2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu>
5 This program can be distributed under the terms of the GNU GPL.
11 #include <linux/init.h>
12 #include <linux/module.h>
13 #include <linux/poll.h>
14 #include <linux/uio.h>
15 #include <linux/miscdevice.h>
16 #include <linux/pagemap.h>
17 #include <linux/file.h>
18 #include <linux/slab.h>
20 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
22 static kmem_cache_t *fuse_req_cachep;
24 static struct fuse_conn *fuse_get_conn(struct file *file)
27 * Lockless access is OK, because file->private data is set
28 * once during mount and is valid until the file is released.
30 return file->private_data;
33 static void fuse_request_init(struct fuse_req *req)
35 memset(req, 0, sizeof(*req));
36 INIT_LIST_HEAD(&req->list);
37 init_waitqueue_head(&req->waitq);
38 atomic_set(&req->count, 1);
41 struct fuse_req *fuse_request_alloc(void)
43 struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, SLAB_KERNEL);
45 fuse_request_init(req);
49 void fuse_request_free(struct fuse_req *req)
51 kmem_cache_free(fuse_req_cachep, req);
54 static void block_sigs(sigset_t *oldset)
58 siginitsetinv(&mask, sigmask(SIGKILL));
59 sigprocmask(SIG_BLOCK, &mask, oldset);
62 static void restore_sigs(sigset_t *oldset)
64 sigprocmask(SIG_SETMASK, oldset, NULL);
68 * Reset request, so that it can be reused
70 * The caller must be _very_ careful to make sure, that it is holding
71 * the only reference to req
73 void fuse_reset_request(struct fuse_req *req)
75 BUG_ON(atomic_read(&req->count) != 1);
76 fuse_request_init(req);
79 static void __fuse_get_request(struct fuse_req *req)
81 atomic_inc(&req->count);
84 /* Must be called with > 1 refcount */
85 static void __fuse_put_request(struct fuse_req *req)
87 BUG_ON(atomic_read(&req->count) < 2);
88 atomic_dec(&req->count);
91 struct fuse_req *fuse_get_req(struct fuse_conn *fc)
93 struct fuse_req *req = fuse_request_alloc();
95 return ERR_PTR(-ENOMEM);
97 atomic_inc(&fc->num_waiting);
98 fuse_request_init(req);
99 req->in.h.uid = current->fsuid;
100 req->in.h.gid = current->fsgid;
101 req->in.h.pid = current->pid;
105 void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
107 if (atomic_dec_and_test(&req->count)) {
108 atomic_dec(&fc->num_waiting);
109 fuse_request_free(req);
113 void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req)
119 spin_lock(&fc->lock);
120 list_del(&req->bg_entry);
121 spin_unlock(&fc->lock);
125 * This function is called when a request is finished. Either a reply
126 * has arrived or it was interrupted (and not yet sent) or some error
127 * occurred during communication with userspace, or the device file
128 * was closed. In case of a background request the reference to the
129 * stored objects are released. The requester thread is woken up (if
130 * still waiting), the 'end' callback is called if given, else the
131 * reference to the request is released
133 * Releasing extra reference for foreground requests must be done
134 * within the same locked region as setting state to finished. This
135 * is because fuse_reset_request() may be called after request is
136 * finished and it must be the sole possessor. If request is
137 * interrupted and put in the background, it will return with an error
138 * and hence never be reset and reused.
140 * Called with fc->lock, unlocks it
142 static void request_end(struct fuse_conn *fc, struct fuse_req *req)
144 list_del(&req->list);
145 req->state = FUSE_REQ_FINISHED;
146 if (!req->background) {
147 spin_unlock(&fc->lock);
148 wake_up(&req->waitq);
149 fuse_put_request(fc, req);
151 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
153 spin_unlock(&fc->lock);
154 down_read(&fc->sbput_sem);
156 fuse_release_background(fc, req);
157 up_read(&fc->sbput_sem);
161 fuse_put_request(fc, req);
166 * Unfortunately request interruption not just solves the deadlock
167 * problem, it causes problems too. These stem from the fact, that an
168 * interrupted request is continued to be processed in userspace,
169 * while all the locks and object references (inode and file) held
170 * during the operation are released.
172 * To release the locks is exactly why there's a need to interrupt the
173 * request, so there's not a lot that can be done about this, except
174 * introduce additional locking in userspace.
176 * More important is to keep inode and file references until userspace
177 * has replied, otherwise FORGET and RELEASE could be sent while the
178 * inode/file is still used by the filesystem.
180 * For this reason the concept of "background" request is introduced.
181 * An interrupted request is backgrounded if it has been already sent
182 * to userspace. Backgrounding involves getting an extra reference to
183 * inode(s) or file used in the request, and adding the request to
184 * fc->background list. When a reply is received for a background
185 * request, the object references are released, and the request is
186 * removed from the list. If the filesystem is unmounted while there
187 * are still background requests, the list is walked and references
188 * are released as if a reply was received.
190 * There's one more use for a background request. The RELEASE message is
191 * always sent as background, since it doesn't return an error or
194 static void background_request(struct fuse_conn *fc, struct fuse_req *req)
197 list_add(&req->bg_entry, &fc->background);
199 req->inode = igrab(req->inode);
201 req->inode2 = igrab(req->inode2);
206 /* Called with fc->lock held. Releases, and then reacquires it. */
207 static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
211 spin_unlock(&fc->lock);
213 wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED);
214 restore_sigs(&oldset);
215 spin_lock(&fc->lock);
216 if (req->state == FUSE_REQ_FINISHED && !req->interrupted)
219 if (!req->interrupted) {
220 req->out.h.error = -EINTR;
221 req->interrupted = 1;
224 /* This is uninterruptible sleep, because data is
225 being copied to/from the buffers of req. During
226 locked state, there mustn't be any filesystem
227 operation (e.g. page fault), since that could lead
229 spin_unlock(&fc->lock);
230 wait_event(req->waitq, !req->locked);
231 spin_lock(&fc->lock);
233 if (req->state == FUSE_REQ_PENDING) {
234 list_del(&req->list);
235 __fuse_put_request(req);
236 } else if (req->state == FUSE_REQ_SENT)
237 background_request(fc, req);
240 static unsigned len_args(unsigned numargs, struct fuse_arg *args)
245 for (i = 0; i < numargs; i++)
246 nbytes += args[i].size;
251 static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
254 /* zero is special */
257 req->in.h.unique = fc->reqctr;
258 req->in.h.len = sizeof(struct fuse_in_header) +
259 len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
260 list_add_tail(&req->list, &fc->pending);
261 req->state = FUSE_REQ_PENDING;
263 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
267 * This can only be interrupted by a SIGKILL
269 void request_send(struct fuse_conn *fc, struct fuse_req *req)
272 spin_lock(&fc->lock);
274 req->out.h.error = -ENOTCONN;
275 else if (fc->conn_error)
276 req->out.h.error = -ECONNREFUSED;
278 queue_request(fc, req);
279 /* acquire extra reference, since request is still needed
280 after request_end() */
281 __fuse_get_request(req);
283 request_wait_answer(fc, req);
285 spin_unlock(&fc->lock);
288 static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
290 spin_lock(&fc->lock);
292 queue_request(fc, req);
293 spin_unlock(&fc->lock);
295 req->out.h.error = -ENOTCONN;
296 request_end(fc, req);
300 void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
303 request_send_nowait(fc, req);
306 void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
309 spin_lock(&fc->lock);
310 background_request(fc, req);
311 spin_unlock(&fc->lock);
312 request_send_nowait(fc, req);
316 * Lock the request. Up to the next unlock_request() there mustn't be
317 * anything that could cause a page-fault. If the request was already
318 * interrupted bail out.
320 static int lock_request(struct fuse_conn *fc, struct fuse_req *req)
324 spin_lock(&fc->lock);
325 if (req->interrupted)
329 spin_unlock(&fc->lock);
335 * Unlock request. If it was interrupted during being locked, the
336 * requester thread is currently waiting for it to be unlocked, so
339 static void unlock_request(struct fuse_conn *fc, struct fuse_req *req)
342 spin_lock(&fc->lock);
344 if (req->interrupted)
345 wake_up(&req->waitq);
346 spin_unlock(&fc->lock);
350 struct fuse_copy_state {
351 struct fuse_conn *fc;
353 struct fuse_req *req;
354 const struct iovec *iov;
355 unsigned long nr_segs;
356 unsigned long seglen;
364 static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
365 int write, struct fuse_req *req,
366 const struct iovec *iov, unsigned long nr_segs)
368 memset(cs, 0, sizeof(*cs));
373 cs->nr_segs = nr_segs;
376 /* Unmap and put previous page of userspace buffer */
377 static void fuse_copy_finish(struct fuse_copy_state *cs)
380 kunmap_atomic(cs->mapaddr, KM_USER0);
382 flush_dcache_page(cs->pg);
383 set_page_dirty_lock(cs->pg);
391 * Get another pagefull of userspace buffer, and map it to kernel
392 * address space, and lock request
394 static int fuse_copy_fill(struct fuse_copy_state *cs)
396 unsigned long offset;
399 unlock_request(cs->fc, cs->req);
400 fuse_copy_finish(cs);
402 BUG_ON(!cs->nr_segs);
403 cs->seglen = cs->iov[0].iov_len;
404 cs->addr = (unsigned long) cs->iov[0].iov_base;
408 down_read(¤t->mm->mmap_sem);
409 err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0,
411 up_read(¤t->mm->mmap_sem);
415 offset = cs->addr % PAGE_SIZE;
416 cs->mapaddr = kmap_atomic(cs->pg, KM_USER0);
417 cs->buf = cs->mapaddr + offset;
418 cs->len = min(PAGE_SIZE - offset, cs->seglen);
419 cs->seglen -= cs->len;
422 return lock_request(cs->fc, cs->req);
425 /* Do as much copy to/from userspace buffer as we can */
426 static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
428 unsigned ncpy = min(*size, cs->len);
431 memcpy(cs->buf, *val, ncpy);
433 memcpy(*val, cs->buf, ncpy);
443 * Copy a page in the request to/from the userspace buffer. Must be
446 static int fuse_copy_page(struct fuse_copy_state *cs, struct page *page,
447 unsigned offset, unsigned count, int zeroing)
449 if (page && zeroing && count < PAGE_SIZE) {
450 void *mapaddr = kmap_atomic(page, KM_USER1);
451 memset(mapaddr, 0, PAGE_SIZE);
452 kunmap_atomic(mapaddr, KM_USER1);
456 if (!cs->len && (err = fuse_copy_fill(cs)))
459 void *mapaddr = kmap_atomic(page, KM_USER1);
460 void *buf = mapaddr + offset;
461 offset += fuse_copy_do(cs, &buf, &count);
462 kunmap_atomic(mapaddr, KM_USER1);
464 offset += fuse_copy_do(cs, NULL, &count);
466 if (page && !cs->write)
467 flush_dcache_page(page);
471 /* Copy pages in the request to/from userspace buffer */
472 static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
476 struct fuse_req *req = cs->req;
477 unsigned offset = req->page_offset;
478 unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
480 for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
481 struct page *page = req->pages[i];
482 int err = fuse_copy_page(cs, page, offset, count, zeroing);
487 count = min(nbytes, (unsigned) PAGE_SIZE);
493 /* Copy a single argument in the request to/from userspace buffer */
494 static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
498 if (!cs->len && (err = fuse_copy_fill(cs)))
500 fuse_copy_do(cs, &val, &size);
505 /* Copy request arguments to/from userspace buffer */
506 static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
507 unsigned argpages, struct fuse_arg *args,
513 for (i = 0; !err && i < numargs; i++) {
514 struct fuse_arg *arg = &args[i];
515 if (i == numargs - 1 && argpages)
516 err = fuse_copy_pages(cs, arg->size, zeroing);
518 err = fuse_copy_one(cs, arg->value, arg->size);
523 /* Wait until a request is available on the pending list */
524 static void request_wait(struct fuse_conn *fc)
526 DECLARE_WAITQUEUE(wait, current);
528 add_wait_queue_exclusive(&fc->waitq, &wait);
529 while (fc->connected && list_empty(&fc->pending)) {
530 set_current_state(TASK_INTERRUPTIBLE);
531 if (signal_pending(current))
534 spin_unlock(&fc->lock);
536 spin_lock(&fc->lock);
538 set_current_state(TASK_RUNNING);
539 remove_wait_queue(&fc->waitq, &wait);
543 * Read a single request into the userspace filesystem's buffer. This
544 * function waits until a request is available, then removes it from
545 * the pending list and copies request data to userspace buffer. If
546 * no reply is needed (FORGET) or request has been interrupted or
547 * there was an error during the copying then it's finished by calling
548 * request_end(). Otherwise add it to the processing list, and set
551 static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
552 unsigned long nr_segs, loff_t *off)
555 struct fuse_req *req;
557 struct fuse_copy_state cs;
559 struct fuse_conn *fc = fuse_get_conn(file);
564 spin_lock(&fc->lock);
566 if ((file->f_flags & O_NONBLOCK) && fc->connected &&
567 list_empty(&fc->pending))
575 if (list_empty(&fc->pending))
578 req = list_entry(fc->pending.next, struct fuse_req, list);
579 req->state = FUSE_REQ_READING;
580 list_move(&req->list, &fc->io);
584 /* If request is too large, reply with an error and restart the read */
585 if (iov_length(iov, nr_segs) < reqsize) {
586 req->out.h.error = -EIO;
587 /* SETXATTR is special, since it may contain too large data */
588 if (in->h.opcode == FUSE_SETXATTR)
589 req->out.h.error = -E2BIG;
590 request_end(fc, req);
593 spin_unlock(&fc->lock);
594 fuse_copy_init(&cs, fc, 1, req, iov, nr_segs);
595 err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
597 err = fuse_copy_args(&cs, in->numargs, in->argpages,
598 (struct fuse_arg *) in->args, 0);
599 fuse_copy_finish(&cs);
600 spin_lock(&fc->lock);
602 if (!err && req->interrupted)
605 if (!req->interrupted)
606 req->out.h.error = -EIO;
607 request_end(fc, req);
611 request_end(fc, req);
613 req->state = FUSE_REQ_SENT;
614 list_move_tail(&req->list, &fc->processing);
615 spin_unlock(&fc->lock);
620 spin_unlock(&fc->lock);
624 static ssize_t fuse_dev_read(struct file *file, char __user *buf,
625 size_t nbytes, loff_t *off)
628 iov.iov_len = nbytes;
630 return fuse_dev_readv(file, &iov, 1, off);
633 /* Look up request on processing list by unique ID */
634 static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
636 struct list_head *entry;
638 list_for_each(entry, &fc->processing) {
639 struct fuse_req *req;
640 req = list_entry(entry, struct fuse_req, list);
641 if (req->in.h.unique == unique)
647 static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
650 unsigned reqsize = sizeof(struct fuse_out_header);
653 return nbytes != reqsize ? -EINVAL : 0;
655 reqsize += len_args(out->numargs, out->args);
657 if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
659 else if (reqsize > nbytes) {
660 struct fuse_arg *lastarg = &out->args[out->numargs-1];
661 unsigned diffsize = reqsize - nbytes;
662 if (diffsize > lastarg->size)
664 lastarg->size -= diffsize;
666 return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
671 * Write a single reply to a request. First the header is copied from
672 * the write buffer. The request is then searched on the processing
673 * list by the unique ID found in the header. If found, then remove
674 * it from the list and copy the rest of the buffer to the request.
675 * The request is finished by calling request_end()
677 static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
678 unsigned long nr_segs, loff_t *off)
681 unsigned nbytes = iov_length(iov, nr_segs);
682 struct fuse_req *req;
683 struct fuse_out_header oh;
684 struct fuse_copy_state cs;
685 struct fuse_conn *fc = fuse_get_conn(file);
689 fuse_copy_init(&cs, fc, 0, NULL, iov, nr_segs);
690 if (nbytes < sizeof(struct fuse_out_header))
693 err = fuse_copy_one(&cs, &oh, sizeof(oh));
697 if (!oh.unique || oh.error <= -1000 || oh.error > 0 ||
701 spin_lock(&fc->lock);
706 req = request_find(fc, oh.unique);
711 if (req->interrupted) {
712 spin_unlock(&fc->lock);
713 fuse_copy_finish(&cs);
714 spin_lock(&fc->lock);
715 request_end(fc, req);
718 list_move(&req->list, &fc->io);
722 spin_unlock(&fc->lock);
724 err = copy_out_args(&cs, &req->out, nbytes);
725 fuse_copy_finish(&cs);
727 spin_lock(&fc->lock);
730 if (req->interrupted)
732 } else if (!req->interrupted)
733 req->out.h.error = -EIO;
734 request_end(fc, req);
736 return err ? err : nbytes;
739 spin_unlock(&fc->lock);
741 fuse_copy_finish(&cs);
745 static ssize_t fuse_dev_write(struct file *file, const char __user *buf,
746 size_t nbytes, loff_t *off)
749 iov.iov_len = nbytes;
750 iov.iov_base = (char __user *) buf;
751 return fuse_dev_writev(file, &iov, 1, off);
754 static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
756 unsigned mask = POLLOUT | POLLWRNORM;
757 struct fuse_conn *fc = fuse_get_conn(file);
761 poll_wait(file, &fc->waitq, wait);
763 spin_lock(&fc->lock);
766 else if (!list_empty(&fc->pending))
767 mask |= POLLIN | POLLRDNORM;
768 spin_unlock(&fc->lock);
774 * Abort all requests on the given list (pending or processing)
776 * This function releases and reacquires fc->lock
778 static void end_requests(struct fuse_conn *fc, struct list_head *head)
780 while (!list_empty(head)) {
781 struct fuse_req *req;
782 req = list_entry(head->next, struct fuse_req, list);
783 req->out.h.error = -ECONNABORTED;
784 request_end(fc, req);
785 spin_lock(&fc->lock);
790 * Abort requests under I/O
792 * The requests are set to interrupted and finished, and the request
793 * waiter is woken up. This will make request_wait_answer() wait
794 * until the request is unlocked and then return.
796 * If the request is asynchronous, then the end function needs to be
797 * called after waiting for the request to be unlocked (if it was
800 static void end_io_requests(struct fuse_conn *fc)
802 while (!list_empty(&fc->io)) {
803 struct fuse_req *req =
804 list_entry(fc->io.next, struct fuse_req, list);
805 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
807 req->interrupted = 1;
808 req->out.h.error = -ECONNABORTED;
809 req->state = FUSE_REQ_FINISHED;
810 list_del_init(&req->list);
811 wake_up(&req->waitq);
814 /* The end function will consume this reference */
815 __fuse_get_request(req);
816 spin_unlock(&fc->lock);
817 wait_event(req->waitq, !req->locked);
819 spin_lock(&fc->lock);
825 * Abort all requests.
827 * Emergency exit in case of a malicious or accidental deadlock, or
828 * just a hung filesystem.
830 * The same effect is usually achievable through killing the
831 * filesystem daemon and all users of the filesystem. The exception
832 * is the combination of an asynchronous request and the tricky
833 * deadlock (see Documentation/filesystems/fuse.txt).
835 * During the aborting, progression of requests from the pending and
836 * processing lists onto the io list, and progression of new requests
837 * onto the pending list is prevented by req->connected being false.
839 * Progression of requests under I/O to the processing list is
840 * prevented by the req->interrupted flag being true for these
841 * requests. For this reason requests on the io list must be aborted
844 void fuse_abort_conn(struct fuse_conn *fc)
846 spin_lock(&fc->lock);
850 end_requests(fc, &fc->pending);
851 end_requests(fc, &fc->processing);
852 wake_up_all(&fc->waitq);
853 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
855 spin_unlock(&fc->lock);
858 static int fuse_dev_release(struct inode *inode, struct file *file)
860 struct fuse_conn *fc = fuse_get_conn(file);
862 spin_lock(&fc->lock);
864 end_requests(fc, &fc->pending);
865 end_requests(fc, &fc->processing);
866 spin_unlock(&fc->lock);
867 fasync_helper(-1, file, 0, &fc->fasync);
868 kobject_put(&fc->kobj);
874 static int fuse_dev_fasync(int fd, struct file *file, int on)
876 struct fuse_conn *fc = fuse_get_conn(file);
880 /* No locking - fasync_helper does its own locking */
881 return fasync_helper(fd, file, on, &fc->fasync);
884 const struct file_operations fuse_dev_operations = {
885 .owner = THIS_MODULE,
887 .read = fuse_dev_read,
888 .readv = fuse_dev_readv,
889 .write = fuse_dev_write,
890 .writev = fuse_dev_writev,
891 .poll = fuse_dev_poll,
892 .release = fuse_dev_release,
893 .fasync = fuse_dev_fasync,
896 static struct miscdevice fuse_miscdevice = {
899 .fops = &fuse_dev_operations,
902 int __init fuse_dev_init(void)
905 fuse_req_cachep = kmem_cache_create("fuse_request",
906 sizeof(struct fuse_req),
908 if (!fuse_req_cachep)
911 err = misc_register(&fuse_miscdevice);
913 goto out_cache_clean;
918 kmem_cache_destroy(fuse_req_cachep);
923 void fuse_dev_cleanup(void)
925 misc_deregister(&fuse_miscdevice);
926 kmem_cache_destroy(fuse_req_cachep);