]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
cuse: implement memory mapping
authorTejun Heo <htejun@gmail.com>
Tue, 13 Dec 2011 10:58:50 +0000 (11:58 +0100)
committerMiklos Szeredi <mszeredi@suse.cz>
Tue, 13 Dec 2011 10:58:50 +0000 (11:58 +0100)
This implements memory mapping of char devices.

Unlike memory maps for regular files this needs to allow more than one
mapping to be associated with an open device.

Tha mapping is identified by a 64bit map ID.  This is used in place of
the node ID in the STORE and RETRIEVE notifications.

Original patch by Tejun Heo.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
fs/fuse/cuse.c
fs/fuse/dev.c
fs/fuse/fuse_i.h
fs/fuse/inode.c
include/linux/fuse.h

index 53df9fe088f12731fc8c68b3b4e5c0523db3a79c..fc75f012c7fafbc2e0dc8a770cb0a75808992aa6 100644 (file)
@@ -48,6 +48,8 @@
 #include <linux/spinlock.h>
 #include <linux/stat.h>
 #include <linux/module.h>
+#include <linux/mman.h>
+#include <linux/pagemap.h>
 
 #include "fuse_i.h"
 
@@ -174,6 +176,419 @@ static long cuse_file_compat_ioctl(struct file *file, unsigned int cmd,
        return fuse_do_ioctl(file, cmd, arg, flags);
 }
 
+struct fuse_dmmap_region {
+       u64 mapid;
+       u64 size;
+       pgoff_t nr_pages;
+       struct page **pages;
+       struct list_head list;
+       atomic_t ref;
+};
+
+/*
+ * fuse_dmmap_vm represents the result of a single mmap() call, which
+ * can be shared by multiple client vmas created by forking.
+ */
+struct fuse_dmmap_vm {
+       atomic_t open_count;
+       struct fuse_dmmap_region *region;
+};
+
+static void fuse_dmmap_region_put(struct fuse_conn *fc,
+                                 struct fuse_dmmap_region *fdr)
+{
+       if (atomic_dec_and_lock(&fdr->ref, &fc->lock)) {
+               pgoff_t idx;
+
+               list_del(&fdr->list);
+               spin_unlock(&fc->lock);
+
+               for (idx = 0; idx < fdr->nr_pages; idx++)
+                       if (fdr->pages[idx])
+                               put_page(fdr->pages[idx]);
+
+               kfree(fdr->pages);
+               kfree(fdr);
+       }
+}
+
+static void fuse_dmmap_vm_open(struct vm_area_struct *vma)
+{
+       struct fuse_dmmap_vm *fdvm = vma->vm_private_data;
+
+       /* vma copied */
+       atomic_inc(&fdvm->open_count);
+}
+
+static void fuse_dmmap_vm_close(struct vm_area_struct *vma)
+{
+       struct fuse_dmmap_vm *fdvm = vma->vm_private_data;
+       struct fuse_file *ff = vma->vm_file->private_data;
+       struct fuse_conn *fc = ff->fc;
+       struct fuse_req *req;
+       struct fuse_munmap_in *inarg;
+
+       if (!atomic_dec_and_test(&fdvm->open_count))
+               return;
+       /*
+        * Notify server that the mmap region has been unmapped.
+        * Failing this might lead to resource leak in server, don't
+        * fail.
+        */
+       req = fuse_get_req_nofail(fc, vma->vm_file);
+       inarg = &req->misc.munmap_in;
+
+       inarg->fh = ff->fh;
+       inarg->mapid = fdvm->region->mapid;
+       inarg->size = fdvm->region->size;
+
+       req->in.h.opcode = FUSE_MUNMAP;
+       req->in.h.nodeid = ff->nodeid;
+       req->in.numargs = 1;
+       req->in.args[0].size = sizeof(*inarg);
+       req->in.args[0].value = inarg;
+
+       fuse_request_send(fc, req);
+       fuse_dmmap_region_put(fc, fdvm->region);
+       kfree(fdvm);
+}
+
+static struct page *fuse_dmmap_find_or_create_page(struct fuse_conn *fc,
+                                          struct fuse_dmmap_region *fdr,
+                                          pgoff_t index)
+{
+       struct page *new_page = NULL;
+       struct page *page;
+
+       BUG_ON(index >= fdr->nr_pages);
+
+       spin_lock(&fc->lock);
+       page = fdr->pages[index];
+       if (!page) {
+               spin_unlock(&fc->lock);
+               /* need to allocate and install a new page */
+               new_page = alloc_page(GFP_HIGHUSER | __GFP_ZERO);
+               if (!new_page)
+                       return NULL;
+
+               /* try to install, check whether someone else already did it */
+               spin_lock(&fc->lock);
+               page = fdr->pages[index];
+               if (!page) {
+                       page = fdr->pages[index] = new_page;
+                       new_page = NULL;
+               }
+       }
+       get_page(page);
+       spin_unlock(&fc->lock);
+
+       if (new_page)
+               put_page(new_page);
+
+       return page;
+}
+
+static int fuse_dmmap_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+       struct fuse_dmmap_vm *fdvm = vma->vm_private_data;
+       struct fuse_dmmap_region *fdr = fdvm->region;
+       struct fuse_file *ff = vma->vm_file->private_data;
+       struct fuse_conn *fc = ff->fc;
+
+       if (vmf->pgoff >= fdr->nr_pages)
+               return VM_FAULT_SIGBUS;
+
+       vmf->page = fuse_dmmap_find_or_create_page(fc, fdr, vmf->pgoff);
+       if (!vmf->page)
+               return VM_FAULT_OOM;
+
+       return 0;
+}
+
+static const struct vm_operations_struct fuse_dmmap_vm_ops = {
+       .open           = fuse_dmmap_vm_open,
+       .close          = fuse_dmmap_vm_close,
+       .fault          = fuse_dmmap_vm_fault,
+};
+
+static struct fuse_dmmap_region *fuse_dmmap_find_locked(struct fuse_conn *fc,
+                                                       u64 mapid)
+{
+       struct fuse_dmmap_region *curr;
+       struct fuse_dmmap_region *fdr = NULL;
+
+       list_for_each_entry(curr, &fc->dmmap_list, list) {
+               if (curr->mapid == mapid) {
+                       fdr = curr;
+                       atomic_inc(&fdr->ref);
+                       break;
+               }
+       }
+
+       return fdr;
+}
+
+static struct fuse_dmmap_region *fuse_dmmap_find(struct fuse_conn *fc,
+                                                u64 mapid)
+{
+       struct fuse_dmmap_region *fdr;
+
+       spin_lock(&fc->lock);
+       fdr = fuse_dmmap_find_locked(fc, mapid);
+       spin_unlock(&fc->lock);
+
+       return fdr;
+}
+
+static struct fuse_dmmap_region *fuse_dmmap_get(struct fuse_conn *fc,
+                                               u64 mapid, u64 size)
+{
+       struct fuse_dmmap_region *fdr;
+       pgoff_t nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+       if ((loff_t) (nr_pages << PAGE_SHIFT) < size)
+               return ERR_PTR(-EIO);
+
+       fdr = fuse_dmmap_find(fc, mapid);
+       if (fdr) {
+               if (fdr->size != size) {
+                       fuse_dmmap_region_put(fc, fdr);
+                       return ERR_PTR(-EIO);
+               }
+       } else {
+               struct fuse_dmmap_region *tmp;
+
+               fdr = kzalloc(sizeof(struct fuse_dmmap_region), GFP_KERNEL);
+               if (!fdr)
+                       return ERR_PTR(-ENOMEM);
+
+               atomic_set(&fdr->ref, 1);
+               fdr->mapid = mapid;
+               fdr->size = size;
+               fdr->nr_pages = nr_pages;
+
+               fdr->pages = kzalloc(sizeof(struct page *) * nr_pages,
+                                    GFP_KERNEL);
+               if (!fdr->pages) {
+                       kfree(fdr);
+                       return ERR_PTR(-ENOMEM);
+               }
+
+               spin_lock(&fc->lock);
+               tmp = fuse_dmmap_find_locked(fc, mapid);
+               if (tmp) {
+                       kfree(fdr->pages);
+                       kfree(fdr);
+                       fdr = tmp;
+               } else {
+                       list_add(&fdr->list, &fc->dmmap_list);
+               }
+               spin_unlock(&fc->lock);
+       }
+
+       return fdr;
+}
+
+static int cuse_mmap(struct file *file, struct vm_area_struct *vma)
+{
+       struct fuse_file *ff = file->private_data;
+       struct fuse_conn *fc = ff->fc;
+       struct fuse_dmmap_vm *fdvm;
+       struct fuse_dmmap_region *fdr;
+       struct fuse_req *req = NULL;
+       struct fuse_mmap_in inarg;
+       struct fuse_mmap_out outarg;
+       int err;
+
+       if (fc->no_dmmap)
+               return -ENOSYS;
+
+       req = fuse_get_req(fc);
+       if (IS_ERR(req))
+               return PTR_ERR(req);
+
+       /* ask server whether this mmap is okay and what the offset should be */
+       memset(&inarg, 0, sizeof(inarg));
+       inarg.fh = ff->fh;
+       inarg.addr = vma->vm_start;
+       inarg.len = vma->vm_end - vma->vm_start;
+       inarg.prot = ((vma->vm_flags & VM_READ) ? PROT_READ : 0) |
+                    ((vma->vm_flags & VM_WRITE) ? PROT_WRITE : 0) |
+                    ((vma->vm_flags & VM_EXEC) ? PROT_EXEC : 0);
+       inarg.flags = ((vma->vm_flags & VM_GROWSDOWN) ? MAP_GROWSDOWN : 0) |
+                     ((vma->vm_flags & VM_DENYWRITE) ? MAP_DENYWRITE : 0) |
+                     ((vma->vm_flags & VM_EXECUTABLE) ? MAP_EXECUTABLE : 0) |
+                     ((vma->vm_flags & VM_LOCKED) ? MAP_LOCKED : 0);
+       inarg.offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
+
+       req->in.h.opcode = FUSE_MMAP;
+       req->in.h.nodeid = ff->nodeid;
+       req->in.numargs = 1;
+       req->in.args[0].size = sizeof(inarg);
+       req->in.args[0].value = &inarg;
+       req->out.numargs = 1;
+       req->out.args[0].size = sizeof(outarg);
+       req->out.args[0].value = &outarg;
+
+       fuse_request_send(fc, req);
+       err = req->out.h.error;
+       if (err) {
+               if (err == -ENOSYS)
+                       fc->no_dmmap = 1;
+               goto free_req;
+       }
+
+       fdr = fuse_dmmap_get(fc, outarg.mapid, outarg.size);
+       err = PTR_ERR(fdr);
+       if (IS_ERR(fdr))
+               goto free_req;
+
+       err = -ENOMEM;
+       fdvm = kzalloc(sizeof(*fdvm), GFP_KERNEL);
+       if (!fdvm) {
+               fuse_dmmap_region_put(fc, fdr);
+               goto free_req;
+       }
+       atomic_set(&fdvm->open_count, 1);
+       fdvm->region = fdr;
+
+       vma->vm_ops = &fuse_dmmap_vm_ops;
+       vma->vm_private_data = fdvm;
+       vma->vm_flags |= VM_DONTEXPAND;         /* disallow expansion for now */
+       err = 0;
+
+free_req:
+       fuse_put_request(fc, req);
+       return err;
+}
+
+static int fuse_notify_store_to_dmmap(struct fuse_conn *fc,
+                                     struct fuse_copy_state *cs,
+                                     u64 nodeid, u32 size, u64 pos)
+{
+       struct fuse_dmmap_region *fdr;
+       pgoff_t index;
+       unsigned int off;
+       int err;
+
+       fdr = fuse_dmmap_find(fc, nodeid);
+       if (!fdr)
+               return -ENOENT;
+
+       index = pos >> PAGE_SHIFT;
+       off = pos & ~PAGE_MASK;
+       if (pos > fdr->size)
+               size = 0;
+       else if (size > fdr->size - pos)
+               size = fdr->size - pos;
+
+       while (size) {
+               struct page *page;
+               unsigned int this_num;
+
+               err = -ENOMEM;
+               page = fuse_dmmap_find_or_create_page(fc, fdr, index);
+               if (!page)
+                       goto out_iput;
+
+               this_num = min_t(unsigned, size, PAGE_SIZE - off);
+               err = fuse_copy_page(cs, &page, off, this_num, 0);
+               put_page(page);
+
+               if (err)
+                       goto out_iput;
+
+               size -= this_num;
+               off = 0;
+               index++;
+       }
+
+       err = 0;
+
+out_iput:
+       fuse_dmmap_region_put(fc, fdr);
+
+       return err;
+}
+
+static void fuse_retrieve_dmmap_end(struct fuse_conn *fc, struct fuse_req *req)
+{
+       release_pages(req->pages, req->num_pages, 0);
+}
+
+static int fuse_notify_retrieve_from_dmmap(struct fuse_conn *fc,
+                               struct fuse_notify_retrieve_out *outarg)
+{
+       struct fuse_dmmap_region *fdr;
+       struct fuse_req *req;
+       pgoff_t index;
+       unsigned int num;
+       unsigned int offset;
+       size_t total_len = 0;
+       int err;
+
+       fdr = fuse_dmmap_find(fc, outarg->nodeid);
+       if (!fdr)
+               return -ENOENT;
+
+       req = fuse_get_req(fc);
+       err = PTR_ERR(req);
+       if (IS_ERR(req))
+               goto out_put_region;
+
+       offset = outarg->offset & ~PAGE_MASK;
+
+       req->in.h.opcode = FUSE_NOTIFY_REPLY;
+       req->in.h.nodeid = outarg->nodeid;
+       req->in.numargs = 2;
+       req->in.argpages = 1;
+       req->page_offset = offset;
+       req->end = fuse_retrieve_dmmap_end;
+
+       index = outarg->offset >> PAGE_SHIFT;
+       num = outarg->size;
+       if (outarg->offset > fdr->size)
+               num = 0;
+       else if (outarg->offset + num > fdr->size)
+               num = fdr->size - outarg->offset;
+
+       while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) {
+               struct page *page;
+               unsigned int this_num;
+
+               BUG_ON(index >= fdr->nr_pages);
+               spin_lock(&fc->lock);
+               page = fdr->pages[index];
+               if (!page)
+                       page = ZERO_PAGE(0);
+               get_page(page);
+               spin_unlock(&fc->lock);
+
+               this_num = min_t(unsigned, num, PAGE_SIZE - offset);
+               req->pages[req->num_pages] = page;
+               req->num_pages++;
+
+               num -= this_num;
+               total_len += this_num;
+               index++;
+       }
+       req->misc.retrieve_in.offset = outarg->offset;
+       req->misc.retrieve_in.size = total_len;
+       req->in.args[0].size = sizeof(req->misc.retrieve_in);
+       req->in.args[0].value = &req->misc.retrieve_in;
+       req->in.args[1].size = total_len;
+
+       err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
+       if (err)
+               fuse_retrieve_dmmap_end(fc, req);
+
+out_put_region:
+       fuse_dmmap_region_put(fc, fdr);
+
+       return err;
+}
+
+
 static const struct file_operations cuse_frontend_fops = {
        .owner                  = THIS_MODULE,
        .read                   = cuse_read,
@@ -183,7 +598,8 @@ static const struct file_operations cuse_frontend_fops = {
        .unlocked_ioctl         = cuse_file_ioctl,
        .compat_ioctl           = cuse_file_compat_ioctl,
        .poll                   = fuse_file_poll,
-       .llseek         = noop_llseek,
+       .llseek                 = noop_llseek,
+       .mmap                   = cuse_mmap,
 };
 
 
@@ -463,6 +879,8 @@ static void cuse_fc_release(struct fuse_conn *fc)
 
 static const struct fuse_conn_operations cuse_ops = {
        .release = cuse_fc_release,
+       .notify_store = fuse_notify_store_to_dmmap,
+       .notify_retrieve = fuse_notify_retrieve_from_dmmap,
 };
 
 /**
index f1f59948c3b49debe70351652e7053e95e421fc3..e1b7a06ba312f96a1b2e80fa3d17fa04125f7501 100644 (file)
@@ -480,6 +480,7 @@ int fuse_request_send_notify_reply(struct fuse_conn *fc,
 
        return err;
 }
+EXPORT_SYMBOL_GPL(fuse_request_send_notify_reply);
 
 /*
  * Called under fc->lock
@@ -850,6 +851,7 @@ int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
                flush_dcache_page(page);
        return 0;
 }
+EXPORT_SYMBOL_GPL(fuse_copy_page);
 
 /* Copy pages in the request to/from userspace buffer */
 static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
index 9542f5bd725809ce18f6ee76684a1d8e079c3c24..c878fa9451cb4e33c8320e513a18a8afbf975939 100644 (file)
@@ -285,6 +285,7 @@ struct fuse_req {
                } write;
                struct fuse_notify_retrieve_in retrieve_in;
                struct fuse_lk_in lk_in;
+               struct fuse_munmap_in munmap_in;
        } misc;
 
        /** page vector */
@@ -484,6 +485,9 @@ struct fuse_conn {
        /** Is poll not implemented by fs? */
        unsigned no_poll:1;
 
+       /** Is direct mmap not implemente by fs? */
+       unsigned no_dmmap:1;
+
        /** Do multi-page cached writes */
        unsigned big_writes:1;
 
@@ -532,6 +536,9 @@ struct fuse_conn {
        /** Read/write semaphore to hold when accessing sb. */
        struct rw_semaphore killsb;
 
+       /** List of direct mmaps (currently CUSE only) */
+       struct list_head dmmap_list;
+
        /** Operations that fuse and cuse can implement differently */
        const struct fuse_conn_operations *ops;
 };
index 4bf887f33663c8f72bed1fb54d81e0cffabe8d96..7ffb64a7fa962e6552bc631ad525de895d765b70 100644 (file)
@@ -542,6 +542,7 @@ void fuse_conn_init(struct fuse_conn *fc)
        fc->blocked = 1;
        fc->attr_version = 1;
        get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
+       INIT_LIST_HEAD(&fc->dmmap_list);
 }
 EXPORT_SYMBOL_GPL(fuse_conn_init);
 
index 8ba2c9460b28fb90bfee024aa8f73601f6c4cf68..bc18853d8590ea9c24bb45b54961c9e30c9192f9 100644 (file)
@@ -54,6 +54,7 @@
  * 7.18
  *  - add FUSE_IOCTL_DIR flag
  *  - add FUSE_NOTIFY_DELETE
+ *  - add FUSE_MMAP and FUSE_MUNMAP
  */
 
 #ifndef _LINUX_FUSE_H
@@ -278,6 +279,8 @@ enum fuse_opcode {
        FUSE_POLL          = 40,
        FUSE_NOTIFY_REPLY  = 41,
        FUSE_BATCH_FORGET  = 42,
+       FUSE_MMAP          = 43,
+       FUSE_MUNMAP        = 44,
 
        /* CUSE specific operations */
        CUSE_INIT          = 4096,
@@ -571,6 +574,28 @@ struct fuse_notify_poll_wakeup_out {
        __u64   kh;
 };
 
+struct fuse_mmap_in {
+       __u64   fh;
+       __u64   addr;
+       __u64   len;
+       __u32   prot;
+       __u32   flags;
+       __u64   offset;
+};
+
+struct fuse_mmap_out {
+       __u64   mapid;          /* Mmap ID, same namespace as Inode ID */
+       __u64   size;           /* Size of memory region */
+       __u64   reserved;
+};
+
+struct fuse_munmap_in {
+       __u64   fh;
+       __u64   mapid;
+       __u64   size;           /* Size of memory region */
+       __u64   reserved;
+};
+
 struct fuse_in_header {
        __u32   len;
        __u32   opcode;