]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - fs/fuse/file.c
Merge remote-tracking branch 'hid/for-next'
[karo-tx-linux.git] / fs / fuse / file.c
index cc3a6c4437e454ac9b35aa46ad13a12ef97a97c1..26c33f36179af2061fd57641da00fdf0f60a0a92 100644 (file)
@@ -1179,9 +1179,10 @@ static inline void fuse_page_descs_length_init(struct fuse_req *req,
                        req->page_descs[i].offset;
 }
 
-static inline unsigned long fuse_get_user_addr(const struct iov_iter *ii)
+static inline unsigned long fuse_get_user_addr(struct iov_iter *ii)
 {
-       return (unsigned long)ii->iov->iov_base + ii->iov_offset;
+       struct iovec *iov = iov_iter_iovec(ii);
+       return (unsigned long)iov->iov_base + ii->iov_offset;
 }
 
 static inline size_t fuse_get_frag_size(const struct iov_iter *ii,
@@ -1270,9 +1271,8 @@ static inline int fuse_iter_npages(const struct iov_iter *ii_p)
        return min(npages, FUSE_MAX_PAGES_PER_REQ);
 }
 
-ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
-                      unsigned long nr_segs, size_t count, loff_t *ppos,
-                      int write)
+ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *ii,
+                      size_t count, loff_t *ppos, int write)
 {
        struct file *file = io->file;
        struct fuse_file *ff = file->private_data;
@@ -1281,14 +1281,11 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
        loff_t pos = *ppos;
        ssize_t res = 0;
        struct fuse_req *req;
-       struct iov_iter ii;
-
-       iov_iter_init(&ii, iov, nr_segs, count, 0);
 
        if (io->async)
-               req = fuse_get_req_for_background(fc, fuse_iter_npages(&ii));
+               req = fuse_get_req_for_background(fc, fuse_iter_npages(ii));
        else
-               req = fuse_get_req(fc, fuse_iter_npages(&ii));
+               req = fuse_get_req(fc, fuse_iter_npages(ii));
        if (IS_ERR(req))
                return PTR_ERR(req);
 
@@ -1296,7 +1293,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
                size_t nres;
                fl_owner_t owner = current->files;
                size_t nbytes = min(count, nmax);
-               int err = fuse_get_user_pages(req, &ii, &nbytes, write);
+               int err = fuse_get_user_pages(req, ii, &nbytes, write);
                if (err) {
                        res = err;
                        break;
@@ -1326,9 +1323,9 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
                        fuse_put_request(fc, req);
                        if (io->async)
                                req = fuse_get_req_for_background(fc,
-                                       fuse_iter_npages(&ii));
+                                       fuse_iter_npages(ii));
                        else
-                               req = fuse_get_req(fc, fuse_iter_npages(&ii));
+                               req = fuse_get_req(fc, fuse_iter_npages(ii));
                        if (IS_ERR(req))
                                break;
                }
@@ -1342,10 +1339,8 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
 }
 EXPORT_SYMBOL_GPL(fuse_direct_io);
 
-static ssize_t __fuse_direct_read(struct fuse_io_priv *io,
-                                 const struct iovec *iov,
-                                 unsigned long nr_segs, loff_t *ppos,
-                                 size_t count)
+static ssize_t __fuse_direct_read(struct fuse_io_priv *io, struct iov_iter *ii,
+                                 loff_t *ppos, size_t count)
 {
        ssize_t res;
        struct file *file = io->file;
@@ -1354,7 +1349,7 @@ static ssize_t __fuse_direct_read(struct fuse_io_priv *io,
        if (is_bad_inode(inode))
                return -EIO;
 
-       res = fuse_direct_io(io, iov, nr_segs, count, ppos, 0);
+       res = fuse_direct_io(io, ii, count, ppos, 0);
 
        fuse_invalidate_attr(inode);
 
@@ -1366,21 +1361,24 @@ static ssize_t fuse_direct_read(struct file *file, char __user *buf,
 {
        struct fuse_io_priv io = { .async = 0, .file = file };
        struct iovec iov = { .iov_base = buf, .iov_len = count };
-       return __fuse_direct_read(&io, &iov, 1, ppos, count);
+       struct iov_iter ii;
+
+       iov_iter_init(&ii, &iov, 1, count, 0);
+
+       return __fuse_direct_read(&io, &ii, ppos, count);
 }
 
-static ssize_t __fuse_direct_write(struct fuse_io_priv *io,
-                                  const struct iovec *iov,
-                                  unsigned long nr_segs, loff_t *ppos)
+static ssize_t __fuse_direct_write(struct fuse_io_priv *io, struct iov_iter *ii,
+                                  loff_t *ppos)
 {
        struct file *file = io->file;
        struct inode *inode = file_inode(file);
-       size_t count = iov_length(iov, nr_segs);
+       size_t count = iov_iter_count(ii);
        ssize_t res;
 
        res = generic_write_checks(file, ppos, &count, 0);
        if (!res)
-               res = fuse_direct_io(io, iov, nr_segs, count, ppos, 1);
+               res = fuse_direct_io(io, ii, count, ppos, 1);
 
        fuse_invalidate_attr(inode);
 
@@ -1391,6 +1389,7 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
                                 size_t count, loff_t *ppos)
 {
        struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
+       struct iov_iter ii;
        struct inode *inode = file_inode(file);
        ssize_t res;
        struct fuse_io_priv io = { .async = 0, .file = file };
@@ -1398,9 +1397,11 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
        if (is_bad_inode(inode))
                return -EIO;
 
+       iov_iter_init(&ii, &iov, 1, count, 0);
+
        /* Don't allow parallel writes to the same file */
        mutex_lock(&inode->i_mutex);
-       res = __fuse_direct_write(&io, &iov, 1, ppos);
+       res = __fuse_direct_write(&io, &ii, ppos);
        if (res > 0)
                fuse_write_update_size(inode, *ppos);
        mutex_unlock(&inode->i_mutex);
@@ -1414,7 +1415,9 @@ static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
 
        for (i = 0; i < req->num_pages; i++)
                __free_page(req->pages[i]);
-       fuse_file_put(req->ff, false);
+
+       if (req->ff)
+               fuse_file_put(req->ff, false);
 }
 
 static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
@@ -1434,12 +1437,12 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
 }
 
 /* Called under fc->lock, may release and reacquire it */
-static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req)
+static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req,
+                               loff_t size)
 __releases(fc->lock)
 __acquires(fc->lock)
 {
        struct fuse_inode *fi = get_fuse_inode(req->inode);
-       loff_t size = i_size_read(req->inode);
        struct fuse_write_in *inarg = &req->misc.write.in;
        __u64 data_size = req->num_pages * PAGE_CACHE_SIZE;
 
@@ -1480,12 +1483,13 @@ __acquires(fc->lock)
 {
        struct fuse_conn *fc = get_fuse_conn(inode);
        struct fuse_inode *fi = get_fuse_inode(inode);
+       size_t crop = i_size_read(inode);
        struct fuse_req *req;
 
        while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) {
                req = list_entry(fi->queued_writes.next, struct fuse_req, list);
                list_del_init(&req->list);
-               fuse_send_writepage(fc, req);
+               fuse_send_writepage(fc, req, crop);
        }
 }
 
@@ -1496,6 +1500,40 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
 
        mapping_set_error(inode->i_mapping, req->out.h.error);
        spin_lock(&fc->lock);
+       while (req->misc.write.next) {
+               struct fuse_conn *fc = get_fuse_conn(inode);
+               struct fuse_write_in *inarg = &req->misc.write.in;
+               struct fuse_req *next = req->misc.write.next;
+               req->misc.write.next = next->misc.write.next;
+               next->misc.write.next = NULL;
+               next->ff = fuse_file_get(req->ff);
+               list_add(&next->writepages_entry, &fi->writepages);
+
+               /*
+                * Skip fuse_flush_writepages() to make it easy to crop requests
+                * based on primary request size.
+                *
+                * 1st case (trivial): there are no concurrent activities using
+                * fuse_set/release_nowrite.  Then we're on safe side because
+                * fuse_flush_writepages() would call fuse_send_writepage()
+                * anyway.
+                *
+                * 2nd case: someone called fuse_set_nowrite and it is waiting
+                * now for completion of all in-flight requests.  This happens
+                * rarely and no more than once per page, so this should be
+                * okay.
+                *
+                * 3rd case: someone (e.g. fuse_do_setattr()) is in the middle
+                * of fuse_set_nowrite..fuse_release_nowrite section.  The fact
+                * that fuse_set_nowrite returned implies that all in-flight
+                * requests were completed along with all of their secondary
+                * requests.  Further primary requests are blocked by negative
+                * writectr.  Hence there cannot be any in-flight requests and
+                * no invocations of fuse_writepage_end() while we're in
+                * fuse_set_nowrite..fuse_release_nowrite section.
+                */
+               fuse_send_writepage(fc, next, inarg->offset + inarg->size);
+       }
        fi->writectr--;
        fuse_writepage_finish(fc, req);
        spin_unlock(&fc->lock);
@@ -1548,6 +1586,7 @@ static int fuse_writepage_locked(struct page *page)
 
        copy_highpage(tmp_page, page);
        req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
+       req->misc.write.next = NULL;
        req->in.argpages = 1;
        req->num_pages = 1;
        req->pages[0] = tmp_page;
@@ -1580,6 +1619,18 @@ static int fuse_writepage(struct page *page, struct writeback_control *wbc)
 {
        int err;
 
+       if (fuse_page_is_writeback(page->mapping->host, page->index)) {
+               /*
+                * ->writepages() should be called for sync() and friends.  We
+                * should only get here on direct reclaim and then we are
+                * allowed to skip a page which is already in flight
+                */
+               WARN_ON(wbc->sync_mode == WB_SYNC_ALL);
+
+               redirty_page_for_writepage(wbc, page);
+               return 0;
+       }
+
        err = fuse_writepage_locked(page);
        unlock_page(page);
 
@@ -1612,6 +1663,67 @@ static void fuse_writepages_send(struct fuse_fill_wb_data *data)
                end_page_writeback(data->orig_pages[i]);
 }
 
+static bool fuse_writepage_in_flight(struct fuse_req *new_req,
+                                    struct page *page)
+{
+       struct fuse_conn *fc = get_fuse_conn(new_req->inode);
+       struct fuse_inode *fi = get_fuse_inode(new_req->inode);
+       struct fuse_req *tmp;
+       struct fuse_req *old_req;
+       bool found = false;
+       pgoff_t curr_index;
+
+       BUG_ON(new_req->num_pages != 0);
+
+       spin_lock(&fc->lock);
+       list_del(&new_req->writepages_entry);
+       list_for_each_entry(old_req, &fi->writepages, writepages_entry) {
+               BUG_ON(old_req->inode != new_req->inode);
+               curr_index = old_req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
+               if (curr_index <= page->index &&
+                   page->index < curr_index + old_req->num_pages) {
+                       found = true;
+                       break;
+               }
+       }
+       if (!found) {
+               list_add(&new_req->writepages_entry, &fi->writepages);
+               goto out_unlock;
+       }
+
+       new_req->num_pages = 1;
+       for (tmp = old_req; tmp != NULL; tmp = tmp->misc.write.next) {
+               BUG_ON(tmp->inode != new_req->inode);
+               curr_index = tmp->misc.write.in.offset >> PAGE_CACHE_SHIFT;
+               if (tmp->num_pages == 1 &&
+                   curr_index == page->index) {
+                       old_req = tmp;
+               }
+       }
+
+       if (old_req->num_pages == 1 && (old_req->state == FUSE_REQ_INIT ||
+                                       old_req->state == FUSE_REQ_PENDING)) {
+               struct backing_dev_info *bdi = page->mapping->backing_dev_info;
+
+               copy_highpage(old_req->pages[0], page);
+               spin_unlock(&fc->lock);
+
+               dec_bdi_stat(bdi, BDI_WRITEBACK);
+               dec_zone_page_state(page, NR_WRITEBACK_TEMP);
+               bdi_writeout_inc(bdi);
+               fuse_writepage_free(fc, new_req);
+               fuse_request_free(new_req);
+               goto out;
+       } else {
+               new_req->misc.write.next = old_req->misc.write.next;
+               old_req->misc.write.next = new_req;
+       }
+out_unlock:
+       spin_unlock(&fc->lock);
+out:
+       return found;
+}
+
 static int fuse_writepages_fill(struct page *page,
                struct writeback_control *wbc, void *_data)
 {
@@ -1620,6 +1732,7 @@ static int fuse_writepages_fill(struct page *page,
        struct inode *inode = data->inode;
        struct fuse_conn *fc = get_fuse_conn(inode);
        struct page *tmp_page;
+       bool is_writeback;
        int err;
 
        if (!data->ff) {
@@ -1629,15 +1742,20 @@ static int fuse_writepages_fill(struct page *page,
                        goto out_unlock;
        }
 
-       if (req) {
-               BUG_ON(!req->num_pages);
-               if (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
-                   (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_write ||
-                   req->pages[req->num_pages - 1]->index + 1 != page->index) {
+       /*
+        * Being under writeback is unlikely but possible.  For example direct
+        * read to an mmaped fuse file will set the page dirty twice; once when
+        * the pages are faulted with get_user_pages(), and then after the read
+        * completed.
+        */
+       is_writeback = fuse_page_is_writeback(inode, page->index);
 
-                       fuse_writepages_send(data);
-                       data->req = NULL;
-               }
+       if (req && req->num_pages &&
+           (is_writeback || req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
+            (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_write ||
+            data->orig_pages[req->num_pages - 1]->index + 1 != page->index)) {
+               fuse_writepages_send(data);
+               data->req = NULL;
        }
        err = -ENOMEM;
        tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
@@ -1669,6 +1787,7 @@ static int fuse_writepages_fill(struct page *page,
 
                fuse_write_fill(req, data->ff, page_offset(page), 0);
                req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
+               req->misc.write.next = NULL;
                req->in.argpages = 1;
                req->background = 1;
                req->num_pages = 0;
@@ -1690,6 +1809,13 @@ static int fuse_writepages_fill(struct page *page,
 
        inc_bdi_stat(page->mapping->backing_dev_info, BDI_WRITEBACK);
        inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
+
+       err = 0;
+       if (is_writeback && fuse_writepage_in_flight(req, page)) {
+               end_page_writeback(page);
+               data->req = NULL;
+               goto out_unlock;
+       }
        data->orig_pages[req->num_pages] = page;
 
        /*
@@ -1700,7 +1826,6 @@ static int fuse_writepages_fill(struct page *page,
        req->num_pages++;
        spin_unlock(&fc->lock);
 
-       err = 0;
 out_unlock:
        unlock_page(page);
 
@@ -2052,30 +2177,17 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov,
        while (iov_iter_count(&ii)) {
                struct page *page = pages[page_idx++];
                size_t todo = min_t(size_t, PAGE_SIZE, iov_iter_count(&ii));
-               void *kaddr;
-
-               kaddr = kmap(page);
-
-               while (todo) {
-                       char __user *uaddr = ii.iov->iov_base + ii.iov_offset;
-                       size_t iov_len = ii.iov->iov_len - ii.iov_offset;
-                       size_t copy = min(todo, iov_len);
-                       size_t left;
+               size_t left;
 
-                       if (!to_user)
-                               left = copy_from_user(kaddr, uaddr, copy);
-                       else
-                               left = copy_to_user(uaddr, kaddr, copy);
-
-                       if (unlikely(left))
-                               return -EFAULT;
+               if (!to_user)
+                       left = iov_iter_copy_from_user(page, &ii, 0, todo);
+               else
+                       left = iov_iter_copy_to_user(page, &ii, 0, todo);
 
-                       iov_iter_advance(&ii, copy);
-                       todo -= copy;
-                       kaddr += copy;
-               }
+               if (unlikely(left))
+                       return -EFAULT;
 
-               kunmap(page);
+               iov_iter_advance(&ii, todo);
        }
 
        return 0;
@@ -2569,8 +2681,8 @@ static inline loff_t fuse_round_up(loff_t off)
 }
 
 static ssize_t
-fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
-                       loff_t offset, unsigned long nr_segs)
+fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *ii,
+                       loff_t offset)
 {
        ssize_t ret = 0;
        struct file *file = iocb->ki_filp;
@@ -2579,7 +2691,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
        loff_t pos = 0;
        struct inode *inode;
        loff_t i_size;
-       size_t count = iov_length(iov, nr_segs);
+       size_t count = iov_iter_count(ii);
        struct fuse_io_priv *io;
 
        pos = offset;
@@ -2620,9 +2732,9 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
                io->async = false;
 
        if (rw == WRITE)
-               ret = __fuse_direct_write(io, iov, nr_segs, &pos);
+               ret = __fuse_direct_write(io, ii, &pos);
        else
-               ret = __fuse_direct_read(io, iov, nr_segs, &pos, count);
+               ret = __fuse_direct_read(io, ii, &pos, count);
 
        if (io->async) {
                fuse_aio_complete(io, ret < 0 ? ret : 0, -1);