fuse: add support of async IO

[karo-tx-linux.git] / fs / fuse / file.c
diff --git a/fs/fuse/file.c b/fs/fuse/file.c

index 1ee2fc92bc3e49d5357dd6f6a9bccdccb7cd2801..4002889fbcc11a7ce0d44831a55e63075c770794 100644 (file)
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -506,6 +506,98 @@ static void fuse_release_user_pages(struct fuse_req *req, int write)
         }
  }
  
+/**
+ * In case of short read, the caller sets 'pos' to the position of
+ * actual end of fuse request in IO request. Otherwise, if bytes_requested
+ * == bytes_transferred or rw == WRITE, the caller sets 'pos' to -1.
+ *
+ * An example:
+ * User requested DIO read of 64K. It was splitted into two 32K fuse requests,
+ * both submitted asynchronously. The first of them was ACKed by userspace as
+ * fully completed (req->out.args[0].size == 32K) resulting in pos == -1. The
+ * second request was ACKed as short, e.g. only 1K was read, resulting in
+ * pos == 33K.
+ *
+ * Thus, when all fuse requests are completed, the minimal non-negative 'pos'
+ * will be equal to the length of the longest contiguous fragment of
+ * transferred data starting from the beginning of IO request.
+ */
+static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
+{
+       int left;
+
+       spin_lock(&io->lock);
+       if (err)
+               io->err = io->err ? : err;
+       else if (pos >= 0 && (io->bytes < 0 || pos < io->bytes))
+               io->bytes = pos;
+
+       left = --io->reqs;
+       spin_unlock(&io->lock);
+
+       if (!left) {
+               long res;
+
+               if (io->err)
+                       res = io->err;
+               else if (io->bytes >= 0 && io->write)
+                       res = -EIO;
+               else {
+                       res = io->bytes < 0 ? io->size : io->bytes;
+
+                       if (!is_sync_kiocb(io->iocb)) {
+                               struct path *path = &io->iocb->ki_filp->f_path;
+                               struct inode *inode = path->dentry->d_inode;
+                               struct fuse_conn *fc = get_fuse_conn(inode);
+                               struct fuse_inode *fi = get_fuse_inode(inode);
+
+                               spin_lock(&fc->lock);
+                               fi->attr_version = ++fc->attr_version;
+                               spin_unlock(&fc->lock);
+                       }
+               }
+
+               aio_complete(io->iocb, res, 0);
+               kfree(io);
+       }
+}
+
+static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_req *req)
+{
+       struct fuse_io_priv *io = req->io;
+       ssize_t pos = -1;
+
+       fuse_release_user_pages(req, !io->write);
+
+       if (io->write) {
+               if (req->misc.write.in.size != req->misc.write.out.size)
+                       pos = req->misc.write.in.offset - io->offset +
+                               req->misc.write.out.size;
+       } else {
+               if (req->misc.read.in.size != req->out.args[0].size)
+                       pos = req->misc.read.in.offset - io->offset +
+                               req->out.args[0].size;
+       }
+
+       fuse_aio_complete(io, req->out.h.error, pos);
+}
+
+static size_t fuse_async_req_send(struct fuse_conn *fc, struct fuse_req *req,
+               size_t num_bytes, struct fuse_io_priv *io)
+{
+       spin_lock(&io->lock);
+       io->size += num_bytes;
+       io->reqs++;
+       spin_unlock(&io->lock);
+
+       req->io = io;
+       req->end = fuse_aio_complete_req;
+
+       fuse_request_send_background(fc, req);
+
+       return num_bytes;
+}
+
  static size_t fuse_send_read(struct fuse_req *req, struct file *file,
                              loff_t pos, size_t count, fl_owner_t owner)
  {