]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - fs/aio.c
Merge remote-tracking branch 'ubifs/linux-next'
[karo-tx-linux.git] / fs / aio.c
index 6b868f0e0c4c019c3b68712ca230deb979711a98..a5630703eb565f4c0d2062bbe7b3ab1617dc2213 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -167,10 +167,25 @@ static int __init aio_setup(void)
 }
 __initcall(aio_setup);
 
+static void put_aio_ring_file(struct kioctx *ctx)
+{
+       struct file *aio_ring_file = ctx->aio_ring_file;
+       if (aio_ring_file) {
+               truncate_setsize(aio_ring_file->f_inode, 0);
+
+               /* Prevent further access to the kioctx from migratepages */
+               spin_lock(&aio_ring_file->f_inode->i_mapping->private_lock);
+               aio_ring_file->f_inode->i_mapping->private_data = NULL;
+               ctx->aio_ring_file = NULL;
+               spin_unlock(&aio_ring_file->f_inode->i_mapping->private_lock);
+
+               fput(aio_ring_file);
+       }
+}
+
 static void aio_free_ring(struct kioctx *ctx)
 {
        int i;
-       struct file *aio_ring_file = ctx->aio_ring_file;
 
        for (i = 0; i < ctx->nr_pages; i++) {
                pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i,
@@ -178,14 +193,10 @@ static void aio_free_ring(struct kioctx *ctx)
                put_page(ctx->ring_pages[i]);
        }
 
+       put_aio_ring_file(ctx);
+
        if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages)
                kfree(ctx->ring_pages);
-
-       if (aio_ring_file) {
-               truncate_setsize(aio_ring_file->f_inode, 0);
-               fput(aio_ring_file);
-               ctx->aio_ring_file = NULL;
-       }
 }
 
 static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
@@ -207,9 +218,8 @@ static int aio_set_page_dirty(struct page *page)
 static int aio_migratepage(struct address_space *mapping, struct page *new,
                        struct page *old, enum migrate_mode mode)
 {
-       struct kioctx *ctx = mapping->private_data;
+       struct kioctx *ctx;
        unsigned long flags;
-       unsigned idx = old->index;
        int rc;
 
        /* Writeback must be complete */
@@ -224,10 +234,23 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
 
        get_page(new);
 
-       spin_lock_irqsave(&ctx->completion_lock, flags);
-       migrate_page_copy(new, old);
-       ctx->ring_pages[idx] = new;
-       spin_unlock_irqrestore(&ctx->completion_lock, flags);
+       /* We can potentially race against kioctx teardown here.  Use the
+        * address_space's private data lock to protect the mapping's
+        * private_data.
+        */
+       spin_lock(&mapping->private_lock);
+       ctx = mapping->private_data;
+       if (ctx) {
+               pgoff_t idx;
+               spin_lock_irqsave(&ctx->completion_lock, flags);
+               migrate_page_copy(new, old);
+               idx = old->index;
+               if (idx < (pgoff_t)ctx->nr_pages)
+                       ctx->ring_pages[idx] = new;
+               spin_unlock_irqrestore(&ctx->completion_lock, flags);
+       } else
+               rc = -EBUSY;
+       spin_unlock(&mapping->private_lock);
 
        return rc;
 }
@@ -617,8 +640,7 @@ out_freepcpu:
 out_freeref:
        free_percpu(ctx->users.pcpu_count);
 out_freectx:
-       if (ctx->aio_ring_file)
-               fput(ctx->aio_ring_file);
+       put_aio_ring_file(ctx);
        kmem_cache_free(kioctx_cachep, ctx);
        pr_debug("error allocating ioctx %d\n", err);
        return ERR_PTR(err);
@@ -855,6 +877,10 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
                iocb->ki_ctx = ERR_PTR(-EXDEV);
                wake_up_process(iocb->ki_obj.tsk);
                return;
+       } else if (is_kernel_kiocb(iocb)) {
+               iocb->ki_obj.complete(iocb->ki_user_data, res);
+               aio_kernel_free(iocb);
+               return;
        }
 
        /*
@@ -1173,13 +1199,55 @@ static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
        return 0;
 }
 
+static ssize_t aio_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+       struct file *file = iocb->ki_filp;
+       ssize_t ret;
+
+       if (unlikely(!is_kernel_kiocb(iocb)))
+               return -EINVAL;
+
+       if (unlikely(!(file->f_mode & FMODE_READ)))
+               return -EBADF;
+
+       ret = security_file_permission(file, MAY_READ);
+       if (unlikely(ret))
+               return ret;
+
+       if (!file->f_op->read_iter)
+               return -EINVAL;
+
+       return file->f_op->read_iter(iocb, iter, iocb->ki_pos);
+}
+
+static ssize_t aio_write_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+       struct file *file = iocb->ki_filp;
+       ssize_t ret;
+
+       if (unlikely(!is_kernel_kiocb(iocb)))
+               return -EINVAL;
+
+       if (unlikely(!(file->f_mode & FMODE_WRITE)))
+               return -EBADF;
+
+       ret = security_file_permission(file, MAY_WRITE);
+       if (unlikely(ret))
+               return ret;
+
+       if (!file->f_op->write_iter)
+               return -EINVAL;
+
+       return file->f_op->write_iter(iocb, iter, iocb->ki_pos);
+}
+
 /*
  * aio_setup_iocb:
  *     Performs the initial checks and aio retry method
  *     setup for the kiocb at the time of io submission.
  */
 static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
-                           char __user *buf, bool compat)
+                           void *buf, bool compat)
 {
        struct file *file = req->ki_filp;
        ssize_t ret;
@@ -1194,14 +1262,14 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
        case IOCB_CMD_PREADV:
                mode    = FMODE_READ;
                rw      = READ;
-               rw_op   = file->f_op->aio_read;
+               rw_op   = do_aio_read;
                goto rw_common;
 
        case IOCB_CMD_PWRITE:
        case IOCB_CMD_PWRITEV:
                mode    = FMODE_WRITE;
                rw      = WRITE;
-               rw_op   = file->f_op->aio_write;
+               rw_op   = do_aio_write;
                goto rw_common;
 rw_common:
                if (unlikely(!(file->f_mode & mode)))
@@ -1244,6 +1312,14 @@ rw_common:
                        file_end_write(file);
                break;
 
+       case IOCB_CMD_READ_ITER:
+               ret = aio_read_iter(req, buf);
+               break;
+
+       case IOCB_CMD_WRITE_ITER:
+               ret = aio_write_iter(req, buf);
+               break;
+
        case IOCB_CMD_FDSYNC:
                if (!file->f_op->aio_fsync)
                        return -EINVAL;
@@ -1281,6 +1357,80 @@ rw_common:
        return 0;
 }
 
+/*
+ * This allocates an iocb that will be used to submit and track completion of
+ * an IO that is issued from kernel space.
+ *
+ * The caller is expected to call the appropriate aio_kernel_init_() functions
+ * and then call aio_kernel_submit().  From that point forward progress is
+ * guaranteed by the file system aio method.  Eventually the caller's
+ * completion callback will be called.
+ *
+ * These iocbs are special.  They don't have a context, we don't limit the
+ * number pending, and they can't be canceled.
+ */
+struct kiocb *aio_kernel_alloc(gfp_t gfp)
+{
+       return kzalloc(sizeof(struct kiocb), gfp);
+}
+EXPORT_SYMBOL_GPL(aio_kernel_alloc);
+
+void aio_kernel_free(struct kiocb *iocb)
+{
+       kfree(iocb);
+}
+EXPORT_SYMBOL_GPL(aio_kernel_free);
+
+/*
+ * ptr and count can be a buff and bytes or an iov and segs.
+ */
+void aio_kernel_init_rw(struct kiocb *iocb, struct file *filp,
+                       size_t nr, loff_t off)
+{
+       iocb->ki_filp = filp;
+       iocb->ki_nbytes = nr;
+       iocb->ki_pos = off;
+       iocb->ki_ctx = (void *)-1;
+}
+EXPORT_SYMBOL_GPL(aio_kernel_init_rw);
+
+void aio_kernel_init_callback(struct kiocb *iocb,
+                             void (*complete)(u64 user_data, long res),
+                             u64 user_data)
+{
+       iocb->ki_obj.complete = complete;
+       iocb->ki_user_data = user_data;
+}
+EXPORT_SYMBOL_GPL(aio_kernel_init_callback);
+
+/*
+ * The iocb is our responsibility once this is called.  The caller must not
+ * reference it.
+ *
+ * Callers must be prepared for their iocb completion callback to be called the
+ * moment they enter this function.  The completion callback may be called from
+ * any context.
+ *
+ * Returns: 0: the iocb completion callback will be called with the op result
+ * negative errno: the operation was not submitted and the iocb was freed
+ */
+int aio_kernel_submit(struct kiocb *iocb, unsigned op, void *ptr)
+{
+       int ret;
+
+       BUG_ON(!is_kernel_kiocb(iocb));
+       BUG_ON(!iocb->ki_obj.complete);
+       BUG_ON(!iocb->ki_filp);
+
+       ret = aio_run_iocb(iocb, op, ptr, 0);
+
+       if (ret)
+               aio_kernel_free(iocb);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(aio_kernel_submit);
+
 static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
                         struct iocb *iocb, bool compat)
 {
@@ -1340,7 +1490,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
        req->ki_nbytes = iocb->aio_nbytes;
 
        ret = aio_run_iocb(req, iocb->aio_lio_opcode,
-                          (char __user *)(unsigned long)iocb->aio_buf,
+                          (void *)(unsigned long)iocb->aio_buf,
                           compat);
        if (ret)
                goto out_put_req;