}
__initcall(aio_setup);
+static void put_aio_ring_file(struct kioctx *ctx)
+{
+ struct file *aio_ring_file = ctx->aio_ring_file;
+ if (aio_ring_file) {
+ truncate_setsize(aio_ring_file->f_inode, 0);
+
+ /* Prevent further access to the kioctx from migratepages */
+ spin_lock(&aio_ring_file->f_inode->i_mapping->private_lock);
+ aio_ring_file->f_inode->i_mapping->private_data = NULL;
+ ctx->aio_ring_file = NULL;
+ spin_unlock(&aio_ring_file->f_inode->i_mapping->private_lock);
+
+ fput(aio_ring_file);
+ }
+}
+
static void aio_free_ring(struct kioctx *ctx)
{
int i;
- struct file *aio_ring_file = ctx->aio_ring_file;
for (i = 0; i < ctx->nr_pages; i++) {
pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i,
put_page(ctx->ring_pages[i]);
}
+ put_aio_ring_file(ctx);
+
if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages)
kfree(ctx->ring_pages);
-
- if (aio_ring_file) {
- truncate_setsize(aio_ring_file->f_inode, 0);
- fput(aio_ring_file);
- ctx->aio_ring_file = NULL;
- }
}
static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
static int aio_migratepage(struct address_space *mapping, struct page *new,
struct page *old, enum migrate_mode mode)
{
- struct kioctx *ctx = mapping->private_data;
+ struct kioctx *ctx;
unsigned long flags;
- unsigned idx = old->index;
int rc;
/* Writeback must be complete */
get_page(new);
- spin_lock_irqsave(&ctx->completion_lock, flags);
- migrate_page_copy(new, old);
- ctx->ring_pages[idx] = new;
- spin_unlock_irqrestore(&ctx->completion_lock, flags);
+ /* We can potentially race against kioctx teardown here. Use the
+ * address_space's private data lock to protect the mapping's
+ * private_data.
+ */
+ spin_lock(&mapping->private_lock);
+ ctx = mapping->private_data;
+ if (ctx) {
+ pgoff_t idx;
+ spin_lock_irqsave(&ctx->completion_lock, flags);
+ migrate_page_copy(new, old);
+ idx = old->index;
+ if (idx < (pgoff_t)ctx->nr_pages)
+ ctx->ring_pages[idx] = new;
+ spin_unlock_irqrestore(&ctx->completion_lock, flags);
+ } else
+ rc = -EBUSY;
+ spin_unlock(&mapping->private_lock);
return rc;
}
out_freeref:
free_percpu(ctx->users.pcpu_count);
out_freectx:
- if (ctx->aio_ring_file)
- fput(ctx->aio_ring_file);
+ put_aio_ring_file(ctx);
kmem_cache_free(kioctx_cachep, ctx);
pr_debug("error allocating ioctx %d\n", err);
return ERR_PTR(err);
iocb->ki_ctx = ERR_PTR(-EXDEV);
wake_up_process(iocb->ki_obj.tsk);
return;
+ } else if (is_kernel_kiocb(iocb)) {
+ iocb->ki_obj.complete(iocb->ki_user_data, res);
+ aio_kernel_free(iocb);
+ return;
}
/*
return 0;
}
+static ssize_t aio_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct file *file = iocb->ki_filp;
+ ssize_t ret;
+
+ if (unlikely(!is_kernel_kiocb(iocb)))
+ return -EINVAL;
+
+ if (unlikely(!(file->f_mode & FMODE_READ)))
+ return -EBADF;
+
+ ret = security_file_permission(file, MAY_READ);
+ if (unlikely(ret))
+ return ret;
+
+ if (!file->f_op->read_iter)
+ return -EINVAL;
+
+ return file->f_op->read_iter(iocb, iter, iocb->ki_pos);
+}
+
+static ssize_t aio_write_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct file *file = iocb->ki_filp;
+ ssize_t ret;
+
+ if (unlikely(!is_kernel_kiocb(iocb)))
+ return -EINVAL;
+
+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
+ return -EBADF;
+
+ ret = security_file_permission(file, MAY_WRITE);
+ if (unlikely(ret))
+ return ret;
+
+ if (!file->f_op->write_iter)
+ return -EINVAL;
+
+ return file->f_op->write_iter(iocb, iter, iocb->ki_pos);
+}
+
/*
* aio_setup_iocb:
* Performs the initial checks and aio retry method
* setup for the kiocb at the time of io submission.
*/
static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
- char __user *buf, bool compat)
+ void *buf, bool compat)
{
struct file *file = req->ki_filp;
ssize_t ret;
case IOCB_CMD_PREADV:
mode = FMODE_READ;
rw = READ;
- rw_op = file->f_op->aio_read;
+ rw_op = do_aio_read;
goto rw_common;
case IOCB_CMD_PWRITE:
case IOCB_CMD_PWRITEV:
mode = FMODE_WRITE;
rw = WRITE;
- rw_op = file->f_op->aio_write;
+ rw_op = do_aio_write;
goto rw_common;
rw_common:
if (unlikely(!(file->f_mode & mode)))
file_end_write(file);
break;
+ case IOCB_CMD_READ_ITER:
+ ret = aio_read_iter(req, buf);
+ break;
+
+ case IOCB_CMD_WRITE_ITER:
+ ret = aio_write_iter(req, buf);
+ break;
+
case IOCB_CMD_FDSYNC:
if (!file->f_op->aio_fsync)
return -EINVAL;
return 0;
}
+/*
+ * This allocates an iocb that will be used to submit and track completion of
+ * an IO that is issued from kernel space.
+ *
+ * The caller is expected to call the appropriate aio_kernel_init_() functions
+ * and then call aio_kernel_submit(). From that point forward progress is
+ * guaranteed by the file system aio method. Eventually the caller's
+ * completion callback will be called.
+ *
+ * These iocbs are special. They don't have a context, we don't limit the
+ * number pending, and they can't be canceled.
+ */
+struct kiocb *aio_kernel_alloc(gfp_t gfp)
+{
+ return kzalloc(sizeof(struct kiocb), gfp);
+}
+EXPORT_SYMBOL_GPL(aio_kernel_alloc);
+
+void aio_kernel_free(struct kiocb *iocb)
+{
+ kfree(iocb);
+}
+EXPORT_SYMBOL_GPL(aio_kernel_free);
+
+/*
+ * ptr and count can be a buff and bytes or an iov and segs.
+ */
+void aio_kernel_init_rw(struct kiocb *iocb, struct file *filp,
+ size_t nr, loff_t off)
+{
+ iocb->ki_filp = filp;
+ iocb->ki_nbytes = nr;
+ iocb->ki_pos = off;
+ iocb->ki_ctx = (void *)-1;
+}
+EXPORT_SYMBOL_GPL(aio_kernel_init_rw);
+
+void aio_kernel_init_callback(struct kiocb *iocb,
+ void (*complete)(u64 user_data, long res),
+ u64 user_data)
+{
+ iocb->ki_obj.complete = complete;
+ iocb->ki_user_data = user_data;
+}
+EXPORT_SYMBOL_GPL(aio_kernel_init_callback);
+
+/*
+ * The iocb is our responsibility once this is called. The caller must not
+ * reference it.
+ *
+ * Callers must be prepared for their iocb completion callback to be called the
+ * moment they enter this function. The completion callback may be called from
+ * any context.
+ *
+ * Returns: 0: the iocb completion callback will be called with the op result
+ * negative errno: the operation was not submitted and the iocb was freed
+ */
+int aio_kernel_submit(struct kiocb *iocb, unsigned op, void *ptr)
+{
+ int ret;
+
+ BUG_ON(!is_kernel_kiocb(iocb));
+ BUG_ON(!iocb->ki_obj.complete);
+ BUG_ON(!iocb->ki_filp);
+
+ ret = aio_run_iocb(iocb, op, ptr, 0);
+
+ if (ret)
+ aio_kernel_free(iocb);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(aio_kernel_submit);
+
static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
struct iocb *iocb, bool compat)
{
req->ki_nbytes = iocb->aio_nbytes;
ret = aio_run_iocb(req, iocb->aio_lio_opcode,
- (char __user *)(unsigned long)iocb->aio_buf,
+ (void *)(unsigned long)iocb->aio_buf,
compat);
if (ret)
goto out_put_req;