struct work_struct free_work;
+ /*
+ * signals when all in-flight requests are done
+ */
+ struct completion *requests_done;
+
struct {
/*
* This counts the number of available slots in the ringbuffer,
}
EXPORT_SYMBOL(kiocb_set_cancel_fn);
-static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb)
+static int kiocb_cancel(struct kiocb *kiocb)
{
kiocb_cancel_fn *old, *cancel;
{
struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
+ /* At this point we know that there are no any in-flight requests */
+ if (ctx->requests_done)
+ complete(ctx->requests_done);
+
INIT_WORK(&ctx->free_work, free_ioctx);
schedule_work(&ctx->free_work);
}
struct kiocb, ki_list);
list_del_init(&req->ki_list);
- kiocb_cancel(ctx, req);
+ kiocb_cancel(req);
}
spin_unlock_irq(&ctx->ctx_lock);
* when the processes owning a context have all exited to encourage
* the rapid destruction of the kioctx.
*/
-static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx)
+static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
+ struct completion *requests_done)
{
- if (!atomic_xchg(&ctx->dead, 1)) {
- struct kioctx_table *table;
+ struct kioctx_table *table;
- spin_lock(&mm->ioctx_lock);
- rcu_read_lock();
- table = rcu_dereference(mm->ioctx_table);
+ if (atomic_xchg(&ctx->dead, 1))
+ return -EINVAL;
- WARN_ON(ctx != table->table[ctx->id]);
- table->table[ctx->id] = NULL;
- rcu_read_unlock();
- spin_unlock(&mm->ioctx_lock);
- /* percpu_ref_kill() will do the necessary call_rcu() */
- wake_up_all(&ctx->wait);
+ spin_lock(&mm->ioctx_lock);
+ rcu_read_lock();
+ table = rcu_dereference(mm->ioctx_table);
- /*
- * It'd be more correct to do this in free_ioctx(), after all
- * the outstanding kiocbs have finished - but by then io_destroy
- * has already returned, so io_setup() could potentially return
- * -EAGAIN with no ioctxs actually in use (as far as userspace
- * could tell).
- */
- aio_nr_sub(ctx->max_reqs);
+ WARN_ON(ctx != table->table[ctx->id]);
+ table->table[ctx->id] = NULL;
+ rcu_read_unlock();
+ spin_unlock(&mm->ioctx_lock);
- if (ctx->mmap_size)
- vm_munmap(ctx->mmap_base, ctx->mmap_size);
+ /* percpu_ref_kill() will do the necessary call_rcu() */
+ wake_up_all(&ctx->wait);
- percpu_ref_kill(&ctx->users);
- }
+ /*
+ * It'd be more correct to do this in free_ioctx(), after all
+ * the outstanding kiocbs have finished - but by then io_destroy
+ * has already returned, so io_setup() could potentially return
+ * -EAGAIN with no ioctxs actually in use (as far as userspace
+ * could tell).
+ */
+ aio_nr_sub(ctx->max_reqs);
+
+ if (ctx->mmap_size)
+ vm_munmap(ctx->mmap_base, ctx->mmap_size);
+
+ ctx->requests_done = requests_done;
+ percpu_ref_kill(&ctx->users);
+ return 0;
}
/* wait_on_sync_kiocb:
*/
ctx->mmap_size = 0;
- kill_ioctx(mm, ctx);
+ kill_ioctx(mm, ctx, NULL);
}
}
if (!IS_ERR(ioctx)) {
ret = put_user(ioctx->user_id, ctxp);
if (ret)
- kill_ioctx(current->mm, ioctx);
+ kill_ioctx(current->mm, ioctx, NULL);
percpu_ref_put(&ioctx->users);
}
{
struct kioctx *ioctx = lookup_ioctx(ctx);
if (likely(NULL != ioctx)) {
- kill_ioctx(current->mm, ioctx);
+ struct completion requests_done =
+ COMPLETION_INITIALIZER_ONSTACK(requests_done);
+ int ret;
+
+ /* Pass requests_done to kill_ioctx() where it can be set
+ * in a thread-safe way. If we try to set it here then we have
+ * a race condition if two io_destroy() called simultaneously.
+ */
+ ret = kill_ioctx(current->mm, ioctx, &requests_done);
percpu_ref_put(&ioctx->users);
- return 0;
+
+ /* Wait until all IO for the context are done. Otherwise kernel
+ * keep using user-space buffers even if user thinks the context
+ * is destroyed.
+ */
+ if (!ret)
+ wait_for_completion(&requests_done);
+
+ return ret;
}
pr_debug("EINVAL: io_destroy: invalid context id\n");
return -EINVAL;
kiocb = lookup_kiocb(ctx, iocb, key);
if (kiocb)
- ret = kiocb_cancel(ctx, kiocb);
+ ret = kiocb_cancel(kiocb);
else
ret = -EINVAL;