]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
next-20160111/aio
authorStephen Rothwell <sfr@canb.auug.org.au>
Thu, 11 Feb 2016 03:39:49 +0000 (14:39 +1100)
committerStephen Rothwell <sfr@canb.auug.org.au>
Thu, 11 Feb 2016 03:39:49 +0000 (14:39 +1100)
30 files changed:
drivers/gpu/drm/drm_lock.c
drivers/gpu/drm/ttm/ttm_lock.c
fs/aio.c
fs/attr.c
fs/binfmt_flat.c
fs/fuse/dev.c
fs/internal.h
fs/pipe.c
fs/read_write.c
fs/splice.c
include/linux/aio.h
include/linux/fs.h
include/linux/sched.h
include/uapi/linux/aio_abi.h
init/Kconfig
kernel/auditsc.c
kernel/signal.c
kernel/sysctl.c
mm/filemap.c
net/atm/common.c
net/ax25/af_ax25.c
net/caif/caif_socket.c
net/core/stream.c
net/decnet/af_decnet.c
net/irda/af_irda.c
net/netrom/af_netrom.c
net/rose/af_rose.c
net/sctp/socket.c
net/unix/af_unix.c
net/x25/af_x25.c

index daa2ff12101ba366d82be5b550ba5cf5738a6ff2..3565563ac6335b531f707d06bb62f60de6f39ff3 100644 (file)
@@ -83,7 +83,7 @@ int drm_legacy_lock(struct drm_device *dev, void *data,
                __set_current_state(TASK_INTERRUPTIBLE);
                if (!master->lock.hw_lock) {
                        /* Device has been unregistered */
-                       send_sig(SIGTERM, current, 0);
+                       io_send_sig(SIGTERM);
                        ret = -EINTR;
                        break;
                }
index f154fb1929bd18e300e226d8fe89925b9e8b3754..816be914979b2ce60c07edf834f4951519b759a5 100644 (file)
@@ -68,7 +68,7 @@ static bool __ttm_read_lock(struct ttm_lock *lock)
 
        spin_lock(&lock->lock);
        if (unlikely(lock->kill_takers)) {
-               send_sig(lock->signal, current, 0);
+               io_send_sig(lock->signal);
                spin_unlock(&lock->lock);
                return false;
        }
@@ -101,7 +101,7 @@ static bool __ttm_read_trylock(struct ttm_lock *lock, bool *locked)
 
        spin_lock(&lock->lock);
        if (unlikely(lock->kill_takers)) {
-               send_sig(lock->signal, current, 0);
+               io_send_sig(lock->signal);
                spin_unlock(&lock->lock);
                return false;
        }
@@ -151,7 +151,7 @@ static bool __ttm_write_lock(struct ttm_lock *lock)
 
        spin_lock(&lock->lock);
        if (unlikely(lock->kill_takers)) {
-               send_sig(lock->signal, current, 0);
+               io_send_sig(lock->signal);
                spin_unlock(&lock->lock);
                return false;
        }
index 155f84253f331a4d9d13ffac3d1ea70322b09b11..56bcdf4105f4c76706bb4407cceca49698ef115b 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -48,6 +48,7 @@
 
 #define AIO_RING_MAGIC                 0xa10a10a1
 #define AIO_RING_COMPAT_FEATURES       1
+#define AIO_RING_COMPAT_THREADED       2
 #define AIO_RING_INCOMPAT_FEATURES     0
 struct aio_ring {
        unsigned        id;     /* kernel internal index number */
@@ -154,8 +155,12 @@ struct kioctx {
        struct file             *aio_ring_file;
 
        unsigned                id;
+       struct mm_struct        *mm;
 };
 
+struct aio_kiocb;
+typedef long (*aio_thread_work_fn_t)(struct aio_kiocb *iocb);
+
 /*
  * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
  * cancelled or completed (this makes a certain amount of sense because
@@ -186,12 +191,29 @@ struct aio_kiocb {
         * this is the underlying eventfd context to deliver events to.
         */
        struct eventfd_ctx      *ki_eventfd;
+
+       struct iov_iter         ki_iter;
+       struct iovec            *ki_iovec;
+       struct iovec            ki_inline_vecs[UIO_FASTIOV];
+
+       // Fields used for threaded aio helper.
+       struct task_struct      *ki_submit_task;
+#if IS_ENABLED(CONFIG_AIO_THREAD)
+       struct task_struct      *ki_cancel_task;
+       unsigned long           ki_data;
+       unsigned long           ki_rlimit_fsize;
+       aio_thread_work_fn_t    ki_work_fn;
+       struct work_struct      ki_work;
+#endif
 };
 
 /*------ sysctl variables----*/
 static DEFINE_SPINLOCK(aio_nr_lock);
 unsigned long aio_nr;          /* current system wide number of aio requests */
 unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */
+#if IS_ENABLED(CONFIG_AIO_THREAD)
+unsigned long aio_auto_threads = 0;    /* Currently disabled by default */
+#endif
 /*----end sysctl variables---*/
 
 static struct kmem_cache       *kiocb_cachep;
@@ -202,6 +224,8 @@ static struct vfsmount *aio_mnt;
 static const struct file_operations aio_ring_fops;
 static const struct address_space_operations aio_ctx_aops;
 
+static void aio_complete(struct kiocb *kiocb, long res, long res2);
+
 static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
 {
        struct qstr this = QSTR_INIT("[aio]", 5);
@@ -518,6 +542,10 @@ static int aio_setup_ring(struct kioctx *ctx)
        ring->head = ring->tail = 0;
        ring->magic = AIO_RING_MAGIC;
        ring->compat_features = AIO_RING_COMPAT_FEATURES;
+#if IS_ENABLED(CONFIG_AIO_THREAD)
+       if (aio_auto_threads & 1)
+               ring->compat_features |= AIO_RING_COMPAT_THREADED;
+#endif
        ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
        ring->header_length = sizeof(struct aio_ring);
        kunmap_atomic(ring);
@@ -568,6 +596,26 @@ static int kiocb_cancel(struct aio_kiocb *kiocb)
        return cancel(&kiocb->common);
 }
 
+struct mm_struct *aio_get_mm(struct kiocb *req)
+{
+       if (req->ki_complete == aio_complete) {
+               struct aio_kiocb *iocb;
+               iocb = container_of(req, struct aio_kiocb, common);
+               return iocb->ki_ctx->mm;
+       }
+       return NULL;
+}
+
+struct task_struct *aio_get_task(struct kiocb *req)
+{
+       if (req->ki_complete == aio_complete) {
+               struct aio_kiocb *iocb;
+               iocb = container_of(req, struct aio_kiocb, common);
+               return iocb->ki_submit_task;
+       }
+       return current;
+}
+
 static void free_ioctx(struct work_struct *work)
 {
        struct kioctx *ctx = container_of(work, struct kioctx, free_work);
@@ -719,6 +767,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
                return ERR_PTR(-ENOMEM);
 
        ctx->max_reqs = nr_events;
+       ctx->mm = mm;
 
        spin_lock_init(&ctx->ctx_lock);
        spin_lock_init(&ctx->completion_lock);
@@ -1011,6 +1060,7 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
        percpu_ref_get(&ctx->reqs);
 
        req->ki_ctx = ctx;
+       req->ki_iovec = req->ki_inline_vecs;
        return req;
 out_put:
        put_reqs_available(ctx, 1);
@@ -1023,6 +1073,10 @@ static void kiocb_free(struct aio_kiocb *req)
                fput(req->common.ki_filp);
        if (req->ki_eventfd != NULL)
                eventfd_ctx_put(req->ki_eventfd);
+       if (req->ki_iovec != req->ki_inline_vecs)
+               kfree(req->ki_iovec);
+       if (req->ki_submit_task)
+               put_task_struct(req->ki_submit_task);
        kmem_cache_free(kiocb_cachep, req);
 }
 
@@ -1269,6 +1323,8 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
 
                if (unlikely(copy_from_user(&ts, timeout, sizeof(ts))))
                        return -EFAULT;
+               if (!timespec_valid(&ts))
+                       return -EINVAL;
 
                until = timespec_to_ktime(ts);
        }
@@ -1396,26 +1452,281 @@ static int aio_setup_vectored_rw(int rw, char __user *buf, size_t len,
                                len, UIO_FASTIOV, iovec, iter);
 }
 
+#if IS_ENABLED(CONFIG_AIO_THREAD)
+/* aio_thread_queue_iocb_cancel_early:
+ *     Early stage cancellation helper function for threaded aios.  This
+ *     is used prior to the iocb being assigned to a worker thread.
+ */
+static int aio_thread_queue_iocb_cancel_early(struct kiocb *iocb)
+{
+       return 0;
+}
+
+/* aio_thread_queue_iocb_cancel:
+ *     Late stage cancellation method for threaded aios.  Once an iocb is
+ *     assigned to a worker thread, we use a fatal signal to interrupt an
+ *     in-progress operation.
+ */
+static int aio_thread_queue_iocb_cancel(struct kiocb *kiocb)
+{
+       struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, common);
+       if (iocb->ki_cancel_task) {
+               force_sig(SIGKILL, iocb->ki_cancel_task);
+               return 0;
+       }
+       return -EAGAIN;
+}
+
+/* aio_thread_fn:
+ *     Entry point for worker to perform threaded aio.  Handles issues
+ *     arising due to cancellation using signals.
+ */
+static void aio_thread_fn(struct work_struct *work)
+{
+       struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, ki_work);
+       kiocb_cancel_fn *old_cancel;
+       long ret;
+
+       iocb->ki_cancel_task = current;
+       current->kiocb = &iocb->common;         /* For io_send_sig(). */
+       BUG_ON(atomic_read(&current->signal->sigcnt) != 1);
+
+       /* Check for early stage cancellation and switch to late stage
+        * cancellation if it has not already occurred.
+        */
+       old_cancel = cmpxchg(&iocb->ki_cancel,
+                            aio_thread_queue_iocb_cancel_early,
+                            aio_thread_queue_iocb_cancel);
+       if (old_cancel != KIOCB_CANCELLED)
+               ret = iocb->ki_work_fn(iocb);
+       else
+               ret = -EINTR;
+
+       current->kiocb = NULL;
+       if (unlikely(ret == -ERESTARTSYS || ret == -ERESTARTNOINTR ||
+                    ret == -ERESTARTNOHAND || ret == -ERESTART_RESTARTBLOCK))
+               ret = -EINTR;
+
+       /* Completion serializes cancellation by taking ctx_lock, so
+        * aio_complete() will not return until after force_sig() in
+        * aio_thread_queue_iocb_cancel().  This should ensure that
+        * the signal is pending before being flushed in this thread.
+        */
+       aio_complete(&iocb->common, ret, 0);
+       if (fatal_signal_pending(current))
+               flush_signals(current);
+}
+
+#define AIO_THREAD_NEED_TASK   0x0001  /* Need aio_kiocb->ki_submit_task */
+
+/* aio_thread_queue_iocb
+ *     Queues an aio_kiocb for dispatch to a worker thread.  Prepares the
+ *     aio_kiocb for cancellation.  The caller must provide a function to
+ *     execute the operation in work_fn.  The flags may be provided as an
+ *     ored set AIO_THREAD_xxx.
+ */
+static ssize_t aio_thread_queue_iocb(struct aio_kiocb *iocb,
+                                    aio_thread_work_fn_t work_fn,
+                                    unsigned flags)
+{
+       INIT_WORK(&iocb->ki_work, aio_thread_fn);
+       iocb->ki_work_fn = work_fn;
+       if (flags & AIO_THREAD_NEED_TASK) {
+               iocb->ki_submit_task = current;
+               get_task_struct(iocb->ki_submit_task);
+       }
+
+       /* Cancellation needs to be always available for operations performed
+        * using helper threads.  Prior to the iocb being assigned to a worker
+        * thread, we need to record that a cancellation has occurred.  We
+        * can do this by having a minimal helper function that is recorded in
+        * ki_cancel.
+        */
+       kiocb_set_cancel_fn(&iocb->common, aio_thread_queue_iocb_cancel_early);
+       queue_work(system_long_wq, &iocb->ki_work);
+       return -EIOCBQUEUED;
+}
+
+static long aio_thread_op_read_iter(struct aio_kiocb *iocb)
+{
+       struct file *filp;
+       long ret;
+
+       use_mm(iocb->ki_ctx->mm);
+       filp = iocb->common.ki_filp;
+
+       if (filp->f_op->read_iter) {
+               struct kiocb sync_kiocb;
+               init_sync_kiocb(&sync_kiocb, filp);
+               sync_kiocb.ki_pos = iocb->common.ki_pos;
+               ret = filp->f_op->read_iter(&sync_kiocb, &iocb->ki_iter);
+       } else if (filp->f_op->read)
+               ret = do_loop_readv_writev(filp, &iocb->ki_iter,
+                                          &iocb->common.ki_pos,
+                                          filp->f_op->read);
+       else
+               ret = -EINVAL;
+       unuse_mm(iocb->ki_ctx->mm);
+       return ret;
+}
+
+ssize_t generic_async_read_iter_non_direct(struct kiocb *iocb,
+                                          struct iov_iter *iter)
+{
+       if ((iocb->ki_flags & IOCB_DIRECT) ||
+           (iocb->ki_complete != aio_complete))
+               return iocb->ki_filp->f_op->read_iter(iocb, iter);
+       return generic_async_read_iter(iocb, iter);
+}
+EXPORT_SYMBOL(generic_async_read_iter_non_direct);
+
+ssize_t generic_async_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+       struct aio_kiocb *req;
+
+       req = container_of(iocb, struct aio_kiocb, common);
+       BUG_ON(iter != &req->ki_iter);
+
+       return aio_thread_queue_iocb(req, aio_thread_op_read_iter,
+                                    AIO_THREAD_NEED_TASK);
+}
+EXPORT_SYMBOL(generic_async_read_iter);
+
+static long aio_thread_op_write_iter(struct aio_kiocb *iocb)
+{
+       u64 saved_rlim_fsize;
+       struct file *filp;
+       long ret;
+
+       use_mm(iocb->ki_ctx->mm);
+       filp = iocb->common.ki_filp;
+       saved_rlim_fsize = rlimit(RLIMIT_FSIZE);
+       current->signal->rlim[RLIMIT_FSIZE].rlim_cur = iocb->ki_rlimit_fsize;
+
+       if (filp->f_op->write_iter) {
+               struct kiocb sync_kiocb;
+               init_sync_kiocb(&sync_kiocb, filp);
+               sync_kiocb.ki_pos = iocb->common.ki_pos;
+               ret = filp->f_op->write_iter(&sync_kiocb, &iocb->ki_iter);
+       } else if (filp->f_op->write)
+               ret = do_loop_readv_writev(filp, &iocb->ki_iter,
+                                          &iocb->common.ki_pos,
+                                          (io_fn_t)filp->f_op->write);
+       else
+               ret = -EINVAL;
+       current->signal->rlim[RLIMIT_FSIZE].rlim_cur = saved_rlim_fsize;
+       unuse_mm(iocb->ki_ctx->mm);
+       return ret;
+}
+
+ssize_t generic_async_write_iter_non_direct(struct kiocb *iocb,
+                                           struct iov_iter *iter)
+{
+       if ((iocb->ki_flags & IOCB_DIRECT) ||
+           (iocb->ki_complete != aio_complete))
+               return iocb->ki_filp->f_op->write_iter(iocb, iter);
+       return generic_async_write_iter(iocb, iter);
+}
+EXPORT_SYMBOL(generic_async_write_iter_non_direct);
+
+ssize_t generic_async_write_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+       struct aio_kiocb *req;
+
+       req = container_of(iocb, struct aio_kiocb, common);
+       BUG_ON(iter != &req->ki_iter);
+       req->ki_rlimit_fsize = rlimit(RLIMIT_FSIZE);
+
+       return aio_thread_queue_iocb(req, aio_thread_op_write_iter,
+                                    AIO_THREAD_NEED_TASK);
+}
+EXPORT_SYMBOL(generic_async_write_iter);
+
+static long aio_thread_op_fsync(struct aio_kiocb *iocb)
+{
+       return vfs_fsync(iocb->common.ki_filp, 0);
+}
+
+static long aio_thread_op_fdatasync(struct aio_kiocb *iocb)
+{
+       return vfs_fsync(iocb->common.ki_filp, 1);
+}
+
+ssize_t generic_async_fsync(struct kiocb *iocb, int datasync)
+{
+       struct aio_kiocb *req;
+
+       BUG_ON(iocb->ki_complete != aio_complete);
+       req = container_of(iocb, struct aio_kiocb, common);
+
+       return aio_thread_queue_iocb(req, datasync ? aio_thread_op_fdatasync
+                                                  : aio_thread_op_fsync, 0);
+}
+EXPORT_SYMBOL(generic_async_fsync);
+
+static long aio_thread_op_poll(struct aio_kiocb *iocb)
+{
+       struct file *file = iocb->common.ki_filp;
+       short events = iocb->ki_data;
+       struct poll_wqueues table;
+       unsigned int mask;
+       ssize_t ret = 0;
+
+       poll_initwait(&table);
+       events |= POLLERR | POLLHUP;
+
+       for (;;) {
+               mask = DEFAULT_POLLMASK;
+               if (file->f_op && file->f_op->poll) {
+                       table.pt._key = events;
+                       mask = file->f_op->poll(file, &table.pt);
+               }
+               /* Mask out unneeded events. */
+               mask &= events;
+               ret = mask;
+               if (mask)
+                       break;
+
+               ret = -EINTR;
+               if (signal_pending(current))
+                       break;
+
+               poll_schedule_timeout(&table, TASK_INTERRUPTIBLE, NULL, 0);
+       }
+
+       poll_freewait(&table);
+       return ret;
+}
+#endif /* IS_ENABLED(CONFIG_AIO_THREAD) */
+
 /*
  * aio_run_iocb:
  *     Performs the initial checks and io submission.
  */
-static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
+static ssize_t aio_run_iocb(struct aio_kiocb *req, unsigned opcode,
                            char __user *buf, size_t len, bool compat)
 {
-       struct file *file = req->ki_filp;
-       ssize_t ret;
+       struct file *file = req->common.ki_filp;
+       ssize_t ret = -EINVAL;
        int rw;
        fmode_t mode;
        rw_iter_op *iter_op;
-       struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
-       struct iov_iter iter;
 
        switch (opcode) {
        case IOCB_CMD_PREAD:
        case IOCB_CMD_PREADV:
                mode    = FMODE_READ;
                rw      = READ;
+               iter_op = file->f_op->async_read_iter;
+               if (iter_op)
+                       goto rw_common;
+#if IS_ENABLED(CONFIG_AIO_THREAD)
+               if ((aio_auto_threads & 1) &&
+                   (file->f_op->read_iter || file->f_op->read)) {
+                       iter_op = generic_async_read_iter;
+                       goto rw_common;
+               }
+#endif
                iter_op = file->f_op->read_iter;
                goto rw_common;
 
@@ -1423,6 +1734,16 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
        case IOCB_CMD_PWRITEV:
                mode    = FMODE_WRITE;
                rw      = WRITE;
+               iter_op = file->f_op->async_write_iter;
+               if (iter_op)
+                       goto rw_common;
+#if IS_ENABLED(CONFIG_AIO_THREAD)
+               if ((aio_auto_threads & 1) &&
+                   (file->f_op->write_iter || file->f_op->write)) {
+                       iter_op = generic_async_write_iter;
+                       goto rw_common;
+               }
+#endif
                iter_op = file->f_op->write_iter;
                goto rw_common;
 rw_common:
@@ -1434,48 +1755,55 @@ rw_common:
 
                if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV)
                        ret = aio_setup_vectored_rw(rw, buf, len,
-                                               &iovec, compat, &iter);
+                                                   &req->ki_iovec, compat,
+                                                   &req->ki_iter);
                else {
-                       ret = import_single_range(rw, buf, len, iovec, &iter);
-                       iovec = NULL;
+                       ret = import_single_range(rw, buf, len, req->ki_iovec,
+                                                 &req->ki_iter);
                }
                if (!ret)
-                       ret = rw_verify_area(rw, file, &req->ki_pos,
-                                            iov_iter_count(&iter));
-               if (ret < 0) {
-                       kfree(iovec);
+                       ret = rw_verify_area(rw, file, &req->common.ki_pos,
+                                            iov_iter_count(&req->ki_iter));
+               if (ret < 0)
                        return ret;
-               }
-
-               len = ret;
 
                if (rw == WRITE)
                        file_start_write(file);
 
-               ret = iter_op(req, &iter);
+               ret = iter_op(&req->common, &req->ki_iter);
 
                if (rw == WRITE)
                        file_end_write(file);
-               kfree(iovec);
                break;
 
        case IOCB_CMD_FDSYNC:
-               if (!file->f_op->aio_fsync)
-                       return -EINVAL;
-
-               ret = file->f_op->aio_fsync(req, 1);
+               if (file->f_op->aio_fsync)
+                       ret = file->f_op->aio_fsync(&req->common, 1);
+#if IS_ENABLED(CONFIG_AIO_THREAD)
+               else if (file->f_op->fsync && (aio_auto_threads & 1))
+                       ret = generic_async_fsync(&req->common, 1);
+#endif
                break;
 
        case IOCB_CMD_FSYNC:
-               if (!file->f_op->aio_fsync)
-                       return -EINVAL;
+               if (file->f_op->aio_fsync)
+                       ret = file->f_op->aio_fsync(&req->common, 0);
+#if IS_ENABLED(CONFIG_AIO_THREAD)
+               else if (file->f_op->fsync && (aio_auto_threads & 1))
+                       ret = generic_async_fsync(&req->common, 0);
+#endif
+               break;
 
-               ret = file->f_op->aio_fsync(req, 0);
+       case IOCB_CMD_POLL:
+#if IS_ENABLED(CONFIG_AIO_THREAD)
+               if (aio_auto_threads & 1)
+                       ret = aio_thread_queue_iocb(req, aio_thread_op_poll, 0);
+#endif
                break;
 
        default:
                pr_debug("EINVAL: no operation provided\n");
-               return -EINVAL;
+               break;
        }
 
        if (ret != -EIOCBQUEUED) {
@@ -1487,7 +1815,7 @@ rw_common:
                             ret == -ERESTARTNOHAND ||
                             ret == -ERESTART_RESTARTBLOCK))
                        ret = -EINTR;
-               aio_complete(req, ret, 0);
+               aio_complete(&req->common, ret, 0);
        }
 
        return 0;
@@ -1554,7 +1882,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
        req->ki_user_iocb = user_iocb;
        req->ki_user_data = iocb->aio_data;
 
-       ret = aio_run_iocb(&req->common, iocb->aio_lio_opcode,
+       ret = aio_run_iocb(req, iocb->aio_lio_opcode,
                           (char __user *)(unsigned long)iocb->aio_buf,
                           iocb->aio_nbytes,
                           compat);
index 25b24d0f6c8810c86ef79319e091fc65231733d0..07b8834c6a0fbe7cda39a47b51b01cea49d21c2f 100644 (file)
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -118,7 +118,7 @@ int inode_newsize_ok(const struct inode *inode, loff_t offset)
 
        return 0;
 out_sig:
-       send_sig(SIGXFSZ, current, 0);
+       io_send_sig(SIGXFSZ);
 out_big:
        return -EFBIG;
 }
index f723cd3a455cbde47e5be8cabb798e0725eefb8e..51cf839059878cb03ceb5c72be25d9c28fca48e3 100644 (file)
@@ -373,7 +373,7 @@ calc_reloc(unsigned long r, struct lib_info *p, int curid, int internalp)
 
 failed:
        printk(", killing %s!\n", current->comm);
-       send_sig(SIGSEGV, current, 0);
+       io_send_sig(SIGSEGV);
 
        return RELOC_FAILED;
 }
index ebb5e37455a07acd86f5fbf1b76d474e99b937fb..20ffc529c5d4711ad0fbdb4b94f5cefb1bdfc1c7 100644 (file)
@@ -1391,7 +1391,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
        pipe_lock(pipe);
 
        if (!pipe->readers) {
-               send_sig(SIGPIPE, current, 0);
+               io_send_sig(SIGPIPE);
                if (!ret)
                        ret = -EPIPE;
                goto out_unlock;
index b71deeecea17939d85bce1c54f30da77f1c4d8a1..ae322b8e43a8f4aeff432bcb6b010a6355dfe9e1 100644 (file)
@@ -16,6 +16,9 @@ struct path;
 struct mount;
 struct shrink_control;
 
+typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
+typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *);
+
 /*
  * block_dev.c
  */
@@ -135,6 +138,9 @@ extern long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc);
  * read_write.c
  */
 extern int rw_verify_area(int, struct file *, const loff_t *, size_t);
+extern ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
+                loff_t *ppos, io_fn_t fn);
+
 
 /*
  * pipe.c
index ab8dad3ccb6a8bac13a2eab25eeb2b68791d60f5..c1c1b2674f115db4409bc308238d937b180495fc 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -357,7 +357,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
        __pipe_lock(pipe);
 
        if (!pipe->readers) {
-               send_sig(SIGPIPE, current, 0);
+               io_send_sig(SIGPIPE);
                ret = -EPIPE;
                goto out;
        }
@@ -392,7 +392,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
                int bufs;
 
                if (!pipe->readers) {
-                       send_sig(SIGPIPE, current, 0);
+                       io_send_sig(SIGPIPE);
                        if (!ret)
                                ret = -EPIPE;
                        break;
index 324ec271cc4e64868c34e3ff2f28ac2c0542475e..aeaeb28db75b191a2b1f3219c07f4f5a1a758b6f 100644 (file)
@@ -22,9 +22,6 @@
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 
-typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
-typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *);
-
 const struct file_operations generic_ro_fops = {
        .llseek         = generic_file_llseek,
        .read_iter      = generic_file_read_iter,
@@ -707,7 +704,7 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
 }
 
 /* Do it by hand, with file-ops */
-static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
+ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
                loff_t *ppos, io_fn_t fn)
 {
        ssize_t ret = 0;
index 82bc0d64fc38d538b6482adf7d5d279318405069..62e1946ead3ed715ff4219c610f122f1bb8951c9 100644 (file)
@@ -193,7 +193,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
 
        for (;;) {
                if (!pipe->readers) {
-                       send_sig(SIGPIPE, current, 0);
+                       io_send_sig(SIGPIPE);
                        if (!ret)
                                ret = -EPIPE;
                        break;
@@ -1767,7 +1767,7 @@ static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
 
        while (pipe->nrbufs >= pipe->buffers) {
                if (!pipe->readers) {
-                       send_sig(SIGPIPE, current, 0);
+                       io_send_sig(SIGPIPE);
                        ret = -EPIPE;
                        break;
                }
@@ -1818,7 +1818,7 @@ retry:
 
        do {
                if (!opipe->readers) {
-                       send_sig(SIGPIPE, current, 0);
+                       io_send_sig(SIGPIPE);
                        if (!ret)
                                ret = -EPIPE;
                        break;
@@ -1922,7 +1922,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,
 
        do {
                if (!opipe->readers) {
-                       send_sig(SIGPIPE, current, 0);
+                       io_send_sig(SIGPIPE);
                        if (!ret)
                                ret = -EPIPE;
                        break;
index 9eb42dbc5582ace99283629f0905861ac820c7d5..9aa576b97b6e1a4c1ee883738bebad166308d80d 100644 (file)
@@ -17,6 +17,11 @@ extern void exit_aio(struct mm_struct *mm);
 extern long do_io_submit(aio_context_t ctx_id, long nr,
                         struct iocb __user *__user *iocbpp, bool compat);
 void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel);
+struct mm_struct *aio_get_mm(struct kiocb *req);
+struct task_struct *aio_get_task(struct kiocb *req);
+struct iov_iter;
+ssize_t generic_async_read_iter(struct kiocb *iocb, struct iov_iter *iter);
+ssize_t generic_async_write_iter(struct kiocb *iocb, struct iov_iter *iter);
 #else
 static inline void exit_aio(struct mm_struct *mm) { }
 static inline long do_io_submit(aio_context_t ctx_id, long nr,
@@ -24,10 +29,13 @@ static inline long do_io_submit(aio_context_t ctx_id, long nr,
                                bool compat) { return 0; }
 static inline void kiocb_set_cancel_fn(struct kiocb *req,
                                       kiocb_cancel_fn *cancel) { }
+static inline struct mm_struct *aio_get_mm(struct kiocb *req) { return NULL; }
+static inline struct task_struct *aio_get_task(struct kiocb *req) { return current; }
 #endif /* CONFIG_AIO */
 
 /* for sysctl: */
 extern unsigned long aio_nr;
 extern unsigned long aio_max_nr;
+extern unsigned long aio_auto_threads;
 
 #endif /* __LINUX__AIO_H */
index a401dc8ad85d3174c1c0ca992f55b7b30d6e3358..7e798e2aa4647fb3cfd5c50c3473e6e885b2cafb 100644 (file)
@@ -1631,6 +1631,8 @@ struct file_operations {
        ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
        ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
        ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
+       ssize_t (*async_read_iter) (struct kiocb *, struct iov_iter *);
+       ssize_t (*async_write_iter) (struct kiocb *, struct iov_iter *);
        int (*iterate) (struct file *, struct dir_context *);
        unsigned int (*poll) (struct file *, struct poll_table_struct *);
        long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
index 98db9e2e04fe836ef2248f14a6a81b56a828302c..34a4f0728794953e3c73c46f407fa902fdb21a2d 100644 (file)
@@ -1654,6 +1654,11 @@ struct task_struct {
 /* journalling filesystem info */
        void *journal_info;
 
+/* threaded aio info */
+#if IS_ENABLED(CONFIG_AIO)
+       struct kiocb *kiocb;
+#endif
+
 /* stacked block device info */
        struct bio_list *bio_list;
 
@@ -2524,6 +2529,7 @@ extern __must_check bool do_notify_parent(struct task_struct *, int);
 extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent);
 extern void force_sig(int, struct task_struct *);
 extern int send_sig(int, struct task_struct *, int);
+extern int io_send_sig(int signal);
 extern int zap_other_threads(struct task_struct *p);
 extern struct sigqueue *sigqueue_alloc(void);
 extern void sigqueue_free(struct sigqueue *);
index bb2554f7fbd12a677015d48703ccf681357e84c6..7639fb14a9079066e5cf30482bdf578de5c5a5a1 100644 (file)
@@ -39,8 +39,8 @@ enum {
        IOCB_CMD_FDSYNC = 3,
        /* These two are experimental.
         * IOCB_CMD_PREADX = 4,
-        * IOCB_CMD_POLL = 5,
         */
+       IOCB_CMD_POLL = 5,
        IOCB_CMD_NOOP = 6,
        IOCB_CMD_PREADV = 7,
        IOCB_CMD_PWRITEV = 8,
index 22320804fbafdc57b7228b68392199657a7c2f49..76b72124f6196b35b5be1adb2b58168c42283df3 100644 (file)
@@ -1550,6 +1550,19 @@ config AIO
          by some high performance threaded applications. Disabling
          this option saves about 7k.
 
+config AIO_THREAD
+       bool "Support kernel thread based AIO" if EXPERT
+       depends on AIO
+       default y
+       help
+          This option enables using kernel thread based AIO which implements
+          asynchronous operations using the kernel's queue_work() mechanism.
+          The automatic use of threads for async operations is currently
+          disabled by default until the security implications for usage
+          are completely understood.  This functionality can be enabled at
+          runtime if this option is enabled by setting the fs.aio-auto-threads
+          to one.
+
 config ADVISE_SYSCALLS
        bool "Enable madvise/fadvise syscalls" if EXPERT
        default y
index 195ffaee50b984c690409023db023e1301144771..d7acaed84a0d224da80dbf00c98b0bef3a2f808c 100644 (file)
@@ -1025,7 +1025,7 @@ static int audit_log_single_execve_arg(struct audit_context *context,
         * any.
         */
        if (WARN_ON_ONCE(len < 0 || len > MAX_ARG_STRLEN - 1)) {
-               send_sig(SIGKILL, current, 0);
+               io_send_sig(SIGKILL);
                return -1;
        }
 
@@ -1043,7 +1043,7 @@ static int audit_log_single_execve_arg(struct audit_context *context,
                 */
                if (ret) {
                        WARN_ON(1);
-                       send_sig(SIGKILL, current, 0);
+                       io_send_sig(SIGKILL);
                        return -1;
                }
                buf[to_send] = '\0';
@@ -1107,7 +1107,7 @@ static int audit_log_single_execve_arg(struct audit_context *context,
                        ret = 0;
                if (ret) {
                        WARN_ON(1);
-                       send_sig(SIGKILL, current, 0);
+                       io_send_sig(SIGKILL);
                        return -1;
                }
                buf[to_send] = '\0';
index 0508544c8ced0d96913905dc53af68f38b6ee618..3256c7e1c43aed014593514a195fdcb32ee01651 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/compat.h>
 #include <linux/cn_proc.h>
 #include <linux/compiler.h>
+#include <linux/aio.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/signal.h>
@@ -1422,6 +1423,25 @@ int send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
        return do_send_sig_info(sig, info, p, false);
 }
 
+/* io_send_sig: send a signal caused by an i/o operation
+ *
+ * Use this helper when a signal is being sent to the task that is responsible
+ * for aer initiated operation.  Most commonly this is used to send signals
+ * like SIGPIPE or SIGXFS that are the result of attempting a read or write
+ * operation.  This is used by aio to direct a signal to the correct task in
+ * the case of async operations.
+ */
+int io_send_sig(int sig)
+{
+       struct task_struct *task = current;
+#if IS_ENABLED(CONFIG_AIO)
+       if (task->kiocb)
+               task = aio_get_task(task->kiocb);
+#endif
+       return send_sig(sig, task, 0);
+}
+EXPORT_SYMBOL(io_send_sig);
+
 #define __si_special(priv) \
        ((priv) ? SEND_SIG_PRIV : SEND_SIG_NOINFO)
 
index f5102fabef7f525f6c79d66756336c262e465caa..f930ec2974f8e2143b30f85cfe07fb6084e5037a 100644 (file)
@@ -1710,6 +1710,15 @@ static struct ctl_table fs_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_doulongvec_minmax,
        },
+#if IS_ENABLED(CONFIG_AIO_THREAD)
+       {
+               .procname       = "aio-auto-threads",
+               .data           = &aio_auto_threads,
+               .maxlen         = sizeof(aio_auto_threads),
+               .mode           = 0644,
+               .proc_handler   = proc_doulongvec_minmax,
+       },
+#endif
 #endif /* CONFIG_AIO */
 #ifdef CONFIG_INOTIFY_USER
        {
index bc943867d68c68dab4109fe715c8d37d6c72757a..0720c9d193653b151eb047999cd897272b788ac9 100644 (file)
@@ -2424,7 +2424,7 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
 
        if (limit != RLIM_INFINITY) {
                if (iocb->ki_pos >= limit) {
-                       send_sig(SIGXFSZ, current, 0);
+                       io_send_sig(SIGXFSZ);
                        return -EFBIG;
                }
                iov_iter_truncate(from, limit - (unsigned long)pos);
@@ -2435,8 +2435,10 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
         */
        if (unlikely(pos + iov_iter_count(from) > MAX_NON_LFS &&
                                !(file->f_flags & O_LARGEFILE))) {
-               if (pos >= MAX_NON_LFS)
+               if (pos >= MAX_NON_LFS) {
+                       io_send_sig(SIGXFSZ);
                        return -EFBIG;
+               }
                iov_iter_truncate(from, MAX_NON_LFS - (unsigned long)pos);
        }
 
index 6dc12305799e45180bc8091ee2a99f11e3400fa7..2a480a4e39efef8b431ae736d1975bef3ae7329e 100644 (file)
@@ -591,7 +591,7 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size)
            test_bit(ATM_VF_CLOSE, &vcc->flags) ||
            !test_bit(ATM_VF_READY, &vcc->flags)) {
                error = -EPIPE;
-               send_sig(SIGPIPE, current, 0);
+               io_send_sig(SIGPIPE);
                goto out;
        }
        if (!size) {
@@ -620,7 +620,7 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size)
                    test_bit(ATM_VF_CLOSE, &vcc->flags) ||
                    !test_bit(ATM_VF_READY, &vcc->flags)) {
                        error = -EPIPE;
-                       send_sig(SIGPIPE, current, 0);
+                       io_send_sig(SIGPIPE);
                        break;
                }
                prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
index fbd0acf80b13236bd8c768bc8bf5d69d6a7e7125..8dfd84c6441bb90d264849da3b67cbe51c0af829 100644 (file)
@@ -1457,7 +1457,7 @@ static int ax25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
        }
 
        if (sk->sk_shutdown & SEND_SHUTDOWN) {
-               send_sig(SIGPIPE, current, 0);
+               io_send_sig(SIGPIPE);
                err = -EPIPE;
                goto out;
        }
index aa209b1066c9699a12510055e70ae79b33ee05b7..ba8d8e24d74bba0bfc2dd5181e8de2fcd6855717 100644 (file)
@@ -663,7 +663,7 @@ static int caif_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 
 pipe_err:
        if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
-               send_sig(SIGPIPE, current, 0);
+               io_send_sig(SIGPIPE);
        err = -EPIPE;
 out_err:
        return sent ? : err;
index 159516a11b7e84f7e64e0acfa2dd55dab82307bd..3f5176cd056479655a23462b16aa6042c709fd6e 100644 (file)
@@ -182,7 +182,7 @@ int sk_stream_error(struct sock *sk, int flags, int err)
        if (err == -EPIPE)
                err = sock_error(sk) ? : -EPIPE;
        if (err == -EPIPE && !(flags & MSG_NOSIGNAL))
-               send_sig(SIGPIPE, current, 0);
+               io_send_sig(SIGPIPE);
        return err;
 }
 EXPORT_SYMBOL(sk_stream_error);
index 13d6b1a6e0fc2b0730827d93d154d6464a3e58ec..47ca4040ade8caa503709df417908ce285ad3fd7 100644 (file)
@@ -1954,7 +1954,7 @@ static int dn_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
        if (sk->sk_shutdown & SEND_SHUTDOWN) {
                err = -EPIPE;
                if (!(flags & MSG_NOSIGNAL))
-                       send_sig(SIGPIPE, current, 0);
+                       io_send_sig(SIGPIPE);
                goto out_err;
        }
 
index 923abd6b3064074f39f84644b2d3d1f068403f51..f9c6b55419adf0360953c8b9407170d210049656 100644 (file)
@@ -1539,7 +1539,7 @@ static int irda_sendmsg_dgram(struct socket *sock, struct msghdr *msg,
        lock_sock(sk);
 
        if (sk->sk_shutdown & SEND_SHUTDOWN) {
-               send_sig(SIGPIPE, current, 0);
+               io_send_sig(SIGPIPE);
                err = -EPIPE;
                goto out;
        }
@@ -1622,7 +1622,7 @@ static int irda_sendmsg_ultra(struct socket *sock, struct msghdr *msg,
 
        err = -EPIPE;
        if (sk->sk_shutdown & SEND_SHUTDOWN) {
-               send_sig(SIGPIPE, current, 0);
+               io_send_sig(SIGPIPE);
                goto out;
        }
 
index ed212ffc1d9d3159ccbf4b8ac5681606b8446069..b5eaecc71254b63a049fd13cacee57d17aae7d2e 100644 (file)
@@ -1044,7 +1044,7 @@ static int nr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
        }
 
        if (sk->sk_shutdown & SEND_SHUTDOWN) {
-               send_sig(SIGPIPE, current, 0);
+               io_send_sig(SIGPIPE);
                err = -EPIPE;
                goto out;
        }
index 129d357d27229f4f819c2e104c913cd04be4a28e..954725c2148647882e926468c46ae1a574154e34 100644 (file)
@@ -1065,7 +1065,7 @@ static int rose_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
                return -EADDRNOTAVAIL;
 
        if (sk->sk_shutdown & SEND_SHUTDOWN) {
-               send_sig(SIGPIPE, current, 0);
+               io_send_sig(SIGPIPE);
                return -EPIPE;
        }
 
index de8eabf03eed9b904afd78e9af6f2ab0b172cd2b..311c0cbb4a8d23f9488210e2cbae573b8f6e2e78 100644 (file)
@@ -1554,7 +1554,7 @@ static int sctp_error(struct sock *sk, int flags, int err)
        if (err == -EPIPE)
                err = sock_error(sk) ? : -EPIPE;
        if (err == -EPIPE && !(flags & MSG_NOSIGNAL))
-               send_sig(SIGPIPE, current, 0);
+               io_send_sig(SIGPIPE);
        return err;
 }
 
index a6d6654697779060ecdeeb34c75fcfe7e618f433..dd5b22a3fc91e54e403aa7829c79d0e45b0ccb0c 100644 (file)
@@ -1936,7 +1936,7 @@ pipe_err_free:
        kfree_skb(skb);
 pipe_err:
        if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
-               send_sig(SIGPIPE, current, 0);
+               io_send_sig(SIGPIPE);
        err = -EPIPE;
 out_err:
        scm_destroy(&scm);
@@ -2053,7 +2053,7 @@ err_unlock:
 err:
        kfree_skb(newskb);
        if (send_sigpipe && !(flags & MSG_NOSIGNAL))
-               send_sig(SIGPIPE, current, 0);
+               io_send_sig(SIGPIPE);
        if (!init_scm)
                scm_destroy(&scm);
        return err;
index a750f330b8ddca8e0ea3e5d8eb88e68f6038e149..102dd0309fc87f1ea115ea8582920a83c7de4716 100644 (file)
@@ -1103,7 +1103,7 @@ static int x25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 
        rc = -EPIPE;
        if (sk->sk_shutdown & SEND_SHUTDOWN) {
-               send_sig(SIGPIPE, current, 0);
+               io_send_sig(SIGPIPE);
                goto out;
        }