From df5494a362633e8e72bc1ff72b53b8f05d75f77d Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Tue, 5 Nov 2013 16:54:46 +1100 Subject: [PATCH] kthread: make kthread_create() killable Any user process callers of wait_for_completion() except global init process might be chosen by the OOM killer while waiting for completion() call by some other process which does memory allocation. See CVE-2012-4398 "kernel: request_module() OOM local DoS" can happen. When such users are chosen by the OOM killer when they are waiting for completion() in TASK_UNINTERRUPTIBLE, the system will be kept stressed due to memory starvation because the OOM killer cannot kill such users. kthread_create() is one of such users and this patch fixes the problem for kthreadd by making kthread_create() killable - the same approach used for fixing CVE-2012-4398. Signed-off-by: Tetsuo Handa Cc: Oleg Nesterov Acked-by: David Rientjes Signed-off-by: Andrew Morton --- kernel/kthread.c | 73 ++++++++++++++++++++++++++++++++++++------------ mm/bounce.c | 2 -- 2 files changed, 55 insertions(+), 20 deletions(-) diff --git a/kernel/kthread.c b/kernel/kthread.c index 760e86df8c20..b5ae3ee860a9 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -33,7 +33,7 @@ struct kthread_create_info /* Result passed back to kthread_create() from kthreadd. */ struct task_struct *result; - struct completion done; + struct completion *done; struct list_head list; }; @@ -178,6 +178,7 @@ static int kthread(void *_create) struct kthread_create_info *create = _create; int (*threadfn)(void *data) = create->threadfn; void *data = create->data; + struct completion *done; struct kthread self; int ret; @@ -187,10 +188,16 @@ static int kthread(void *_create) init_completion(&self.parked); current->vfork_done = &self.exited; + /* If user was SIGKILLed, I release the structure. */ + done = xchg(&create->done, NULL); + if (!done) { + kfree(create); + do_exit(-EINTR); + } /* OK, tell user we're spawned, wait for stop or wakeup */ __set_current_state(TASK_UNINTERRUPTIBLE); create->result = current; - complete(&create->done); + complete(done); schedule(); ret = -EINTR; @@ -223,8 +230,15 @@ static void create_kthread(struct kthread_create_info *create) /* We want our own signal handler (we take no signals by default). */ pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); if (pid < 0) { + /* If user was SIGKILLed, I release the structure. */ + struct completion *done = xchg(&create->done, NULL); + + if (!done) { + kfree(create); + return; + } create->result = ERR_PTR(pid); - complete(&create->done); + complete(done); } } @@ -255,36 +269,59 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), const char namefmt[], ...) { - struct kthread_create_info create; - - create.threadfn = threadfn; - create.data = data; - create.node = node; - init_completion(&create.done); + DECLARE_COMPLETION_ONSTACK(done); + struct task_struct *task; + struct kthread_create_info *create = kmalloc(sizeof(*create), + GFP_KERNEL); + + if (!create) + return ERR_PTR(-ENOMEM); + create->threadfn = threadfn; + create->data = data; + create->node = node; + create->done = &done; spin_lock(&kthread_create_lock); - list_add_tail(&create.list, &kthread_create_list); + list_add_tail(&create->list, &kthread_create_list); spin_unlock(&kthread_create_lock); wake_up_process(kthreadd_task); - wait_for_completion(&create.done); - - if (!IS_ERR(create.result)) { + /* + * Wait for completion in killable state, for I might be chosen by + * the OOM killer while kthreadd is trying to allocate memory for + * new kernel thread. + */ + if (unlikely(wait_for_completion_killable(&done))) { + /* + * If I was SIGKILLed before kthreadd (or new kernel thread) + * calls complete(), leave the cleanup of this structure to + * that thread. + */ + if (xchg(&create->done, NULL)) + return ERR_PTR(-ENOMEM); + /* + * kthreadd (or new kernel thread) will call complete() + * shortly. + */ + wait_for_completion(&done); + } + task = create->result; + if (!IS_ERR(task)) { static const struct sched_param param = { .sched_priority = 0 }; va_list args; va_start(args, namefmt); - vsnprintf(create.result->comm, sizeof(create.result->comm), - namefmt, args); + vsnprintf(task->comm, sizeof(task->comm), namefmt, args); va_end(args); /* * root may have changed our (kthreadd's) priority or CPU mask. * The kernel thread should not inherit these properties. */ - sched_setscheduler_nocheck(create.result, SCHED_NORMAL, ¶m); - set_cpus_allowed_ptr(create.result, cpu_all_mask); + sched_setscheduler_nocheck(task, SCHED_NORMAL, ¶m); + set_cpus_allowed_ptr(task, cpu_all_mask); } - return create.result; + kfree(create); + return task; } EXPORT_SYMBOL(kthread_create_on_node); diff --git a/mm/bounce.c b/mm/bounce.c index 5a7d58fb883b..c9f0a4339a7d 100644 --- a/mm/bounce.c +++ b/mm/bounce.c @@ -204,8 +204,6 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, struct bio_vec *to, *from; unsigned i; - if (force) - goto bounce; bio_for_each_segment(from, *bio_orig, i) if (page_to_pfn(from->bv_page) > queue_bounce_pfn(q)) goto bounce; -- 2.39.5