sched: Fix a race between __kthread_bind() and sched_setaffinity()

author Peter Zijlstra <peterz@infradead.org>

Fri, 15 May 2015 15:43:34 +0000 (17:43 +0200)

committer Ingo Molnar <mingo@kernel.org>

Wed, 12 Aug 2015 10:06:09 +0000 (12:06 +0200)
author Peter Zijlstra <peterz@infradead.org>
Fri, 15 May 2015 15:43:34 +0000 (17:43 +0200)
committer Ingo Molnar <mingo@kernel.org>
Wed, 12 Aug 2015 10:06:09 +0000 (12:06 +0200)
diff --git a/include/linux/kthread.h b/include/linux/kthread.h

index 13d55206ccf67a1666db7be5c5097f0835e3bf42..869b21dcf503a8220b7ed4628bb642b10867c83e 100644 (file)
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -38,6 +38,7 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
  })
  
  void kthread_bind(struct task_struct *k, unsigned int cpu);
+void kthread_bind_mask(struct task_struct *k, const struct cpumask *mask);
  int kthread_stop(struct task_struct *k);
  bool kthread_should_stop(void);
  bool kthread_should_park(void);
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 44dca5b35de6ae0de970b2494c870e4f28929942..81bb4577274becf86a1bd487995d6e5130f8e839 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2203,13 +2203,6 @@ static inline void calc_load_enter_idle(void) { }
  static inline void calc_load_exit_idle(void) { }
  #endif /* CONFIG_NO_HZ_COMMON */
  
-#ifndef CONFIG_CPUMASK_OFFSTACK
-static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
-{
-       return set_cpus_allowed_ptr(p, &new_mask);
-}
-#endif
-
  /*
   * Do not use outside of architecture code which knows its limitations.
   *
diff --git a/kernel/kthread.c b/kernel/kthread.c

index 10e489c448fe4e934e2c203ca2aa7a8d0679bb5e..7c40a189becc5ed2579177ccace6cabb73f39c6a 100644 (file)
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -325,16 +325,30 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
  }
  EXPORT_SYMBOL(kthread_create_on_node);
  
-static void __kthread_bind(struct task_struct *p, unsigned int cpu, long state)
+static void __kthread_bind_mask(struct task_struct *p, const struct cpumask *mask, long state)
  {
-       /* Must have done schedule() in kthread() before we set_task_cpu */
+       unsigned long flags;
+
         if (!wait_task_inactive(p, state)) {
                 WARN_ON(1);
                 return;
         }
+
         /* It's safe because the task is inactive. */
-       do_set_cpus_allowed(p, cpumask_of(cpu));
+       raw_spin_lock_irqsave(&p->pi_lock, flags);
+       do_set_cpus_allowed(p, mask);
         p->flags |= PF_NO_SETAFFINITY;
+       raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+}
+
+static void __kthread_bind(struct task_struct *p, unsigned int cpu, long state)
+{
+       __kthread_bind_mask(p, cpumask_of(cpu), state);
+}
+
+void kthread_bind_mask(struct task_struct *p, const struct cpumask *mask)
+{
+       __kthread_bind_mask(p, mask, TASK_UNINTERRUPTIBLE);
  }
  
  /**
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index ea6d74345e60ca892707636f5ab1a1e88e958483..2e3b983da836a70bc642489e6779b7e8fbf2433b 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1153,6 +1153,8 @@ static int migration_cpu_stop(void *data)
  
  void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
  {
+       lockdep_assert_held(&p->pi_lock);
+
         if (p->sched_class->set_cpus_allowed)
                 p->sched_class->set_cpus_allowed(p, new_mask);
  
@@ -1169,7 +1171,8 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
   * task must not exit() & deallocate itself prematurely. The
   * call is not atomic; no spinlocks may be held.
   */
-int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
+static int __set_cpus_allowed_ptr(struct task_struct *p,
+                                 const struct cpumask *new_mask, bool check)
  {
         unsigned long flags;
         struct rq *rq;
@@ -1178,6 +1181,15 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
  
         rq = task_rq_lock(p, &flags);
  
+       /*
+        * Must re-check here, to close a race against __kthread_bind(),
+        * sched_setaffinity() is not guaranteed to observe the flag.
+        */
+       if (check && (p->flags & PF_NO_SETAFFINITY)) {
+               ret = -EINVAL;
+               goto out;
+       }
+
         if (cpumask_equal(&p->cpus_allowed, new_mask))
                 goto out;
  
@@ -1214,6 +1226,11 @@ out:
  
         return ret;
  }
+
+int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
+{
+       return __set_cpus_allowed_ptr(p, new_mask, false);
+}
  EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
  
  void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
@@ -1595,6 +1612,15 @@ static void update_avg(u64 *avg, u64 sample)
         s64 diff = sample - *avg;
         *avg += diff >> 3;
  }
+
+#else
+
+static inline int __set_cpus_allowed_ptr(struct task_struct *p,
+                                        const struct cpumask *new_mask, bool check)
+{
+       return set_cpus_allowed_ptr(p, new_mask);
+}
+
  #endif /* CONFIG_SMP */
  
  static void
@@ -4340,7 +4366,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
         }
  #endif
  again:
-       retval = set_cpus_allowed_ptr(p, new_mask);
+       retval = __set_cpus_allowed_ptr(p, new_mask, true);
  
         if (!retval) {
                 cpuset_cpus_allowed(p, cpus_allowed);
@@ -4865,7 +4891,8 @@ void init_idle(struct task_struct *idle, int cpu)
         struct rq *rq = cpu_rq(cpu);
         unsigned long flags;
  
-       raw_spin_lock_irqsave(&rq->lock, flags);
+       raw_spin_lock_irqsave(&idle->pi_lock, flags);
+       raw_spin_lock(&rq->lock);
  
         __sched_fork(0, idle);
         idle->state = TASK_RUNNING;
@@ -4891,7 +4918,8 @@ void init_idle(struct task_struct *idle, int cpu)
  #if defined(CONFIG_SMP)
         idle->on_cpu = 1;
  #endif
-       raw_spin_unlock_irqrestore(&rq->lock, flags);
+       raw_spin_unlock(&rq->lock);
+       raw_spin_unlock_irqrestore(&idle->pi_lock, flags);
  
         /* Set the preempt count _outside_ the spinlocks! */
         init_idle_preempt_count(idle, cpu);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c

index 4c4f06176f748616b180254e94eda6bbed7dde25..f5782d5fd196964ba220544be9cb8204f294e79c 100644 (file)
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1714,9 +1714,7 @@ static struct worker *create_worker(struct worker_pool *pool)
                 goto fail;
  
         set_user_nice(worker->task, pool->attrs->nice);
-
-       /* prevent userland from meddling with cpumask of workqueue workers */
-       worker->task->flags |= PF_NO_SETAFFINITY;
+       kthread_bind_mask(worker->task, pool->attrs->cpumask);
  
         /* successful, attach the worker to the pool */
         worker_attach_to_pool(worker, pool);
@@ -3856,7 +3854,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
                 }
  
                 wq->rescuer = rescuer;
-               rescuer->task->flags |= PF_NO_SETAFFINITY;
+               kthread_bind_mask(rescuer->task, cpu_possible_mask);
                 wake_up_process(rescuer->task);
         }
author	Peter Zijlstra <peterz@infradead.org>
	Fri, 15 May 2015 15:43:34 +0000 (17:43 +0200)
committer	Ingo Molnar <mingo@kernel.org>
	Wed, 12 Aug 2015 10:06:09 +0000 (12:06 +0200)
include/linux/kthread.h		patch \| blob \| history
include/linux/sched.h		patch \| blob \| history
kernel/kthread.c		patch \| blob \| history
kernel/sched/core.c		patch \| blob \| history
kernel/workqueue.c		patch \| blob \| history