sched, numa, mm: Introduce tsk_home_node()

author Peter Zijlstra <a.p.zijlstra@chello.nl>

Sat, 3 Mar 2012 16:05:16 +0000 (17:05 +0100)

committer Ingo Molnar <mingo@kernel.org>

Sun, 28 Oct 2012 16:31:08 +0000 (17:31 +0100)
author Peter Zijlstra <a.p.zijlstra@chello.nl>
Sat, 3 Mar 2012 16:05:16 +0000 (17:05 +0100)
committer Ingo Molnar <mingo@kernel.org>
Sun, 28 Oct 2012 16:31:08 +0000 (17:31 +0100)
diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig

index cb8f9920f4dd873b69050e81c4a46fc7e40f5007..1210cc74f86292a6a3b79b025d0519335821226c 100644 (file)
--- a/arch/sh/mm/Kconfig
+++ b/arch/sh/mm/Kconfig
@@ -111,6 +111,7 @@ config VSYSCALL
  config NUMA
         bool "Non Uniform Memory Access (NUMA) Support"
         depends on MMU && SYS_SUPPORTS_NUMA && EXPERIMENTAL
+       select EMBEDDED_NUMA
         default n
         help
           Some SH systems have many various memories scattered around
diff --git a/include/linux/init_task.h b/include/linux/init_task.h

index 6d087c5f57f79e5a22ffa9a440061b5079838f53..b4405b6077749900e1cc2fb9ff9b15b5ace4161b 100644 (file)
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -143,6 +143,13 @@ extern struct task_group root_task_group;
  
  #define INIT_TASK_COMM "swapper"
  
+#ifdef CONFIG_SCHED_NUMA
+# define INIT_TASK_NUMA(tsk)                                           \
+       .node = -1,
+#else
+# define INIT_TASK_NUMA(tsk)
+#endif
+
  /*
   *  INIT_TASK is used to set up the first task table, touch at
   * your own risk!. Base=0, limit=0x1fffff (=2MB)
@@ -210,6 +217,7 @@ extern struct task_group root_task_group;
         INIT_TRACE_RECURSION                                            \
         INIT_TASK_RCU_PREEMPT(tsk)                                      \
         INIT_CPUSET_SEQ                                                 \
+       INIT_TASK_NUMA(tsk)                                             \
  }
  
  
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 0dd42a02df2e851e0847df640dd559f3689c36ed..fad12390a447f277d3573c4e2837d3aff2d020c8 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1478,6 +1478,9 @@ struct task_struct {
         struct mempolicy *mempolicy;    /* Protected by alloc_lock */
         short il_next;
         short pref_node_fork;
+#endif
+#ifdef CONFIG_SCHED_NUMA
+       int node;
  #endif
         struct rcu_head rcu;
  
@@ -1553,6 +1556,15 @@ struct task_struct {
  /* Future-safe accessor for struct task_struct's cpus_allowed. */
  #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
  
+static inline int tsk_home_node(struct task_struct *p)
+{
+#ifdef CONFIG_SCHED_NUMA
+       return p->node;
+#else
+       return -1;
+#endif
+}
+
  /*
   * Priority of a process goes from 0..MAX_PRIO-1, valid RT
   * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
diff --git a/init/Kconfig b/init/Kconfig

index 6fdd6e339326a079a2b6b5a3a1d2940b2bd81bd8..c8d1f593187b6c2c42e2cc99e9943dc90dd0008b 100644 (file)
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -696,6 +696,20 @@ config LOG_BUF_SHIFT
  config HAVE_UNSTABLE_SCHED_CLOCK
         bool
  
+#
+# For architectures that (ab)use NUMA to represent different memory regions
+# all cpu-local but of different latencies, such as SuperH.
+#
+config EMBEDDED_NUMA
+       bool
+
+config SCHED_NUMA
+       bool "Memory placement aware NUMA scheduler"
+       default n
+       depends on SMP && NUMA && MIGRATION && !EMBEDDED_NUMA
+       help
+         This option adds support for automatic NUMA aware memory/task placement.
+
  menuconfig CGROUPS
         boolean "Control Group support"
         depends on EVENTFD
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 2d8927fda712f5ee1e19f1fe364fbad557d8a736..76f939b01d54e5ca9879482c74f3be81eda3b4af 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5959,6 +5959,42 @@ static struct sched_domain_topology_level default_topology[] = {
  
  static struct sched_domain_topology_level *sched_domain_topology = default_topology;
  
+#ifdef CONFIG_SCHED_NUMA
+
+/*
+ * Requeues a task ensuring its on the right load-balance list so
+ * that it might get migrated to its new home.
+ *
+ * Note that we cannot actively migrate ourselves since our callers
+ * can be from atomic context. We rely on the regular load-balance
+ * mechanisms to move us around -- its all preference anyway.
+ */
+void sched_setnode(struct task_struct *p, int node)
+{
+       unsigned long flags;
+       int on_rq, running;
+       struct rq *rq;
+
+       rq = task_rq_lock(p, &flags);
+       on_rq = p->on_rq;
+       running = task_current(rq, p);
+
+       if (on_rq)
+               dequeue_task(rq, p, 0);
+       if (running)
+               p->sched_class->put_prev_task(rq, p);
+
+       p->node = node;
+
+       if (running)
+               p->sched_class->set_curr_task(rq);
+       if (on_rq)
+               enqueue_task(rq, p, 0);
+       task_rq_unlock(rq, p, &flags);
+}
+
+#endif /* CONFIG_SCHED_NUMA */
+
  #ifdef CONFIG_NUMA
  
  static int sched_domains_numa_levels;
author	Peter Zijlstra <a.p.zijlstra@chello.nl>
	Sat, 3 Mar 2012 16:05:16 +0000 (17:05 +0100)
committer	Ingo Molnar <mingo@kernel.org>
	Sun, 28 Oct 2012 16:31:08 +0000 (17:31 +0100)
arch/sh/mm/Kconfig		patch \| blob \| history
include/linux/init_task.h		patch \| blob \| history
include/linux/sched.h		patch \| blob \| history
init/Kconfig		patch \| blob \| history
kernel/sched/core.c		patch \| blob \| history