/* numa_scan_seq prevents two threads setting pte_numa */
int numa_scan_seq;
+
+ /*
+ * The first node a task was scheduled on. If a task runs on
+ * a different node than Make PTE Scan Go Now.
+ */
+ int first_nid;
#endif
struct uprobes_state uprobes_state;
};
+/* first nid will either be a valid NID or one of these values */
+#define NUMA_PTE_SCAN_INIT -1
+#define NUMA_PTE_SCAN_ACTIVE -2
+
static inline void mm_init_cpumask(struct mm_struct *mm)
{
#ifdef CONFIG_CPUMASK_OFFSTACK
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
mm->pmd_huge_pte = NULL;
+#endif
+#ifdef CONFIG_NUMA_BALANCING
+ mm->first_nid = NUMA_PTE_SCAN_INIT;
#endif
if (!mm_init(mm, tsk))
goto fail_nomem;
if (p->flags & PF_EXITING)
return;
+ /*
+ * We do not care about task placement until a task runs on a node
+ * other than the first one used by the address space. This is
+ * largely because migrations are driven by what CPU the task
+ * is running on. If it's never scheduled on another node, it'll
+ * not migrate so why bother trapping the fault.
+ */
+ if (mm->first_nid == NUMA_PTE_SCAN_INIT)
+ mm->first_nid = numa_node_id();
+ if (mm->first_nid != NUMA_PTE_SCAN_ACTIVE) {
+ /* Are we running on a new node yet? */
+ if (numa_node_id() == mm->first_nid &&
+ !sched_feat_numa(NUMA_FORCE))
+ return;
+
+ mm->first_nid = NUMA_PTE_SCAN_ACTIVE;
+ }
+
/*
* Reset the scan period if enough time has gone by. Objective is that
* scanning will be reduced if pages are properly placed. As tasks
/*
* Apply the automatic NUMA scheduling policy. Enabled automatically
* at runtime if running on a NUMA machine. Can be controlled via
- * numa_balancing=
+ * numa_balancing=. Allow PTE scanning to be forced on UMA machines
+ * for debugging the core machinery.
*/
#ifdef CONFIG_NUMA_BALANCING
SCHED_FEAT(NUMA, false)
+SCHED_FEAT(NUMA_FORCE, false)
#endif