From f9848081ba2cf98628df165cc2784c32fe9fc9e2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 18 Jul 2012 22:06:47 +0200 Subject: [PATCH] sched, numa, mm/mpol: Add_MPOL_F_HOME Add MPOL_F_HOME, to implement multi-stage home node binding. Suggested-by: Andrea Arcangeli Suggested-by: Rik van Riel Signed-off-by: Peter Zijlstra Cc: Paul Turner Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/n/tip-ec1gnZa9xaqwlsSjsfqjyxcl@git.kernel.org Signed-off-by: Ingo Molnar --- include/uapi/linux/mempolicy.h | 1 + mm/mempolicy.c | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h index 6a1baae3775d..a2bcd04a48ff 100644 --- a/include/uapi/linux/mempolicy.h +++ b/include/uapi/linux/mempolicy.h @@ -69,6 +69,7 @@ enum mpol_rebind_step { #define MPOL_F_LOCAL (1 << 1) /* preferred local allocation */ #define MPOL_F_REBINDING (1 << 2) /* identify policies in rebinding */ #define MPOL_F_MOF (1 << 3) /* this policy wants migrate on fault */ +#define MPOL_F_HOME (1 << 4) /* this is the home-node policy */ #endif /* _UAPI_LINUX_MEMPOLICY_H */ diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 97e6d9950670..1ac0479e85a3 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2216,6 +2216,7 @@ static void sp_free(struct sp_node *n) * @page - page to be checked * @vma - vm area where page mapped * @addr - virtual address where page mapped + * @multi - use multi-stage node binding * * Lookup current policy node id for vma,addr and "compare to" page's * node id. @@ -2278,6 +2279,37 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long default: BUG(); } + + /* + * Multi-stage node selection is used in conjunction with a periodic + * migration fault to build a temporal task<->page relation. By + * using a two-stage filter we remove short/unlikely relations. + * + * Using P(p) ~ n_p / n_t as per frequentist probability, we can + * equate a task's usage of a particular page (n_p) per total usage + * of this page (n_t) (in a given time-span) to a probability. + * + * Our periodic faults will then sample this probability and getting + * the same result twice in a row, given these samples are fully + * independent, is then given by P(n)^2, provided our sample period + * is sufficiently short compared to the usage pattern. + * + * This quadric squishes small probabilities, making it less likely + * we act on an unlikely task<->page relation. + */ + if (pol->flags & MPOL_F_HOME) { + int last_nid; + + /* + * Migrate towards the current node, depends on + * task_numa_placement() details. + */ + polnid = numa_node_id(); + last_nid = page_xchg_last_nid(page, polnid); + if (last_nid != polnid) + goto out; + } + if (curnid != polnid) ret = polnid; out: @@ -2470,7 +2502,7 @@ void __init numa_policy_init(void) preferred_node_policy[nid] = (struct mempolicy) { .refcnt = ATOMIC_INIT(1), .mode = MPOL_PREFERRED, - .flags = MPOL_F_MOF, + .flags = MPOL_F_MOF | MPOL_F_HOME, .v = { .preferred_node = nid, }, }; } -- 2.39.5