Merge remote-tracking branch 'imx-mxs/for-next'

[karo-tx-linux.git] / mm / mempolicy.c
diff --git a/mm/mempolicy.c b/mm/mempolicy.c

index 04729647f359c7c1fa3a91058cc1044c0db2df8d..71cb253368cb72b1bf99d390ca9360ccc277c29b 100644 (file)
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1679,6 +1679,30 @@ struct mempolicy *get_vma_policy(struct task_struct *task,
         return pol;
  }
  
+bool vma_policy_mof(struct task_struct *task, struct vm_area_struct *vma)
+{
+       struct mempolicy *pol = get_task_policy(task);
+       if (vma) {
+               if (vma->vm_ops && vma->vm_ops->get_policy) {
+                       bool ret = false;
+
+                       pol = vma->vm_ops->get_policy(vma, vma->vm_start);
+                       if (pol && (pol->flags & MPOL_F_MOF))
+                               ret = true;
+                       mpol_cond_put(pol);
+
+                       return ret;
+               } else if (vma->vm_policy) {
+                       pol = vma->vm_policy;
+               }
+       }
+
+       if (!pol)
+               return default_policy.flags & MPOL_F_MOF;
+
+       return pol->flags & MPOL_F_MOF;
+}
+
  static int apply_policy_zone(struct mempolicy *policy, enum zone_type zone)
  {
         enum zone_type dynamic_policy_zone = policy_zone;
@@ -2277,6 +2301,35 @@ static void sp_free(struct sp_node *n)
         kmem_cache_free(sn_cache, n);
  }
  
+#ifdef CONFIG_NUMA_BALANCING
+static bool numa_migrate_deferred(struct task_struct *p, int last_cpupid)
+{
+       /* Never defer a private fault */
+       if (cpupid_match_pid(p, last_cpupid))
+               return false;
+
+       if (p->numa_migrate_deferred) {
+               p->numa_migrate_deferred--;
+               return true;
+       }
+       return false;
+}
+
+static inline void defer_numa_migrate(struct task_struct *p)
+{
+       p->numa_migrate_deferred = sysctl_numa_balancing_migrate_deferred;
+}
+#else
+static inline bool numa_migrate_deferred(struct task_struct *p, int last_cpupid)
+{
+       return false;
+}
+
+static inline void defer_numa_migrate(struct task_struct *p)
+{
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
  /**
   * mpol_misplaced - check whether current page node is valid in policy
   *
@@ -2300,6 +2353,8 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
         struct zone *zone;
         int curnid = page_to_nid(page);
         unsigned long pgoff;
+       int thiscpu = raw_smp_processor_id();
+       int thisnid = cpu_to_node(thiscpu);
         int polnid = -1;
         int ret = -1;
  
@@ -2348,9 +2403,11 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
  
         /* Migrate the page towards the node whose CPU is referencing it */
         if (pol->flags & MPOL_F_MORON) {
-               int last_nid;
+               int last_cpupid;
+               int this_cpupid;
  
-               polnid = numa_node_id();
+               polnid = thisnid;
+               this_cpupid = cpu_pid_to_cpupid(thiscpu, current->pid);
  
                 /*
                  * Multi-stage node selection is used in conjunction
@@ -2373,8 +2430,25 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
                  * it less likely we act on an unlikely task<->page
                  * relation.
                  */
-               last_nid = page_nid_xchg_last(page, polnid);
-               if (last_nid != polnid)
+               last_cpupid = page_cpupid_xchg_last(page, this_cpupid);
+               if (!cpupid_pid_unset(last_cpupid) && cpupid_to_nid(last_cpupid) != thisnid) {
+
+                       /* See sysctl_numa_balancing_migrate_deferred comment */
+                       if (!cpupid_match_pid(current, last_cpupid))
+                               defer_numa_migrate(current);
+
+                       goto out;
+               }
+
+               /*
+                * The quadratic filter above reduces extraneous migration
+                * of shared pages somewhat. This code reduces it even more,
+                * reducing the overhead of page migrations of shared pages.
+                * This makes workloads with shared pages rely more on
+                * "move task near its memory", and less on "move memory
+                * towards its task", which is exactly what we want.
+                */
+               if (numa_migrate_deferred(current, last_cpupid))
                         goto out;
         }