Merge branch 'master' into csb1725

[mv-sheeva.git] / arch / x86 / mm / tlb.c
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c

index c03f14ab666742d6960ff3339ebcfe28a003308b..6acc724d5d8ff759f93290a2591c84945f9e6bd2 100644 (file)
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -5,6 +5,7 @@
  #include <linux/smp.h>
  #include <linux/interrupt.h>
  #include <linux/module.h>
+#include <linux/cpu.h>
  
  #include <asm/tlbflush.h>
  #include <asm/mmu_context.h>
@@ -52,6 +53,8 @@ union smp_flush_state {
     want false sharing in the per cpu data segment. */
  static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS];
  
+static DEFINE_PER_CPU_READ_MOSTLY(int, tlb_vector_offset);
+
  /*
   * We cannot call mmdrop() because we are in interrupt context,
   * instead update mm->cpu_vm_mask.
@@ -173,7 +176,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
         union smp_flush_state *f;
  
         /* Caller has disabled preemption */
-       sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
+       sender = this_cpu_read(tlb_vector_offset);
         f = &flush_state[sender];
  
         /*
@@ -218,6 +221,48 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
         flush_tlb_others_ipi(cpumask, mm, va);
  }
  
+static void __cpuinit calculate_tlb_offset(void)
+{
+       int cpu, node, nr_node_vecs, idx = 0;
+       /*
+        * we are changing tlb_vector_offset for each CPU in runtime, but this
+        * will not cause inconsistency, as the write is atomic under X86. we
+        * might see more lock contentions in a short time, but after all CPU's
+        * tlb_vector_offset are changed, everything should go normal
+        *
+        * Note: if NUM_INVALIDATE_TLB_VECTORS % nr_online_nodes !=0, we might
+        * waste some vectors.
+        **/
+       if (nr_online_nodes > NUM_INVALIDATE_TLB_VECTORS)
+               nr_node_vecs = 1;
+       else
+               nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes;
+
+       for_each_online_node(node) {
+               int node_offset = (idx % NUM_INVALIDATE_TLB_VECTORS) *
+                       nr_node_vecs;
+               int cpu_offset = 0;
+               for_each_cpu(cpu, cpumask_of_node(node)) {
+                       per_cpu(tlb_vector_offset, cpu) = node_offset +
+                               cpu_offset;
+                       cpu_offset++;
+                       cpu_offset = cpu_offset % nr_node_vecs;
+               }
+               idx++;
+       }
+}
+
+static int __cpuinit tlb_cpuhp_notify(struct notifier_block *n,
+               unsigned long action, void *hcpu)
+{
+       switch (action & 0xf) {
+       case CPU_ONLINE:
+       case CPU_DEAD:
+               calculate_tlb_offset();
+       }
+       return NOTIFY_OK;
+}
+
  static int __cpuinit init_smp_flush(void)
  {
         int i;
@@ -225,6 +270,8 @@ static int __cpuinit init_smp_flush(void)
         for (i = 0; i < ARRAY_SIZE(flush_state); i++)
                 raw_spin_lock_init(&flush_state[i].tlbstate_lock);
  
+       calculate_tlb_offset();
+       hotcpu_notifier(tlb_cpuhp_notify, 0);
         return 0;
  }
  core_initcall(init_smp_flush);