From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 26 Jul 2013 17:07:36 +0000 (-0700)
Subject: ARC: [ASID] SMP enabled allocation
X-Git-Tag: next-20130802~104^2~2
X-Git-Url: https://git.karo-electronics.de/?a=commitdiff_plain;h=f100635031f067f4efb00607a20c053e66efd399;p=karo-tx-linux.git

ARC: [ASID] SMP enabled allocation

-Per CPU ASID management/allocation
-Forcing ASID refresh on task migration

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---

diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h
index 3529a46859b1..baf923f689c1 100644
--- a/arch/arc/include/asm/mmu.h
+++ b/arch/arc/include/asm/mmu.h
@@ -48,7 +48,7 @@
 #ifndef __ASSEMBLY__
 
 typedef struct {
-	unsigned long asid;	/* Hw PID + Generation cycle */
+	unsigned long asid[NR_CPUS];	/* Hw PID + Generation cycle */
 } mm_context_t;
 
 #ifdef CONFIG_ARC_DBG_TLB_PARANOIA
diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h
index cf1d7c145ef6..f02f8d8a2973 100644
--- a/arch/arc/include/asm/mmu_context.h
+++ b/arch/arc/include/asm/mmu_context.h
@@ -30,13 +30,13 @@
  * "Fast Context Switch" i.e. no TLB flush on ctxt-switch
  *
  * Linux assigns each task a unique ASID. A simple round-robin allocation
- * of H/w ASID is done using software tracker @asid_cache.
+ * of H/w ASID is done using software tracker @asid_cpu.
  * When it reaches max 255, the allocation cycle starts afresh by flushing
  * the entire TLB and wrapping ASID back to zero.
  *
  * A new allocation cycle, post rollover, could potentially reassign an ASID
  * to a different task. Thus the rule is to refresh the ASID in a new cycle.
- * The 32 bit @asid_cache (and mm->asid) have 8 bits MMU PID and rest 24 bits
+ * The 32 bit @asid_cpu (and mm->asid) have 8 bits MMU PID and rest 24 bits
  * serve as cycle/generation indicator and natural 32 bit unsigned math
  * automagically increments the generation when lower 8 bits rollover.
  */
@@ -47,9 +47,11 @@
 #define MM_CTXT_FIRST_CYCLE	(MM_CTXT_ASID_MASK + 1)
 #define MM_CTXT_NO_ASID		0UL
 
-#define hw_pid(mm)		(mm->context.asid & MM_CTXT_ASID_MASK)
+#define asid_mm(mm, cpu)	mm->context.asid[cpu]
+#define hw_pid(mm, cpu)		(asid_mm(mm, cpu) & MM_CTXT_ASID_MASK)
 
-extern unsigned int asid_cache;
+DECLARE_PER_CPU(unsigned int, asid_cache);
+#define asid_cpu(cpu)		per_cpu(asid_cache, cpu)
 
 /*
  * Get a new ASID if task doesn't have a valid one (unalloc or from prev cycle)
@@ -57,6 +59,7 @@ extern unsigned int asid_cache;
  */
 static inline void get_new_mmu_context(struct mm_struct *mm)
 {
+	const unsigned int cpu = smp_processor_id();
 	unsigned long flags;
 
 	local_irq_save(flags);
@@ -68,11 +71,11 @@ static inline void get_new_mmu_context(struct mm_struct *mm)
 	 * context, setting it to invalid value, which the check below would
 	 * catch too
 	 */
-	if (!((mm->context.asid ^ asid_cache) & MM_CTXT_CYCLE_MASK))
+	if (!((asid_mm(mm, cpu) ^ asid_cpu(cpu)) & MM_CTXT_CYCLE_MASK))
 		goto set_hw;
 
 	/* move to new ASID */
-	if (!(++asid_cache & MM_CTXT_ASID_MASK)) {	/* ASID roll-over */
+	if (!(++asid_cpu(cpu) & MM_CTXT_ASID_MASK)) {	/* ASID roll-over */
 		flush_tlb_all();
 	}
 
@@ -80,14 +83,14 @@ static inline void get_new_mmu_context(struct mm_struct *mm)
 	 * If the container itself wrapped around, set it to a non zero
 	 * "generation" to distinguish from no context
 	 */
-	if (!asid_cache)
-		asid_cache = MM_CTXT_FIRST_CYCLE;
+	if (!asid_cpu(cpu))
+		asid_cpu(cpu) = MM_CTXT_FIRST_CYCLE;
 
 	/* Assign new ASID to tsk */
-	mm->context.asid = asid_cache;
+	asid_mm(mm, cpu) = asid_cpu(cpu);
 
 set_hw:
-	write_aux_reg(ARC_REG_PID, hw_pid(mm) | MMU_ENABLE);
+	write_aux_reg(ARC_REG_PID, hw_pid(mm, cpu) | MMU_ENABLE);
 
 	local_irq_restore(flags);
 }
@@ -99,16 +102,44 @@ set_hw:
 static inline int
 init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
-	mm->context.asid = MM_CTXT_NO_ASID;
+	int i;
+
+	for (i = 0; i < NR_CPUS; i++)
+		asid_mm(mm, i) = MM_CTXT_NO_ASID;
+
 	return 0;
 }
 
+static inline void destroy_context(struct mm_struct *mm)
+{
+	asid_mm(mm, smp_processor_id()) = MM_CTXT_NO_ASID;
+}
+
 /* Prepare the MMU for task: setup PID reg with allocated ASID
     If task doesn't have an ASID (never alloc or stolen, get a new ASID)
 */
 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 			     struct task_struct *tsk)
 {
+#ifdef CONFIG_SMP
+	const int cpu = smp_processor_id();
+	int migrating;
+
+	/*
+	 * If @next is migrating to a different CPU, force an ASID refresh (by
+	 * 	relinquishing current value as required by new implementation
+	 * 	of get_new_mmu_context()
+	 * Use Case:
+	 *	Task t1 migrates to a different core, forks, migrates back to
+	 *	orig core. COW semantics requires it to have a new ASID now
+	 *	so that pre-fork TLB entries can't be used.
+	 */
+	cpumask_clear_cpu(cpu, mm_cpumask(prev));
+	migrating = !cpumask_test_and_set_cpu(cpu, mm_cpumask(next));
+	if (migrating)
+		destroy_context(next);
+#endif
+
 #ifndef CONFIG_SMP
 	/* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */
 	write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd);
@@ -126,11 +157,6 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
  */
 #define activate_mm(prev, next)		switch_mm(prev, next, NULL)
 
-static inline void destroy_context(struct mm_struct *mm)
-{
-	mm->context.asid = MM_CTXT_NO_ASID;
-}
-
 /* it seemed that deactivate_mm( ) is a reasonable place to do book-keeping
  * for retiring-mm. However destroy_context( ) still needs to do that because
  * between mm_release( ) = >deactive_mm( ) and
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index bca3052c956d..482a42bdc051 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -128,6 +128,7 @@ void start_kernel_secondary(void)
 	atomic_inc(&mm->mm_users);
 	atomic_inc(&mm->mm_count);
 	current->active_mm = mm;
+	cpumask_set_cpu(cpu, mm_cpumask(mm));
 
 	notify_cpu_starting(cpu);
 	set_cpu_online(cpu, true);
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index ecf5a113335c..8fed015a7116 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -100,7 +100,7 @@
 
 
 /* A copy of the ASID from the PID reg is kept in asid_cache */
-unsigned int asid_cache = MM_CTXT_FIRST_CYCLE;
+DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE;
 
 /*
  * Utility Routine to erase a J-TLB entry
@@ -275,6 +275,7 @@ noinline void local_flush_tlb_mm(struct mm_struct *mm)
 void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 			   unsigned long end)
 {
+	const unsigned int cpu = smp_processor_id();
 	unsigned long flags;
 
 	/* If range @start to @end is more than 32 TLB entries deep,
@@ -298,9 +299,9 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 
 	local_irq_save(flags);
 
-	if (vma->vm_mm->context.asid != MM_CTXT_NO_ASID) {
+	if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) {
 		while (start < end) {
-			tlb_entry_erase(start | hw_pid(vma->vm_mm));
+			tlb_entry_erase(start | hw_pid(vma->vm_mm, cpu));
 			start += PAGE_SIZE;
 		}
 	}
@@ -347,6 +348,7 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
 
 void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
 {
+	const unsigned int cpu = smp_processor_id();
 	unsigned long flags;
 
 	/* Note that it is critical that interrupts are DISABLED between
@@ -354,8 +356,8 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
 	 */
 	local_irq_save(flags);
 
-	if (vma->vm_mm->context.asid != MM_CTXT_NO_ASID) {
-		tlb_entry_erase((page & PAGE_MASK) | hw_pid(vma->vm_mm));
+	if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) {
+		tlb_entry_erase((page & PAGE_MASK) | hw_pid(vma->vm_mm, cpu));
 		utlb_invalidate();
 	}
 
@@ -679,7 +681,7 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
 void print_asid_mismatch(int is_fast_path)
 {
 	int pid_sw, pid_hw;
-	pid_sw = hw_pid(current->active_mm);
+	pid_sw = hw_pid(current->active_mm, smp_processor_id());
 	pid_hw = read_aux_reg(ARC_REG_PID) & 0xff;
 
 	pr_emerg("ASID Mismatch in %s Path Handler: sw-pid=0x%x hw-pid=0x%x\n",