From: Vineet Gupta Date: Fri, 26 Jul 2013 17:07:36 +0000 (-0700) Subject: ARC: [ASID] SMP enabled allocation X-Git-Tag: next-20130802~104^2~2 X-Git-Url: https://git.karo-electronics.de/?a=commitdiff_plain;h=f100635031f067f4efb00607a20c053e66efd399;p=karo-tx-linux.git ARC: [ASID] SMP enabled allocation -Per CPU ASID management/allocation -Forcing ASID refresh on task migration Signed-off-by: Vineet Gupta --- diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h index 3529a46859b1..baf923f689c1 100644 --- a/arch/arc/include/asm/mmu.h +++ b/arch/arc/include/asm/mmu.h @@ -48,7 +48,7 @@ #ifndef __ASSEMBLY__ typedef struct { - unsigned long asid; /* Hw PID + Generation cycle */ + unsigned long asid[NR_CPUS]; /* Hw PID + Generation cycle */ } mm_context_t; #ifdef CONFIG_ARC_DBG_TLB_PARANOIA diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h index cf1d7c145ef6..f02f8d8a2973 100644 --- a/arch/arc/include/asm/mmu_context.h +++ b/arch/arc/include/asm/mmu_context.h @@ -30,13 +30,13 @@ * "Fast Context Switch" i.e. no TLB flush on ctxt-switch * * Linux assigns each task a unique ASID. A simple round-robin allocation - * of H/w ASID is done using software tracker @asid_cache. + * of H/w ASID is done using software tracker @asid_cpu. * When it reaches max 255, the allocation cycle starts afresh by flushing * the entire TLB and wrapping ASID back to zero. * * A new allocation cycle, post rollover, could potentially reassign an ASID * to a different task. Thus the rule is to refresh the ASID in a new cycle. - * The 32 bit @asid_cache (and mm->asid) have 8 bits MMU PID and rest 24 bits + * The 32 bit @asid_cpu (and mm->asid) have 8 bits MMU PID and rest 24 bits * serve as cycle/generation indicator and natural 32 bit unsigned math * automagically increments the generation when lower 8 bits rollover. */ @@ -47,9 +47,11 @@ #define MM_CTXT_FIRST_CYCLE (MM_CTXT_ASID_MASK + 1) #define MM_CTXT_NO_ASID 0UL -#define hw_pid(mm) (mm->context.asid & MM_CTXT_ASID_MASK) +#define asid_mm(mm, cpu) mm->context.asid[cpu] +#define hw_pid(mm, cpu) (asid_mm(mm, cpu) & MM_CTXT_ASID_MASK) -extern unsigned int asid_cache; +DECLARE_PER_CPU(unsigned int, asid_cache); +#define asid_cpu(cpu) per_cpu(asid_cache, cpu) /* * Get a new ASID if task doesn't have a valid one (unalloc or from prev cycle) @@ -57,6 +59,7 @@ extern unsigned int asid_cache; */ static inline void get_new_mmu_context(struct mm_struct *mm) { + const unsigned int cpu = smp_processor_id(); unsigned long flags; local_irq_save(flags); @@ -68,11 +71,11 @@ static inline void get_new_mmu_context(struct mm_struct *mm) * context, setting it to invalid value, which the check below would * catch too */ - if (!((mm->context.asid ^ asid_cache) & MM_CTXT_CYCLE_MASK)) + if (!((asid_mm(mm, cpu) ^ asid_cpu(cpu)) & MM_CTXT_CYCLE_MASK)) goto set_hw; /* move to new ASID */ - if (!(++asid_cache & MM_CTXT_ASID_MASK)) { /* ASID roll-over */ + if (!(++asid_cpu(cpu) & MM_CTXT_ASID_MASK)) { /* ASID roll-over */ flush_tlb_all(); } @@ -80,14 +83,14 @@ static inline void get_new_mmu_context(struct mm_struct *mm) * If the container itself wrapped around, set it to a non zero * "generation" to distinguish from no context */ - if (!asid_cache) - asid_cache = MM_CTXT_FIRST_CYCLE; + if (!asid_cpu(cpu)) + asid_cpu(cpu) = MM_CTXT_FIRST_CYCLE; /* Assign new ASID to tsk */ - mm->context.asid = asid_cache; + asid_mm(mm, cpu) = asid_cpu(cpu); set_hw: - write_aux_reg(ARC_REG_PID, hw_pid(mm) | MMU_ENABLE); + write_aux_reg(ARC_REG_PID, hw_pid(mm, cpu) | MMU_ENABLE); local_irq_restore(flags); } @@ -99,16 +102,44 @@ set_hw: static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { - mm->context.asid = MM_CTXT_NO_ASID; + int i; + + for (i = 0; i < NR_CPUS; i++) + asid_mm(mm, i) = MM_CTXT_NO_ASID; + return 0; } +static inline void destroy_context(struct mm_struct *mm) +{ + asid_mm(mm, smp_processor_id()) = MM_CTXT_NO_ASID; +} + /* Prepare the MMU for task: setup PID reg with allocated ASID If task doesn't have an ASID (never alloc or stolen, get a new ASID) */ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { +#ifdef CONFIG_SMP + const int cpu = smp_processor_id(); + int migrating; + + /* + * If @next is migrating to a different CPU, force an ASID refresh (by + * relinquishing current value as required by new implementation + * of get_new_mmu_context() + * Use Case: + * Task t1 migrates to a different core, forks, migrates back to + * orig core. COW semantics requires it to have a new ASID now + * so that pre-fork TLB entries can't be used. + */ + cpumask_clear_cpu(cpu, mm_cpumask(prev)); + migrating = !cpumask_test_and_set_cpu(cpu, mm_cpumask(next)); + if (migrating) + destroy_context(next); +#endif + #ifndef CONFIG_SMP /* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */ write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd); @@ -126,11 +157,6 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, */ #define activate_mm(prev, next) switch_mm(prev, next, NULL) -static inline void destroy_context(struct mm_struct *mm) -{ - mm->context.asid = MM_CTXT_NO_ASID; -} - /* it seemed that deactivate_mm( ) is a reasonable place to do book-keeping * for retiring-mm. However destroy_context( ) still needs to do that because * between mm_release( ) = >deactive_mm( ) and diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index bca3052c956d..482a42bdc051 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -128,6 +128,7 @@ void start_kernel_secondary(void) atomic_inc(&mm->mm_users); atomic_inc(&mm->mm_count); current->active_mm = mm; + cpumask_set_cpu(cpu, mm_cpumask(mm)); notify_cpu_starting(cpu); set_cpu_online(cpu, true); diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index ecf5a113335c..8fed015a7116 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -100,7 +100,7 @@ /* A copy of the ASID from the PID reg is kept in asid_cache */ -unsigned int asid_cache = MM_CTXT_FIRST_CYCLE; +DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE; /* * Utility Routine to erase a J-TLB entry @@ -275,6 +275,7 @@ noinline void local_flush_tlb_mm(struct mm_struct *mm) void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { + const unsigned int cpu = smp_processor_id(); unsigned long flags; /* If range @start to @end is more than 32 TLB entries deep, @@ -298,9 +299,9 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, local_irq_save(flags); - if (vma->vm_mm->context.asid != MM_CTXT_NO_ASID) { + if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) { while (start < end) { - tlb_entry_erase(start | hw_pid(vma->vm_mm)); + tlb_entry_erase(start | hw_pid(vma->vm_mm, cpu)); start += PAGE_SIZE; } } @@ -347,6 +348,7 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) { + const unsigned int cpu = smp_processor_id(); unsigned long flags; /* Note that it is critical that interrupts are DISABLED between @@ -354,8 +356,8 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) */ local_irq_save(flags); - if (vma->vm_mm->context.asid != MM_CTXT_NO_ASID) { - tlb_entry_erase((page & PAGE_MASK) | hw_pid(vma->vm_mm)); + if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) { + tlb_entry_erase((page & PAGE_MASK) | hw_pid(vma->vm_mm, cpu)); utlb_invalidate(); } @@ -679,7 +681,7 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, void print_asid_mismatch(int is_fast_path) { int pid_sw, pid_hw; - pid_sw = hw_pid(current->active_mm); + pid_sw = hw_pid(current->active_mm, smp_processor_id()); pid_hw = read_aux_reg(ARC_REG_PID) & 0xff; pr_emerg("ASID Mismatch in %s Path Handler: sw-pid=0x%x hw-pid=0x%x\n",