arch/powerpc/mm/mmu_context_nohash.c

   1 /*
   2  * This file contains the routines for handling the MMU on those
   3  * PowerPC implementations where the MMU is not using the hash
   4  * table, such as 8xx, 4xx, BookE's etc...
   5  *
   6  * Copyright 2008 Ben Herrenschmidt <benh@kernel.crashing.org>
   7  *                IBM Corp.
   8  *
   9  *  Derived from previous arch/powerpc/mm/mmu_context.c
  10  *  and arch/powerpc/include/asm/mmu_context.h
  11  *
  12  *  This program is free software; you can redistribute it and/or
  13  *  modify it under the terms of the GNU General Public License
  14  *  as published by the Free Software Foundation; either version
  15  *  2 of the License, or (at your option) any later version.
  16  *
  17  * TODO:
  18  *
  19  *   - The global context lock will not scale very well
  20  *   - The maps should be dynamically allocated to allow for processors
  21  *     that support more PID bits at runtime
  22  *   - Implement flush_tlb_mm() by making the context stale and picking
  23  *     a new one
  24  *   - More aggressively clear stale map bits and maybe find some way to
  25  *     also clear mm->cpu_vm_mask bits when processes are migrated
  26  */
  27
  28 #undef DEBUG
  29 #define DEBUG_STEAL_ONLY
  30 #undef DEBUG_MAP_CONSISTENCY
  31 /*#define DEBUG_CLAMP_LAST_CONTEXT   15 */
  32
  33 #include <linux/kernel.h>
  34 #include <linux/mm.h>
  35 #include <linux/init.h>
  36 #include <linux/spinlock.h>
  37 #include <linux/bootmem.h>
  38 #include <linux/notifier.h>
  39 #include <linux/cpu.h>
  40
  41 #include <asm/mmu_context.h>
  42 #include <asm/tlbflush.h>
  43
  44 static unsigned int first_context, last_context;
  45 static unsigned int next_context, nr_free_contexts;
  46 static unsigned long *context_map;
  47 static unsigned long *stale_map[NR_CPUS];
  48 static struct mm_struct **context_mm;
  49 static spinlock_t context_lock = SPIN_LOCK_UNLOCKED;
  50
  51 #define CTX_MAP_SIZE    \
  52         (sizeof(unsigned long) * (last_context / BITS_PER_LONG + 1))
  53
  54
  55 /* Steal a context from a task that has one at the moment.
  56  *
  57  * This is used when we are running out of available PID numbers
  58  * on the processors.
  59  *
  60  * This isn't an LRU system, it just frees up each context in
  61  * turn (sort-of pseudo-random replacement :).  This would be the
  62  * place to implement an LRU scheme if anyone was motivated to do it.
  63  *  -- paulus
  64  *
  65  * For context stealing, we use a slightly different approach for
  66  * SMP and UP. Basically, the UP one is simpler and doesn't use
  67  * the stale map as we can just flush the local CPU
  68  *  -- benh
  69  */
  70 #ifdef CONFIG_SMP
  71 static unsigned int steal_context_smp(unsigned int id)
  72 {
  73         struct mm_struct *mm;
  74         unsigned int cpu, max;
  75
  76  again:
  77         max = last_context - first_context;
  78
  79         /* Attempt to free next_context first and then loop until we manage */
  80         while (max--) {
  81                 /* Pick up the victim mm */
  82                 mm = context_mm[id];
  83
  84                 /* We have a candidate victim, check if it's active, on SMP
  85                  * we cannot steal active contexts
  86                  */
  87                 if (mm->context.active) {
  88                         id++;
  89                         if (id > last_context)
  90                                 id = first_context;
  91                         continue;
  92                 }
  93                 pr_debug("[%d] steal context %d from mm @%p\n",
  94                          smp_processor_id(), id, mm);
  95
  96                 /* Mark this mm has having no context anymore */
  97                 mm->context.id = MMU_NO_CONTEXT;
  98
  99                 /* Mark it stale on all CPUs that used this mm */
 100                 for_each_cpu(cpu, mm_cpumask(mm))
 101                         __set_bit(id, stale_map[cpu]);
 102                 return id;
 103         }
 104
 105         /* This will happen if you have more CPUs than available contexts,
 106          * all we can do here is wait a bit and try again
 107          */
 108         spin_unlock(&context_lock);
 109         cpu_relax();
 110         spin_lock(&context_lock);
 111         goto again;
 112 }
 113 #endif  /* CONFIG_SMP */
 114
 115 /* Note that this will also be called on SMP if all other CPUs are
 116  * offlined, which means that it may be called for cpu != 0. For
 117  * this to work, we somewhat assume that CPUs that are onlined
 118  * come up with a fully clean TLB (or are cleaned when offlined)
 119  */
 120 static unsigned int steal_context_up(unsigned int id)
 121 {
 122         struct mm_struct *mm;
 123         int cpu = smp_processor_id();
 124
 125         /* Pick up the victim mm */
 126         mm = context_mm[id];
 127
 128         pr_debug("[%d] steal context %d from mm @%p\n", cpu, id, mm);
 129
 130         /* Mark this mm has having no context anymore */
 131         mm->context.id = MMU_NO_CONTEXT;
 132
 133         /* Flush the TLB for that context */
 134         local_flush_tlb_mm(mm);
 135
 136         /* XXX This clear should ultimately be part of local_flush_tlb_mm */
 137         __clear_bit(id, stale_map[cpu]);
 138
 139         return id;
 140 }
 141
 142 #ifdef DEBUG_MAP_CONSISTENCY
 143 static void context_check_map(void)
 144 {
 145         unsigned int id, nrf, nact;
 146
 147         nrf = nact = 0;
 148         for (id = first_context; id <= last_context; id++) {
 149                 int used = test_bit(id, context_map);
 150                 if (!used)
 151                         nrf++;
 152                 if (used != (context_mm[id] != NULL))
 153                         pr_err("MMU: Context %d is %s and MM is %p !\n",
 154                                id, used ? "used" : "free", context_mm[id]);
 155                 if (context_mm[id] != NULL)
 156                         nact += context_mm[id]->context.active;
 157         }
 158         if (nrf != nr_free_contexts) {
 159                 pr_err("MMU: Free context count out of sync ! (%d vs %d)\n",
 160                        nr_free_contexts, nrf);
 161                 nr_free_contexts = nrf;
 162         }
 163         if (nact > num_online_cpus())
 164                 pr_err("MMU: More active contexts than CPUs ! (%d vs %d)\n",
 165                        nact, num_online_cpus());
 166         if (first_context > 0 && !test_bit(0, context_map))
 167                 pr_err("MMU: Context 0 has been freed !!!\n");
 168 }
 169 #else
 170 static void context_check_map(void) { }
 171 #endif
 172
 173 void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
 174 {
 175         unsigned int id, cpu = smp_processor_id();
 176         unsigned long *map;
 177
 178         /* No lockless fast path .. yet */
 179         spin_lock(&context_lock);
 180
 181 #ifndef DEBUG_STEAL_ONLY
 182         pr_debug("[%d] activating context for mm @%p, active=%d, id=%d\n",
 183                  cpu, next, next->context.active, next->context.id);
 184 #endif
 185
 186 #ifdef CONFIG_SMP
 187         /* Mark us active and the previous one not anymore */
 188         next->context.active++;
 189         if (prev) {
 190 #ifndef DEBUG_STEAL_ONLY
 191                 pr_debug(" old context %p active was: %d\n",
 192                          prev, prev->context.active);
 193 #endif
 194                 WARN_ON(prev->context.active < 1);
 195                 prev->context.active--;
 196         }
 197 #endif /* CONFIG_SMP */
 198
 199         /* If we already have a valid assigned context, skip all that */
 200         id = next->context.id;
 201         if (likely(id != MMU_NO_CONTEXT))
 202                 goto ctxt_ok;
 203
 204         /* We really don't have a context, let's try to acquire one */
 205         id = next_context;
 206         if (id > last_context)
 207                 id = first_context;
 208         map = context_map;
 209
 210         /* No more free contexts, let's try to steal one */
 211         if (nr_free_contexts == 0) {
 212 #ifdef CONFIG_SMP
 213                 if (num_online_cpus() > 1) {
 214                         id = steal_context_smp(id);
 215                         goto stolen;
 216                 }
 217 #endif /* CONFIG_SMP */
 218                 id = steal_context_up(id);
 219                 goto stolen;
 220         }
 221         nr_free_contexts--;
 222
 223         /* We know there's at least one free context, try to find it */
 224         while (__test_and_set_bit(id, map)) {
 225                 id = find_next_zero_bit(map, last_context+1, id);
 226                 if (id > last_context)
 227                         id = first_context;
 228         }
 229  stolen:
 230         next_context = id + 1;
 231         context_mm[id] = next;
 232         next->context.id = id;
 233
 234 #ifndef DEBUG_STEAL_ONLY
 235         pr_debug("[%d] picked up new id %d, nrf is now %d\n",
 236                  cpu, id, nr_free_contexts);
 237 #endif
 238
 239         context_check_map();
 240  ctxt_ok:
 241
 242         /* If that context got marked stale on this CPU, then flush the
 243          * local TLB for it and unmark it before we use it
 244          */
 245         if (test_bit(id, stale_map[cpu])) {
 246                 pr_debug("[%d] flushing stale context %d for mm @%p !\n",
 247                          cpu, id, next);
 248                 local_flush_tlb_mm(next);
 249
 250                 /* XXX This clear should ultimately be part of local_flush_tlb_mm */
 251                 __clear_bit(id, stale_map[cpu]);
 252         }
 253
 254         /* Flick the MMU and release lock */
 255         set_context(id, next->pgd);
 256         spin_unlock(&context_lock);
 257 }
 258
 259 /*
 260  * Set up the context for a new address space.
 261  */
 262 int init_new_context(struct task_struct *t, struct mm_struct *mm)
 263 {
 264         mm->context.id = MMU_NO_CONTEXT;
 265         mm->context.active = 0;
 266
 267         return 0;
 268 }
 269
 270 /*
 271  * We're finished using the context for an address space.
 272  */
 273 void destroy_context(struct mm_struct *mm)
 274 {
 275         unsigned int id;
 276
 277         if (mm->context.id == MMU_NO_CONTEXT)
 278                 return;
 279
 280         WARN_ON(mm->context.active != 0);
 281
 282         spin_lock(&context_lock);
 283         id = mm->context.id;
 284         if (id != MMU_NO_CONTEXT) {
 285                 __clear_bit(id, context_map);
 286                 mm->context.id = MMU_NO_CONTEXT;
 287 #ifdef DEBUG_MAP_CONSISTENCY
 288                 mm->context.active = 0;
 289                 context_mm[id] = NULL;
 290 #endif
 291                 nr_free_contexts++;
 292         }
 293         spin_unlock(&context_lock);
 294 }
 295
 296 #ifdef CONFIG_SMP
 297
 298 static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self,
 299                                             unsigned long action, void *hcpu)
 300 {
 301         unsigned int cpu = (unsigned int)(long)hcpu;
 302
 303         /* We don't touch CPU 0 map, it's allocated at aboot and kept
 304          * around forever
 305          */
 306         if (cpu == 0)
 307                 return NOTIFY_OK;
 308
 309         switch (action) {
 310         case CPU_ONLINE:
 311         case CPU_ONLINE_FROZEN:
 312                 pr_debug("MMU: Allocating stale context map for CPU %d\n", cpu);
 313                 stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
 314                 break;
 315 #ifdef CONFIG_HOTPLUG_CPU
 316         case CPU_DEAD:
 317         case CPU_DEAD_FROZEN:
 318                 pr_debug("MMU: Freeing stale context map for CPU %d\n", cpu);
 319                 kfree(stale_map[cpu]);
 320                 stale_map[cpu] = NULL;
 321                 break;
 322 #endif
 323         }
 324         return NOTIFY_OK;
 325 }
 326
 327 static struct notifier_block __cpuinitdata mmu_context_cpu_nb = {
 328         .notifier_call  = mmu_context_cpu_notify,
 329 };
 330
 331 #endif /* CONFIG_SMP */
 332
 333 /*
 334  * Initialize the context management stuff.
 335  */
 336 void __init mmu_context_init(void)
 337 {
 338         /* Mark init_mm as being active on all possible CPUs since
 339          * we'll get called with prev == init_mm the first time
 340          * we schedule on a given CPU
 341          */
 342         init_mm.context.active = NR_CPUS;
 343
 344         /*
 345          *   The MPC8xx has only 16 contexts.  We rotate through them on each
 346          * task switch.  A better way would be to keep track of tasks that
 347          * own contexts, and implement an LRU usage.  That way very active
 348          * tasks don't always have to pay the TLB reload overhead.  The
 349          * kernel pages are mapped shared, so the kernel can run on behalf
 350          * of any task that makes a kernel entry.  Shared does not mean they
 351          * are not protected, just that the ASID comparison is not performed.
 352          *      -- Dan
 353          *
 354          * The IBM4xx has 256 contexts, so we can just rotate through these
 355          * as a way of "switching" contexts.  If the TID of the TLB is zero,
 356          * the PID/TID comparison is disabled, so we can use a TID of zero
 357          * to represent all kernel pages as shared among all contexts.
 358          *      -- Dan
 359          */
 360         if (mmu_has_feature(MMU_FTR_TYPE_8xx)) {
 361                 first_context = 0;
 362                 last_context = 15;
 363         } else {
 364                 first_context = 1;
 365                 last_context = 255;
 366         }
 367
 368 #ifdef DEBUG_CLAMP_LAST_CONTEXT
 369         last_context = DEBUG_CLAMP_LAST_CONTEXT;
 370 #endif
 371         /*
 372          * Allocate the maps used by context management
 373          */
 374         context_map = alloc_bootmem(CTX_MAP_SIZE);
 375         context_mm = alloc_bootmem(sizeof(void *) * (last_context + 1));
 376         stale_map[0] = alloc_bootmem(CTX_MAP_SIZE);
 377
 378 #ifdef CONFIG_SMP
 379         register_cpu_notifier(&mmu_context_cpu_nb);
 380 #endif
 381
 382         printk(KERN_INFO
 383                "MMU: Allocated %zu bytes of context maps for %d contexts\n",
 384                2 * CTX_MAP_SIZE + (sizeof(void *) * (last_context + 1)),
 385                last_context - first_context + 1);
 386
 387         /*
 388          * Some processors have too few contexts to reserve one for
 389          * init_mm, and require using context 0 for a normal task.
 390          * Other processors reserve the use of context zero for the kernel.
 391          * This code assumes first_context < 32.
 392          */
 393         context_map[0] = (1 << first_context) - 1;
 394         next_context = first_context;
 395         nr_free_contexts = last_context - first_context + 1;
 396 }
 397