mm/numa.c

   1 /*
   2  * Generic NUMA page table entry support. This code reuses
   3  * PROT_NONE: an architecture can choose to use its own
   4  * implementation, by setting CONFIG_ARCH_SUPPORTS_NUMA_BALANCING
   5  * and not setting CONFIG_ARCH_WANTS_NUMA_GENERIC_PGPROT.
   6  */
   7 #include <linux/mm.h>
   8
   9 static inline pgprot_t vma_prot_none(struct vm_area_struct *vma)
  10 {
  11         /*
  12          * obtain PROT_NONE by removing READ|WRITE|EXEC privs
  13          */
  14         vm_flags_t vmflags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
  15
  16         return pgprot_modify(vma->vm_page_prot, vm_get_page_prot(vmflags));
  17 }
  18
  19 bool pte_numa(struct vm_area_struct *vma, pte_t pte)
  20 {
  21         /*
  22          * For NUMA page faults, we use PROT_NONE ptes in VMAs with
  23          * "normal" vma->vm_page_prot protections.  Genuine PROT_NONE
  24          * VMAs should never get here, because the fault handling code
  25          * will notice that the VMA has no read or write permissions.
  26          *
  27          * This means we cannot get 'special' PROT_NONE faults from genuine
  28          * PROT_NONE maps, nor from PROT_WRITE file maps that do dirty
  29          * tracking.
  30          *
  31          * Neither case is really interesting for our current use though so we
  32          * don't care.
  33          */
  34         if (pte_same(pte, pte_modify(pte, vma->vm_page_prot)))
  35                 return false;
  36
  37         return pte_same(pte, pte_modify(pte, vma_prot_none(vma)));
  38 }
  39
  40 pte_t pte_mknuma(struct vm_area_struct *vma, pte_t pte)
  41 {
  42         return pte_modify(pte, vma_prot_none(vma));
  43 }
  44
  45 #ifdef CONFIG_ARCH_USES_NUMA_GENERIC_PGPROT_HUGEPAGE
  46 bool pmd_numa(struct vm_area_struct *vma, pmd_t pmd)
  47 {
  48         /*
  49          * See pte_numa() above
  50          */
  51         if (pmd_same(pmd, pmd_modify(pmd, vma->vm_page_prot)))
  52                 return false;
  53
  54         return pmd_same(pmd, pmd_modify(pmd, vma_prot_none(vma)));
  55 }
  56 #endif
  57
  58 /*
  59  * The scheduler uses this function to mark a range of virtual
  60  * memory inaccessible to user-space, for the purposes of probing
  61  * the composition of the working set.
  62  *
  63  * The resulting page faults will be demultiplexed into:
  64  *
  65  *    mm/memory.c::do_numa_page()
  66  *    mm/huge_memory.c::do_huge_pmd_numa_page()
  67  *
  68  * This generic version simply uses PROT_NONE.
  69  */
  70 unsigned long change_prot_numa(struct vm_area_struct *vma, unsigned long start, unsigned long end)
  71 {
  72         return change_protection(vma, start, end, vma_prot_none(vma), 0);
  73 }