mm: avoid spurious 'bad pmd' warning messages

author Ross Zwisler <ross.zwisler@linux.intel.com>

Fri, 2 Jun 2017 21:46:34 +0000 (14:46 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 2 Jun 2017 22:07:37 +0000 (15:07 -0700)
author Ross Zwisler <ross.zwisler@linux.intel.com>
Fri, 2 Jun 2017 21:46:34 +0000 (14:46 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 2 Jun 2017 22:07:37 +0000 (15:07 -0700)
diff --git a/mm/memory.c b/mm/memory.c

index 6ff5d729ded0ecd3a5607d10248697a786091f7e..2e65df1831d941dcd1282c56312bdbd153df0a79 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3029,6 +3029,17 @@ static int __do_fault(struct vm_fault *vmf)
         return ret;
  }
  
+/*
+ * The ordering of these checks is important for pmds with _PAGE_DEVMAP set.
+ * If we check pmd_trans_unstable() first we will trip the bad_pmd() check
+ * inside of pmd_none_or_trans_huge_or_clear_bad(). This will end up correctly
+ * returning 1 but not before it spams dmesg with the pmd_clear_bad() output.
+ */
+static int pmd_devmap_trans_unstable(pmd_t *pmd)
+{
+       return pmd_devmap(*pmd) || pmd_trans_unstable(pmd);
+}
+
  static int pte_alloc_one_map(struct vm_fault *vmf)
  {
         struct vm_area_struct *vma = vmf->vma;
@@ -3052,18 +3063,27 @@ static int pte_alloc_one_map(struct vm_fault *vmf)
  map_pte:
         /*
          * If a huge pmd materialized under us just retry later.  Use
-        * pmd_trans_unstable() instead of pmd_trans_huge() to ensure the pmd
-        * didn't become pmd_trans_huge under us and then back to pmd_none, as
-        * a result of MADV_DONTNEED running immediately after a huge pmd fault
-        * in a different thread of this mm, in turn leading to a misleading
-        * pmd_trans_huge() retval.  All we have to ensure is that it is a
-        * regular pmd that we can walk with pte_offset_map() and we can do that
-        * through an atomic read in C, which is what pmd_trans_unstable()
-        * provides.
+        * pmd_trans_unstable() via pmd_devmap_trans_unstable() instead of
+        * pmd_trans_huge() to ensure the pmd didn't become pmd_trans_huge
+        * under us and then back to pmd_none, as a result of MADV_DONTNEED
+        * running immediately after a huge pmd fault in a different thread of
+        * this mm, in turn leading to a misleading pmd_trans_huge() retval.
+        * All we have to ensure is that it is a regular pmd that we can walk
+        * with pte_offset_map() and we can do that through an atomic read in
+        * C, which is what pmd_trans_unstable() provides.
          */
-       if (pmd_trans_unstable(vmf->pmd) || pmd_devmap(*vmf->pmd))
+       if (pmd_devmap_trans_unstable(vmf->pmd))
                 return VM_FAULT_NOPAGE;
  
+       /*
+        * At this point we know that our vmf->pmd points to a page of ptes
+        * and it cannot become pmd_none(), pmd_devmap() or pmd_trans_huge()
+        * for the duration of the fault.  If a racing MADV_DONTNEED runs and
+        * we zap the ptes pointed to by our vmf->pmd, the vmf->ptl will still
+        * be valid and we will re-check to make sure the vmf->pte isn't
+        * pte_none() under vmf->ptl protection when we return to
+        * alloc_set_pte().
+        */
         vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
                         &vmf->ptl);
         return 0;
@@ -3690,7 +3710,7 @@ static int handle_pte_fault(struct vm_fault *vmf)
                 vmf->pte = NULL;
         } else {
                 /* See comment in pte_alloc_one_map() */
-               if (pmd_trans_unstable(vmf->pmd) || pmd_devmap(*vmf->pmd))
+               if (pmd_devmap_trans_unstable(vmf->pmd))
                         return 0;
                 /*
                  * A regular pmd is established and it can't morph into a huge
author	Ross Zwisler <ross.zwisler@linux.intel.com>
	Fri, 2 Jun 2017 21:46:34 +0000 (14:46 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 2 Jun 2017 22:07:37 +0000 (15:07 -0700)