mm: hugetlbfs: close race during teardown of hugetlbfs shared page tables

author Mel Gorman <mgorman@suse.de>

Tue, 31 Jul 2012 23:46:20 +0000 (16:46 -0700)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 15 Aug 2012 14:53:02 +0000 (07:53 -0700)
author Mel Gorman <mgorman@suse.de>
Tue, 31 Jul 2012 23:46:20 +0000 (16:46 -0700)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 15 Aug 2012 14:53:02 +0000 (07:53 -0700)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index e198831276a3eab77b4a89fc0e1457a5a45d025d..19558df403257bc8b083a1682e6d28770680b4b2 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2393,6 +2393,22 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
  {
         mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex);
         __unmap_hugepage_range(vma, start, end, ref_page);
+       /*
+        * Clear this flag so that x86's huge_pmd_share page_table_shareable
+        * test will fail on a vma being torn down, and not grab a page table
+        * on its way out.  We're lucky that the flag has such an appropriate
+        * name, and can in fact be safely cleared here. We could clear it
+        * before the __unmap_hugepage_range above, but all that's necessary
+        * is to clear it before releasing the i_mmap_mutex below.
+        *
+        * This works because in the contexts this is called, the VMA is
+        * going to be destroyed. It is not vunerable to madvise(DONTNEED)
+        * because madvise is not supported on hugetlbfs. The same applies
+        * for direct IO. unmap_hugepage_range() is only being called just
+        * before free_pgtables() so clearing VM_MAYSHARE will not cause
+        * surprises later.
+        */
+       vma->vm_flags &= ~VM_MAYSHARE;
         mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
  }
  
@@ -2959,9 +2975,14 @@ void hugetlb_change_protection(struct vm_area_struct *vma,
                 }
         }
         spin_unlock(&mm->page_table_lock);
-       mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
-
+       /*
+        * Must flush TLB before releasing i_mmap_mutex: x86's huge_pmd_unshare
+        * may have cleared our pud entry and done put_page on the page table:
+        * once we release i_mmap_mutex, another task can do the final put_page
+        * and that page table be reused and filled with junk.
+        */
         flush_tlb_range(vma, start, end);
+       mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
  }
  
  int hugetlb_reserve_pages(struct inode *inode,
author	Mel Gorman <mgorman@suse.de>
	Tue, 31 Jul 2012 23:46:20 +0000 (16:46 -0700)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 15 Aug 2012 14:53:02 +0000 (07:53 -0700)