thp: freeze khugepaged and ksmd

[mv-sheeva.git] / mm / huge_memory.c
diff --git a/mm/huge_memory.c b/mm/huge_memory.c

index 30c3cec8202396a0cc223361f999fc094083b7ab..45b6d53bcfbc5bec3b5ba81c20d1abb7473933e6 100644 (file)
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -15,6 +15,7 @@
  #include <linux/mm_inline.h>
  #include <linux/kthread.h>
  #include <linux/khugepaged.h>
+#include <linux/freezer.h>
  #include <asm/tlb.h>
  #include <asm/pgalloc.h>
  #include "internal.h"
@@ -487,7 +488,15 @@ static int __init hugepage_init(void)
         int err;
  #ifdef CONFIG_SYSFS
         static struct kobject *hugepage_kobj;
+#endif
+
+       err = -EINVAL;
+       if (!has_transparent_hugepage()) {
+               transparent_hugepage_flags = 0;
+               goto out;
+       }
  
+#ifdef CONFIG_SYSFS
         err = -ENOMEM;
         hugepage_kobj = kobject_create_and_add("transparent_hugepage", mm_kobj);
         if (unlikely(!hugepage_kobj)) {
@@ -1075,8 +1084,16 @@ pmd_t *page_check_address_pmd(struct page *page,
                 goto out;
         if (pmd_page(*pmd) != page)
                 goto out;
-       VM_BUG_ON(flag == PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG &&
-                 pmd_trans_splitting(*pmd));
+       /*
+        * split_vma() may create temporary aliased mappings. There is
+        * no risk as long as all huge pmd are found and have their
+        * splitting bit set before __split_huge_page_refcount
+        * runs. Finding the same huge pmd more than once during the
+        * same rmap walk is not a problem.
+        */
+       if (flag == PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG &&
+           pmd_trans_splitting(*pmd))
+               goto out;
         if (pmd_trans_huge(*pmd)) {
                 VM_BUG_ON(flag == PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG &&
                           !pmd_trans_splitting(*pmd));
@@ -1616,7 +1633,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
                 VM_BUG_ON(PageLRU(page));
  
                 /* If there is no mapped pte young don't collapse the page */
-               if (pte_young(pteval))
+               if (pte_young(pteval) || PageReferenced(page) ||
+                   mmu_notifier_test_young(vma->vm_mm, address))
                         referenced = 1;
         }
         if (unlikely(!referenced))
@@ -1876,7 +1894,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
                 /* cannot use mapcount: can't collapse if there's a gup pin */
                 if (page_count(page) != 1)
                         goto out_unmap;
-               if (pte_young(pteval))
+               if (pte_young(pteval) || PageReferenced(page) ||
+                   mmu_notifier_test_young(vma->vm_mm, address))
                         referenced = 1;
         }
         if (referenced)
@@ -2067,6 +2086,9 @@ static void khugepaged_do_scan(struct page **hpage)
                         break;
  #endif
  
+               if (unlikely(kthread_should_stop() || freezing(current)))
+                       break;
+
                 spin_lock(&khugepaged_mm_lock);
                 if (!khugepaged_scan.mm_slot)
                         pass_through_head++;
@@ -2129,6 +2151,9 @@ static void khugepaged_loop(void)
                 if (hpage)
                         put_page(hpage);
  #endif
+               try_to_freeze();
+               if (unlikely(kthread_should_stop()))
+                       break;
                 if (khugepaged_has_work()) {
                         DEFINE_WAIT(wait);
                         if (!khugepaged_scan_sleep_millisecs)
@@ -2139,8 +2164,8 @@ static void khugepaged_loop(void)
                                         khugepaged_scan_sleep_millisecs));
                         remove_wait_queue(&khugepaged_wait, &wait);
                 } else if (khugepaged_enabled())
-                       wait_event_interruptible(khugepaged_wait,
-                                                khugepaged_wait_event());
+                       wait_event_freezable(khugepaged_wait,
+                                            khugepaged_wait_event());
         }
  }
  
@@ -2148,6 +2173,7 @@ static int khugepaged(void *none)
  {
         struct mm_slot *mm_slot;
  
+       set_freezable();
         set_user_nice(current, 19);
  
         /* serialize with start_khugepaged() */
@@ -2162,6 +2188,8 @@ static int khugepaged(void *none)
                 mutex_lock(&khugepaged_mutex);
                 if (!khugepaged_enabled())
                         break;
+               if (unlikely(kthread_should_stop()))
+                       break;
         }
  
         spin_lock(&khugepaged_mm_lock);
@@ -2196,3 +2224,71 @@ void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd)
         put_page(page);
         BUG_ON(pmd_trans_huge(*pmd));
  }
+
+static void split_huge_page_address(struct mm_struct *mm,
+                                   unsigned long address)
+{
+       pgd_t *pgd;
+       pud_t *pud;
+       pmd_t *pmd;
+
+       VM_BUG_ON(!(address & ~HPAGE_PMD_MASK));
+
+       pgd = pgd_offset(mm, address);
+       if (!pgd_present(*pgd))
+               return;
+
+       pud = pud_offset(pgd, address);
+       if (!pud_present(*pud))
+               return;
+
+       pmd = pmd_offset(pud, address);
+       if (!pmd_present(*pmd))
+               return;
+       /*
+        * Caller holds the mmap_sem write mode, so a huge pmd cannot
+        * materialize from under us.
+        */
+       split_huge_page_pmd(mm, pmd);
+}
+
+void __vma_adjust_trans_huge(struct vm_area_struct *vma,
+                            unsigned long start,
+                            unsigned long end,
+                            long adjust_next)
+{
+       /*
+        * If the new start address isn't hpage aligned and it could
+        * previously contain an hugepage: check if we need to split
+        * an huge pmd.
+        */
+       if (start & ~HPAGE_PMD_MASK &&
+           (start & HPAGE_PMD_MASK) >= vma->vm_start &&
+           (start & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= vma->vm_end)
+               split_huge_page_address(vma->vm_mm, start);
+
+       /*
+        * If the new end address isn't hpage aligned and it could
+        * previously contain an hugepage: check if we need to split
+        * an huge pmd.
+        */
+       if (end & ~HPAGE_PMD_MASK &&
+           (end & HPAGE_PMD_MASK) >= vma->vm_start &&
+           (end & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= vma->vm_end)
+               split_huge_page_address(vma->vm_mm, end);
+
+       /*
+        * If we're also updating the vma->vm_next->vm_start, if the new
+        * vm_next->vm_start isn't page aligned and it could previously
+        * contain an hugepage: check if we need to split an huge pmd.
+        */
+       if (adjust_next > 0) {
+               struct vm_area_struct *next = vma->vm_next;
+               unsigned long nstart = next->vm_start;
+               nstart += adjust_next << PAGE_SHIFT;
+               if (nstart & ~HPAGE_PMD_MASK &&
+                   (nstart & HPAGE_PMD_MASK) >= next->vm_start &&
+                   (nstart & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= next->vm_end)
+                       split_huge_page_address(next->vm_mm, nstart);
+       }
+}