]> git.karo-electronics.de Git - linux-beck.git/blobdiff - mm/huge_memory.c
Merge branch 'for-3.20/bdi' of git://git.kernel.dk/linux-block
[linux-beck.git] / mm / huge_memory.c
index 817a875f2b8c0338dc9e56511b92d99a7ff7b992..cb7be110cad33a956f90cdf654a42f38b445c79e 100644 (file)
@@ -171,12 +171,7 @@ static int start_khugepaged(void)
 }
 
 static atomic_t huge_zero_refcount;
-static struct page *huge_zero_page __read_mostly;
-
-static inline bool is_huge_zero_page(struct page *page)
-{
-       return ACCESS_ONCE(huge_zero_page) == page;
-}
+struct page *huge_zero_page __read_mostly;
 
 static inline bool is_huge_zero_pmd(pmd_t pmd)
 {
@@ -766,15 +761,6 @@ static inline gfp_t alloc_hugepage_gfpmask(int defrag, gfp_t extra_gfp)
        return (GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_WAIT)) | extra_gfp;
 }
 
-static inline struct page *alloc_hugepage_vma(int defrag,
-                                             struct vm_area_struct *vma,
-                                             unsigned long haddr, int nd,
-                                             gfp_t extra_gfp)
-{
-       return alloc_pages_vma(alloc_hugepage_gfpmask(defrag, extra_gfp),
-                              HPAGE_PMD_ORDER, vma, haddr, nd);
-}
-
 /* Caller must hold page table lock. */
 static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
                struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd,
@@ -795,6 +781,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
                               unsigned long address, pmd_t *pmd,
                               unsigned int flags)
 {
+       gfp_t gfp;
        struct page *page;
        unsigned long haddr = address & HPAGE_PMD_MASK;
 
@@ -829,8 +816,8 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
                }
                return 0;
        }
-       page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
-                       vma, haddr, numa_node_id(), 0);
+       gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma), 0);
+       page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
        if (unlikely(!page)) {
                count_vm_event(THP_FAULT_FALLBACK);
                return VM_FAULT_FALLBACK;
@@ -1118,10 +1105,12 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
        spin_unlock(ptl);
 alloc:
        if (transparent_hugepage_enabled(vma) &&
-           !transparent_hugepage_debug_cow())
-               new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
-                                             vma, haddr, numa_node_id(), 0);
-       else
+           !transparent_hugepage_debug_cow()) {
+               gfp_t gfp;
+
+               gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma), 0);
+               new_page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
+       } else
                new_page = NULL;
 
        if (unlikely(!new_page)) {
@@ -1423,26 +1412,6 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
        return ret;
 }
 
-int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
-               unsigned long addr, unsigned long end,
-               unsigned char *vec)
-{
-       spinlock_t *ptl;
-       int ret = 0;
-
-       if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
-               /*
-                * All logical pages in the range are present
-                * if backed by a huge page.
-                */
-               spin_unlock(ptl);
-               memset(vec, 1, (end - addr) >> PAGE_SHIFT);
-               ret = 1;
-       }
-
-       return ret;
-}
-
 int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
                  unsigned long old_addr,
                  unsigned long new_addr, unsigned long old_end,
@@ -2148,7 +2117,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
 {
        struct page *page;
        pte_t *_pte;
-       int referenced = 0, none = 0;
+       int none = 0;
+       bool referenced = false, writable = false;
        for (_pte = pte; _pte < pte+HPAGE_PMD_NR;
             _pte++, address += PAGE_SIZE) {
                pte_t pteval = *_pte;
@@ -2158,7 +2128,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
                        else
                                goto out;
                }
-               if (!pte_present(pteval) || !pte_write(pteval))
+               if (!pte_present(pteval))
                        goto out;
                page = vm_normal_page(vma, address, pteval);
                if (unlikely(!page))
@@ -2168,9 +2138,6 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
                VM_BUG_ON_PAGE(!PageAnon(page), page);
                VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
 
-               /* cannot use mapcount: can't collapse if there's a gup pin */
-               if (page_count(page) != 1)
-                       goto out;
                /*
                 * We can do it before isolate_lru_page because the
                 * page can't be freed from under us. NOTE: PG_lock
@@ -2179,6 +2146,29 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
                 */
                if (!trylock_page(page))
                        goto out;
+
+               /*
+                * cannot use mapcount: can't collapse if there's a gup pin.
+                * The page must only be referenced by the scanned process
+                * and page swap cache.
+                */
+               if (page_count(page) != 1 + !!PageSwapCache(page)) {
+                       unlock_page(page);
+                       goto out;
+               }
+               if (pte_write(pteval)) {
+                       writable = true;
+               } else {
+                       if (PageSwapCache(page) && !reuse_swap_page(page)) {
+                               unlock_page(page);
+                               goto out;
+                       }
+                       /*
+                        * Page is not in the swap cache. It can be collapsed
+                        * into a THP.
+                        */
+               }
+
                /*
                 * Isolate the page to avoid collapsing an hugepage
                 * currently in use by the VM.
@@ -2195,9 +2185,9 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
                /* If there is no mapped pte young don't collapse the page */
                if (pte_young(pteval) || PageReferenced(page) ||
                    mmu_notifier_test_young(vma->vm_mm, address))
-                       referenced = 1;
+                       referenced = true;
        }
-       if (likely(referenced))
+       if (likely(referenced && writable))
                return 1;
 out:
        release_pte_pages(pte, _pte);
@@ -2550,11 +2540,12 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
 {
        pmd_t *pmd;
        pte_t *pte, *_pte;
-       int ret = 0, referenced = 0, none = 0;
+       int ret = 0, none = 0;
        struct page *page;
        unsigned long _address;
        spinlock_t *ptl;
        int node = NUMA_NO_NODE;
+       bool writable = false, referenced = false;
 
        VM_BUG_ON(address & ~HPAGE_PMD_MASK);
 
@@ -2573,8 +2564,11 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
                        else
                                goto out_unmap;
                }
-               if (!pte_present(pteval) || !pte_write(pteval))
+               if (!pte_present(pteval))
                        goto out_unmap;
+               if (pte_write(pteval))
+                       writable = true;
+
                page = vm_normal_page(vma, _address, pteval);
                if (unlikely(!page))
                        goto out_unmap;
@@ -2591,14 +2585,18 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
                VM_BUG_ON_PAGE(PageCompound(page), page);
                if (!PageLRU(page) || PageLocked(page) || !PageAnon(page))
                        goto out_unmap;
-               /* cannot use mapcount: can't collapse if there's a gup pin */
-               if (page_count(page) != 1)
+               /*
+                * cannot use mapcount: can't collapse if there's a gup pin.
+                * The page must only be referenced by the scanned process
+                * and page swap cache.
+                */
+               if (page_count(page) != 1 + !!PageSwapCache(page))
                        goto out_unmap;
                if (pte_young(pteval) || PageReferenced(page) ||
                    mmu_notifier_test_young(vma->vm_mm, address))
-                       referenced = 1;
+                       referenced = true;
        }
-       if (referenced)
+       if (referenced && writable)
                ret = 1;
 out_unmap:
        pte_unmap_unlock(pte, ptl);