]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - mm/huge_memory.c
mm: get rid of __GFP_OTHER_NODE
[karo-tx-linux.git] / mm / huge_memory.c
index d4a6e40015128c626f606e339474d8b23d90607e..72339a646fb14620d0b11fcda4ca65ce03e7d270 100644 (file)
@@ -285,6 +285,15 @@ static ssize_t use_zero_page_store(struct kobject *kobj,
 }
 static struct kobj_attribute use_zero_page_attr =
        __ATTR(use_zero_page, 0644, use_zero_page_show, use_zero_page_store);
+
+static ssize_t hpage_pmd_size_show(struct kobject *kobj,
+               struct kobj_attribute *attr, char *buf)
+{
+       return sprintf(buf, "%lu\n", HPAGE_PMD_SIZE);
+}
+static struct kobj_attribute hpage_pmd_size_attr =
+       __ATTR_RO(hpage_pmd_size);
+
 #ifdef CONFIG_DEBUG_VM
 static ssize_t debug_cow_show(struct kobject *kobj,
                                struct kobj_attribute *attr, char *buf)
@@ -307,6 +316,7 @@ static struct attribute *hugepage_attr[] = {
        &enabled_attr.attr,
        &defrag_attr.attr,
        &use_zero_page_attr.attr,
+       &hpage_pmd_size_attr.attr,
 #if defined(CONFIG_SHMEM) && defined(CONFIG_TRANSPARENT_HUGE_PAGECACHE)
        &shmem_enabled_attr.attr,
 #endif
@@ -532,13 +542,13 @@ unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr,
 }
 EXPORT_SYMBOL_GPL(thp_get_unmapped_area);
 
-static int __do_huge_pmd_anonymous_page(struct fault_env *fe, struct page *page,
+static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
                gfp_t gfp)
 {
-       struct vm_area_struct *vma = fe->vma;
+       struct vm_area_struct *vma = vmf->vma;
        struct mem_cgroup *memcg;
        pgtable_t pgtable;
-       unsigned long haddr = fe->address & HPAGE_PMD_MASK;
+       unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
 
        VM_BUG_ON_PAGE(!PageCompound(page), page);
 
@@ -563,9 +573,9 @@ static int __do_huge_pmd_anonymous_page(struct fault_env *fe, struct page *page,
         */
        __SetPageUptodate(page);
 
-       fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
-       if (unlikely(!pmd_none(*fe->pmd))) {
-               spin_unlock(fe->ptl);
+       vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
+       if (unlikely(!pmd_none(*vmf->pmd))) {
+               spin_unlock(vmf->ptl);
                mem_cgroup_cancel_charge(page, memcg, true);
                put_page(page);
                pte_free(vma->vm_mm, pgtable);
@@ -576,11 +586,11 @@ static int __do_huge_pmd_anonymous_page(struct fault_env *fe, struct page *page,
                if (userfaultfd_missing(vma)) {
                        int ret;
 
-                       spin_unlock(fe->ptl);
+                       spin_unlock(vmf->ptl);
                        mem_cgroup_cancel_charge(page, memcg, true);
                        put_page(page);
                        pte_free(vma->vm_mm, pgtable);
-                       ret = handle_userfault(fe, VM_UFFD_MISSING);
+                       ret = handle_userfault(vmf, VM_UFFD_MISSING);
                        VM_BUG_ON(ret & VM_FAULT_FALLBACK);
                        return ret;
                }
@@ -590,11 +600,11 @@ static int __do_huge_pmd_anonymous_page(struct fault_env *fe, struct page *page,
                page_add_new_anon_rmap(page, vma, haddr, true);
                mem_cgroup_commit_charge(page, memcg, false, true);
                lru_cache_add_active_or_unevictable(page, vma);
-               pgtable_trans_huge_deposit(vma->vm_mm, fe->pmd, pgtable);
-               set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry);
+               pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
+               set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
                add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
                atomic_long_inc(&vma->vm_mm->nr_ptes);
-               spin_unlock(fe->ptl);
+               spin_unlock(vmf->ptl);
                count_vm_event(THP_FAULT_ALLOC);
        }
 
@@ -641,12 +651,12 @@ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
        return true;
 }
 
-int do_huge_pmd_anonymous_page(struct fault_env *fe)
+int do_huge_pmd_anonymous_page(struct vm_fault *vmf)
 {
-       struct vm_area_struct *vma = fe->vma;
+       struct vm_area_struct *vma = vmf->vma;
        gfp_t gfp;
        struct page *page;
-       unsigned long haddr = fe->address & HPAGE_PMD_MASK;
+       unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
 
        if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
                return VM_FAULT_FALLBACK;
@@ -654,7 +664,7 @@ int do_huge_pmd_anonymous_page(struct fault_env *fe)
                return VM_FAULT_OOM;
        if (unlikely(khugepaged_enter(vma, vma->vm_flags)))
                return VM_FAULT_OOM;
-       if (!(fe->flags & FAULT_FLAG_WRITE) &&
+       if (!(vmf->flags & FAULT_FLAG_WRITE) &&
                        !mm_forbids_zeropage(vma->vm_mm) &&
                        transparent_hugepage_use_zero_page()) {
                pgtable_t pgtable;
@@ -670,22 +680,22 @@ int do_huge_pmd_anonymous_page(struct fault_env *fe)
                        count_vm_event(THP_FAULT_FALLBACK);
                        return VM_FAULT_FALLBACK;
                }
-               fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
+               vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
                ret = 0;
                set = false;
-               if (pmd_none(*fe->pmd)) {
+               if (pmd_none(*vmf->pmd)) {
                        if (userfaultfd_missing(vma)) {
-                               spin_unlock(fe->ptl);
-                               ret = handle_userfault(fe, VM_UFFD_MISSING);
+                               spin_unlock(vmf->ptl);
+                               ret = handle_userfault(vmf, VM_UFFD_MISSING);
                                VM_BUG_ON(ret & VM_FAULT_FALLBACK);
                        } else {
                                set_huge_zero_page(pgtable, vma->vm_mm, vma,
-                                                  haddr, fe->pmd, zero_page);
-                               spin_unlock(fe->ptl);
+                                                  haddr, vmf->pmd, zero_page);
+                               spin_unlock(vmf->ptl);
                                set = true;
                        }
                } else
-                       spin_unlock(fe->ptl);
+                       spin_unlock(vmf->ptl);
                if (!set)
                        pte_free(vma->vm_mm, pgtable);
                return ret;
@@ -697,7 +707,7 @@ int do_huge_pmd_anonymous_page(struct fault_env *fe)
                return VM_FAULT_FALLBACK;
        }
        prep_transhuge_page(page);
-       return __do_huge_pmd_anonymous_page(fe, page, gfp);
+       return __do_huge_pmd_anonymous_page(vmf, page, gfp);
 }
 
 static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
@@ -737,8 +747,9 @@ int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 
        if (addr < vma->vm_start || addr >= vma->vm_end)
                return VM_FAULT_SIGBUS;
-       if (track_pfn_insert(vma, &pgprot, pfn))
-               return VM_FAULT_SIGBUS;
+
+       track_pfn_insert(vma, &pgprot, pfn);
+
        insert_pfn_pmd(vma, addr, pmd, pfn, pgprot, write);
        return VM_FAULT_NOPAGE;
 }
@@ -868,30 +879,30 @@ out:
        return ret;
 }
 
-void huge_pmd_set_accessed(struct fault_env *fe, pmd_t orig_pmd)
+void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd)
 {
        pmd_t entry;
        unsigned long haddr;
 
-       fe->ptl = pmd_lock(fe->vma->vm_mm, fe->pmd);
-       if (unlikely(!pmd_same(*fe->pmd, orig_pmd)))
+       vmf->ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
+       if (unlikely(!pmd_same(*vmf->pmd, orig_pmd)))
                goto unlock;
 
        entry = pmd_mkyoung(orig_pmd);
-       haddr = fe->address & HPAGE_PMD_MASK;
-       if (pmdp_set_access_flags(fe->vma, haddr, fe->pmd, entry,
-                               fe->flags & FAULT_FLAG_WRITE))
-               update_mmu_cache_pmd(fe->vma, fe->address, fe->pmd);
+       haddr = vmf->address & HPAGE_PMD_MASK;
+       if (pmdp_set_access_flags(vmf->vma, haddr, vmf->pmd, entry,
+                               vmf->flags & FAULT_FLAG_WRITE))
+               update_mmu_cache_pmd(vmf->vma, vmf->address, vmf->pmd);
 
 unlock:
-       spin_unlock(fe->ptl);
+       spin_unlock(vmf->ptl);
 }
 
-static int do_huge_pmd_wp_page_fallback(struct fault_env *fe, pmd_t orig_pmd,
+static int do_huge_pmd_wp_page_fallback(struct vm_fault *vmf, pmd_t orig_pmd,
                struct page *page)
 {
-       struct vm_area_struct *vma = fe->vma;
-       unsigned long haddr = fe->address & HPAGE_PMD_MASK;
+       struct vm_area_struct *vma = vmf->vma;
+       unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
        struct mem_cgroup *memcg;
        pgtable_t pgtable;
        pmd_t _pmd;
@@ -908,9 +919,8 @@ static int do_huge_pmd_wp_page_fallback(struct fault_env *fe, pmd_t orig_pmd,
        }
 
        for (i = 0; i < HPAGE_PMD_NR; i++) {
-               pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE |
-                                              __GFP_OTHER_NODE, vma,
-                                              fe->address, page_to_nid(page));
+               pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE, vma,
+                                              vmf->address, page_to_nid(page));
                if (unlikely(!pages[i] ||
                             mem_cgroup_try_charge(pages[i], vma->vm_mm,
                                     GFP_KERNEL, &memcg, false))) {
@@ -941,15 +951,15 @@ static int do_huge_pmd_wp_page_fallback(struct fault_env *fe, pmd_t orig_pmd,
        mmun_end   = haddr + HPAGE_PMD_SIZE;
        mmu_notifier_invalidate_range_start(vma->vm_mm, mmun_start, mmun_end);
 
-       fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
-       if (unlikely(!pmd_same(*fe->pmd, orig_pmd)))
+       vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
+       if (unlikely(!pmd_same(*vmf->pmd, orig_pmd)))
                goto out_free_pages;
        VM_BUG_ON_PAGE(!PageHead(page), page);
 
-       pmdp_huge_clear_flush_notify(vma, haddr, fe->pmd);
+       pmdp_huge_clear_flush_notify(vma, haddr, vmf->pmd);
        /* leave pmd empty until pte is filled */
 
-       pgtable = pgtable_trans_huge_withdraw(vma->vm_mm, fe->pmd);
+       pgtable = pgtable_trans_huge_withdraw(vma->vm_mm, vmf->pmd);
        pmd_populate(vma->vm_mm, &_pmd, pgtable);
 
        for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
@@ -958,20 +968,20 @@ static int do_huge_pmd_wp_page_fallback(struct fault_env *fe, pmd_t orig_pmd,
                entry = maybe_mkwrite(pte_mkdirty(entry), vma);
                memcg = (void *)page_private(pages[i]);
                set_page_private(pages[i], 0);
-               page_add_new_anon_rmap(pages[i], fe->vma, haddr, false);
+               page_add_new_anon_rmap(pages[i], vmf->vma, haddr, false);
                mem_cgroup_commit_charge(pages[i], memcg, false, false);
                lru_cache_add_active_or_unevictable(pages[i], vma);
-               fe->pte = pte_offset_map(&_pmd, haddr);
-               VM_BUG_ON(!pte_none(*fe->pte));
-               set_pte_at(vma->vm_mm, haddr, fe->pte, entry);
-               pte_unmap(fe->pte);
+               vmf->pte = pte_offset_map(&_pmd, haddr);
+               VM_BUG_ON(!pte_none(*vmf->pte));
+               set_pte_at(vma->vm_mm, haddr, vmf->pte, entry);
+               pte_unmap(vmf->pte);
        }
        kfree(pages);
 
        smp_wmb(); /* make pte visible before pmd */
-       pmd_populate(vma->vm_mm, fe->pmd, pgtable);
+       pmd_populate(vma->vm_mm, vmf->pmd, pgtable);
        page_remove_rmap(page, true);
-       spin_unlock(fe->ptl);
+       spin_unlock(vmf->ptl);
 
        mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
 
@@ -982,7 +992,7 @@ out:
        return ret;
 
 out_free_pages:
-       spin_unlock(fe->ptl);
+       spin_unlock(vmf->ptl);
        mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
        for (i = 0; i < HPAGE_PMD_NR; i++) {
                memcg = (void *)page_private(pages[i]);
@@ -994,23 +1004,23 @@ out_free_pages:
        goto out;
 }
 
-int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd)
+int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
 {
-       struct vm_area_struct *vma = fe->vma;
+       struct vm_area_struct *vma = vmf->vma;
        struct page *page = NULL, *new_page;
        struct mem_cgroup *memcg;
-       unsigned long haddr = fe->address & HPAGE_PMD_MASK;
+       unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
        unsigned long mmun_start;       /* For mmu_notifiers */
        unsigned long mmun_end;         /* For mmu_notifiers */
        gfp_t huge_gfp;                 /* for allocation and charge */
        int ret = 0;
 
-       fe->ptl = pmd_lockptr(vma->vm_mm, fe->pmd);
+       vmf->ptl = pmd_lockptr(vma->vm_mm, vmf->pmd);
        VM_BUG_ON_VMA(!vma->anon_vma, vma);
        if (is_huge_zero_pmd(orig_pmd))
                goto alloc;
-       spin_lock(fe->ptl);
-       if (unlikely(!pmd_same(*fe->pmd, orig_pmd)))
+       spin_lock(vmf->ptl);
+       if (unlikely(!pmd_same(*vmf->pmd, orig_pmd)))
                goto out_unlock;
 
        page = pmd_page(orig_pmd);
@@ -1023,13 +1033,13 @@ int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd)
                pmd_t entry;
                entry = pmd_mkyoung(orig_pmd);
                entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
-               if (pmdp_set_access_flags(vma, haddr, fe->pmd, entry,  1))
-                       update_mmu_cache_pmd(vma, fe->address, fe->pmd);
+               if (pmdp_set_access_flags(vma, haddr, vmf->pmd, entry,  1))
+                       update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
                ret |= VM_FAULT_WRITE;
                goto out_unlock;
        }
        get_page(page);
-       spin_unlock(fe->ptl);
+       spin_unlock(vmf->ptl);
 alloc:
        if (transparent_hugepage_enabled(vma) &&
            !transparent_hugepage_debug_cow()) {
@@ -1042,12 +1052,12 @@ alloc:
                prep_transhuge_page(new_page);
        } else {
                if (!page) {
-                       split_huge_pmd(vma, fe->pmd, fe->address);
+                       split_huge_pmd(vma, vmf->pmd, vmf->address);
                        ret |= VM_FAULT_FALLBACK;
                } else {
-                       ret = do_huge_pmd_wp_page_fallback(fe, orig_pmd, page);
+                       ret = do_huge_pmd_wp_page_fallback(vmf, orig_pmd, page);
                        if (ret & VM_FAULT_OOM) {
-                               split_huge_pmd(vma, fe->pmd, fe->address);
+                               split_huge_pmd(vma, vmf->pmd, vmf->address);
                                ret |= VM_FAULT_FALLBACK;
                        }
                        put_page(page);
@@ -1059,7 +1069,7 @@ alloc:
        if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm,
                                        huge_gfp, &memcg, true))) {
                put_page(new_page);
-               split_huge_pmd(vma, fe->pmd, fe->address);
+               split_huge_pmd(vma, vmf->pmd, vmf->address);
                if (page)
                        put_page(page);
                ret |= VM_FAULT_FALLBACK;
@@ -1079,11 +1089,11 @@ alloc:
        mmun_end   = haddr + HPAGE_PMD_SIZE;
        mmu_notifier_invalidate_range_start(vma->vm_mm, mmun_start, mmun_end);
 
-       spin_lock(fe->ptl);
+       spin_lock(vmf->ptl);
        if (page)
                put_page(page);
-       if (unlikely(!pmd_same(*fe->pmd, orig_pmd))) {
-               spin_unlock(fe->ptl);
+       if (unlikely(!pmd_same(*vmf->pmd, orig_pmd))) {
+               spin_unlock(vmf->ptl);
                mem_cgroup_cancel_charge(new_page, memcg, true);
                put_page(new_page);
                goto out_mn;
@@ -1091,12 +1101,12 @@ alloc:
                pmd_t entry;
                entry = mk_huge_pmd(new_page, vma->vm_page_prot);
                entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
-               pmdp_huge_clear_flush_notify(vma, haddr, fe->pmd);
+               pmdp_huge_clear_flush_notify(vma, haddr, vmf->pmd);
                page_add_new_anon_rmap(new_page, vma, haddr, true);
                mem_cgroup_commit_charge(new_page, memcg, false, true);
                lru_cache_add_active_or_unevictable(new_page, vma);
-               set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry);
-               update_mmu_cache_pmd(vma, fe->address, fe->pmd);
+               set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
+               update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
                if (!page) {
                        add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
                } else {
@@ -1106,13 +1116,13 @@ alloc:
                }
                ret |= VM_FAULT_WRITE;
        }
-       spin_unlock(fe->ptl);
+       spin_unlock(vmf->ptl);
 out_mn:
        mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
 out:
        return ret;
 out_unlock:
-       spin_unlock(fe->ptl);
+       spin_unlock(vmf->ptl);
        return ret;
 }
 
@@ -1185,12 +1195,12 @@ out:
 }
 
 /* NUMA hinting page fault entry point for trans huge pmds */
-int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd)
+int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
 {
-       struct vm_area_struct *vma = fe->vma;
+       struct vm_area_struct *vma = vmf->vma;
        struct anon_vma *anon_vma = NULL;
        struct page *page;
-       unsigned long haddr = fe->address & HPAGE_PMD_MASK;
+       unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
        int page_nid = -1, this_nid = numa_node_id();
        int target_nid, last_cpupid = -1;
        bool page_locked;
@@ -1198,8 +1208,8 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd)
        bool was_writable;
        int flags = 0;
 
-       fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
-       if (unlikely(!pmd_same(pmd, *fe->pmd)))
+       vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
+       if (unlikely(!pmd_same(pmd, *vmf->pmd)))
                goto out_unlock;
 
        /*
@@ -1207,9 +1217,9 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd)
         * without disrupting NUMA hinting information. Do not relock and
         * check_same as the page may no longer be mapped.
         */
-       if (unlikely(pmd_trans_migrating(*fe->pmd))) {
-               page = pmd_page(*fe->pmd);
-               spin_unlock(fe->ptl);
+       if (unlikely(pmd_trans_migrating(*vmf->pmd))) {
+               page = pmd_page(*vmf->pmd);
+               spin_unlock(vmf->ptl);
                wait_on_page_locked(page);
                goto out;
        }
@@ -1242,7 +1252,7 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd)
 
        /* Migration could have started since the pmd_trans_migrating check */
        if (!page_locked) {
-               spin_unlock(fe->ptl);
+               spin_unlock(vmf->ptl);
                wait_on_page_locked(page);
                page_nid = -1;
                goto out;
@@ -1253,12 +1263,12 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd)
         * to serialises splits
         */
        get_page(page);
-       spin_unlock(fe->ptl);
+       spin_unlock(vmf->ptl);
        anon_vma = page_lock_anon_vma_read(page);
 
        /* Confirm the PMD did not change while page_table_lock was released */
-       spin_lock(fe->ptl);
-       if (unlikely(!pmd_same(pmd, *fe->pmd))) {
+       spin_lock(vmf->ptl);
+       if (unlikely(!pmd_same(pmd, *vmf->pmd))) {
                unlock_page(page);
                put_page(page);
                page_nid = -1;
@@ -1276,9 +1286,9 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd)
         * Migrate the THP to the requested node, returns with page unlocked
         * and access rights restored.
         */
-       spin_unlock(fe->ptl);
+       spin_unlock(vmf->ptl);
        migrated = migrate_misplaced_transhuge_page(vma->vm_mm, vma,
-                               fe->pmd, pmd, fe->address, page, target_nid);
+                               vmf->pmd, pmd, vmf->address, page, target_nid);
        if (migrated) {
                flags |= TNF_MIGRATED;
                page_nid = target_nid;
@@ -1293,18 +1303,19 @@ clear_pmdnuma:
        pmd = pmd_mkyoung(pmd);
        if (was_writable)
                pmd = pmd_mkwrite(pmd);
-       set_pmd_at(vma->vm_mm, haddr, fe->pmd, pmd);
-       update_mmu_cache_pmd(vma, fe->address, fe->pmd);
+       set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd);
+       update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
        unlock_page(page);
 out_unlock:
-       spin_unlock(fe->ptl);
+       spin_unlock(vmf->ptl);
 
 out:
        if (anon_vma)
                page_unlock_anon_vma_read(anon_vma);
 
        if (page_nid != -1)
-               task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, fe->flags);
+               task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR,
+                               vmf->flags);
 
        return 0;
 }
@@ -1322,6 +1333,8 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
        struct mm_struct *mm = tlb->mm;
        bool ret = false;
 
+       tlb_remove_check_page_size_change(tlb, HPAGE_PMD_SIZE);
+
        ptl = pmd_trans_huge_lock(pmd, vma);
        if (!ptl)
                goto out_unlocked;
@@ -1377,12 +1390,23 @@ out_unlocked:
        return ret;
 }
 
+static inline void zap_deposited_table(struct mm_struct *mm, pmd_t *pmd)
+{
+       pgtable_t pgtable;
+
+       pgtable = pgtable_trans_huge_withdraw(mm, pmd);
+       pte_free(mm, pgtable);
+       atomic_long_dec(&mm->nr_ptes);
+}
+
 int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
                 pmd_t *pmd, unsigned long addr)
 {
        pmd_t orig_pmd;
        spinlock_t *ptl;
 
+       tlb_remove_check_page_size_change(tlb, HPAGE_PMD_SIZE);
+
        ptl = __pmd_trans_huge_lock(pmd, vma);
        if (!ptl)
                return 0;
@@ -1398,12 +1422,12 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
        if (vma_is_dax(vma)) {
                spin_unlock(ptl);
                if (is_huge_zero_pmd(orig_pmd))
-                       tlb_remove_page(tlb, pmd_page(orig_pmd));
+                       tlb_remove_page_size(tlb, pmd_page(orig_pmd), HPAGE_PMD_SIZE);
        } else if (is_huge_zero_pmd(orig_pmd)) {
                pte_free(tlb->mm, pgtable_trans_huge_withdraw(tlb->mm, pmd));
                atomic_long_dec(&tlb->mm->nr_ptes);
                spin_unlock(ptl);
-               tlb_remove_page(tlb, pmd_page(orig_pmd));
+               tlb_remove_page_size(tlb, pmd_page(orig_pmd), HPAGE_PMD_SIZE);
        } else {
                struct page *page = pmd_page(orig_pmd);
                page_remove_rmap(page, true);
@@ -1416,6 +1440,8 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
                        atomic_long_dec(&tlb->mm->nr_ptes);
                        add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
                } else {
+                       if (arch_needs_pgtable_deposit())
+                               zap_deposited_table(tlb->mm, pmd);
                        add_mm_counter(tlb->mm, MM_FILEPAGES, -HPAGE_PMD_NR);
                }
                spin_unlock(ptl);
@@ -1424,6 +1450,21 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
        return 1;
 }
 
+#ifndef pmd_move_must_withdraw
+static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl,
+                                        spinlock_t *old_pmd_ptl,
+                                        struct vm_area_struct *vma)
+{
+       /*
+        * With split pmd lock we also need to move preallocated
+        * PTE page table if new_pmd is on different PMD page table.
+        *
+        * We also don't deposit and withdraw tables for file pages.
+        */
+       return (new_pmd_ptl != old_pmd_ptl) && vma_is_anonymous(vma);
+}
+#endif
+
 bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
                  unsigned long new_addr, unsigned long old_end,
                  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
@@ -1461,8 +1502,7 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
                        force_flush = true;
                VM_BUG_ON(!pmd_none(*new_pmd));
 
-               if (pmd_move_must_withdraw(new_ptl, old_ptl) &&
-                               vma_is_anonymous(vma)) {
+               if (pmd_move_must_withdraw(new_ptl, old_ptl, vma)) {
                        pgtable_t pgtable;
                        pgtable = pgtable_trans_huge_withdraw(mm, old_pmd);
                        pgtable_trans_huge_deposit(mm, new_pmd, pgtable);
@@ -1588,6 +1628,12 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 
        if (!vma_is_anonymous(vma)) {
                _pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
+               /*
+                * We are going to unmap this huge page. So
+                * just go ahead and zap it
+                */
+               if (arch_needs_pgtable_deposit())
+                       zap_deposited_table(mm, pmd);
                if (vma_is_dax(vma))
                        return;
                page = pmd_page(_pmd);