X-Git-Url: https://git.karo-electronics.de/?a=blobdiff_plain;f=mm%2Fhuge_memory.c;h=827d9c81305115d4d3b0b4c22dfadf2fc20d2a10;hb=e05a1c6397a73d09389e033b6b2c25c954d2177c;hp=9539d6654bb90e8ab7f8078dd96cdbabd21a6903;hpb=78ca0e679203bbf74f8febd9725a1c8dd083d073;p=karo-tx-linux.git diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 9539d6654bb9..827d9c813051 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -12,12 +12,14 @@ #include #include #include +#include #include #include #include #include #include #include + #include #include #include "internal.h" @@ -37,7 +39,8 @@ unsigned long transparent_hugepage_flags __read_mostly = (1<nr_to_scan) + /* we can free zero page only if last reference remains */ + return atomic_read(&huge_zero_refcount) == 1 ? HPAGE_PMD_NR : 0; + + if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) { + unsigned long zero_pfn = xchg(&huge_zero_pfn, 0); + BUG_ON(zero_pfn == 0); + __free_page(__pfn_to_page(zero_pfn)); + } + + return 0; } +static struct shrinker huge_zero_page_shrinker = { + .shrink = shrink_huge_zero_page, + .seeks = DEFAULT_SEEKS, +}; + #ifdef CONFIG_SYSFS static ssize_t double_flag_show(struct kobject *kobj, @@ -310,6 +358,20 @@ static ssize_t defrag_store(struct kobject *kobj, static struct kobj_attribute defrag_attr = __ATTR(defrag, 0644, defrag_show, defrag_store); +static ssize_t use_zero_page_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return single_flag_show(kobj, attr, buf, + TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG); +} +static ssize_t use_zero_page_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + return single_flag_store(kobj, attr, buf, count, + TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG); +} +static struct kobj_attribute use_zero_page_attr = + __ATTR(use_zero_page, 0644, use_zero_page_show, use_zero_page_store); #ifdef CONFIG_DEBUG_VM static ssize_t debug_cow_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -331,6 +393,7 @@ static struct kobj_attribute debug_cow_attr = static struct attribute *hugepage_attr[] = { &enabled_attr.attr, &defrag_attr.attr, + &use_zero_page_attr.attr, #ifdef CONFIG_DEBUG_VM &debug_cow_attr.attr, #endif @@ -576,6 +639,8 @@ static int __init hugepage_init(void) goto out; } + register_shrinker(&huge_zero_page_shrinker); + /* * By default disable transparent hugepages on smaller systems, * where the extra memory used could hurt more than TLB overhead @@ -704,16 +769,20 @@ static inline struct page *alloc_hugepage(int defrag) } #endif -static void set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, - struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd) +static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, + struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd, + unsigned long zero_pfn) { pmd_t entry; - entry = pfn_pmd(huge_zero_pfn, vma->vm_page_prot); + if (!pmd_none(*pmd)) + return false; + entry = pfn_pmd(zero_pfn, vma->vm_page_prot); entry = pmd_wrprotect(entry); entry = pmd_mkhuge(entry); set_pmd_at(mm, haddr, pmd, entry); pgtable_trans_huge_deposit(mm, pgtable); mm->nr_ptes++; + return true; } int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, @@ -729,18 +798,28 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, return VM_FAULT_OOM; if (unlikely(khugepaged_enter(vma))) return VM_FAULT_OOM; - if (!(flags & FAULT_FLAG_WRITE)) { + if (!(flags & FAULT_FLAG_WRITE) && + transparent_hugepage_use_zero_page()) { pgtable_t pgtable; - if (unlikely(!huge_zero_pfn && init_huge_zero_pfn())) { - count_vm_event(THP_FAULT_FALLBACK); - goto out; - } + unsigned long zero_pfn; + bool set; pgtable = pte_alloc_one(mm, haddr); if (unlikely(!pgtable)) return VM_FAULT_OOM; + zero_pfn = get_huge_zero_page(); + if (unlikely(!zero_pfn)) { + pte_free(mm, pgtable); + count_vm_event(THP_FAULT_FALLBACK); + goto out; + } spin_lock(&mm->page_table_lock); - set_huge_zero_page(pgtable, mm, vma, haddr, pmd); + set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd, + zero_pfn); spin_unlock(&mm->page_table_lock); + if (!set) { + pte_free(mm, pgtable); + put_huge_zero_page(); + } return 0; } page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), @@ -813,7 +892,17 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, * a page table. */ if (is_huge_zero_pmd(pmd)) { - set_huge_zero_page(pgtable, dst_mm, vma, addr, dst_pmd); + unsigned long zero_pfn; + bool set; + /* + * get_huge_zero_page() will never allocate a new page here, + * since we already have a zero page to copy. It just takes a + * reference. + */ + zero_pfn = get_huge_zero_page(); + set = set_huge_zero_page(pgtable, dst_mm, vma, addr, dst_pmd, + zero_pfn); + BUG_ON(!set); /* unexpected !pmd_none(dst_pmd) */ ret = 0; goto out_unlock; } @@ -870,7 +959,7 @@ unlock: static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, - pmd_t *pmd, unsigned long haddr) + pmd_t *pmd, pmd_t orig_pmd, unsigned long haddr) { pgtable_t pgtable; pmd_t _pmd; @@ -899,6 +988,9 @@ static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm, mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); spin_lock(&mm->page_table_lock); + if (unlikely(!pmd_same(*pmd, orig_pmd))) + goto out_free_page; + pmdp_clear_flush(vma, haddr, pmd); /* leave pmd empty until pte is filled */ @@ -923,6 +1015,7 @@ static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm, smp_wmb(); /* make pte visible before pmd */ pmd_populate(mm, pmd, pgtable); spin_unlock(&mm->page_table_lock); + put_huge_zero_page(); inc_mm_counter(mm, MM_ANONPAGES); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); @@ -930,6 +1023,12 @@ static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm, ret |= VM_FAULT_WRITE; out: return ret; +out_free_page: + spin_unlock(&mm->page_table_lock); + mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); + mem_cgroup_uncharge_page(page); + put_page(page); + goto out; } static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, @@ -1076,7 +1175,7 @@ alloc: count_vm_event(THP_FAULT_FALLBACK); if (is_huge_zero_pmd(orig_pmd)) { ret = do_huge_pmd_wp_zero_page_fallback(mm, vma, - address, pmd, haddr); + address, pmd, orig_pmd, haddr); } else { ret = do_huge_pmd_wp_page_fallback(mm, vma, address, pmd, orig_pmd, page, haddr); @@ -1123,9 +1222,10 @@ alloc: page_add_new_anon_rmap(new_page, vma, haddr); set_pmd_at(mm, haddr, pmd, entry); update_mmu_cache_pmd(vma, address, pmd); - if (is_huge_zero_pmd(orig_pmd)) + if (is_huge_zero_pmd(orig_pmd)) { add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); - else { + put_huge_zero_page(); + } else { VM_BUG_ON(!PageHead(page)); page_remove_rmap(page); put_page(page); @@ -1202,6 +1302,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, if (is_huge_zero_pmd(orig_pmd)) { tlb->mm->nr_ptes--; spin_unlock(&tlb->mm->page_table_lock); + put_huge_zero_page(); } else { page = pmd_page(orig_pmd); page_remove_rmap(page); @@ -2511,6 +2612,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma, } smp_wmb(); /* make pte visible before pmd */ pmd_populate(mm, pmd, pgtable); + put_huge_zero_page(); } void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address,