From 39d6cb39a81744473e13c693a9f988a9e342018b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 17 Jul 2012 22:54:51 +0200 Subject: [PATCH] mm/mpol: Use special PROT_NONE to migrate pages Combine our previous PROT_NONE, mpol_misplaced and migrate_misplaced_page() pieces into an effective migrate on fault scheme. Suggested-by: Rik van Riel Signed-off-by: Peter Zijlstra Reviewed-by: Rik van Riel Cc: Paul Turner Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/n/tip-e98gyl8kr9jzooh2s4piuils@git.kernel.org Signed-off-by: Ingo Molnar --- mm/huge_memory.c | 41 ++++++++++++++++++++++++++++++++++++++++- mm/memory.c | 42 ++++++++++++++++++++++++++++++++++++------ 2 files changed, 76 insertions(+), 7 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 5d7b11480270..a147d29ba2fd 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include "internal.h" @@ -766,12 +767,48 @@ void do_huge_pmd_prot_none(struct mm_struct *mm, struct vm_area_struct *vma, unsigned int flags, pmd_t entry) { unsigned long haddr = address & HPAGE_PMD_MASK; + struct page *page = NULL; + int node; spin_lock(&mm->page_table_lock); if (unlikely(!pmd_same(*pmd, entry))) goto out_unlock; - /* do fancy stuff */ + if (unlikely(pmd_trans_splitting(entry))) { + spin_unlock(&mm->page_table_lock); + wait_split_huge_page(vma->anon_vma, pmd); + return; + } + +#ifdef CONFIG_NUMA + page = pmd_page(entry); + VM_BUG_ON(!PageCompound(page) || !PageHead(page)); + + get_page(page); + spin_unlock(&mm->page_table_lock); + + /* + * XXX should we serialize against split_huge_page ? + */ + + node = mpol_misplaced(page, vma, haddr); + if (node == -1) + goto do_fixup; + + /* + * Due to lacking code to migrate thp pages, we'll split + * (which preserves the special PROT_NONE) and re-take the + * fault on the normal pages. + */ + split_huge_page(page); + put_page(page); + return; + +do_fixup: + spin_lock(&mm->page_table_lock); + if (unlikely(!pmd_same(*pmd, entry))) + goto out_unlock; +#endif /* change back to regular protection */ entry = pmd_modify(entry, vma->vm_page_prot); @@ -780,6 +817,8 @@ void do_huge_pmd_prot_none(struct mm_struct *mm, struct vm_area_struct *vma, out_unlock: spin_unlock(&mm->page_table_lock); + if (page) + put_page(page); } int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, diff --git a/mm/memory.c b/mm/memory.c index bea2ed5f4021..d896a2438eae 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include @@ -3441,16 +3442,41 @@ static int do_prot_none(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *ptep, pmd_t *pmd, unsigned int flags, pte_t entry) { + struct page *page = NULL; spinlock_t *ptl; - int ret = 0; + int node; - if (!pte_unmap_same(mm, pmd, ptep, entry)) - goto out; + ptl = pte_lockptr(mm, pmd); + spin_lock(ptl); + if (unlikely(!pte_same(*ptep, entry))) + goto unlock; +#ifdef CONFIG_NUMA /* - * Do fancy stuff... + * For NUMA systems we use the special PROT_NONE maps to drive + * lazy page migration, see MPOL_MF_LAZY and related. */ + page = vm_normal_page(vma, address, entry); + if (!page) + goto do_fixup_locked; + + get_page(page); + pte_unmap_unlock(ptep, ptl); + + node = mpol_misplaced(page, vma, address); + if (node == -1) + goto do_fixup; + /* + * Page migration will install a new pte with vma->vm_page_prot, + * otherwise fall-through to the fixup. Next time,.. perhaps. + */ + if (!migrate_misplaced_page(mm, page, node)) { + put_page(page); + return 0; + } + +do_fixup: /* * OK, nothing to do,.. change the protection back to what it * ought to be. @@ -3459,6 +3485,9 @@ static int do_prot_none(struct mm_struct *mm, struct vm_area_struct *vma, if (unlikely(!pte_same(*ptep, entry))) goto unlock; +do_fixup_locked: +#endif /* CONFIG_NUMA */ + flush_cache_page(vma, address, pte_pfn(entry)); ptep_modify_prot_start(mm, address, ptep); @@ -3468,8 +3497,9 @@ static int do_prot_none(struct mm_struct *mm, struct vm_area_struct *vma, update_mmu_cache(vma, address, ptep); unlock: pte_unmap_unlock(ptep, ptl); -out: - return ret; + if (page) + put_page(page); + return 0; } /* -- 2.39.2