From: Gerald Schaefer Date: Fri, 7 Sep 2012 00:23:43 +0000 (+1000) Subject: thp, s390: thp splitting backend for s390 X-Git-Tag: next-20120910~2^2~138 X-Git-Url: https://git.karo-electronics.de/?a=commitdiff_plain;h=6d8e3c3dc19e50f397f003b26c73d31236557e00;p=karo-tx-linux.git thp, s390: thp splitting backend for s390 This patch is part of the architecture backend for thp on s390. It provides the functions related to thp splitting, including serialization against gup. Unlike other archs, pmdp_splitting_flush() cannot use a tlb flushing operation to serialize against gup on s390, because that wouldn't be stopped by the disabled IRQs. So instead, smp_call_function() is called with an empty function, which will have the expected effect. Signed-off-by: Gerald Schaefer Cc: Andrea Arcangeli Cc: Andi Kleen Cc: Hugh Dickins Cc: Hillf Danton Cc: Martin Schwidefsky Cc: Heiko Carstens Signed-off-by: Andrew Morton --- diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 6bd7d7483017..c3f0775e71f1 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -347,6 +347,8 @@ extern struct page *vmemmap; #define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */ #define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */ +#define _SEGMENT_ENTRY_SPLIT_BIT 0 /* THP splitting bit number */ +#define _SEGMENT_ENTRY_SPLIT (1UL << _SEGMENT_ENTRY_SPLIT_BIT) /* Page status table bits for virtualization */ #define RCP_ACC_BITS 0xf000000000000000UL @@ -506,6 +508,10 @@ static inline int pmd_bad(pmd_t pmd) return (pmd_val(pmd) & mask) != _SEGMENT_ENTRY; } +#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH +extern void pmdp_splitting_flush(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp); + static inline int pte_none(pte_t pte) { return (pte_val(pte) & _PAGE_INVALID) && !(pte_val(pte) & _PAGE_SWT); @@ -1159,6 +1165,13 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address) #define pte_unmap(pte) do { } while (0) +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline int pmd_trans_splitting(pmd_t pmd) +{ + return pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT; +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + /* * 31 bit swap entry format: * A page-table entry has some bits we have to treat in a special way. diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index eeaf8023851f..60acb93a4680 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -115,7 +115,16 @@ static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, pmd = *pmdp; barrier(); next = pmd_addr_end(addr, end); - if (pmd_none(pmd)) + /* + * The pmd_trans_splitting() check below explains why + * pmdp_splitting_flush() has to serialize with + * smp_call_function() against our disabled IRQs, to stop + * this gup-fast code from running while we set the + * splitting bit in the pmd. Returning zero will take + * the slow path that will call wait_split_huge_page() + * if the pmd is still in splitting state. + */ + if (pmd_none(pmd) || pmd_trans_splitting(pmd)) return 0; if (unlikely(pmd_huge(pmd))) { if (!gup_huge_pmd(pmdp, pmd, addr, next, diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index d83ecbf02018..ddae19d63e52 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -866,3 +866,21 @@ bool kernel_page_present(struct page *page) return cc == 0; } #endif /* CONFIG_HIBERNATION && CONFIG_DEBUG_PAGEALLOC */ + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static void pmdp_splitting_flush_sync(void *arg) +{ + /* Simply deliver the interrupt */ +} + +void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmdp) +{ + VM_BUG_ON(address & ~HPAGE_PMD_MASK); + if (!test_and_set_bit(_SEGMENT_ENTRY_SPLIT_BIT, + (unsigned long *) pmdp)) { + /* need to serialize against gup-fast (IRQ disabled) */ + smp_call_function(pmdp_splitting_flush_sync, NULL, 1); + } +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */