]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - mm/gup.c
Add linux-next specific files for 20151105
[karo-tx-linux.git] / mm / gup.c
index a798293fc6486bac215ecb58ed071263a5f775f0..e95b0cb6ed8173e80ecf3ae7d8798b763b46f965 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -116,8 +116,21 @@ retry:
                }
        }
 
+       if (flags & FOLL_SPLIT && PageTransCompound(page)) {
+               int ret;
+               get_page(page);
+               pte_unmap_unlock(ptep, ptl);
+               lock_page(page);
+               ret = split_huge_page(page);
+               unlock_page(page);
+               put_page(page);
+               if (ret)
+                       return ERR_PTR(ret);
+               goto retry;
+       }
+
        if (flags & FOLL_GET)
-               get_page_foll(page);
+               get_page(page);
        if (flags & FOLL_TOUCH) {
                if ((flags & FOLL_WRITE) &&
                    !pte_dirty(pte) && !PageDirty(page))
@@ -129,7 +142,11 @@ retry:
                 */
                mark_page_accessed(page);
        }
-       if ((flags & FOLL_POPULATE) && (vma->vm_flags & VM_LOCKED)) {
+       if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
+               /* Do not mlock pte-mapped THP */
+               if (PageTransCompound(page))
+                       goto out;
+
                /*
                 * The preliminary mapping check is mainly to avoid the
                 * pointless overhead of lock_page on the ZERO_PAGE
@@ -220,27 +237,38 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
        }
        if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
                return no_page_table(vma, flags);
-       if (pmd_trans_huge(*pmd)) {
-               if (flags & FOLL_SPLIT) {
-                       split_huge_page_pmd(vma, address, pmd);
-                       return follow_page_pte(vma, address, pmd, flags);
-               }
-               ptl = pmd_lock(mm, pmd);
-               if (likely(pmd_trans_huge(*pmd))) {
-                       if (unlikely(pmd_trans_splitting(*pmd))) {
-                               spin_unlock(ptl);
-                               wait_split_huge_page(vma->anon_vma, pmd);
-                       } else {
-                               page = follow_trans_huge_pmd(vma, address,
-                                                            pmd, flags);
-                               spin_unlock(ptl);
-                               *page_mask = HPAGE_PMD_NR - 1;
-                               return page;
-                       }
-               } else
+       if (likely(!pmd_trans_huge(*pmd)))
+               return follow_page_pte(vma, address, pmd, flags);
+
+       ptl = pmd_lock(mm, pmd);
+       if (unlikely(!pmd_trans_huge(*pmd))) {
+               spin_unlock(ptl);
+               return follow_page_pte(vma, address, pmd, flags);
+       }
+       if (flags & FOLL_SPLIT) {
+               int ret;
+               page = pmd_page(*pmd);
+               if (is_huge_zero_page(page)) {
+                       spin_unlock(ptl);
+                       ret = 0;
+                       split_huge_pmd(vma, pmd, address);
+               } else {
+                       get_page(page);
                        spin_unlock(ptl);
+                       lock_page(page);
+                       ret = split_huge_page(page);
+                       unlock_page(page);
+                       put_page(page);
+               }
+
+               return ret ? ERR_PTR(ret) :
+                       follow_page_pte(vma, address, pmd, flags);
        }
-       return follow_page_pte(vma, address, pmd, flags);
+
+       page = follow_trans_huge_pmd(vma, address, pmd, flags);
+       spin_unlock(ptl);
+       *page_mask = HPAGE_PMD_NR - 1;
+       return page;
 }
 
 static int get_gate_page(struct mm_struct *mm, unsigned long address,
@@ -299,6 +327,9 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
        unsigned int fault_flags = 0;
        int ret;
 
+       /* mlock all present pages, but do not fault in new pages */
+       if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
+               return -ENOENT;
        /* For mm_populate(), just skip the stack guard page. */
        if ((*flags & FOLL_POPULATE) &&
                        (stack_guard_page_start(vma, address) ||
@@ -890,7 +921,9 @@ long populate_vma_page_range(struct vm_area_struct *vma,
        VM_BUG_ON_VMA(end   > vma->vm_end, vma);
        VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm);
 
-       gup_flags = FOLL_TOUCH | FOLL_POPULATE;
+       gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK;
+       if (vma->vm_flags & VM_LOCKONFAULT)
+               gup_flags &= ~FOLL_POPULATE;
        /*
         * We want to touch writable mappings with a write fault in order
         * to break COW, except for shared mappings because these don't COW
@@ -1030,9 +1063,6 @@ struct page *get_dump_page(unsigned long addr)
  *  *) HAVE_RCU_TABLE_FREE is enabled, and tlb_remove_table is used to free
  *      pages containing page tables.
  *
- *  *) THP splits will broadcast an IPI, this can be achieved by overriding
- *      pmdp_splitting_flush.
- *
  *  *) ptes can be read atomically by the architecture.
  *
  *  *) access_ok is sufficient to validate userspace address ranges.
@@ -1060,7 +1090,7 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
                 * for an example see gup_get_pte in arch/x86/mm/gup.c
                 */
                pte_t pte = READ_ONCE(*ptep);
-               struct page *page;
+               struct page *head, *page;
 
                /*
                 * Similar to the PMD case below, NUMA hinting must take slow
@@ -1072,15 +1102,17 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
 
                VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
                page = pte_page(pte);
+               head = compound_head(page);
 
-               if (!page_cache_get_speculative(page))
+               if (!page_cache_get_speculative(head))
                        goto pte_unmap;
 
                if (unlikely(pte_val(pte) != pte_val(*ptep))) {
-                       put_page(page);
+                       put_page(head);
                        goto pte_unmap;
                }
 
+               VM_BUG_ON_PAGE(compound_head(page) != head, page);
                pages[*nr] = page;
                (*nr)++;
 
@@ -1113,7 +1145,7 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
 static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
                unsigned long end, int write, struct page **pages, int *nr)
 {
-       struct page *head, *page, *tail;
+       struct page *head, *page;
        int refs;
 
        if (write && !pmd_write(orig))
@@ -1122,7 +1154,6 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
        refs = 0;
        head = pmd_page(orig);
        page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
-       tail = page;
        do {
                VM_BUG_ON_PAGE(compound_head(page) != head, page);
                pages[*nr] = page;
@@ -1143,24 +1174,13 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
                return 0;
        }
 
-       /*
-        * Any tail pages need their mapcount reference taken before we
-        * return. (This allows the THP code to bump their ref count when
-        * they are split into base pages).
-        */
-       while (refs--) {
-               if (PageTail(tail))
-                       get_huge_page_tail(tail);
-               tail++;
-       }
-
        return 1;
 }
 
 static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
                unsigned long end, int write, struct page **pages, int *nr)
 {
-       struct page *head, *page, *tail;
+       struct page *head, *page;
        int refs;
 
        if (write && !pud_write(orig))
@@ -1169,7 +1189,6 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
        refs = 0;
        head = pud_page(orig);
        page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
-       tail = page;
        do {
                VM_BUG_ON_PAGE(compound_head(page) != head, page);
                pages[*nr] = page;
@@ -1190,12 +1209,6 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
                return 0;
        }
 
-       while (refs--) {
-               if (PageTail(tail))
-                       get_huge_page_tail(tail);
-               tail++;
-       }
-
        return 1;
 }
 
@@ -1204,7 +1217,7 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
                        struct page **pages, int *nr)
 {
        int refs;
-       struct page *head, *page, *tail;
+       struct page *head, *page;
 
        if (write && !pgd_write(orig))
                return 0;
@@ -1212,7 +1225,6 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
        refs = 0;
        head = pgd_page(orig);
        page = head + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT);
-       tail = page;
        do {
                VM_BUG_ON_PAGE(compound_head(page) != head, page);
                pages[*nr] = page;
@@ -1233,12 +1245,6 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
                return 0;
        }
 
-       while (refs--) {
-               if (PageTail(tail))
-                       get_huge_page_tail(tail);
-               tail++;
-       }
-
        return 1;
 }
 
@@ -1253,7 +1259,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
                pmd_t pmd = READ_ONCE(*pmdp);
 
                next = pmd_addr_end(addr, end);
-               if (pmd_none(pmd) || pmd_trans_splitting(pmd))
+               if (pmd_none(pmd))
                        return 0;
 
                if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd))) {