}
}
+ if (flags & FOLL_SPLIT && PageTransCompound(page)) {
+ int ret;
+ get_page(page);
+ pte_unmap_unlock(ptep, ptl);
+ lock_page(page);
+ ret = split_huge_page(page);
+ unlock_page(page);
+ put_page(page);
+ if (ret)
+ return ERR_PTR(ret);
+ goto retry;
+ }
+
if (flags & FOLL_GET)
- get_page_foll(page);
+ get_page(page);
if (flags & FOLL_TOUCH) {
if ((flags & FOLL_WRITE) &&
!pte_dirty(pte) && !PageDirty(page))
*/
mark_page_accessed(page);
}
- if ((flags & FOLL_POPULATE) && (vma->vm_flags & VM_LOCKED)) {
+ if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
+ /* Do not mlock pte-mapped THP */
+ if (PageTransCompound(page))
+ goto out;
+
/*
* The preliminary mapping check is mainly to avoid the
* pointless overhead of lock_page on the ZERO_PAGE
}
if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
return no_page_table(vma, flags);
- if (pmd_trans_huge(*pmd)) {
- if (flags & FOLL_SPLIT) {
- split_huge_page_pmd(vma, address, pmd);
- return follow_page_pte(vma, address, pmd, flags);
- }
- ptl = pmd_lock(mm, pmd);
- if (likely(pmd_trans_huge(*pmd))) {
- if (unlikely(pmd_trans_splitting(*pmd))) {
- spin_unlock(ptl);
- wait_split_huge_page(vma->anon_vma, pmd);
- } else {
- page = follow_trans_huge_pmd(vma, address,
- pmd, flags);
- spin_unlock(ptl);
- *page_mask = HPAGE_PMD_NR - 1;
- return page;
- }
- } else
+ if (likely(!pmd_trans_huge(*pmd)))
+ return follow_page_pte(vma, address, pmd, flags);
+
+ ptl = pmd_lock(mm, pmd);
+ if (unlikely(!pmd_trans_huge(*pmd))) {
+ spin_unlock(ptl);
+ return follow_page_pte(vma, address, pmd, flags);
+ }
+ if (flags & FOLL_SPLIT) {
+ int ret;
+ page = pmd_page(*pmd);
+ if (is_huge_zero_page(page)) {
+ spin_unlock(ptl);
+ ret = 0;
+ split_huge_pmd(vma, pmd, address);
+ } else {
+ get_page(page);
spin_unlock(ptl);
+ lock_page(page);
+ ret = split_huge_page(page);
+ unlock_page(page);
+ put_page(page);
+ }
+
+ return ret ? ERR_PTR(ret) :
+ follow_page_pte(vma, address, pmd, flags);
}
- return follow_page_pte(vma, address, pmd, flags);
+
+ page = follow_trans_huge_pmd(vma, address, pmd, flags);
+ spin_unlock(ptl);
+ *page_mask = HPAGE_PMD_NR - 1;
+ return page;
}
static int get_gate_page(struct mm_struct *mm, unsigned long address,
unsigned int fault_flags = 0;
int ret;
+ /* mlock all present pages, but do not fault in new pages */
+ if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
+ return -ENOENT;
/* For mm_populate(), just skip the stack guard page. */
if ((*flags & FOLL_POPULATE) &&
(stack_guard_page_start(vma, address) ||
VM_BUG_ON_VMA(end > vma->vm_end, vma);
VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm);
- gup_flags = FOLL_TOUCH | FOLL_POPULATE;
+ gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK;
+ if (vma->vm_flags & VM_LOCKONFAULT)
+ gup_flags &= ~FOLL_POPULATE;
/*
* We want to touch writable mappings with a write fault in order
* to break COW, except for shared mappings because these don't COW
* *) HAVE_RCU_TABLE_FREE is enabled, and tlb_remove_table is used to free
* pages containing page tables.
*
- * *) THP splits will broadcast an IPI, this can be achieved by overriding
- * pmdp_splitting_flush.
- *
* *) ptes can be read atomically by the architecture.
*
* *) access_ok is sufficient to validate userspace address ranges.
* for an example see gup_get_pte in arch/x86/mm/gup.c
*/
pte_t pte = READ_ONCE(*ptep);
- struct page *page;
+ struct page *head, *page;
/*
* Similar to the PMD case below, NUMA hinting must take slow
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
page = pte_page(pte);
+ head = compound_head(page);
- if (!page_cache_get_speculative(page))
+ if (!page_cache_get_speculative(head))
goto pte_unmap;
if (unlikely(pte_val(pte) != pte_val(*ptep))) {
- put_page(page);
+ put_page(head);
goto pte_unmap;
}
+ VM_BUG_ON_PAGE(compound_head(page) != head, page);
pages[*nr] = page;
(*nr)++;
static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
- struct page *head, *page, *tail;
+ struct page *head, *page;
int refs;
if (write && !pmd_write(orig))
refs = 0;
head = pmd_page(orig);
page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
- tail = page;
do {
VM_BUG_ON_PAGE(compound_head(page) != head, page);
pages[*nr] = page;
return 0;
}
- /*
- * Any tail pages need their mapcount reference taken before we
- * return. (This allows the THP code to bump their ref count when
- * they are split into base pages).
- */
- while (refs--) {
- if (PageTail(tail))
- get_huge_page_tail(tail);
- tail++;
- }
-
return 1;
}
static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
- struct page *head, *page, *tail;
+ struct page *head, *page;
int refs;
if (write && !pud_write(orig))
refs = 0;
head = pud_page(orig);
page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
- tail = page;
do {
VM_BUG_ON_PAGE(compound_head(page) != head, page);
pages[*nr] = page;
return 0;
}
- while (refs--) {
- if (PageTail(tail))
- get_huge_page_tail(tail);
- tail++;
- }
-
return 1;
}
struct page **pages, int *nr)
{
int refs;
- struct page *head, *page, *tail;
+ struct page *head, *page;
if (write && !pgd_write(orig))
return 0;
refs = 0;
head = pgd_page(orig);
page = head + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT);
- tail = page;
do {
VM_BUG_ON_PAGE(compound_head(page) != head, page);
pages[*nr] = page;
return 0;
}
- while (refs--) {
- if (PageTail(tail))
- get_huge_page_tail(tail);
- tail++;
- }
-
return 1;
}
pmd_t pmd = READ_ONCE(*pmdp);
next = pmd_addr_end(addr, end);
- if (pmd_none(pmd) || pmd_trans_splitting(pmd))
+ if (pmd_none(pmd))
return 0;
if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd))) {