return retval;
}
+/*
+ * Prepare page for fast batched LRU putback via putback_lru_evictable_pagevec()
+ *
+ * The fast path is available only for evictable pages with single mapping.
+ * Then we can bypass the per-cpu pvec and get better performance.
+ * when mapcount > 1 we need try_to_munlock() which can fail.
+ * when !page_evictable(), we need the full redo logic of putback_lru_page to
+ * avoid leaving evictable page in unevictable list.
+ *
+ * In case of success, @page is added to @pvec and @pgrescued is incremented
+ * in case that the page was previously unevictable. @page is also unlocked.
+ */
+static bool __putback_lru_fast_prepare(struct page *page, struct pagevec *pvec,
+ int *pgrescued)
+{
+ VM_BUG_ON(PageLRU(page));
+ VM_BUG_ON(!PageLocked(page));
+
+ if (page_mapcount(page) <= 1 && page_evictable(page)) {
+ pagevec_add(pvec, page);
+ if (TestClearPageUnevictable(page))
+ (*pgrescued)++;
+ unlock_page(page);
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Putback multiple evictable pages to the LRU
+ *
+ * Batched putback of evictable pages that bypasses the per-cpu pvec. Some of
+ * the pages might have meanwhile become unevictable but that is OK.
+ */
+static void __putback_lru_fast(struct pagevec *pvec, int pgrescued)
+{
+ count_vm_events(UNEVICTABLE_PGMUNLOCKED, pagevec_count(pvec));
+ /*
+ *__pagevec_lru_add() calls release_pages() so we don't call
+ * put_page() explicitly
+ */
+ __pagevec_lru_add(pvec);
+ count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued);
+}
+
/*
* Munlock a batch of pages from the same zone
*
* The second phase finishes the munlock only for pages where isolation
* succeeded.
*
- * Note that pvec is modified during the process. Before returning
- * pagevec_reinit() is called on it.
+ * Note that the pagevec may be modified during the process.
*/
static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
{
int i;
int nr = pagevec_count(pvec);
+ int delta_munlocked = -nr;
+ struct pagevec pvec_putback;
+ int pgrescued = 0;
/* Phase 1: page isolation */
spin_lock_irq(&zone->lru_lock);
struct lruvec *lruvec;
int lru;
- /* we have disabled interrupts */
- __mod_zone_page_state(zone, NR_MLOCK, -1);
-
if (PageLRU(page)) {
lruvec = mem_cgroup_page_lruvec(page, zone);
lru = page_lru(page);
-
- get_page(page);
+ /*
+ * We already have pin from follow_page_mask()
+ * so we can spare the get_page() here.
+ */
ClearPageLRU(page);
del_page_from_lru_list(page, lruvec, lru);
} else {
*/
pvec->pages[i] = NULL;
put_page(page);
+ delta_munlocked++;
}
}
+ __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
spin_unlock_irq(&zone->lru_lock);
- /* Phase 2: page munlock and putback */
+ /* Phase 2: page munlock */
+ pagevec_init(&pvec_putback, 0);
for (i = 0; i < nr; i++) {
struct page *page = pvec->pages[i];
if (page) {
lock_page(page);
- __munlock_isolated_page(page);
- unlock_page(page);
- put_page(page); /* pin from follow_page_mask() */
+ if (!__putback_lru_fast_prepare(page, &pvec_putback,
+ &pgrescued)) {
+ /*
+ * Slow path. We don't want to lose the last
+ * pin before unlock_page()
+ */
+ get_page(page); /* for putback_lru_page() */
+ __munlock_isolated_page(page);
+ unlock_page(page);
+ put_page(page); /* from follow_page_mask() */
+ }
}
}
- pagevec_reinit(pvec);
+
+ /*
+ * Phase 3: page putback for pages that qualified for the fast path
+ * This will also call put_page() to return pin from follow_page_mask()
+ */
+ if (pagevec_count(&pvec_putback))
+ __putback_lru_fast(&pvec_putback, pgrescued);
+}
+
+/*
+ * Fill up pagevec for __munlock_pagevec using pte walk
+ *
+ * The function expects that the struct page corresponding to @start address is
+ * a non-TPH page already pinned and in the @pvec, and that it belongs to @zone.
+ *
+ * The rest of @pvec is filled by subsequent pages within the same pmd and same
+ * zone, as long as the pte's are present and vm_normal_page() succeeds. These
+ * pages also get pinned.
+ *
+ * Returns the address of the next page that should be scanned. This equals
+ * @start + PAGE_SIZE when no page could be added by the pte walk.
+ */
+static unsigned long __munlock_pagevec_fill(struct pagevec *pvec,
+ struct vm_area_struct *vma, int zoneid, unsigned long start,
+ unsigned long end)
+{
+ pte_t *pte;
+ spinlock_t *ptl;
+
+ /*
+ * Initialize pte walk starting at the already pinned page where we
+ * are sure that there is a pte.
+ */
+ pte = get_locked_pte(vma->vm_mm, start, &ptl);
+ end = min(end, pmd_addr_end(start, end));
+
+ /* The page next to the pinned page is the first we will try to get */
+ start += PAGE_SIZE;
+ while (start < end) {
+ struct page *page = NULL;
+ pte++;
+ if (pte_present(*pte))
+ page = vm_normal_page(vma, start, *pte);
+ /*
+ * Break if page could not be obtained or the page's node+zone does not
+ * match
+ */
+ if (!page || page_zone_id(page) != zoneid)
+ break;
+
+ get_page(page);
+ /*
+ * Increase the address that will be returned *before* the
+ * eventual break due to pvec becoming full by adding the page
+ */
+ start += PAGE_SIZE;
+ if (pagevec_add(pvec, page) == 0)
+ break;
+ }
+ pte_unmap_unlock(pte, ptl);
+ return start;
}
/*
void munlock_vma_pages_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
- struct pagevec pvec;
- struct zone *zone = NULL;
-
- pagevec_init(&pvec, 0);
vma->vm_flags &= ~VM_LOCKED;
while (start < end) {
- struct page *page;
+ struct page *page = NULL;
unsigned int page_mask, page_increm;
- struct zone *pagezone;
+ struct pagevec pvec;
+ struct zone *zone;
+ int zoneid;
+ pagevec_init(&pvec, 0);
/*
* Although FOLL_DUMP is intended for get_dump_page(),
* it just so happens that its special treatment of the
* has sneaked into the range, we won't oops here: great).
*/
page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP,
- &page_mask);
+ &page_mask);
+
if (page && !IS_ERR(page)) {
- pagezone = page_zone(page);
- /* The whole pagevec must be in the same zone */
- if (pagezone != zone) {
- if (pagevec_count(&pvec))
- __munlock_pagevec(&pvec, zone);
- zone = pagezone;
- }
if (PageTransHuge(page)) {
- /*
- * THP pages are not handled by pagevec due
- * to their possible split (see below).
- */
- if (pagevec_count(&pvec))
- __munlock_pagevec(&pvec, zone);
lock_page(page);
/*
* Any THP page found by follow_page_mask() may
put_page(page); /* follow_page_mask() */
} else {
/*
- * Non-huge pages are handled in batches
- * via pagevec. The pin from
- * follow_page_mask() prevents them from
- * collapsing by THP.
+ * Non-huge pages are handled in batches via
+ * pagevec. The pin from follow_page_mask()
+ * prevents them from collapsing by THP.
+ */
+ pagevec_add(&pvec, page);
+ zone = page_zone(page);
+ zoneid = page_zone_id(page);
+
+ /*
+ * Try to fill the rest of pagevec using fast
+ * pte walk. This will also update start to
+ * the next page to process. Then munlock the
+ * pagevec.
*/
- if (pagevec_add(&pvec, page) == 0)
- __munlock_pagevec(&pvec, zone);
+ start = __munlock_pagevec_fill(&pvec, vma,
+ zoneid, start, end);
+ __munlock_pagevec(&pvec, zone);
+ goto next;
}
}
page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
start += page_increm * PAGE_SIZE;
+next:
cond_resched();
}
- if (pagevec_count(&pvec))
- __munlock_pagevec(&pvec, zone);
}
/*