return &zone->reclaim_stat;
}
-static unsigned long zone_nr_pages(struct zone *zone, struct scan_control *sc,
- enum lru_list lru)
+static unsigned long zone_nr_lru_pages(struct zone *zone,
+ struct scan_control *sc, enum lru_list lru)
{
if (!scanning_global_lru(sc))
return mem_cgroup_zone_nr_pages(sc->mem_cgroup, zone, lru);
static inline int is_page_cache_freeable(struct page *page)
{
- return page_count(page) - !!page_has_private(page) == 2;
+ /*
+ * A freeable page cache page is referenced only by the caller
+ * that isolated the page, the page cache radix tree and
+ * optional buffer heads at page->private.
+ */
+ return page_count(page) - page_has_private(page) == 2;
}
static int may_write_to_queue(struct backing_dev_info *bdi)
* block, for some throttling. This happens by accident, because
* swap_backing_dev_info is bust: it doesn't reflect the
* congestion state of the swapdevs. Easy to fix, if needed.
- * See swapfile.c:page_queue_congested().
*/
if (!is_page_cache_freeable(page))
return PAGE_KEEP;
* unevictable page on [in]active list.
* We know how to handle that.
*/
- lru = active + page_is_file_cache(page);
+ lru = active + page_lru_base_type(page);
lru_cache_add_lru(page, lru);
} else {
/*
* processes. Try to unmap it here.
*/
if (page_mapped(page) && mapping) {
- switch (try_to_unmap(page, 0)) {
+ switch (try_to_unmap(page, TTU_UNMAP)) {
case SWAP_FAIL:
goto activate_locked;
case SWAP_AGAIN:
if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode))
return ret;
- if (mode != ISOLATE_BOTH && (!page_is_file_cache(page) != !file))
+ if (mode != ISOLATE_BOTH && page_is_file_cache(page) != file)
return ret;
/*
/* Check that we have not crossed a zone boundary. */
if (unlikely(page_zone_id(cursor_page) != zone_id))
continue;
+
+ /*
+ * If we don't have enough swap space, reclaiming of
+ * anon page which don't already have a swap slot is
+ * pointless.
+ */
+ if (nr_swap_pages <= 0 && PageAnon(cursor_page) &&
+ !PageSwapCache(cursor_page))
+ continue;
+
if (__isolate_lru_page(cursor_page, mode, file) == 0) {
list_move(&cursor_page->lru, dst);
mem_cgroup_del_lru(cursor_page);
if (file)
lru += LRU_FILE;
return isolate_lru_pages(nr, &z->lru[lru].list, dst, scanned, order,
- mode, !!file);
+ mode, file);
}
/*
struct page *page;
list_for_each_entry(page, page_list, lru) {
- lru = page_is_file_cache(page);
+ lru = page_lru_base_type(page);
if (PageActive(page)) {
lru += LRU_ACTIVE;
ClearPageActive(page);
SetPageLRU(page);
lru = page_lru(page);
add_page_to_lru_list(zone, page, lru);
- if (PageActive(page)) {
- int file = !!page_is_file_cache(page);
+ if (is_active_lru(lru)) {
+ int file = is_file_lru(lru);
reclaim_stat->recent_rotated[file]++;
}
if (!pagevec_add(&pvec, page)) {
while (!list_empty(list)) {
page = lru_to_page(list);
- prefetchw_prev_lru_page(page, list, flags);
VM_BUG_ON(PageLRU(page));
SetPageLRU(page);
- VM_BUG_ON(!PageActive(page));
- if (!is_active_lru(lru))
- ClearPageActive(page); /* we are de-activating */
-
list_move(&page->lru, &zone->lru[lru].list);
mem_cgroup_add_lru_list(page, lru);
pgmoved++;
if (scanning_global_lru(sc)) {
zone->pages_scanned += pgscanned;
}
- reclaim_stat->recent_scanned[!!file] += nr_taken;
+ reclaim_stat->recent_scanned[file] += nr_taken;
__count_zone_vm_events(PGREFILL, zone, pgscanned);
if (file)
}
}
+ ClearPageActive(page); /* we are de-activating */
list_add(&page->lru, &l_inactive);
}
* helps balance scan pressure between file and anonymous pages in
* get_scan_ratio.
*/
- reclaim_stat->recent_rotated[!!file] += nr_rotated;
+ reclaim_stat->recent_rotated[file] += nr_rotated;
move_active_pages_to_lru(zone, &l_active,
LRU_ACTIVE + file * LRU_FILE);
unsigned long ap, fp;
struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
- anon = zone_nr_pages(zone, sc, LRU_ACTIVE_ANON) +
- zone_nr_pages(zone, sc, LRU_INACTIVE_ANON);
- file = zone_nr_pages(zone, sc, LRU_ACTIVE_FILE) +
- zone_nr_pages(zone, sc, LRU_INACTIVE_FILE);
+ anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) +
+ zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON);
+ file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) +
+ zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
if (scanning_global_lru(sc)) {
free = zone_page_state(zone, NR_FREE_PAGES);
enum lru_list l;
unsigned long nr_reclaimed = sc->nr_reclaimed;
unsigned long swap_cluster_max = sc->swap_cluster_max;
+ struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
int noswap = 0;
/* If we have no swap space, do not bother scanning anon pages. */
int file = is_file_lru(l);
unsigned long scan;
- scan = zone_nr_pages(zone, sc, l);
+ scan = zone_nr_lru_pages(zone, sc, l);
if (priority || noswap) {
scan >>= priority;
scan = (scan * percent[file]) / 100;
}
- if (scanning_global_lru(sc))
- nr[l] = nr_scan_try_batch(scan,
- &zone->lru[l].nr_saved_scan,
- swap_cluster_max);
- else
- nr[l] = scan;
+ nr[l] = nr_scan_try_batch(scan,
+ &reclaim_stat->nr_saved_scan[l],
+ swap_cluster_max);
}
while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
*
* If the caller is !__GFP_FS then the probability of a failure is reasonably
* high - the zone may be full of dirty or under-writeback pages, which this
- * caller can't do much about. We kick pdflush and take explicit naps in the
- * hope that some of these pages can be written. But if the allocating task
- * holds filesystem locks which prevent writeout this might not work, and the
- * allocation attempt will fail.
+ * caller can't do much about. We kick the writeback threads and take explicit
+ * naps in the hope that some of these pages can be written. But if the
+ * allocating task holds filesystem locks which prevent writeout this might not
+ * work, and the allocation attempt will fail.
*
* returns: 0, if no pages reclaimed
* else, the number of pages reclaimed
if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
continue;
- lru_pages += zone_lru_pages(zone);
+ lru_pages += zone_reclaimable_pages(zone);
}
}
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
+ gfp_t gfp_mask, bool noswap,
+ unsigned int swappiness,
+ struct zone *zone, int nid)
+{
+ struct scan_control sc = {
+ .may_writepage = !laptop_mode,
+ .may_unmap = 1,
+ .may_swap = !noswap,
+ .swap_cluster_max = SWAP_CLUSTER_MAX,
+ .swappiness = swappiness,
+ .order = 0,
+ .mem_cgroup = mem,
+ .isolate_pages = mem_cgroup_isolate_pages,
+ };
+ nodemask_t nm = nodemask_of_node(nid);
+
+ sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
+ (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
+ sc.nodemask = &nm;
+ sc.nr_reclaimed = 0;
+ sc.nr_scanned = 0;
+ /*
+ * NOTE: Although we can get the priority field, using it
+ * here is not a good idea, since it limits the pages we can scan.
+ * if we don't reclaim here, the shrink_zone from balance_pgdat
+ * will pick up pages from other mem cgroup's as well. We hack
+ * the priority and make it zero.
+ */
+ shrink_zone(0, zone, &sc);
+ return sc.nr_reclaimed;
+}
+
unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
gfp_t gfp_mask,
bool noswap,
unsigned int swappiness)
{
+ struct zonelist *zonelist;
struct scan_control sc = {
.may_writepage = !laptop_mode,
.may_unmap = 1,
.isolate_pages = mem_cgroup_isolate_pages,
.nodemask = NULL, /* we don't care the placement */
};
- struct zonelist *zonelist;
sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
for (i = 0; i <= end_zone; i++) {
struct zone *zone = pgdat->node_zones + i;
- lru_pages += zone_lru_pages(zone);
+ lru_pages += zone_reclaimable_pages(zone);
}
/*
for (i = 0; i <= end_zone; i++) {
struct zone *zone = pgdat->node_zones + i;
int nr_slab;
+ int nid, zid;
if (!populated_zone(zone))
continue;
temp_priority[i] = priority;
sc.nr_scanned = 0;
note_zone_scanning_priority(zone, priority);
+
+ nid = pgdat->node_id;
+ zid = zone_idx(zone);
+ /*
+ * Call soft limit reclaim before calling shrink_zone.
+ * For now we ignore the return value
+ */
+ mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask,
+ nid, zid);
/*
* We put equal pressure on every zone, unless one
* zone has way too many pages free already.
if (zone_is_all_unreclaimable(zone))
continue;
if (nr_slab == 0 && zone->pages_scanned >=
- (zone_lru_pages(zone) * 6))
+ (zone_reclaimable_pages(zone) * 6))
zone_set_flag(zone,
ZONE_ALL_UNRECLAIMABLE);
/*
wake_up_interruptible(&pgdat->kswapd_wait);
}
-unsigned long global_lru_pages(void)
+/*
+ * The reclaimable count would be mostly accurate.
+ * The less reclaimable pages may be
+ * - mlocked pages, which will be moved to unevictable list when encountered
+ * - mapped pages, which may require several travels to be reclaimed
+ * - dirty pages, which is not "instantly" reclaimable
+ */
+unsigned long global_reclaimable_pages(void)
{
- return global_page_state(NR_ACTIVE_ANON)
- + global_page_state(NR_ACTIVE_FILE)
- + global_page_state(NR_INACTIVE_ANON)
- + global_page_state(NR_INACTIVE_FILE);
+ int nr;
+
+ nr = global_page_state(NR_ACTIVE_FILE) +
+ global_page_state(NR_INACTIVE_FILE);
+
+ if (nr_swap_pages > 0)
+ nr += global_page_state(NR_ACTIVE_ANON) +
+ global_page_state(NR_INACTIVE_ANON);
+
+ return nr;
+}
+
+unsigned long zone_reclaimable_pages(struct zone *zone)
+{
+ int nr;
+
+ nr = zone_page_state(zone, NR_ACTIVE_FILE) +
+ zone_page_state(zone, NR_INACTIVE_FILE);
+
+ if (nr_swap_pages > 0)
+ nr += zone_page_state(zone, NR_ACTIVE_ANON) +
+ zone_page_state(zone, NR_INACTIVE_ANON);
+
+ return nr;
}
#ifdef CONFIG_HIBERNATION
{
struct zone *zone;
unsigned long nr_reclaimed = 0;
+ struct zone_reclaim_stat *reclaim_stat;
for_each_populated_zone(zone) {
enum lru_list l;
l == LRU_ACTIVE_FILE))
continue;
- zone->lru[l].nr_saved_scan += (lru_pages >> prio) + 1;
- if (zone->lru[l].nr_saved_scan >= nr_pages || pass > 3) {
+ reclaim_stat = get_reclaim_stat(zone, sc);
+ reclaim_stat->nr_saved_scan[l] +=
+ (lru_pages >> prio) + 1;
+ if (reclaim_stat->nr_saved_scan[l]
+ >= nr_pages || pass > 3) {
unsigned long nr_to_scan;
- zone->lru[l].nr_saved_scan = 0;
+ reclaim_stat->nr_saved_scan[l] = 0;
nr_to_scan = min(nr_pages, lru_pages);
nr_reclaimed += shrink_list(l, nr_to_scan, zone,
sc, prio);
current->reclaim_state = &reclaim_state;
- lru_pages = global_lru_pages();
+ lru_pages = global_reclaimable_pages();
nr_slab = global_page_state(NR_SLAB_RECLAIMABLE);
/* If slab caches are huge, it's better to hit them first */
while (nr_slab >= lru_pages) {
reclaim_state.reclaimed_slab = 0;
shrink_slab(sc.nr_scanned, sc.gfp_mask,
- global_lru_pages());
+ global_reclaimable_pages());
sc.nr_reclaimed += reclaim_state.reclaimed_slab;
if (sc.nr_reclaimed >= nr_pages)
goto out;
if (!sc.nr_reclaimed) {
do {
reclaim_state.reclaimed_slab = 0;
- shrink_slab(nr_pages, sc.gfp_mask, global_lru_pages());
+ shrink_slab(nr_pages, sc.gfp_mask,
+ global_reclaimable_pages());
sc.nr_reclaimed += reclaim_state.reclaimed_slab;
} while (sc.nr_reclaimed < nr_pages &&
reclaim_state.reclaimed_slab > 0);
retry:
ClearPageUnevictable(page);
if (page_evictable(page, NULL)) {
- enum lru_list l = LRU_INACTIVE_ANON + page_is_file_cache(page);
+ enum lru_list l = page_lru_base_type(page);
__dec_zone_state(zone, NR_UNEVICTABLE);
list_move(&page->lru, &zone->lru[l].list);
unsigned long scan_unevictable_pages;
int scan_unevictable_handler(struct ctl_table *table, int write,
- struct file *file, void __user *buffer,
+ void __user *buffer,
size_t *length, loff_t *ppos)
{
- proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
+ proc_doulongvec_minmax(table, write, buffer, length, ppos);
if (write && *(unsigned long *)table->data)
scan_all_zones_unevictable_pages();