X-Git-Url: https://git.karo-electronics.de/?a=blobdiff_plain;f=mm%2Fvmscan.c;h=70aa739c6b68beeca628dde8d947023ce4b18ca3;hb=c2eca00fec40a9b3551f0605834acad9336099d1;hp=e7c75a3c6b53fa02c831654a45fad8316f25100d;hpb=fd538803731e50367b7c59ce4ad3454426a3d671;p=karo-tx-linux.git diff --git a/mm/vmscan.c b/mm/vmscan.c index e7c75a3c6b53..70aa739c6b68 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -87,6 +87,7 @@ struct scan_control { /* The highest zone to isolate pages for reclaim from */ enum zone_type reclaim_idx; + /* Writepage batching in laptop mode; RECLAIM_WRITE */ unsigned int may_writepage:1; /* Can mapped pages be reclaimed? */ @@ -1055,6 +1056,15 @@ static unsigned long shrink_page_list(struct list_head *page_list, * throttling so we could easily OOM just because too many * pages are in writeback and there is nothing else to * reclaim. Wait for the writeback to complete. + * + * In cases 1) and 2) we activate the pages to get them out of + * the way while we continue scanning for clean pages on the + * inactive list and refilling from the active list. The + * observation here is that waiting for disk writes is more + * expensive than potentially causing reloads down the line. + * Since they're marked for immediate reclaim, they won't put + * memory pressure on the cache working set any longer than it + * takes to write them to disk. */ if (PageWriteback(page)) { /* Case 1 above */ @@ -1062,7 +1072,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, PageReclaim(page) && test_bit(PGDAT_WRITEBACK, &pgdat->flags)) { nr_immediate++; - goto keep_locked; + goto activate_locked; /* Case 2 above */ } else if (sane_reclaim(sc) || @@ -1080,7 +1090,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, */ SetPageReclaim(page); nr_writeback++; - goto keep_locked; + goto activate_locked; /* Case 3 above */ } else { @@ -1152,13 +1162,18 @@ static unsigned long shrink_page_list(struct list_head *page_list, if (PageDirty(page)) { /* - * Only kswapd can writeback filesystem pages to - * avoid risk of stack overflow but only writeback - * if many dirty pages have been encountered. + * Only kswapd can writeback filesystem pages + * to avoid risk of stack overflow. But avoid + * injecting inefficient single-page IO into + * flusher writeback as much as possible: only + * write pages when we've encountered many + * dirty pages, and when we've already scanned + * the rest of the LRU for clean pages and see + * the same dirty pages again (PageReclaim). */ if (page_is_file_cache(page) && - (!current_is_kswapd() || - !test_bit(PGDAT_DIRTY, &pgdat->flags))) { + (!current_is_kswapd() || !PageReclaim(page) || + !test_bit(PGDAT_DIRTY, &pgdat->flags))) { /* * Immediately reclaim when written back. * Similar in principal to deactivate_page() @@ -1168,7 +1183,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, inc_node_page_state(page, NR_VMSCAN_IMMEDIATE); SetPageReclaim(page); - goto keep_locked; + goto activate_locked; } if (references == PAGEREF_RECLAIM_CLEAN) @@ -1373,13 +1388,10 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode) * wants to isolate pages it will be able to operate on without * blocking - clean pages for the most part. * - * ISOLATE_CLEAN means that only clean pages should be isolated. This - * is used by reclaim when it is cannot write to backing storage - * * ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages * that it is possible to migrate without blocking */ - if (mode & (ISOLATE_CLEAN|ISOLATE_ASYNC_MIGRATE)) { + if (mode & ISOLATE_ASYNC_MIGRATE) { /* All the caller can do on PageWriteback is block */ if (PageWriteback(page)) return ret; @@ -1387,10 +1399,6 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode) if (PageDirty(page)) { struct address_space *mapping; - /* ISOLATE_CLEAN means only clean pages */ - if (mode & ISOLATE_CLEAN) - return ret; - /* * Only pages without mappings or that have a * ->migratepage callback are possible to migrate @@ -1701,30 +1709,6 @@ static int current_may_throttle(void) bdi_write_congested(current->backing_dev_info); } -static bool inactive_reclaimable_pages(struct lruvec *lruvec, - struct scan_control *sc, enum lru_list lru) -{ - int zid; - struct zone *zone; - int file = is_file_lru(lru); - struct pglist_data *pgdat = lruvec_pgdat(lruvec); - - if (!global_reclaim(sc)) - return true; - - for (zid = sc->reclaim_idx; zid >= 0; zid--) { - zone = &pgdat->node_zones[zid]; - if (!managed_zone(zone)) - continue; - - if (zone_page_state_snapshot(zone, NR_ZONE_LRU_BASE + - LRU_FILE * file) >= SWAP_CLUSTER_MAX) - return true; - } - - return false; -} - /* * shrink_inactive_list() is a helper for shrink_node(). It returns the number * of reclaimed pages @@ -1743,9 +1727,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, struct pglist_data *pgdat = lruvec_pgdat(lruvec); struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; - if (!inactive_reclaimable_pages(lruvec, sc, lru)) - return 0; - while (unlikely(too_many_isolated(pgdat, file, sc))) { congestion_wait(BLK_RW_ASYNC, HZ/10); @@ -1758,8 +1739,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, if (!sc->may_unmap) isolate_mode |= ISOLATE_UNMAPPED; - if (!sc->may_writepage) - isolate_mode |= ISOLATE_CLEAN; spin_lock_irq(&pgdat->lru_lock); @@ -1833,12 +1812,20 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, /* * If dirty pages are scanned that are not queued for IO, it - * implies that flushers are not keeping up. In this case, flag - * the pgdat PGDAT_DIRTY and kswapd will start writing pages from - * reclaim context. + * implies that flushers are not doing their job. This can + * happen when memory pressure pushes dirty pages to the end of + * the LRU before the dirty limits are breached and the dirty + * data has expired. It can also happen when the proportion of + * dirty pages grows not through writes but through memory + * pressure reclaiming all the clean cache. And in some cases, + * the flushers simply cannot keep up with the allocation + * rate. Nudge the flusher threads in case they are asleep, but + * also allow kswapd to start writing pages during reclaim. */ - if (stat.nr_unqueued_dirty == nr_taken) + if (stat.nr_unqueued_dirty == nr_taken) { + wakeup_flusher_threads(0, WB_REASON_VMSCAN); set_bit(PGDAT_DIRTY, &pgdat->flags); + } /* * If kswapd scans pages marked marked for immediate @@ -1956,8 +1943,6 @@ static void shrink_active_list(unsigned long nr_to_scan, if (!sc->may_unmap) isolate_mode |= ISOLATE_UNMAPPED; - if (!sc->may_writepage) - isolate_mode |= ISOLATE_CLEAN; spin_lock_irq(&pgdat->lru_lock); @@ -2233,7 +2218,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, * system is under heavy pressure. */ if (!inactive_list_is_low(lruvec, true, sc, false) && - lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, MAX_NR_ZONES) >> sc->priority) { + lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) { scan_balance = SCAN_FILE; goto out; } @@ -2300,7 +2285,7 @@ out: unsigned long size; unsigned long scan; - size = lruvec_lru_size(lruvec, lru, MAX_NR_ZONES); + size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx); scan = size >> sc->priority; if (!scan && pass && force_scan) @@ -2786,8 +2771,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, struct scan_control *sc) { int initial_priority = sc->priority; - unsigned long total_scanned = 0; - unsigned long writeback_threshold; retry: delayacct_freepages_start(); @@ -2800,7 +2783,6 @@ retry: sc->nr_scanned = 0; shrink_zones(zonelist, sc); - total_scanned += sc->nr_scanned; if (sc->nr_reclaimed >= sc->nr_to_reclaim) break; @@ -2813,20 +2795,6 @@ retry: */ if (sc->priority < DEF_PRIORITY - 2) sc->may_writepage = 1; - - /* - * Try to write back as many pages as we just scanned. This - * tends to cause slow streaming writers to write data to the - * disk smoothly, at the dirtying rate, which is nice. But - * that's undesirable in laptop mode, where we *want* lumpy - * writeout. So in laptop mode, write out the whole world. - */ - writeback_threshold = sc->nr_to_reclaim + sc->nr_to_reclaim / 2; - if (total_scanned > writeback_threshold) { - wakeup_flusher_threads(laptop_mode ? 0 : total_scanned, - WB_REASON_TRY_TO_FREE_PAGES); - sc->may_writepage = 1; - } } while (--sc->priority >= 0); delayacct_freepages_end(); @@ -3128,6 +3096,7 @@ static bool zone_balanced(struct zone *zone, int order, int classzone_idx) */ clear_bit(PGDAT_CONGESTED, &zone->zone_pgdat->flags); clear_bit(PGDAT_DIRTY, &zone->zone_pgdat->flags); + clear_bit(PGDAT_WRITEBACK, &zone->zone_pgdat->flags); return true; }