Merge tag 'for-next-dma_ops' of git://git.kernel.org/pub/scm/linux/kernel/git/dledfor...

[karo-tx-linux.git] / mm / vmscan.c
diff --git a/mm/vmscan.c b/mm/vmscan.c

index 0d05f7f3b5329508bcd1b9771e043925d7c72715..70aa739c6b68beeca628dde8d947023ce4b18ca3 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1056,6 +1056,15 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                  *    throttling so we could easily OOM just because too many
                  *    pages are in writeback and there is nothing else to
                  *    reclaim. Wait for the writeback to complete.
+                *
+                * In cases 1) and 2) we activate the pages to get them out of
+                * the way while we continue scanning for clean pages on the
+                * inactive list and refilling from the active list. The
+                * observation here is that waiting for disk writes is more
+                * expensive than potentially causing reloads down the line.
+                * Since they're marked for immediate reclaim, they won't put
+                * memory pressure on the cache working set any longer than it
+                * takes to write them to disk.
                  */
                 if (PageWriteback(page)) {
                         /* Case 1 above */
@@ -1063,7 +1072,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                             PageReclaim(page) &&
                             test_bit(PGDAT_WRITEBACK, &pgdat->flags)) {
                                 nr_immediate++;
-                               goto keep_locked;
+                               goto activate_locked;
  
                         /* Case 2 above */
                         } else if (sane_reclaim(sc) ||
@@ -1081,7 +1090,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                                  */
                                 SetPageReclaim(page);
                                 nr_writeback++;
-                               goto keep_locked;
+                               goto activate_locked;
  
                         /* Case 3 above */
                         } else {
@@ -1153,13 +1162,18 @@ static unsigned long shrink_page_list(struct list_head *page_list,
  
                 if (PageDirty(page)) {
                         /*
-                        * Only kswapd can writeback filesystem pages to
-                        * avoid risk of stack overflow but only writeback
-                        * if many dirty pages have been encountered.
+                        * Only kswapd can writeback filesystem pages
+                        * to avoid risk of stack overflow. But avoid
+                        * injecting inefficient single-page IO into
+                        * flusher writeback as much as possible: only
+                        * write pages when we've encountered many
+                        * dirty pages, and when we've already scanned
+                        * the rest of the LRU for clean pages and see
+                        * the same dirty pages again (PageReclaim).
                          */
                         if (page_is_file_cache(page) &&
-                                       (!current_is_kswapd() ||
-                                        !test_bit(PGDAT_DIRTY, &pgdat->flags))) {
+                           (!current_is_kswapd() || !PageReclaim(page) ||
+                            !test_bit(PGDAT_DIRTY, &pgdat->flags))) {
                                 /*
                                  * Immediately reclaim when written back.
                                  * Similar in principal to deactivate_page()
@@ -1169,7 +1183,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                                 inc_node_page_state(page, NR_VMSCAN_IMMEDIATE);
                                 SetPageReclaim(page);
  
-                               goto keep_locked;
+                               goto activate_locked;
                         }
  
                         if (references == PAGEREF_RECLAIM_CLEAN)
@@ -1798,12 +1812,20 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
  
                 /*
                  * If dirty pages are scanned that are not queued for IO, it
-                * implies that flushers are not keeping up. In this case, flag
-                * the pgdat PGDAT_DIRTY and kswapd will start writing pages from
-                * reclaim context.
+                * implies that flushers are not doing their job. This can
+                * happen when memory pressure pushes dirty pages to the end of
+                * the LRU before the dirty limits are breached and the dirty
+                * data has expired. It can also happen when the proportion of
+                * dirty pages grows not through writes but through memory
+                * pressure reclaiming all the clean cache. And in some cases,
+                * the flushers simply cannot keep up with the allocation
+                * rate. Nudge the flusher threads in case they are asleep, but
+                * also allow kswapd to start writing pages during reclaim.
                  */
-               if (stat.nr_unqueued_dirty == nr_taken)
+               if (stat.nr_unqueued_dirty == nr_taken) {
+                       wakeup_flusher_threads(0, WB_REASON_VMSCAN);
                         set_bit(PGDAT_DIRTY, &pgdat->flags);
+               }
  
                 /*
                  * If kswapd scans pages marked marked for immediate
@@ -2749,8 +2771,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
                                           struct scan_control *sc)
  {
         int initial_priority = sc->priority;
-       unsigned long total_scanned = 0;
-       unsigned long writeback_threshold;
  retry:
         delayacct_freepages_start();
  
@@ -2763,7 +2783,6 @@ retry:
                 sc->nr_scanned = 0;
                 shrink_zones(zonelist, sc);
  
-               total_scanned += sc->nr_scanned;
                 if (sc->nr_reclaimed >= sc->nr_to_reclaim)
                         break;
  
@@ -2776,20 +2795,6 @@ retry:
                  */
                 if (sc->priority < DEF_PRIORITY - 2)
                         sc->may_writepage = 1;
-
-               /*
-                * Try to write back as many pages as we just scanned.  This
-                * tends to cause slow streaming writers to write data to the
-                * disk smoothly, at the dirtying rate, which is nice.   But
-                * that's undesirable in laptop mode, where we *want* lumpy
-                * writeout.  So in laptop mode, write out the whole world.
-                */
-               writeback_threshold = sc->nr_to_reclaim + sc->nr_to_reclaim / 2;
-               if (total_scanned > writeback_threshold) {
-                       wakeup_flusher_threads(laptop_mode ? 0 : total_scanned,
-                                               WB_REASON_TRY_TO_FREE_PAGES);
-                       sc->may_writepage = 1;
-               }
         } while (--sc->priority >= 0);
  
         delayacct_freepages_end();
@@ -3091,6 +3096,7 @@ static bool zone_balanced(struct zone *zone, int order, int classzone_idx)
          */
         clear_bit(PGDAT_CONGESTED, &zone->zone_pgdat->flags);
         clear_bit(PGDAT_DIRTY, &zone->zone_pgdat->flags);
+       clear_bit(PGDAT_WRITEBACK, &zone->zone_pgdat->flags);
  
         return true;
  }