mm: throttle on IO only when there are too many dirty and writeback pages

[karo-tx-linux.git] / mm / page_alloc.c
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 36ea50bf60ce31735ca3d244b1068bf17bc866e1..fca77c0ac77b0066fb5f5af35466f824e996f077 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3139,8 +3139,9 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
         for_each_zone_zonelist_nodemask(zone, z, ac->zonelist,
                         ac->high_zoneidx, ac->nodemask) {
                 unsigned long available;
+               unsigned long reclaimable;
  
-               available = zone_reclaimable_pages(zone);
+               available = reclaimable = zone_reclaimable_pages(zone);
                 available -= DIV_ROUND_UP(no_progress_loops * available,
                                           MAX_RECLAIM_RETRIES);
                 available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
@@ -3151,8 +3152,37 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
                  */
                 if (__zone_watermark_ok(zone, order, min_wmark_pages(zone),
                                 ac->high_zoneidx, alloc_flags, available)) {
-                       /* Wait for some writes to complete then retry */
-                       wait_iff_congested(zone, BLK_RW_ASYNC, HZ/50);
+                       unsigned long writeback;
+                       unsigned long dirty;
+
+                       writeback = zone_page_state_snapshot(zone, NR_WRITEBACK);
+                       dirty = zone_page_state_snapshot(zone, NR_FILE_DIRTY);
+
+                       /*
+                        * If we didn't make any progress and have a lot of
+                        * dirty + writeback pages then we should wait for
+                        * an IO to complete to slow down the reclaim and
+                        * prevent from pre mature OOM
+                        */
+                       if (!did_some_progress && 2*(writeback + dirty) > reclaimable) {
+                               congestion_wait(BLK_RW_ASYNC, HZ/10);
+                               return true;
+                       }
+
+                       /*
+                        * Memory allocation/reclaim might be called from a WQ
+                        * context and the current implementation of the WQ
+                        * concurrency control doesn't recognize that
+                        * a particular WQ is congested if the worker thread is
+                        * looping without ever sleeping. Therefore we have to
+                        * do a short sleep here rather than calling
+                        * cond_resched().
+                        */
+                       if (current->flags & PF_WQ_WORKER)
+                               schedule_timeout(1);
+                       else
+                               cond_resched();
+
                         return true;
                 }
         }