mm: page_alloc: embed OOM killing naturally into allocation slowpath

author Johannes Weiner <hannes@cmpxchg.org>

Mon, 26 Jan 2015 20:58:32 +0000 (12:58 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 26 Jan 2015 21:37:18 +0000 (13:37 -0800)
author Johannes Weiner <hannes@cmpxchg.org>
Mon, 26 Jan 2015 20:58:32 +0000 (12:58 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 26 Jan 2015 21:37:18 +0000 (13:37 -0800)
diff --git a/include/linux/oom.h b/include/linux/oom.h

index 853698c721f7d1547df181a4fbfc904a67758f72..76200984d1e22081954234d49f32653e18478717 100644 (file)
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -85,11 +85,6 @@ static inline void oom_killer_enable(void)
         oom_killer_disabled = false;
  }
  
-static inline bool oom_gfp_allowed(gfp_t gfp_mask)
-{
-       return (gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY);
-}
-
  extern struct task_struct *find_lock_task_mm(struct task_struct *p);
  
  static inline bool task_will_free_mem(struct task_struct *task)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 7633c503a116c221e7447614c6d10ebaa38a0b1c..8e20f9c2fa5ab7a89fb29c5dbc3987ccd8690047 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2332,12 +2332,21 @@ static inline struct page *
  __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
         struct zonelist *zonelist, enum zone_type high_zoneidx,
         nodemask_t *nodemask, struct zone *preferred_zone,
-       int classzone_idx, int migratetype)
+       int classzone_idx, int migratetype, unsigned long *did_some_progress)
  {
         struct page *page;
  
-       /* Acquire the per-zone oom lock for each zone */
+       *did_some_progress = 0;
+
+       if (oom_killer_disabled)
+               return NULL;
+
+       /*
+        * Acquire the per-zone oom lock for each zone.  If that
+        * fails, somebody else is making progress for us.
+        */
         if (!oom_zonelist_trylock(zonelist, gfp_mask)) {
+               *did_some_progress = 1;
                 schedule_timeout_uninterruptible(1);
                 return NULL;
         }
@@ -2363,12 +2372,18 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
                 goto out;
  
         if (!(gfp_mask & __GFP_NOFAIL)) {
+               /* Coredumps can quickly deplete all memory reserves */
+               if (current->flags & PF_DUMPCORE)
+                       goto out;
                 /* The OOM killer will not help higher order allocs */
                 if (order > PAGE_ALLOC_COSTLY_ORDER)
                         goto out;
                 /* The OOM killer does not needlessly kill tasks for lowmem */
                 if (high_zoneidx < ZONE_NORMAL)
                         goto out;
+               /* The OOM killer does not compensate for light reclaim */
+               if (!(gfp_mask & __GFP_FS))
+                       goto out;
                 /*
                  * GFP_THISNODE contains __GFP_NORETRY and we never hit this.
                  * Sanity check for bare calls of __GFP_THISNODE, not real OOM.
@@ -2381,7 +2396,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
         }
         /* Exhausted what can be done so it's blamo time */
         out_of_memory(zonelist, gfp_mask, order, nodemask, false);
-
+       *did_some_progress = 1;
  out:
         oom_zonelist_unlock(zonelist, gfp_mask);
         return page;
@@ -2658,7 +2673,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
             (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
                 goto nopage;
  
-restart:
+retry:
         if (!(gfp_mask & __GFP_NO_KSWAPD))
                 wake_all_kswapds(order, zonelist, high_zoneidx,
                                 preferred_zone, nodemask);
@@ -2681,7 +2696,6 @@ restart:
                 classzone_idx = zonelist_zone_idx(preferred_zoneref);
         }
  
-rebalance:
         /* This is the last chance, in general, before the goto nopage. */
         page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist,
                         high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS,
@@ -2788,54 +2802,28 @@ rebalance:
         if (page)
                 goto got_pg;
  
-       /*
-        * If we failed to make any progress reclaiming, then we are
-        * running out of options and have to consider going OOM
-        */
-       if (!did_some_progress) {
-               if (oom_gfp_allowed(gfp_mask)) {
-                       if (oom_killer_disabled)
-                               goto nopage;
-                       /* Coredumps can quickly deplete all memory reserves */
-                       if ((current->flags & PF_DUMPCORE) &&
-                           !(gfp_mask & __GFP_NOFAIL))
-                               goto nopage;
-                       page = __alloc_pages_may_oom(gfp_mask, order,
-                                       zonelist, high_zoneidx,
-                                       nodemask, preferred_zone,
-                                       classzone_idx, migratetype);
-                       if (page)
-                               goto got_pg;
-
-                       if (!(gfp_mask & __GFP_NOFAIL)) {
-                               /*
-                                * The oom killer is not called for high-order
-                                * allocations that may fail, so if no progress
-                                * is being made, there are no other options and
-                                * retrying is unlikely to help.
-                                */
-                               if (order > PAGE_ALLOC_COSTLY_ORDER)
-                                       goto nopage;
-                               /*
-                                * The oom killer is not called for lowmem
-                                * allocations to prevent needlessly killing
-                                * innocent tasks.
-                                */
-                               if (high_zoneidx < ZONE_NORMAL)
-                                       goto nopage;
-                       }
-
-                       goto restart;
-               }
-       }
-
         /* Check if we should retry the allocation */
         pages_reclaimed += did_some_progress;
         if (should_alloc_retry(gfp_mask, order, did_some_progress,
                                                 pages_reclaimed)) {
+               /*
+                * If we fail to make progress by freeing individual
+                * pages, but the allocation wants us to keep going,
+                * start OOM killing tasks.
+                */
+               if (!did_some_progress) {
+                       page = __alloc_pages_may_oom(gfp_mask, order, zonelist,
+                                               high_zoneidx, nodemask,
+                                               preferred_zone, classzone_idx,
+                                               migratetype,&did_some_progress);
+                       if (page)
+                               goto got_pg;
+                       if (!did_some_progress)
+                               goto nopage;
+               }
                 /* Wait for some write requests to complete then retry */
                 wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);
-               goto rebalance;
+               goto retry;
         } else {
                 /*
                  * High-order allocations do not necessarily loop after
author	Johannes Weiner <hannes@cmpxchg.org>
	Mon, 26 Jan 2015 20:58:32 +0000 (12:58 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 26 Jan 2015 21:37:18 +0000 (13:37 -0800)
include/linux/oom.h		patch \| blob \| history
mm/page_alloc.c		patch \| blob \| history