memcg, vmscan: integrate soft reclaim tighter with zone shrinking code

author Michal Hocko <mhocko@suse.cz>

Thu, 15 Aug 2013 23:39:30 +0000 (09:39 +1000)

committer Stephen Rothwell <sfr@canb.auug.org.au>

Thu, 22 Aug 2013 07:27:04 +0000 (17:27 +1000)
author Michal Hocko <mhocko@suse.cz>
Thu, 15 Aug 2013 23:39:30 +0000 (09:39 +1000)
committer Stephen Rothwell <sfr@canb.auug.org.au>
Thu, 22 Aug 2013 07:27:04 +0000 (17:27 +1000)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h

index 6c416092e3244d1e82b1f928ee0d31a6f6e46162..4b78661c68d0f56a2a09cc0849a763b3379356b5 100644 (file)
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -180,9 +180,7 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
         mem_cgroup_update_page_stat(page, idx, -1);
  }
  
-unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
-                                               gfp_t gfp_mask,
-                                               unsigned long *total_scanned);
+bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg);
  
  void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
  static inline void mem_cgroup_count_vm_event(struct mm_struct *mm,
@@ -359,11 +357,9 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
  }
  
  static inline
-unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
-                                           gfp_t gfp_mask,
-                                           unsigned long *total_scanned)
+bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg)
  {
-       return 0;
+       return false;
  }
  
  static inline void mem_cgroup_split_huge_fixup(struct page *head)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index 0cddfbcbc426c498376ee82416beefe6641e9372..bdf31c2de2315dd9bd4edb85e304f6d14349c74f 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2004,57 +2004,28 @@ static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
  }
  #endif
  
-static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
-                                  struct zone *zone,
-                                  gfp_t gfp_mask,
-                                  unsigned long *total_scanned)
-{
-       struct mem_cgroup *victim = NULL;
-       int total = 0;
-       int loop = 0;
-       unsigned long excess;
-       unsigned long nr_scanned;
-       struct mem_cgroup_reclaim_cookie reclaim = {
-               .zone = zone,
-               .priority = 0,
-       };
-
-       excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT;
-
-       while (1) {
-               victim = mem_cgroup_iter(root_memcg, victim, &reclaim);
-               if (!victim) {
-                       loop++;
-                       if (loop >= 2) {
-                               /*
-                                * If we have not been able to reclaim
-                                * anything, it might because there are
-                                * no reclaimable pages under this hierarchy
-                                */
-                               if (!total)
-                                       break;
-                               /*
-                                * We want to do more targeted reclaim.
-                                * excess >> 2 is not to excessive so as to
-                                * reclaim too much, nor too less that we keep
-                                * coming back to reclaim from this cgroup
-                                */
-                               if (total >= (excess >> 2) ||
-                                       (loop > MEM_CGROUP_MAX_RECLAIM_LOOPS))
-                                       break;
-                       }
-                       continue;
-               }
-               if (!mem_cgroup_reclaimable(victim, false))
-                       continue;
-               total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false,
-                                                    zone, &nr_scanned);
-               *total_scanned += nr_scanned;
-               if (!res_counter_soft_limit_excess(&root_memcg->res))
-                       break;
+/*
+ * A group is eligible for the soft limit reclaim if it is
+ *     a) is over its soft limit
+ *     b) any parent up the hierarchy is over its soft limit
+ */
+bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg)
+{
+       struct mem_cgroup *parent = memcg;
+
+       if (res_counter_soft_limit_excess(&memcg->res))
+               return true;
+
+       /*
+        * If any parent up the hierarchy is over its soft limit then we
+        * have to obey and reclaim from this group as well.
+        */
+       while((parent = parent_mem_cgroup(parent))) {
+               if (res_counter_soft_limit_excess(&parent->res))
+                       return true;
         }
-       mem_cgroup_iter_break(root_memcg, victim);
-       return total;
+
+       return false;
  }
  
  /*
@@ -4727,98 +4698,6 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
         return ret;
  }
  
-unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
-                                           gfp_t gfp_mask,
-                                           unsigned long *total_scanned)
-{
-       unsigned long nr_reclaimed = 0;
-       struct mem_cgroup_per_zone *mz, *next_mz = NULL;
-       unsigned long reclaimed;
-       int loop = 0;
-       struct mem_cgroup_tree_per_zone *mctz;
-       unsigned long long excess;
-       unsigned long nr_scanned;
-
-       if (order > 0)
-               return 0;
-
-       mctz = soft_limit_tree_node_zone(zone_to_nid(zone), zone_idx(zone));
-       /*
-        * This loop can run a while, specially if mem_cgroup's continuously
-        * keep exceeding their soft limit and putting the system under
-        * pressure
-        */
-       do {
-               if (next_mz)
-                       mz = next_mz;
-               else
-                       mz = mem_cgroup_largest_soft_limit_node(mctz);
-               if (!mz)
-                       break;
-
-               nr_scanned = 0;
-               reclaimed = mem_cgroup_soft_reclaim(mz->memcg, zone,
-                                                   gfp_mask, &nr_scanned);
-               nr_reclaimed += reclaimed;
-               *total_scanned += nr_scanned;
-               spin_lock(&mctz->lock);
-
-               /*
-                * If we failed to reclaim anything from this memory cgroup
-                * it is time to move on to the next cgroup
-                */
-               next_mz = NULL;
-               if (!reclaimed) {
-                       do {
-                               /*
-                                * Loop until we find yet another one.
-                                *
-                                * By the time we get the soft_limit lock
-                                * again, someone might have aded the
-                                * group back on the RB tree. Iterate to
-                                * make sure we get a different mem.
-                                * mem_cgroup_largest_soft_limit_node returns
-                                * NULL if no other cgroup is present on
-                                * the tree
-                                */
-                               next_mz =
-                               __mem_cgroup_largest_soft_limit_node(mctz);
-                               if (next_mz == mz)
-                                       css_put(&next_mz->memcg->css);
-                               else /* next_mz == NULL or other memcg */
-                                       break;
-                       } while (1);
-               }
-               __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz);
-               excess = res_counter_soft_limit_excess(&mz->memcg->res);
-               /*
-                * One school of thought says that we should not add
-                * back the node to the tree if reclaim returns 0.
-                * But our reclaim could return 0, simply because due
-                * to priority we are exposing a smaller subset of
-                * memory to reclaim from. Consider this as a longer
-                * term TODO.
-                */
-               /* If excess == 0, no tree ops */
-               __mem_cgroup_insert_exceeded(mz->memcg, mz, mctz, excess);
-               spin_unlock(&mctz->lock);
-               css_put(&mz->memcg->css);
-               loop++;
-               /*
-                * Could not reclaim anything and there are no more
-                * mem cgroups to try or we seem to be looping without
-                * reclaiming anything.
-                */
-               if (!nr_reclaimed &&
-                       (next_mz == NULL ||
-                       loop > MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS))
-                       break;
-       } while (!nr_reclaimed);
-       if (next_mz)
-               css_put(&next_mz->memcg->css);
-       return nr_reclaimed;
-}
-
  /**
   * mem_cgroup_force_empty_list - clears LRU of a group
   * @memcg: group to clear
diff --git a/mm/vmscan.c b/mm/vmscan.c

index 758540d3ca83bf866ecfe931031a696c2bea2136..cfc8cefcfc2817774c4483ed8988241d1df6e6d4 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -139,11 +139,21 @@ static bool global_reclaim(struct scan_control *sc)
  {
         return !sc->target_mem_cgroup;
  }
+
+static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc)
+{
+       return !mem_cgroup_disabled() && global_reclaim(sc);
+}
  #else
  static bool global_reclaim(struct scan_control *sc)
  {
         return true;
  }
+
+static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc)
+{
+       return false;
+}
  #endif
  
  static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru)
@@ -2111,7 +2121,8 @@ static inline bool should_continue_reclaim(struct zone *zone,
         }
  }
  
-static void shrink_zone(struct zone *zone, struct scan_control *sc)
+static void
+__shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
  {
         unsigned long nr_reclaimed, nr_scanned;
  
@@ -2130,6 +2141,12 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
                 do {
                         struct lruvec *lruvec;
  
+                       if (soft_reclaim &&
+                           !mem_cgroup_soft_reclaim_eligible(memcg)) {
+                               memcg = mem_cgroup_iter(root, memcg, &reclaim);
+                               continue;
+                       }
+
                         lruvec = mem_cgroup_zone_lruvec(zone, memcg);
  
                         shrink_lruvec(lruvec, sc);
@@ -2160,6 +2177,24 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
                                          sc->nr_scanned - nr_scanned, sc));
  }
  
+
+static void shrink_zone(struct zone *zone, struct scan_control *sc)
+{
+       bool do_soft_reclaim = mem_cgroup_should_soft_reclaim(sc);
+       unsigned long nr_scanned = sc->nr_scanned;
+
+       __shrink_zone(zone, sc, do_soft_reclaim);
+
+       /*
+        * No group is over the soft limit or those that are do not have
+        * pages in the zone we are reclaiming so we have to reclaim everybody
+        */
+       if (do_soft_reclaim && (sc->nr_scanned == nr_scanned)) {
+               __shrink_zone(zone, sc, false);
+               return;
+       }
+}
+
  /* Returns true if compaction should go ahead for a high-order request */
  static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
  {
@@ -2221,8 +2256,6 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
  {
         struct zoneref *z;
         struct zone *zone;
-       unsigned long nr_soft_reclaimed;
-       unsigned long nr_soft_scanned;
         bool aborted_reclaim = false;
  
         /*
@@ -2262,18 +2295,6 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
                                         continue;
                                 }
                         }
-                       /*
-                        * This steals pages from memory cgroups over softlimit
-                        * and returns the number of reclaimed pages and
-                        * scanned pages. This works for global memory pressure
-                        * and balancing, not for a memcg's limit.
-                        */
-                       nr_soft_scanned = 0;
-                       nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
-                                               sc->order, sc->gfp_mask,
-                                               &nr_soft_scanned);
-                       sc->nr_reclaimed += nr_soft_reclaimed;
-                       sc->nr_scanned += nr_soft_scanned;
                         /* need some check for avoid more shrink_zone() */
                 }
  
@@ -2870,8 +2891,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
  {
         int i;
         int end_zone = 0;       /* Inclusive.  0 = ZONE_DMA */
-       unsigned long nr_soft_reclaimed;
-       unsigned long nr_soft_scanned;
         struct scan_control sc = {
                 .gfp_mask = GFP_KERNEL,
                 .priority = DEF_PRIORITY,
@@ -2986,15 +3005,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
  
                         sc.nr_scanned = 0;
  
-                       nr_soft_scanned = 0;
-                       /*
-                        * Call soft limit reclaim before calling shrink_zone.
-                        */
-                       nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
-                                                       order, sc.gfp_mask,
-                                                       &nr_soft_scanned);
-                       sc.nr_reclaimed += nr_soft_reclaimed;
-
                         /*
                          * There should be no need to raise the scanning
                          * priority if enough pages are already being scanned
author	Michal Hocko <mhocko@suse.cz>
	Thu, 15 Aug 2013 23:39:30 +0000 (09:39 +1000)
committer	Stephen Rothwell <sfr@canb.auug.org.au>
	Thu, 22 Aug 2013 07:27:04 +0000 (17:27 +1000)
include/linux/memcontrol.h		patch \| blob \| history
mm/memcontrol.c		patch \| blob \| history
mm/vmscan.c		patch \| blob \| history