]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - mm/memcontrol.c
mm: memcontrol: reclaim when shrinking memory.high below usage
[karo-tx-linux.git] / mm / memcontrol.c
index e234c21a5e6cb3d02104da2422548692fda1e881..5d081ff28663b6899e62ff5b144539830f926088 100644 (file)
@@ -903,14 +903,20 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
                if (prev && reclaim->generation != iter->generation)
                        goto out_unlock;
 
-               do {
+               while (1) {
                        pos = READ_ONCE(iter->position);
+                       if (!pos || css_tryget(&pos->css))
+                               break;
                        /*
-                        * A racing update may change the position and
-                        * put the last reference, hence css_tryget(),
-                        * or retry to see the updated position.
+                        * css reference reached zero, so iter->position will
+                        * be cleared by ->css_released. However, we should not
+                        * rely on this happening soon, because ->css_released
+                        * is called from a work queue, and by busy-waiting we
+                        * might block it. So we clear iter->position right
+                        * away.
                         */
-               } while (pos && !css_tryget(&pos->css));
+                       (void)cmpxchg(&iter->position, pos, NULL);
+               }
        }
 
        if (pos)
@@ -956,17 +962,13 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
        }
 
        if (reclaim) {
-               if (cmpxchg(&iter->position, pos, memcg) == pos) {
-                       if (memcg)
-                               css_get(&memcg->css);
-                       if (pos)
-                               css_put(&pos->css);
-               }
-
                /*
-                * pairs with css_tryget when dereferencing iter->position
-                * above.
+                * The position could have already been updated by a competing
+                * thread, so check that the value hasn't changed since we read
+                * it to avoid reclaiming from the same cgroup twice.
                 */
+               (void)cmpxchg(&iter->position, pos, memcg);
+
                if (pos)
                        css_put(&pos->css);
 
@@ -999,6 +1001,28 @@ void mem_cgroup_iter_break(struct mem_cgroup *root,
                css_put(&prev->css);
 }
 
+static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
+{
+       struct mem_cgroup *memcg = dead_memcg;
+       struct mem_cgroup_reclaim_iter *iter;
+       struct mem_cgroup_per_zone *mz;
+       int nid, zid;
+       int i;
+
+       while ((memcg = parent_mem_cgroup(memcg))) {
+               for_each_node(nid) {
+                       for (zid = 0; zid < MAX_NR_ZONES; zid++) {
+                               mz = &memcg->nodeinfo[nid]->zoneinfo[zid];
+                               for (i = 0; i <= DEF_PRIORITY; i++) {
+                                       iter = &mz->iter[i];
+                                       cmpxchg(&iter->position,
+                                               dead_memcg, NULL);
+                               }
+                       }
+               }
+       }
+}
+
 /*
  * Iteration constructs for visiting all cgroups (under a tree).  If
  * loops are exited prematurely (break), mem_cgroup_iter_break() must
@@ -3498,16 +3522,17 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
 swap_buffers:
        /* Swap primary and spare array */
        thresholds->spare = thresholds->primary;
-       /* If all events are unregistered, free the spare array */
-       if (!new) {
-               kfree(thresholds->spare);
-               thresholds->spare = NULL;
-       }
 
        rcu_assign_pointer(thresholds->primary, new);
 
        /* To be sure that nobody uses thresholds */
        synchronize_rcu();
+
+       /* If all events are unregistered, free the spare array */
+       if (!new) {
+               kfree(thresholds->spare);
+               thresholds->spare = NULL;
+       }
 unlock:
        mutex_unlock(&memcg->thresholds_lock);
 }
@@ -4324,6 +4349,13 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
        wb_memcg_offline(memcg);
 }
 
+static void mem_cgroup_css_released(struct cgroup_subsys_state *css)
+{
+       struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+
+       invalidate_reclaim_iterators(memcg);
+}
+
 static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
 {
        struct mem_cgroup *memcg = mem_cgroup_from_css(css);
@@ -5089,6 +5121,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
                                 char *buf, size_t nbytes, loff_t off)
 {
        struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+       unsigned long nr_pages;
        unsigned long high;
        int err;
 
@@ -5099,6 +5132,11 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
 
        memcg->high = high;
 
+       nr_pages = page_counter_read(&memcg->memory);
+       if (nr_pages > high)
+               try_to_free_mem_cgroup_pages(memcg, nr_pages - high,
+                                            GFP_KERNEL, true);
+
        memcg_wb_domain_size_changed(memcg);
        return nbytes;
 }
@@ -5185,6 +5223,7 @@ struct cgroup_subsys memory_cgrp_subsys = {
        .css_alloc = mem_cgroup_css_alloc,
        .css_online = mem_cgroup_css_online,
        .css_offline = mem_cgroup_css_offline,
+       .css_released = mem_cgroup_css_released,
        .css_free = mem_cgroup_css_free,
        .css_reset = mem_cgroup_css_reset,
        .can_attach = mem_cgroup_can_attach,