memcg/sl[au]b: shrink dead caches

author Glauber Costa <glommer@parallels.com>

Sat, 3 Nov 2012 00:42:32 +0000 (11:42 +1100)

committer Stephen Rothwell <sfr@canb.auug.org.au>

Thu, 8 Nov 2012 04:08:41 +0000 (15:08 +1100)
author Glauber Costa <glommer@parallels.com>
Sat, 3 Nov 2012 00:42:32 +0000 (11:42 +1100)
committer Stephen Rothwell <sfr@canb.auug.org.au>
Thu, 8 Nov 2012 04:08:41 +0000 (15:08 +1100)
diff --git a/include/linux/slab.h b/include/linux/slab.h

index b63152938123e9edf5d860642a98f253fd2762d0..e09aa3d94a7e2cf244f707ecf167d4626304da91 100644 (file)
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -213,7 +213,7 @@ struct memcg_cache_params {
                         struct kmem_cache *root_cache;
                         bool dead;
                         atomic_t nr_pages;
-                       struct work_struct destroy;
+                       struct delayed_work destroy;
                 };
         };
  };
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index 84d19e367e7f9d7b7e5302c077029924750506ec..cc9cc4da3bc01f28605f3cd5613599c7c8d869a8 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3047,12 +3047,35 @@ static void kmem_cache_destroy_work_func(struct work_struct *w)
  {
         struct kmem_cache *cachep;
         struct memcg_cache_params *p;
+       struct delayed_work *dw = to_delayed_work(w);
  
-       p = container_of(w, struct memcg_cache_params, destroy);
+       p = container_of(dw, struct memcg_cache_params, destroy);
  
         cachep = memcg_params_to_cache(p);
  
-       if (!atomic_read(&cachep->memcg_params->nr_pages))
+       /*
+        * If we get down to 0 after shrink, we could delete right away.
+        * However, memcg_release_pages() already puts us back in the workqueue
+        * in that case. If we proceed deleting, we'll get a dangling
+        * reference, and removing the object from the workqueue in that case
+        * is unnecessary complication. We are not a fast path.
+        *
+        * Note that this case is fundamentally different from racing with
+        * shrink_slab(): if memcg_cgroup_destroy_cache() is called in
+        * kmem_cache_shrink, not only we would be reinserting a dead cache
+        * into the queue, but doing so from inside the worker racing to
+        * destroy it.
+        *
+        * So if we aren't down to zero, we'll just schedule a worker and try
+        * again
+        */
+       if (atomic_read(&cachep->memcg_params->nr_pages) != 0) {
+               kmem_cache_shrink(cachep);
+               if (atomic_read(&cachep->memcg_params->nr_pages) == 0)
+                       return;
+               /* Once per minute should be good enough. */
+               schedule_delayed_work(&cachep->memcg_params->destroy, 60 * HZ);
+       } else
                 kmem_cache_destroy(cachep);
  }
  
@@ -3061,11 +3084,31 @@ void mem_cgroup_destroy_cache(struct kmem_cache *cachep)
         if (!cachep->memcg_params->dead)
                 return;
  
+       /*
+        * There are many ways in which we can get here.
+        *
+        * We can get to a memory-pressure situation while the delayed work is
+        * still pending to run. The vmscan shrinkers can then release all
+        * cache memory and get us to destruction. If this is the case, we'll
+        * be executed twice, which is a bug (the second time will execute over
+        * bogus data). In this case, cancelling the work should be fine.
+        *
+        * But we can also get here from the worker itself, if
+        * kmem_cache_shrink is enough to shake all the remaining objects and
+        * get the page count to 0. In this case, we'll deadlock if we try to
+        * cancel the work (the worker runs with an internal lock held, which
+        * is the same lock we would hold for cancel_delayed_work_sync().)
+        *
+        * Since we can't possibly know who got us here, just refrain from
+        * running if there is already work pending
+        */
+       if (delayed_work_pending(&cachep->memcg_params->destroy))
+               return;
         /*
          * We have to defer the actual destroying to a workqueue, because
          * we might currently be in a context that cannot sleep.
          */
-       schedule_work(&cachep->memcg_params->destroy);
+       schedule_delayed_work(&cachep->memcg_params->destroy, 0);
  }
  
  static char *memcg_cache_name(struct mem_cgroup *memcg, struct kmem_cache *s)
@@ -3217,9 +3260,9 @@ static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
         list_for_each_entry(params, &memcg->memcg_slab_caches, list) {
                 cachep = memcg_params_to_cache(params);
                 cachep->memcg_params->dead = true;
-               INIT_WORK(&cachep->memcg_params->destroy,
-                         kmem_cache_destroy_work_func);
-               schedule_work(&cachep->memcg_params->destroy);
+               INIT_DELAYED_WORK(&cachep->memcg_params->destroy,
+                                 kmem_cache_destroy_work_func);
+               schedule_delayed_work(&cachep->memcg_params->destroy, 0);
         }
         mutex_unlock(&memcg->slab_caches_mutex);
  }
author	Glauber Costa <glommer@parallels.com>
	Sat, 3 Nov 2012 00:42:32 +0000 (11:42 +1100)
committer	Stephen Rothwell <sfr@canb.auug.org.au>
	Thu, 8 Nov 2012 04:08:41 +0000 (15:08 +1100)
include/linux/slab.h		patch \| blob \| history
mm/memcontrol.c		patch \| blob \| history