memcg: destroy memcg caches

author Glauber Costa <glommer@parallels.com>

Fri, 9 Nov 2012 03:04:17 +0000 (14:04 +1100)

committer Stephen Rothwell <sfr@canb.auug.org.au>

Mon, 12 Nov 2012 04:17:03 +0000 (15:17 +1100)
author Glauber Costa <glommer@parallels.com>
Fri, 9 Nov 2012 03:04:17 +0000 (14:04 +1100)
committer Stephen Rothwell <sfr@canb.auug.org.au>
Mon, 12 Nov 2012 04:17:03 +0000 (15:17 +1100)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h

index d77d88d40d74c3687b5b32fa55b7c3330afa458b..7d59852f1dcefd7409f310e8e84cf1b5b0490c7b 100644 (file)
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -446,6 +446,8 @@ void memcg_update_array_size(int num_groups);
  struct kmem_cache *
  __memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
  
+void mem_cgroup_destroy_cache(struct kmem_cache *cachep);
+
  /**
   * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
   * @gfp: the gfp allocation flags.
diff --git a/include/linux/slab.h b/include/linux/slab.h

index c0fcf28c15b20d05b0d93e0aeaedab6ece14c33a..b63152938123e9edf5d860642a98f253fd2762d0 100644 (file)
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -180,6 +180,7 @@ void kmem_cache_free(struct kmem_cache *, void *);
  #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
  #endif
  
+#include <linux/workqueue.h>
  /*
   * This is the main placeholder for memcg-related information in kmem caches.
   * struct kmem_cache will hold a pointer to it, so the memory cost while
@@ -197,6 +198,10 @@ void kmem_cache_free(struct kmem_cache *, void *);
   * @memcg: pointer to the memcg this cache belongs to
   * @list: list_head for the list of all caches in this memcg
   * @root_cache: pointer to the global, root cache, this cache was derived from
+ * @dead: set to true after the memcg dies; the cache may still be around.
+ * @nr_pages: number of pages that belongs to this cache.
+ * @destroy: worker to be called whenever we are ready, or believe we may be
+ *           ready, to destroy this cache.
   */
  struct memcg_cache_params {
         bool is_root_cache;
@@ -206,6 +211,9 @@ struct memcg_cache_params {
                         struct mem_cgroup *memcg;
                         struct list_head list;
                         struct kmem_cache *root_cache;
+                       bool dead;
+                       atomic_t nr_pages;
+                       struct work_struct destroy;
                 };
         };
  };
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index 6990ab96bedc2f871ff808cc6db8288571acb2b9..2a2466f0bc0b96fa1739f018594985cd74c426d7 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2756,6 +2756,19 @@ static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg)
                 (memcg->kmem_account_flags & KMEM_ACCOUNTED_MASK);
  }
  
+/*
+ * This is a bit cumbersome, but it is rarely used and avoids a backpointer
+ * in the memcg_cache_params struct.
+ */
+static struct kmem_cache *memcg_params_to_cache(struct memcg_cache_params *p)
+{
+       struct kmem_cache *cachep;
+
+       VM_BUG_ON(p->is_root_cache);
+       cachep = p->root_cache;
+       return cachep->memcg_params->memcg_caches[memcg_cache_id(p->memcg)];
+}
+
  static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
  {
         struct res_counter *fail_res;
@@ -3037,6 +3050,31 @@ static inline void memcg_resume_kmem_account(void)
         current->memcg_kmem_skip_account--;
  }
  
+static void kmem_cache_destroy_work_func(struct work_struct *w)
+{
+       struct kmem_cache *cachep;
+       struct memcg_cache_params *p;
+
+       p = container_of(w, struct memcg_cache_params, destroy);
+
+       cachep = memcg_params_to_cache(p);
+
+       if (!atomic_read(&cachep->memcg_params->nr_pages))
+               kmem_cache_destroy(cachep);
+}
+
+void mem_cgroup_destroy_cache(struct kmem_cache *cachep)
+{
+       if (!cachep->memcg_params->dead)
+               return;
+
+       /*
+        * We have to defer the actual destroying to a workqueue, because
+        * we might currently be in a context that cannot sleep.
+        */
+       schedule_work(&cachep->memcg_params->destroy);
+}
+
  static char *memcg_cache_name(struct mem_cgroup *memcg, struct kmem_cache *s)
  {
         char *name;
@@ -3110,6 +3148,7 @@ static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
  
         mem_cgroup_get(memcg);
         new_cachep->memcg_params->root_cache = cachep;
+       atomic_set(&new_cachep->memcg_params->nr_pages , 0);
  
         cachep->memcg_params->memcg_caches[idx] = new_cachep;
         /*
@@ -3128,6 +3167,25 @@ struct create_work {
         struct work_struct work;
  };
  
+static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
+{
+       struct kmem_cache *cachep;
+       struct memcg_cache_params *params;
+
+       if (!memcg_kmem_is_active(memcg))
+               return;
+
+       mutex_lock(&memcg->slab_caches_mutex);
+       list_for_each_entry(params, &memcg->memcg_slab_caches, list) {
+               cachep = memcg_params_to_cache(params);
+               cachep->memcg_params->dead = true;
+               INIT_WORK(&cachep->memcg_params->destroy,
+                         kmem_cache_destroy_work_func);
+               schedule_work(&cachep->memcg_params->destroy);
+       }
+       mutex_unlock(&memcg->slab_caches_mutex);
+}
+
  static void memcg_create_cache_work_func(struct work_struct *w)
  {
         struct create_work *cw;
@@ -3343,6 +3401,10 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order)
         VM_BUG_ON(mem_cgroup_is_root(memcg));
         memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
  }
+#else
+static inline void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
+{
+}
  #endif /* CONFIG_MEMCG_KMEM */
  
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -5960,6 +6022,7 @@ static void mem_cgroup_pre_destroy(struct cgroup *cont)
         struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
  
         mem_cgroup_reparent_charges(memcg);
+       mem_cgroup_destroy_all_caches(memcg);
  }
  
  static void mem_cgroup_destroy(struct cgroup *cont)
diff --git a/mm/slab.c b/mm/slab.c

index 317df1574b9ba1b0269910bc651561a664fcadb0..90538b77e114e0d10bedc13d1c3352bd8b25cf58 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1936,6 +1936,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
                 if (page->pfmemalloc)
                         SetPageSlabPfmemalloc(page + i);
         }
+       memcg_bind_pages(cachep, cachep->gfporder);
  
         if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
                 kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid);
@@ -1972,6 +1973,8 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
                 __ClearPageSlab(page);
                 page++;
         }
+
+       memcg_release_pages(cachep, cachep->gfporder);
         if (current->reclaim_state)
                 current->reclaim_state->reclaimed_slab += nr_freed;
         free_memcg_kmem_pages((unsigned long)addr, cachep->gfporder);
diff --git a/mm/slab.h b/mm/slab.h

index fb1c4c4d49654bfe3e4b5feae3544d4e74916e55..3ef41e1ae559f161d861b9bc1ab5f6fa224336dd 100644 (file)
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -109,6 +109,21 @@ static inline bool cache_match_memcg(struct kmem_cache *cachep,
                                 (cachep->memcg_params->memcg == memcg);
  }
  
+static inline void memcg_bind_pages(struct kmem_cache *s, int order)
+{
+       if (!is_root_cache(s))
+               atomic_add(1 << order, &s->memcg_params->nr_pages);
+}
+
+static inline void memcg_release_pages(struct kmem_cache *s, int order)
+{
+       if (is_root_cache(s))
+               return;
+
+       if (atomic_sub_and_test((1 << order), &s->memcg_params->nr_pages))
+               mem_cgroup_destroy_cache(s);
+}
+
  static inline bool slab_equal_or_root(struct kmem_cache *s,
                                         struct kmem_cache *p)
  {
@@ -127,6 +142,14 @@ static inline bool cache_match_memcg(struct kmem_cache *cachep,
         return true;
  }
  
+static inline void memcg_bind_pages(struct kmem_cache *s, int order)
+{
+}
+
+static inline void memcg_release_pages(struct kmem_cache *s, int order)
+{
+}
+
  static inline bool slab_equal_or_root(struct kmem_cache *s,
                                       struct kmem_cache *p)
  {
diff --git a/mm/slub.c b/mm/slub.c

index 3c3a79cd54fe78a87f2876e87dfb86a8af59064c..c3271b1f8ed11e1eb3e739ed66e332b2fb1cfc46 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1346,6 +1346,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
         void *start;
         void *last;
         void *p;
+       int order;
  
         BUG_ON(flags & GFP_SLAB_BUG_MASK);
  
@@ -1354,7 +1355,9 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
         if (!page)
                 goto out;
  
+       order = compound_order(page);
         inc_slabs_node(s, page_to_nid(page), page->objects);
+       memcg_bind_pages(s, order);
         page->slab_cache = s;
         __SetPageSlab(page);
         if (page->pfmemalloc)
@@ -1363,7 +1366,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
         start = page_address(page);
  
         if (unlikely(s->flags & SLAB_POISON))
-               memset(start, POISON_INUSE, PAGE_SIZE << compound_order(page));
+               memset(start, POISON_INUSE, PAGE_SIZE << order);
  
         last = start;
         for_each_object(p, s, start, page->objects) {
@@ -1404,6 +1407,8 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
  
         __ClearPageSlabPfmemalloc(page);
         __ClearPageSlab(page);
+
+       memcg_release_pages(s, order);
         reset_page_mapcount(page);
         if (current->reclaim_state)
                 current->reclaim_state->reclaimed_slab += pages;
author	Glauber Costa <glommer@parallels.com>
	Fri, 9 Nov 2012 03:04:17 +0000 (14:04 +1100)
committer	Stephen Rothwell <sfr@canb.auug.org.au>
	Mon, 12 Nov 2012 04:17:03 +0000 (15:17 +1100)
include/linux/memcontrol.h		patch \| blob \| history
include/linux/slab.h		patch \| blob \| history
mm/memcontrol.c		patch \| blob \| history
mm/slab.c		patch \| blob \| history
mm/slab.h		patch \| blob \| history
mm/slub.c		patch \| blob \| history