/* internal only representation about the status of kmem accounting. */
enum {
KMEM_ACCOUNTED_ACTIVE = 0, /* accounted by this cgroup itself */
+ KMEM_ACCOUNTED_ACTIVATED, /* static key enabled. */
KMEM_ACCOUNTED_DEAD, /* dead memcg with pending kmem charges */
};
-#define KMEM_ACCOUNTED_MASK (1 << KMEM_ACCOUNTED_ACTIVE)
+/* We account when limit is on, but only after call sites are patched */
+#define KMEM_ACCOUNTED_MASK \
+ ((1 << KMEM_ACCOUNTED_ACTIVE) | (1 << KMEM_ACCOUNTED_ACTIVATED))
#ifdef CONFIG_MEMCG_KMEM
static inline void memcg_kmem_set_active(struct mem_cgroup *memcg)
return test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags);
}
+static void memcg_kmem_set_activated(struct mem_cgroup *memcg)
+{
+ set_bit(KMEM_ACCOUNTED_ACTIVATED, &memcg->kmem_account_flags);
+}
+
static void memcg_kmem_mark_dead(struct mem_cgroup *memcg)
{
if (test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags))
}
#endif
+#ifdef CONFIG_MEMCG_KMEM
+struct static_key memcg_kmem_enabled_key;
+
+static void disarm_kmem_keys(struct mem_cgroup *memcg)
+{
+ if (memcg_kmem_is_active(memcg))
+ static_key_slow_dec(&memcg_kmem_enabled_key);
+}
+#else
+static void disarm_kmem_keys(struct mem_cgroup *memcg)
+{
+}
+#endif /* CONFIG_MEMCG_KMEM */
+
+static void disarm_static_keys(struct mem_cgroup *memcg)
+{
+ disarm_sock_keys(memcg);
+ disarm_kmem_keys(memcg);
+}
+
static void drain_all_stock_async(struct mem_cgroup *memcg);
static struct mem_cgroup_per_zone *
{
int ret = -EINVAL;
#ifdef CONFIG_MEMCG_KMEM
+ bool must_inc_static_branch = false;
+
struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
/*
* For simplicity, we won't allow this to be disabled. It also can't
ret = res_counter_set_limit(&memcg->kmem, val);
VM_BUG_ON(ret);
- memcg_kmem_set_active(memcg);
+ /*
+ * After this point, kmem_accounted (that we test atomically in
+ * the beginning of this conditional), is no longer 0. This
+ * guarantees only one process will set the following boolean
+ * to true. We don't need test_and_set because we're protected
+ * by the set_limit_mutex anyway.
+ */
+ memcg_kmem_set_activated(memcg);
+ must_inc_static_branch = true;
/*
* kmem charges can outlive the cgroup. In the case of slab
* pages, for instance, a page contain objects from various
out:
mutex_unlock(&set_limit_mutex);
cgroup_unlock();
+
+ /*
+ * We are by now familiar with the fact that we can't inc the static
+ * branch inside cgroup_lock. See disarm functions for details. A
+ * worker here is overkill, but also wrong: After the limit is set, we
+ * must start accounting right away. Since this operation can't fail,
+ * we can safely defer it to here - no rollback will be needed.
+ *
+ * The boolean used to control this is also safe, because
+ * KMEM_ACCOUNTED_ACTIVATED guarantees that only one process will be
+ * able to set it to true;
+ */
+ if (must_inc_static_branch) {
+ static_key_slow_inc(&memcg_kmem_enabled_key);
+ /*
+ * setting the active bit after the inc will guarantee no one
+ * starts accounting before all call sites are patched
+ */
+ memcg_kmem_set_active(memcg);
+ }
+
#endif
return ret;
}
return;
memcg->kmem_account_flags = parent->kmem_account_flags;
#ifdef CONFIG_MEMCG_KMEM
- if (memcg_kmem_is_active(memcg))
+ /*
+ * When that happen, we need to disable the static branch only on those
+ * memcgs that enabled it. To achieve this, we would be forced to
+ * complicate the code by keeping track of which memcgs were the ones
+ * that actually enabled limits, and which ones got it from its
+ * parents.
+ *
+ * It is a lot simpler just to do static_key_slow_inc() on every child
+ * that is accounted.
+ */
+ if (memcg_kmem_is_active(memcg)) {
mem_cgroup_get(memcg);
+ static_key_slow_inc(&memcg_kmem_enabled_key);
+ }
#endif
}
* to move this code around, and make sure it is outside
* the cgroup_lock.
*/
- disarm_sock_keys(memcg);
+ disarm_static_keys(memcg);
if (size < PAGE_SIZE)
kfree(memcg);
else