cgroup: keep zombies associated with their original cgroups

author Tejun Heo <tj@kernel.org>

Thu, 15 Oct 2015 20:41:53 +0000 (16:41 -0400)

committer Tejun Heo <tj@kernel.org>

Thu, 15 Oct 2015 20:41:53 +0000 (16:41 -0400)
author Tejun Heo <tj@kernel.org>
Thu, 15 Oct 2015 20:41:53 +0000 (16:41 -0400)
committer Tejun Heo <tj@kernel.org>
Thu, 15 Oct 2015 20:41:53 +0000 (16:41 -0400)
diff --git a/Documentation/cgroups/unified-hierarchy.txt b/Documentation/cgroups/unified-hierarchy.txt

index 176b940f8327936c97efa50e96188b26d773041e..6932453d37a2ba0c0ed96c3848e996ecde56273c 100644 (file)
--- a/Documentation/cgroups/unified-hierarchy.txt
+++ b/Documentation/cgroups/unified-hierarchy.txt
@@ -374,6 +374,10 @@ supported and the interface files "release_agent" and
  
  - The "cgroup.clone_children" file is removed.
  
+- /proc/PID/cgroup keeps reporting the cgroup that a zombie belonged
+  to before exiting.  If the cgroup is removed before the zombie is
+  reaped, " (deleted)" is appeneded to the path.
+
  
  5-3. Controller File Conventions
  
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h

index 62413c3e2f4bc2abb1fd51e6a6d3571fac1d72fc..6a1ab64ee5f91d53495739ecd0a4f9ec50b43565 100644 (file)
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -435,9 +435,7 @@ struct cgroup_subsys {
         int (*can_fork)(struct task_struct *task, void **priv_p);
         void (*cancel_fork)(struct task_struct *task, void *priv);
         void (*fork)(struct task_struct *task, void *priv);
-       void (*exit)(struct cgroup_subsys_state *css,
-                    struct cgroup_subsys_state *old_css,
-                    struct task_struct *task);
+       void (*exit)(struct task_struct *task);
         void (*bind)(struct cgroup_subsys_state *root_css);
  
         int early_init;
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h

index 46020735bcbb80ddd6797871b061f2981c73e56b..22e3754f89c511374af4ca8ac5a518786dcd6d88 100644 (file)
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -102,6 +102,7 @@ extern void cgroup_cancel_fork(struct task_struct *p,
  extern void cgroup_post_fork(struct task_struct *p,
                              void *old_ss_priv[CGROUP_CANFORK_COUNT]);
  void cgroup_exit(struct task_struct *p);
+void cgroup_free(struct task_struct *p);
  
  int cgroup_init_early(void);
  int cgroup_init(void);
@@ -547,6 +548,7 @@ static inline void cgroup_cancel_fork(struct task_struct *p,
  static inline void cgroup_post_fork(struct task_struct *p,
                                     void *ss_priv[CGROUP_CANFORK_COUNT]) {}
  static inline void cgroup_exit(struct task_struct *p) {}
+static inline void cgroup_free(struct task_struct *p) {}
  
  static inline int cgroup_init_early(void) { return 0; }
  static inline int cgroup_init(void) { return 0; }
diff --git a/kernel/cgroup.c b/kernel/cgroup.c

index ba7b3284c2e4cb1b33fe5d891e8dbacb5b6bc1c4..918658497625389c21691357c83ec284622e783d 100644 (file)
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -5379,14 +5379,34 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
                         seq_printf(m, "%sname=%s", count ? "," : "",
                                    root->name);
                 seq_putc(m, ':');
+
                 cgrp = task_cgroup_from_root(tsk, root);
-               path = cgroup_path(cgrp, buf, PATH_MAX);
-               if (!path) {
-                       retval = -ENAMETOOLONG;
-                       goto out_unlock;
+
+               /*
+                * On traditional hierarchies, all zombie tasks show up as
+                * belonging to the root cgroup.  On the default hierarchy,
+                * while a zombie doesn't show up in "cgroup.procs" and
+                * thus can't be migrated, its /proc/PID/cgroup keeps
+                * reporting the cgroup it belonged to before exiting.  If
+                * the cgroup is removed before the zombie is reaped,
+                * " (deleted)" is appended to the cgroup path.
+                */
+               if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) {
+                       path = cgroup_path(cgrp, buf, PATH_MAX);
+                       if (!path) {
+                               retval = -ENAMETOOLONG;
+                               goto out_unlock;
+                       }
+               } else {
+                       path = "/";
                 }
+
                 seq_puts(m, path);
-               seq_putc(m, '\n');
+
+               if (cgroup_on_dfl(cgrp) && cgroup_is_dead(cgrp))
+                       seq_puts(m, " (deleted)\n");
+               else
+                       seq_putc(m, '\n');
         }
  
         retval = 0;
@@ -5593,7 +5613,6 @@ void cgroup_exit(struct task_struct *tsk)
  {
         struct cgroup_subsys *ss;
         struct css_set *cset;
-       bool put_cset = false;
         int i;
  
         /*
@@ -5606,22 +5625,20 @@ void cgroup_exit(struct task_struct *tsk)
                 spin_lock_bh(&css_set_lock);
                 css_set_move_task(tsk, cset, NULL, false);
                 spin_unlock_bh(&css_set_lock);
-               put_cset = true;
+       } else {
+               get_css_set(cset);
         }
  
-       /* Reassign the task to the init_css_set. */
-       RCU_INIT_POINTER(tsk->cgroups, &init_css_set);
-
         /* see cgroup_post_fork() for details */
-       for_each_subsys_which(ss, i, &have_exit_callback) {
-               struct cgroup_subsys_state *old_css = cset->subsys[i];
-               struct cgroup_subsys_state *css = task_css(tsk, i);
+       for_each_subsys_which(ss, i, &have_exit_callback)
+               ss->exit(tsk);
+}
  
-               ss->exit(css, old_css, tsk);
-       }
+void cgroup_free(struct task_struct *task)
+{
+       struct css_set *cset = task_css_set(task);
  
-       if (put_cset)
-               put_css_set(cset);
+       put_css_set(cset);
  }
  
  static void check_for_release(struct cgroup *cgrp)
diff --git a/kernel/cgroup_pids.c b/kernel/cgroup_pids.c

index 806cd7693ac88b0307173ab7bdaf1a8bc44ab173..45f0856a61fe784d025a671d4f0b6705fa54c327 100644 (file)
--- a/kernel/cgroup_pids.c
+++ b/kernel/cgroup_pids.c
@@ -266,11 +266,9 @@ static void pids_fork(struct task_struct *task, void *priv)
         css_put(old_css);
  }
  
-static void pids_exit(struct cgroup_subsys_state *css,
-                     struct cgroup_subsys_state *old_css,
-                     struct task_struct *task)
+static void pids_exit(struct task_struct *task)
  {
-       struct pids_cgroup *pids = css_pids(old_css);
+       struct pids_cgroup *pids = css_pids(task_css(task, pids_cgrp_id));
  
         pids_uncharge(pids, 1);
  }
diff --git a/kernel/events/core.c b/kernel/events/core.c

index f548f69c4299dd1ee44bfdc1f84d79d655d0d6d7..e9874949c78734d2c662845aedb69d79ccd5b839 100644 (file)
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -9293,25 +9293,9 @@ static void perf_cgroup_attach(struct cgroup_subsys_state *css,
                 task_function_call(task, __perf_cgroup_move, task);
  }
  
-static void perf_cgroup_exit(struct cgroup_subsys_state *css,
-                            struct cgroup_subsys_state *old_css,
-                            struct task_struct *task)
-{
-       /*
-        * cgroup_exit() is called in the copy_process() failure path.
-        * Ignore this case since the task hasn't ran yet, this avoids
-        * trying to poke a half freed task state from generic code.
-        */
-       if (!(task->flags & PF_EXITING))
-               return;
-
-       task_function_call(task, __perf_cgroup_move, task);
-}
-
  struct cgroup_subsys perf_event_cgrp_subsys = {
         .css_alloc      = perf_cgroup_css_alloc,
         .css_free       = perf_cgroup_css_free,
-       .exit           = perf_cgroup_exit,
         .attach         = perf_cgroup_attach,
  };
  #endif /* CONFIG_CGROUP_PERF */
diff --git a/kernel/fork.c b/kernel/fork.c

index 7d5f0f118a6348f81f08f10dd7dbb499f89dd243..118743bb596498edb919f09b92cbca956f479595 100644 (file)
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -251,6 +251,7 @@ void __put_task_struct(struct task_struct *tsk)
         WARN_ON(atomic_read(&tsk->usage));
         WARN_ON(tsk == current);
  
+       cgroup_free(tsk);
         task_numa_free(tsk);
         security_task_free(tsk);
         exit_creds(tsk);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 3595403921bd5be10c3e5e591bf04916e654423d..2cad9ba9103682a14fc596f8ee813b52a70b928c 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8163,21 +8163,6 @@ static void cpu_cgroup_attach(struct cgroup_subsys_state *css,
                 sched_move_task(task);
  }
  
-static void cpu_cgroup_exit(struct cgroup_subsys_state *css,
-                           struct cgroup_subsys_state *old_css,
-                           struct task_struct *task)
-{
-       /*
-        * cgroup_exit() is called in the copy_process() failure path.
-        * Ignore this case since the task hasn't ran yet, this avoids
-        * trying to poke a half freed task state from generic code.
-        */
-       if (!(task->flags & PF_EXITING))
-               return;
-
-       sched_move_task(task);
-}
-
  #ifdef CONFIG_FAIR_GROUP_SCHED
  static int cpu_shares_write_u64(struct cgroup_subsys_state *css,
                                 struct cftype *cftype, u64 shareval)
@@ -8509,7 +8494,6 @@ struct cgroup_subsys cpu_cgrp_subsys = {
         .fork           = cpu_cgroup_fork,
         .can_attach     = cpu_cgroup_can_attach,
         .attach         = cpu_cgroup_attach,
-       .exit           = cpu_cgroup_exit,
         .legacy_cftypes = cpu_files,
         .early_init     = 1,
  };
author	Tejun Heo <tj@kernel.org>
	Thu, 15 Oct 2015 20:41:53 +0000 (16:41 -0400)
committer	Tejun Heo <tj@kernel.org>
	Thu, 15 Oct 2015 20:41:53 +0000 (16:41 -0400)
Documentation/cgroups/unified-hierarchy.txt		patch \| blob \| history
include/linux/cgroup-defs.h		patch \| blob \| history
include/linux/cgroup.h		patch \| blob \| history
kernel/cgroup.c		patch \| blob \| history
kernel/cgroup_pids.c		patch \| blob \| history
kernel/events/core.c		patch \| blob \| history
kernel/fork.c		patch \| blob \| history
kernel/sched/core.c		patch \| blob \| history