mm, oom: normalize oom scores to oom_score_adj scale only for userspace

author David Rientjes <rientjes@google.com>

Tue, 29 May 2012 22:06:47 +0000 (15:06 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 29 May 2012 23:22:24 +0000 (16:22 -0700)
author David Rientjes <rientjes@google.com>
Tue, 29 May 2012 22:06:47 +0000 (15:06 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 29 May 2012 23:22:24 +0000 (16:22 -0700)
diff --git a/fs/proc/base.c b/fs/proc/base.c

index d2d3108a611c8cf96b6d1aa275270a3929556ccf..d7d711876b6a00e8bf3ba659db8f59d69d6862dc 100644 (file)
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -411,12 +411,13 @@ static const struct file_operations proc_lstats_operations = {
  
  static int proc_oom_score(struct task_struct *task, char *buffer)
  {
+       unsigned long totalpages = totalram_pages + total_swap_pages;
         unsigned long points = 0;
  
         read_lock(&tasklist_lock);
         if (pid_alive(task))
-               points = oom_badness(task, NULL, NULL,
-                                       totalram_pages + total_swap_pages);
+               points = oom_badness(task, NULL, NULL, totalpages) *
+                                               1000 / totalpages;
         read_unlock(&tasklist_lock);
         return sprintf(buffer, "%lu\n", points);
  }
diff --git a/include/linux/oom.h b/include/linux/oom.h

index 3d7647536b0304ba40013aa2399d551b6779b4cf..e4c29bc72e70297af00eb276538840e72b163eda 100644 (file)
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -43,8 +43,9 @@ enum oom_constraint {
  extern void compare_swap_oom_score_adj(int old_val, int new_val);
  extern int test_set_oom_score_adj(int new_val);
  
-extern unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
-                       const nodemask_t *nodemask, unsigned long totalpages);
+extern unsigned long oom_badness(struct task_struct *p,
+               struct mem_cgroup *memcg, const nodemask_t *nodemask,
+               unsigned long totalpages);
  extern int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
  extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
  
diff --git a/mm/oom_kill.c b/mm/oom_kill.c

index 9f09a1fde9f9954616473e1452c9e7e67c0778b8..ed0e19677360fa55f62e3944208e82cab7eeacc8 100644 (file)
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -180,10 +180,10 @@ static bool oom_unkillable_task(struct task_struct *p,
   * predictable as possible.  The goal is to return the highest value for the
   * task consuming the most memory to avoid subsequent oom failures.
   */
-unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
-                     const nodemask_t *nodemask, unsigned long totalpages)
+unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
+                         const nodemask_t *nodemask, unsigned long totalpages)
  {
-       long points;
+       unsigned long points;
  
         if (oom_unkillable_task(p, memcg, nodemask))
                 return 0;
@@ -197,22 +197,12 @@ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
                 return 0;
         }
  
-       /*
-        * The memory controller may have a limit of 0 bytes, so avoid a divide
-        * by zero, if necessary.
-        */
-       if (!totalpages)
-               totalpages = 1;
-
         /*
          * The baseline for the badness score is the proportion of RAM that each
          * task's rss, pagetable and swap space use.
          */
-       points = get_mm_rss(p->mm) + p->mm->nr_ptes;
-       points += get_mm_counter(p->mm, MM_SWAPENTS);
-
-       points *= 1000;
-       points /= totalpages;
+       points = get_mm_rss(p->mm) + p->mm->nr_ptes +
+                get_mm_counter(p->mm, MM_SWAPENTS);
         task_unlock(p);
  
         /*
@@ -220,23 +210,20 @@ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
          * implementation used by LSMs.
          */
         if (has_capability_noaudit(p, CAP_SYS_ADMIN))
-               points -= 30;
+               points -= 30 * totalpages / 1000;
  
         /*
          * /proc/pid/oom_score_adj ranges from -1000 to +1000 such that it may
          * either completely disable oom killing or always prefer a certain
          * task.
          */
-       points += p->signal->oom_score_adj;
+       points += p->signal->oom_score_adj * totalpages / 1000;
  
         /*
-        * Never return 0 for an eligible task that may be killed since it's
-        * possible that no single user task uses more than 0.1% of memory and
-        * no single admin tasks uses more than 3.0%.
+        * Never return 0 for an eligible task regardless of the root bonus and
+        * oom_score_adj (oom_score_adj can't be OOM_SCORE_ADJ_MIN here).
          */
-       if (points <= 0)
-               return 1;
-       return (points < 1000) ? points : 1000;
+       return points ? points : 1;
  }
  
  /*
@@ -314,7 +301,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
  {
         struct task_struct *g, *p;
         struct task_struct *chosen = NULL;
-       *ppoints = 0;
+       unsigned long chosen_points = 0;
  
         do_each_thread(g, p) {
                 unsigned int points;
@@ -354,7 +341,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
                          */
                         if (p == current) {
                                 chosen = p;
-                               *ppoints = 1000;
+                               chosen_points = ULONG_MAX;
                         } else if (!force_kill) {
                                 /*
                                  * If this task is not being ptraced on exit,
@@ -367,12 +354,13 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
                 }
  
                 points = oom_badness(p, memcg, nodemask, totalpages);
-               if (points > *ppoints) {
+               if (points > chosen_points) {
                         chosen = p;
-                       *ppoints = points;
+                       chosen_points = points;
                 }
         } while_each_thread(g, p);
  
+       *ppoints = chosen_points * 1000 / totalpages;
         return chosen;
  }
  
@@ -572,7 +560,7 @@ void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
         }
  
         check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL);
-       limit = mem_cgroup_get_limit(memcg) >> PAGE_SHIFT;
+       limit = mem_cgroup_get_limit(memcg) >> PAGE_SHIFT ? : 1;
         read_lock(&tasklist_lock);
         p = select_bad_process(&points, limit, memcg, NULL, false);
         if (p && PTR_ERR(p) != -1UL)
author	David Rientjes <rientjes@google.com>
	Tue, 29 May 2012 22:06:47 +0000 (15:06 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 29 May 2012 23:22:24 +0000 (16:22 -0700)
fs/proc/base.c		patch \| blob \| history
include/linux/oom.h		patch \| blob \| history
mm/oom_kill.c		patch \| blob \| history