]> git.karo-electronics.de Git - mv-sheeva.git/commitdiff
oom: replace PF_OOM_ORIGIN with toggling oom_score_adj
authorDavid Rientjes <rientjes@google.com>
Wed, 25 May 2011 00:11:40 +0000 (17:11 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 25 May 2011 15:39:10 +0000 (08:39 -0700)
There's a kernel-wide shortage of per-process flags, so it's always
helpful to trim one when possible without incurring a significant penalty.
 It's even more important when you're planning on adding a per- process
flag yourself, which I plan to do shortly for transparent hugepages.

PF_OOM_ORIGIN is used by ksm and swapoff to prefer current since it has a
tendency to allocate large amounts of memory and should be preferred for
killing over other tasks.  We'd rather immediately kill the task making
the errant syscall rather than penalizing an innocent task.

This patch removes PF_OOM_ORIGIN since its behavior is equivalent to
setting the process's oom_score_adj to OOM_SCORE_ADJ_MAX.

The process's old oom_score_adj is stored and then set to
OOM_SCORE_ADJ_MAX during the time it used to have PF_OOM_ORIGIN.  The old
value is then reinstated when the process should no longer be considered a
high priority for oom killing.

Signed-off-by: David Rientjes <rientjes@google.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Izik Eidus <ieidus@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/linux/oom.h
include/linux/sched.h
mm/ksm.c
mm/oom_kill.c
mm/swapfile.c

index 5e3aa8311c5ed40f2598ccd814162bef5173030e..4952fb874ad3dadd631b3e10f0ae35eb0df59135 100644 (file)
@@ -40,6 +40,8 @@ enum oom_constraint {
        CONSTRAINT_MEMCG,
 };
 
+extern int test_set_oom_score_adj(int new_val);
+
 extern unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
                        const nodemask_t *nodemask, unsigned long totalpages);
 extern int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
index aaf71e08222ca70e0984e374bfe2e4ee66a4ce7c..44b8faaac7c0cf2ba7debe1d47338a6d1baf3e1c 100644 (file)
@@ -1753,7 +1753,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
 #define PF_FROZEN      0x00010000      /* frozen for system suspend */
 #define PF_FSTRANS     0x00020000      /* inside a filesystem transaction */
 #define PF_KSWAPD      0x00040000      /* I am kswapd */
-#define PF_OOM_ORIGIN  0x00080000      /* Allocating much memory to others */
 #define PF_LESS_THROTTLE 0x00100000    /* Throttle me less: I clean memory */
 #define PF_KTHREAD     0x00200000      /* I am a kernel thread */
 #define PF_RANDOMIZE   0x00400000      /* randomize virtual address space */
index 942dfc73a2ff89c3c7c96b3f9b1838c99ec16d02..d708b3ef2260282a3d6e5784a60c1fb003339f8e 100644 (file)
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -35,6 +35,7 @@
 #include <linux/ksm.h>
 #include <linux/hash.h>
 #include <linux/freezer.h>
+#include <linux/oom.h>
 
 #include <asm/tlbflush.h>
 #include "internal.h"
@@ -1894,9 +1895,11 @@ static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
        if (ksm_run != flags) {
                ksm_run = flags;
                if (flags & KSM_RUN_UNMERGE) {
-                       current->flags |= PF_OOM_ORIGIN;
+                       int oom_score_adj;
+
+                       oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX);
                        err = unmerge_and_remove_all_rmap_items();
-                       current->flags &= ~PF_OOM_ORIGIN;
+                       test_set_oom_score_adj(oom_score_adj);
                        if (err) {
                                ksm_run = KSM_RUN_STOP;
                                count = err;
index f52e85c80e8d554fcae1a7ad40e0617c0bb1318f..e4b0991ca3516b3fc3590f40da1ede9f8a4ba637 100644 (file)
@@ -38,6 +38,33 @@ int sysctl_oom_kill_allocating_task;
 int sysctl_oom_dump_tasks = 1;
 static DEFINE_SPINLOCK(zone_scan_lock);
 
+/**
+ * test_set_oom_score_adj() - set current's oom_score_adj and return old value
+ * @new_val: new oom_score_adj value
+ *
+ * Sets the oom_score_adj value for current to @new_val with proper
+ * synchronization and returns the old value.  Usually used to temporarily
+ * set a value, save the old value in the caller, and then reinstate it later.
+ */
+int test_set_oom_score_adj(int new_val)
+{
+       struct sighand_struct *sighand = current->sighand;
+       int old_val;
+
+       spin_lock_irq(&sighand->siglock);
+       old_val = current->signal->oom_score_adj;
+       if (new_val != old_val) {
+               if (new_val == OOM_SCORE_ADJ_MIN)
+                       atomic_inc(&current->mm->oom_disable_count);
+               else if (old_val == OOM_SCORE_ADJ_MIN)
+                       atomic_dec(&current->mm->oom_disable_count);
+               current->signal->oom_score_adj = new_val;
+       }
+       spin_unlock_irq(&sighand->siglock);
+
+       return old_val;
+}
+
 #ifdef CONFIG_NUMA
 /**
  * has_intersects_mems_allowed() - check task eligiblity for kill
@@ -154,15 +181,6 @@ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
                return 0;
        }
 
-       /*
-        * When the PF_OOM_ORIGIN bit is set, it indicates the task should have
-        * priority for oom killing.
-        */
-       if (p->flags & PF_OOM_ORIGIN) {
-               task_unlock(p);
-               return 1000;
-       }
-
        /*
         * The memory controller may have a limit of 0 bytes, so avoid a divide
         * by zero, if necessary.
index 8c6b3ce38f09aa0e4f824ce3f70e6e9254a87724..d537d29e9b7bb5d9364b531e17f972618a5cad4b 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/syscalls.h>
 #include <linux/memcontrol.h>
 #include <linux/poll.h>
+#include <linux/oom.h>
 
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
@@ -1555,6 +1556,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
        struct address_space *mapping;
        struct inode *inode;
        char *pathname;
+       int oom_score_adj;
        int i, type, prev;
        int err;
 
@@ -1613,9 +1615,9 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
        p->flags &= ~SWP_WRITEOK;
        spin_unlock(&swap_lock);
 
-       current->flags |= PF_OOM_ORIGIN;
+       oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX);
        err = try_to_unuse(type);
-       current->flags &= ~PF_OOM_ORIGIN;
+       test_set_oom_score_adj(oom_score_adj);
 
        if (err) {
                /*