From cd64eede7810dd8b709ee3efb4f9b696b91b4061 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 28 Sep 2011 10:50:03 +1000 Subject: [PATCH] oom: fix race while temporarily setting current's oom_score_adj test_set_oom_score_adj() was introduced in 72788c385604 ("oom: replace PF_OOM_ORIGIN with toggling oom_score_adj") to temporarily elevate current's oom_score_adj for ksm and swapoff without requiring an additional per-process flag. Using that function to both set oom_score_adj to OOM_SCORE_ADJ_MAX and then reinstate the previous value is racy since it's possible that userspace can set the value to something else itself before the old value is reinstated. That results in userspace setting current's oom_score_adj to a different value and then the kernel immediately setting it back to its previous value without notification. To fix this, a new compare_swap_oom_score_adj() function is introduced with the same semantics as the compare and swap CAS instruction, or CMPXCHG on x86. It is used to reinstate the previous value of oom_score_adj if and only if the present value is the same as the old value. Signed-off-by: David Rientjes Cc: Oleg Nesterov Cc: Ying Han Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton <> --- include/linux/oom.h | 1 + mm/ksm.c | 3 ++- mm/oom_kill.c | 19 +++++++++++++++++++ mm/swapfile.c | 2 +- 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/include/linux/oom.h b/include/linux/oom.h index 13b7b02e599a..6f9d04a85336 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -40,6 +40,7 @@ enum oom_constraint { CONSTRAINT_MEMCG, }; +extern void compare_swap_oom_score_adj(int old_val, int new_val); extern int test_set_oom_score_adj(int new_val); extern unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem, diff --git a/mm/ksm.c b/mm/ksm.c index 9a68b0cf0a1c..310544a379ae 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1905,7 +1905,8 @@ static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr, oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX); err = unmerge_and_remove_all_rmap_items(); - test_set_oom_score_adj(oom_score_adj); + compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX, + oom_score_adj); if (err) { ksm_run = KSM_RUN_STOP; count = err; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index d25a3da9a8a7..3847a0c7e794 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -38,6 +38,25 @@ int sysctl_oom_kill_allocating_task; int sysctl_oom_dump_tasks = 1; static DEFINE_SPINLOCK(zone_scan_lock); +/* + * compare_swap_oom_score_adj() - compare and swap current's oom_score_adj + * @old_val: old oom_score_adj for compare + * @new_val: new oom_score_adj for swap + * + * Sets the oom_score_adj value for current to @new_val iff its present value is + * @old_val. Usually used to reinstate a previous value to prevent racing with + * userspacing tuning the value in the interim. + */ +void compare_swap_oom_score_adj(int old_val, int new_val) +{ + struct sighand_struct *sighand = current->sighand; + + spin_lock_irq(&sighand->siglock); + if (current->signal->oom_score_adj == old_val) + current->signal->oom_score_adj = new_val; + spin_unlock_irq(&sighand->siglock); +} + /** * test_set_oom_score_adj() - set current's oom_score_adj and return old value * @new_val: new oom_score_adj value diff --git a/mm/swapfile.c b/mm/swapfile.c index 5a666c9967ea..9b71e6dc0650 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1637,7 +1637,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX); err = try_to_unuse(type, false, 0); /* force all pages to be unused */ - test_set_oom_score_adj(oom_score_adj); + compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX, oom_score_adj); if (err) { /* -- 2.39.5