#include "locking/rtmutex_common.h"
/*
- * Basic futex operation and ordering guarantees:
+ * READ this before attempting to hack on futexes!
+ *
+ * Basic futex operation and ordering guarantees
+ * =============================================
*
* The waiter reads the futex value in user space and calls
* futex_wait(). This function computes the hash bucket and acquires
* sys_futex(WAIT, futex, val);
* futex_wait(futex, val);
*
- * waiters++;
+ * waiters++; (a)
* mb(); (A) <-- paired with -.
* |
* lock(hash_bucket(futex)); |
* unlock(hash_bucket(futex));
* schedule(); if (waiters)
* lock(hash_bucket(futex));
- * wake_waiters(futex);
- * unlock(hash_bucket(futex));
+ * else wake_waiters(futex);
+ * waiters--; (b) unlock(hash_bucket(futex));
*
- * Where (A) orders the waiters increment and the futex value read -- this
- * is guaranteed by the head counter in the hb spinlock; and where (B)
- * orders the write to futex and the waiters read -- this is done by the
- * barriers in get_futex_key_refs(), through either ihold or atomic_inc,
- * depending on the futex type.
+ * Where (A) orders the waiters increment and the futex value read through
+ * atomic operations (see hb_waiters_inc) and where (B) orders the write
+ * to futex and the waiters read -- this is done by the barriers in
+ * get_futex_key_refs(), through either ihold or atomic_inc, depending on the
+ * futex type.
*
* This yields the following case (where X:=waiters, Y:=futex):
*
* Which guarantees that x==0 && y==0 is impossible; which translates back into
* the guarantee that we cannot both miss the futex variable change and the
* enqueue.
+ *
+ * Note that a new waiter is accounted for in (a) even when it is possible that
+ * the wait call can return error, in which case we backtrack from it in (b).
+ * Refer to the comment in queue_lock().
+ *
+ * Similarly, in order to account for waiters being requeued on another
+ * address we always increment the waiters for the destination bucket before
+ * acquiring the lock. It then decrements them again after releasing it -
+ * the code that actually moves the futex(es) between hash buckets (requeue_futex)
+ * will do the additional required waiter count housekeeping. This is done for
+ * double_lock_hb() and double_unlock_hb(), respectively.
*/
#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
static int
lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
- union futex_key *key, struct futex_pi_state **ps)
+ union futex_key *key, struct futex_pi_state **ps,
+ struct task_struct *task)
{
struct futex_pi_state *pi_state = NULL;
struct futex_q *this, *next;
return -EINVAL;
}
+ /*
+ * Protect against a corrupted uval. If uval
+ * is 0x80000000 then pid is 0 and the waiter
+ * bit is set. So the deadlock check in the
+ * calling code has failed and we did not fall
+ * into the check above due to !pid.
+ */
+ if (task && pi_state->owner == task)
+ return -EDEADLK;
+
atomic_inc(&pi_state->refcount);
*ps = pi_state;
if (!p)
return -ESRCH;
+ if (!p->mm) {
+ put_task_struct(p);
+ return -EPERM;
+ }
+
/*
* We need to look at the task state flags to figure out,
* whether the task is exiting. To protect against the do_exit
* We dont have the lock. Look up the PI state (or create it if
* we are the first waiter):
*/
- ret = lookup_pi_state(uval, hb, key, ps);
+ ret = lookup_pi_state(uval, hb, key, ps, task);
if (unlikely(ret)) {
switch (ret) {
*
* Return:
* 0 - failed to acquire the lock atomically;
- * 1 - acquired the lock;
+ * >0 - acquired the lock, return value is vpid of the top_waiter
* <0 - error
*/
static int futex_proxy_trylock_atomic(u32 __user *pifutex,
{
struct futex_q *top_waiter = NULL;
u32 curval;
- int ret;
+ int ret, vpid;
if (get_futex_value_locked(&curval, pifutex))
return -EFAULT;
* the contended case or if set_waiters is 1. The pi_state is returned
* in ps in contended cases.
*/
+ vpid = task_pid_vnr(top_waiter->task);
ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
set_waiters);
- if (ret == 1)
+ if (ret == 1) {
requeue_pi_wake_futex(top_waiter, key2, hb2);
-
+ return vpid;
+ }
return ret;
}
struct futex_pi_state *pi_state = NULL;
struct futex_hash_bucket *hb1, *hb2;
struct futex_q *this, *next;
- u32 curval2;
if (requeue_pi) {
/*
* At this point the top_waiter has either taken uaddr2 or is
* waiting on it. If the former, then the pi_state will not
* exist yet, look it up one more time to ensure we have a
- * reference to it.
+ * reference to it. If the lock was taken, ret contains the
+ * vpid of the top waiter task.
*/
- if (ret == 1) {
+ if (ret > 0) {
WARN_ON(pi_state);
drop_count++;
task_count++;
- ret = get_futex_value_locked(&curval2, uaddr2);
- if (!ret)
- ret = lookup_pi_state(curval2, hb2, &key2,
- &pi_state);
+ /*
+ * If we acquired the lock, then the user
+ * space value of uaddr2 should be vpid. It
+ * cannot be changed by the top waiter as it
+ * is blocked on hb2 lock if it tries to do
+ * so. If something fiddled with it behind our
+ * back the pi state lookup might unearth
+ * it. So we rather use the known value than
+ * rereading and handing potential crap to
+ * lookup_pi_state.
+ */
+ ret = lookup_pi_state(ret, hb2, &key2, &pi_state, NULL);
}
switch (ret) {