]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
Merge branch 'ptrace' of git://git.kernel.org/pub/scm/linux/kernel/git/oleg/misc
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 20 May 2011 20:33:21 +0000 (13:33 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 20 May 2011 20:33:21 +0000 (13:33 -0700)
* 'ptrace' of git://git.kernel.org/pub/scm/linux/kernel/git/oleg/misc: (41 commits)
  signal: trivial, fix the "timespec declared inside parameter list" warning
  job control: reorganize wait_task_stopped()
  ptrace: fix signal->wait_chldexit usage in task_clear_group_stop_trapping()
  signal: sys_sigprocmask() needs retarget_shared_pending()
  signal: cleanup sys_sigprocmask()
  signal: rename signandsets() to sigandnsets()
  signal: do_sigtimedwait() needs retarget_shared_pending()
  signal: introduce do_sigtimedwait() to factor out compat/native code
  signal: sys_rt_sigtimedwait: simplify the timeout logic
  signal: cleanup sys_rt_sigprocmask()
  x86: signal: sys_rt_sigreturn() should use set_current_blocked()
  x86: signal: handle_signal() should use set_current_blocked()
  signal: sigprocmask() should do retarget_shared_pending()
  signal: sigprocmask: narrow the scope of ->siglock
  signal: retarget_shared_pending: optimize while_each_thread() loop
  signal: retarget_shared_pending: consider shared/unblocked signals only
  signal: introduce retarget_shared_pending()
  ptrace: ptrace_check_attach() should not do s/STOPPED/TRACED/
  signal: Turn SIGNAL_STOP_DEQUEUED into GROUP_STOP_DEQUEUED
  signal: do_signal_stop: Remove the unneeded task_clear_group_stop_pending()
  ...

arch/x86/kernel/signal.c
fs/exec.c
include/linux/sched.h
include/linux/signal.h
include/linux/tracehook.h
kernel/compat.c
kernel/exit.c
kernel/ptrace.c
kernel/signal.c

index 4fd173cd8e5734f451b3c2b113d58c4ee5da2a32..40a24932a8a152878abfbf8ff6210a28892e5e2d 100644 (file)
@@ -601,10 +601,7 @@ long sys_rt_sigreturn(struct pt_regs *regs)
                goto badframe;
 
        sigdelsetmask(&set, ~_BLOCKABLE);
-       spin_lock_irq(&current->sighand->siglock);
-       current->blocked = set;
-       recalc_sigpending();
-       spin_unlock_irq(&current->sighand->siglock);
+       set_current_blocked(&set);
 
        if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
                goto badframe;
@@ -682,6 +679,7 @@ static int
 handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
              sigset_t *oldset, struct pt_regs *regs)
 {
+       sigset_t blocked;
        int ret;
 
        /* Are we from a system call? */
@@ -741,12 +739,10 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
         */
        regs->flags &= ~X86_EFLAGS_TF;
 
-       spin_lock_irq(&current->sighand->siglock);
-       sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
+       sigorsets(&blocked, &current->blocked, &ka->sa.sa_mask);
        if (!(ka->sa.sa_flags & SA_NODEFER))
-               sigaddset(&current->blocked, sig);
-       recalc_sigpending();
-       spin_unlock_irq(&current->sighand->siglock);
+               sigaddset(&blocked, sig);
+       set_current_blocked(&blocked);
 
        tracehook_signal_handler(sig, info, ka, regs,
                                 test_thread_flag(TIF_SINGLESTEP));
index 5e62d26a4fecec227d81700b0b9fd6542b715ad6..8328beb9016f909dc9cef5b169fb8fc33a7fcb8f 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1659,6 +1659,7 @@ static int zap_process(struct task_struct *start, int exit_code)
 
        t = start;
        do {
+               task_clear_group_stop_pending(t);
                if (t != current && t->mm) {
                        sigaddset(&t->pending.signal, SIGKILL);
                        signal_wake_up(t, 1);
index 12211e1666e2510b6389c47db75110b6ec8232ac..885c4f242ad72cfb2a4ef6af06bbc1011255178a 100644 (file)
@@ -653,9 +653,8 @@ struct signal_struct {
  * Bits in flags field of signal_struct.
  */
 #define SIGNAL_STOP_STOPPED    0x00000001 /* job control stop in effect */
-#define SIGNAL_STOP_DEQUEUED   0x00000002 /* stop signal dequeued */
-#define SIGNAL_STOP_CONTINUED  0x00000004 /* SIGCONT since WCONTINUED reap */
-#define SIGNAL_GROUP_EXIT      0x00000008 /* group exit in progress */
+#define SIGNAL_STOP_CONTINUED  0x00000002 /* SIGCONT since WCONTINUED reap */
+#define SIGNAL_GROUP_EXIT      0x00000004 /* group exit in progress */
 /*
  * Pending notifications to parent.
  */
@@ -1251,6 +1250,7 @@ struct task_struct {
        int exit_state;
        int exit_code, exit_signal;
        int pdeath_signal;  /*  The signal sent when the parent dies  */
+       unsigned int group_stop;        /* GROUP_STOP_*, siglock protected */
        /* ??? */
        unsigned int personality;
        unsigned did_exec:1;
@@ -1771,6 +1771,17 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
 #define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
 #define used_math() tsk_used_math(current)
 
+/*
+ * task->group_stop flags
+ */
+#define GROUP_STOP_SIGMASK     0xffff    /* signr of the last group stop */
+#define GROUP_STOP_PENDING     (1 << 16) /* task should stop for group stop */
+#define GROUP_STOP_CONSUME     (1 << 17) /* consume group stop count */
+#define GROUP_STOP_TRAPPING    (1 << 18) /* switching from STOPPED to TRACED */
+#define GROUP_STOP_DEQUEUED    (1 << 19) /* stop signal dequeued */
+
+extern void task_clear_group_stop_pending(struct task_struct *task);
+
 #ifdef CONFIG_PREEMPT_RCU
 
 #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
index 29a68ac7af834a80a71212f1e7ea0a050df30c0d..a822300a253b0d2be69b477be3b078448ec25017 100644 (file)
@@ -125,13 +125,13 @@ _SIG_SET_BINOP(sigorsets, _sig_or)
 #define _sig_and(x,y)  ((x) & (y))
 _SIG_SET_BINOP(sigandsets, _sig_and)
 
-#define _sig_nand(x,y) ((x) & ~(y))
-_SIG_SET_BINOP(signandsets, _sig_nand)
+#define _sig_andn(x,y) ((x) & ~(y))
+_SIG_SET_BINOP(sigandnsets, _sig_andn)
 
 #undef _SIG_SET_BINOP
 #undef _sig_or
 #undef _sig_and
-#undef _sig_nand
+#undef _sig_andn
 
 #define _SIG_SET_OP(name, op)                                          \
 static inline void name(sigset_t *set)                                 \
@@ -236,6 +236,9 @@ static inline int valid_signal(unsigned long sig)
        return sig <= _NSIG ? 1 : 0;
 }
 
+struct timespec;
+struct pt_regs;
+
 extern int next_signal(struct sigpending *pending, sigset_t *mask);
 extern int do_send_sig_info(int sig, struct siginfo *info,
                                struct task_struct *p, bool group);
@@ -244,10 +247,12 @@ extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
 extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig,
                                 siginfo_t *info);
 extern long do_sigpending(void __user *, unsigned long);
+extern int do_sigtimedwait(const sigset_t *, siginfo_t *,
+                               const struct timespec *);
 extern int sigprocmask(int, sigset_t *, sigset_t *);
+extern void set_current_blocked(const sigset_t *);
 extern int show_unhandled_signals;
 
-struct pt_regs;
 extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie);
 extern void exit_signals(struct task_struct *tsk);
 
index ebcfa4ebdbf8ddec4c48cf070882bfebf83b41b1..e95f5236611f3efcd0d9c9db4375b54c47cadc05 100644 (file)
@@ -468,33 +468,6 @@ static inline int tracehook_get_signal(struct task_struct *task,
        return 0;
 }
 
-/**
- * tracehook_notify_jctl - report about job control stop/continue
- * @notify:            zero, %CLD_STOPPED or %CLD_CONTINUED
- * @why:               %CLD_STOPPED or %CLD_CONTINUED
- *
- * This is called when we might call do_notify_parent_cldstop().
- *
- * @notify is zero if we would not ordinarily send a %SIGCHLD,
- * or is the %CLD_STOPPED or %CLD_CONTINUED .si_code for %SIGCHLD.
- *
- * @why is %CLD_STOPPED when about to stop for job control;
- * we are already in %TASK_STOPPED state, about to call schedule().
- * It might also be that we have just exited (check %PF_EXITING),
- * but need to report that a group-wide stop is complete.
- *
- * @why is %CLD_CONTINUED when waking up after job control stop and
- * ready to make a delayed @notify report.
- *
- * Return the %CLD_* value for %SIGCHLD, or zero to generate no signal.
- *
- * Called with the siglock held.
- */
-static inline int tracehook_notify_jctl(int notify, int why)
-{
-       return notify ?: (current->ptrace & PT_PTRACED) ? why : 0;
-}
-
 /**
  * tracehook_finish_jctl - report about return from job control stop
  *
index 38b1d2c1cbe80bd88371a09412b5e52abe86afc3..9214dcd087b7369fb754e6391015918db6d6ed3f 100644 (file)
@@ -890,10 +890,9 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese,
 {
        compat_sigset_t s32;
        sigset_t s;
-       int sig;
        struct timespec t;
        siginfo_t info;
-       long ret, timeout = 0;
+       long ret;
 
        if (sigsetsize != sizeof(sigset_t))
                return -EINVAL;
@@ -901,51 +900,19 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese,
        if (copy_from_user(&s32, uthese, sizeof(compat_sigset_t)))
                return -EFAULT;
        sigset_from_compat(&s, &s32);
-       sigdelsetmask(&s,sigmask(SIGKILL)|sigmask(SIGSTOP));
-       signotset(&s);
 
        if (uts) {
-               if (get_compat_timespec (&t, uts))
+               if (get_compat_timespec(&t, uts))
                        return -EFAULT;
-               if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0
-                               || t.tv_sec < 0)
-                       return -EINVAL;
        }
 
-       spin_lock_irq(&current->sighand->siglock);
-       sig = dequeue_signal(current, &s, &info);
-       if (!sig) {
-               timeout = MAX_SCHEDULE_TIMEOUT;
-               if (uts)
-                       timeout = timespec_to_jiffies(&t)
-                               +(t.tv_sec || t.tv_nsec);
-               if (timeout) {
-                       current->real_blocked = current->blocked;
-                       sigandsets(&current->blocked, &current->blocked, &s);
-
-                       recalc_sigpending();
-                       spin_unlock_irq(&current->sighand->siglock);
-
-                       timeout = schedule_timeout_interruptible(timeout);
-
-                       spin_lock_irq(&current->sighand->siglock);
-                       sig = dequeue_signal(current, &s, &info);
-                       current->blocked = current->real_blocked;
-                       siginitset(&current->real_blocked, 0);
-                       recalc_sigpending();
-               }
-       }
-       spin_unlock_irq(&current->sighand->siglock);
+       ret = do_sigtimedwait(&s, &info, uts ? &t : NULL);
 
-       if (sig) {
-               ret = sig;
-               if (uinfo) {
-                       if (copy_siginfo_to_user32(uinfo, &info))
-                               ret = -EFAULT;
-               }
-       }else {
-               ret = timeout?-EINTR:-EAGAIN;
+       if (ret > 0 && uinfo) {
+               if (copy_siginfo_to_user32(uinfo, &info))
+                       ret = -EFAULT;
        }
+
        return ret;
 
 }
index 8dd87418154205940341f98641191fe1c1d6ea70..20a406471525af2087cf914d8569d401306b3a5b 100644 (file)
@@ -1377,11 +1377,23 @@ static int *task_stopped_code(struct task_struct *p, bool ptrace)
        return NULL;
 }
 
-/*
- * Handle sys_wait4 work for one task in state TASK_STOPPED.  We hold
- * read_lock(&tasklist_lock) on entry.  If we return zero, we still hold
- * the lock and this task is uninteresting.  If we return nonzero, we have
- * released the lock and the system call should return.
+/**
+ * wait_task_stopped - Wait for %TASK_STOPPED or %TASK_TRACED
+ * @wo: wait options
+ * @ptrace: is the wait for ptrace
+ * @p: task to wait for
+ *
+ * Handle sys_wait4() work for %p in state %TASK_STOPPED or %TASK_TRACED.
+ *
+ * CONTEXT:
+ * read_lock(&tasklist_lock), which is released if return value is
+ * non-zero.  Also, grabs and releases @p->sighand->siglock.
+ *
+ * RETURNS:
+ * 0 if wait condition didn't exist and search for other wait conditions
+ * should continue.  Non-zero return, -errno on failure and @p's pid on
+ * success, implies that tasklist_lock is released and wait condition
+ * search should terminate.
  */
 static int wait_task_stopped(struct wait_opts *wo,
                                int ptrace, struct task_struct *p)
@@ -1397,6 +1409,9 @@ static int wait_task_stopped(struct wait_opts *wo,
        if (!ptrace && !(wo->wo_flags & WUNTRACED))
                return 0;
 
+       if (!task_stopped_code(p, ptrace))
+               return 0;
+
        exit_code = 0;
        spin_lock_irq(&p->sighand->siglock);
 
@@ -1538,33 +1553,84 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace,
                return 0;
        }
 
-       if (likely(!ptrace) && unlikely(task_ptrace(p))) {
+       /* dead body doesn't have much to contribute */
+       if (p->exit_state == EXIT_DEAD)
+               return 0;
+
+       /* slay zombie? */
+       if (p->exit_state == EXIT_ZOMBIE) {
+               /*
+                * A zombie ptracee is only visible to its ptracer.
+                * Notification and reaping will be cascaded to the real
+                * parent when the ptracer detaches.
+                */
+               if (likely(!ptrace) && unlikely(task_ptrace(p))) {
+                       /* it will become visible, clear notask_error */
+                       wo->notask_error = 0;
+                       return 0;
+               }
+
+               /* we don't reap group leaders with subthreads */
+               if (!delay_group_leader(p))
+                       return wait_task_zombie(wo, p);
+
                /*
-                * This child is hidden by ptrace.
-                * We aren't allowed to see it now, but eventually we will.
+                * Allow access to stopped/continued state via zombie by
+                * falling through.  Clearing of notask_error is complex.
+                *
+                * When !@ptrace:
+                *
+                * If WEXITED is set, notask_error should naturally be
+                * cleared.  If not, subset of WSTOPPED|WCONTINUED is set,
+                * so, if there are live subthreads, there are events to
+                * wait for.  If all subthreads are dead, it's still safe
+                * to clear - this function will be called again in finite
+                * amount time once all the subthreads are released and
+                * will then return without clearing.
+                *
+                * When @ptrace:
+                *
+                * Stopped state is per-task and thus can't change once the
+                * target task dies.  Only continued and exited can happen.
+                * Clear notask_error if WCONTINUED | WEXITED.
+                */
+               if (likely(!ptrace) || (wo->wo_flags & (WCONTINUED | WEXITED)))
+                       wo->notask_error = 0;
+       } else {
+               /*
+                * If @p is ptraced by a task in its real parent's group,
+                * hide group stop/continued state when looking at @p as
+                * the real parent; otherwise, a single stop can be
+                * reported twice as group and ptrace stops.
+                *
+                * If a ptracer wants to distinguish the two events for its
+                * own children, it should create a separate process which
+                * takes the role of real parent.
+                */
+               if (likely(!ptrace) && task_ptrace(p) &&
+                   same_thread_group(p->parent, p->real_parent))
+                       return 0;
+
+               /*
+                * @p is alive and it's gonna stop, continue or exit, so
+                * there always is something to wait for.
                 */
                wo->notask_error = 0;
-               return 0;
        }
 
-       if (p->exit_state == EXIT_DEAD)
-               return 0;
-
        /*
-        * We don't reap group leaders with subthreads.
+        * Wait for stopped.  Depending on @ptrace, different stopped state
+        * is used and the two don't interact with each other.
         */
-       if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p))
-               return wait_task_zombie(wo, p);
+       ret = wait_task_stopped(wo, ptrace, p);
+       if (ret)
+               return ret;
 
        /*
-        * It's stopped or running now, so it might
-        * later continue, exit, or stop again.
+        * Wait for continued.  There's only one continued state and the
+        * ptracer can consume it which can confuse the real parent.  Don't
+        * use WCONTINUED from ptracer.  You don't need or want it.
         */
-       wo->notask_error = 0;
-
-       if (task_stopped_code(p, ptrace))
-               return wait_task_stopped(wo, ptrace, p);
-
        return wait_task_continued(wo, p);
 }
 
index dc7ab65f3b36cb0b71c8365e13d0e73a9601f6ee..7a81fc0713442590c6643e411c77da127bc2520e 100644 (file)
@@ -38,35 +38,33 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
        child->parent = new_parent;
 }
 
-/*
- * Turn a tracing stop into a normal stop now, since with no tracer there
- * would be no way to wake it up with SIGCONT or SIGKILL.  If there was a
- * signal sent that would resume the child, but didn't because it was in
- * TASK_TRACED, resume it now.
- * Requires that irqs be disabled.
- */
-static void ptrace_untrace(struct task_struct *child)
-{
-       spin_lock(&child->sighand->siglock);
-       if (task_is_traced(child)) {
-               /*
-                * If the group stop is completed or in progress,
-                * this thread was already counted as stopped.
-                */
-               if (child->signal->flags & SIGNAL_STOP_STOPPED ||
-                   child->signal->group_stop_count)
-                       __set_task_state(child, TASK_STOPPED);
-               else
-                       signal_wake_up(child, 1);
-       }
-       spin_unlock(&child->sighand->siglock);
-}
-
-/*
- * unptrace a task: move it back to its original parent and
- * remove it from the ptrace list.
+/**
+ * __ptrace_unlink - unlink ptracee and restore its execution state
+ * @child: ptracee to be unlinked
  *
- * Must be called with the tasklist lock write-held.
+ * Remove @child from the ptrace list, move it back to the original parent,
+ * and restore the execution state so that it conforms to the group stop
+ * state.
+ *
+ * Unlinking can happen via two paths - explicit PTRACE_DETACH or ptracer
+ * exiting.  For PTRACE_DETACH, unless the ptracee has been killed between
+ * ptrace_check_attach() and here, it's guaranteed to be in TASK_TRACED.
+ * If the ptracer is exiting, the ptracee can be in any state.
+ *
+ * After detach, the ptracee should be in a state which conforms to the
+ * group stop.  If the group is stopped or in the process of stopping, the
+ * ptracee should be put into TASK_STOPPED; otherwise, it should be woken
+ * up from TASK_TRACED.
+ *
+ * If the ptracee is in TASK_TRACED and needs to be moved to TASK_STOPPED,
+ * it goes through TRACED -> RUNNING -> STOPPED transition which is similar
+ * to but in the opposite direction of what happens while attaching to a
+ * stopped task.  However, in this direction, the intermediate RUNNING
+ * state is not hidden even from the current ptracer and if it immediately
+ * re-attaches and performs a WNOHANG wait(2), it may fail.
+ *
+ * CONTEXT:
+ * write_lock_irq(tasklist_lock)
  */
 void __ptrace_unlink(struct task_struct *child)
 {
@@ -76,8 +74,27 @@ void __ptrace_unlink(struct task_struct *child)
        child->parent = child->real_parent;
        list_del_init(&child->ptrace_entry);
 
-       if (task_is_traced(child))
-               ptrace_untrace(child);
+       spin_lock(&child->sighand->siglock);
+
+       /*
+        * Reinstate GROUP_STOP_PENDING if group stop is in effect and
+        * @child isn't dead.
+        */
+       if (!(child->flags & PF_EXITING) &&
+           (child->signal->flags & SIGNAL_STOP_STOPPED ||
+            child->signal->group_stop_count))
+               child->group_stop |= GROUP_STOP_PENDING;
+
+       /*
+        * If transition to TASK_STOPPED is pending or in TASK_TRACED, kick
+        * @child in the butt.  Note that @resume should be used iff @child
+        * is in TASK_TRACED; otherwise, we might unduly disrupt
+        * TASK_KILLABLE sleeps.
+        */
+       if (child->group_stop & GROUP_STOP_PENDING || task_is_traced(child))
+               signal_wake_up(child, task_is_traced(child));
+
+       spin_unlock(&child->sighand->siglock);
 }
 
 /*
@@ -96,16 +113,14 @@ int ptrace_check_attach(struct task_struct *child, int kill)
         */
        read_lock(&tasklist_lock);
        if ((child->ptrace & PT_PTRACED) && child->parent == current) {
-               ret = 0;
                /*
                 * child->sighand can't be NULL, release_task()
                 * does ptrace_unlink() before __exit_signal().
                 */
                spin_lock_irq(&child->sighand->siglock);
-               if (task_is_stopped(child))
-                       child->state = TASK_TRACED;
-               else if (!task_is_traced(child) && !kill)
-                       ret = -ESRCH;
+               WARN_ON_ONCE(task_is_stopped(child));
+               if (task_is_traced(child) || kill)
+                       ret = 0;
                spin_unlock_irq(&child->sighand->siglock);
        }
        read_unlock(&tasklist_lock);
@@ -169,6 +184,7 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode)
 
 static int ptrace_attach(struct task_struct *task)
 {
+       bool wait_trap = false;
        int retval;
 
        audit_ptrace(task);
@@ -208,12 +224,42 @@ static int ptrace_attach(struct task_struct *task)
        __ptrace_link(task, current);
        send_sig_info(SIGSTOP, SEND_SIG_FORCED, task);
 
+       spin_lock(&task->sighand->siglock);
+
+       /*
+        * If the task is already STOPPED, set GROUP_STOP_PENDING and
+        * TRAPPING, and kick it so that it transits to TRACED.  TRAPPING
+        * will be cleared if the child completes the transition or any
+        * event which clears the group stop states happens.  We'll wait
+        * for the transition to complete before returning from this
+        * function.
+        *
+        * This hides STOPPED -> RUNNING -> TRACED transition from the
+        * attaching thread but a different thread in the same group can
+        * still observe the transient RUNNING state.  IOW, if another
+        * thread's WNOHANG wait(2) on the stopped tracee races against
+        * ATTACH, the wait(2) may fail due to the transient RUNNING.
+        *
+        * The following task_is_stopped() test is safe as both transitions
+        * in and out of STOPPED are protected by siglock.
+        */
+       if (task_is_stopped(task)) {
+               task->group_stop |= GROUP_STOP_PENDING | GROUP_STOP_TRAPPING;
+               signal_wake_up(task, 1);
+               wait_trap = true;
+       }
+
+       spin_unlock(&task->sighand->siglock);
+
        retval = 0;
 unlock_tasklist:
        write_unlock_irq(&tasklist_lock);
 unlock_creds:
        mutex_unlock(&task->signal->cred_guard_mutex);
 out:
+       if (wait_trap)
+               wait_event(current->signal->wait_chldexit,
+                          !(task->group_stop & GROUP_STOP_TRAPPING));
        return retval;
 }
 
@@ -316,8 +362,6 @@ static int ptrace_detach(struct task_struct *child, unsigned int data)
        if (child->ptrace) {
                child->exit_code = data;
                dead = __ptrace_detach(current, child);
-               if (!child->exit_state)
-                       wake_up_state(child, TASK_TRACED | TASK_STOPPED);
        }
        write_unlock_irq(&tasklist_lock);
 
index 7165af5f1b116fed3b5c5751d83e7e355e27ddab..ad5e818baacc43fb5e4046926bf52dde55acacfa 100644 (file)
@@ -124,7 +124,7 @@ static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked)
 
 static int recalc_sigpending_tsk(struct task_struct *t)
 {
-       if (t->signal->group_stop_count > 0 ||
+       if ((t->group_stop & GROUP_STOP_PENDING) ||
            PENDING(&t->pending, &t->blocked) ||
            PENDING(&t->signal->shared_pending, &t->blocked)) {
                set_tsk_thread_flag(t, TIF_SIGPENDING);
@@ -223,6 +223,83 @@ static inline void print_dropped_signal(int sig)
                                current->comm, current->pid, sig);
 }
 
+/**
+ * task_clear_group_stop_trapping - clear group stop trapping bit
+ * @task: target task
+ *
+ * If GROUP_STOP_TRAPPING is set, a ptracer is waiting for us.  Clear it
+ * and wake up the ptracer.  Note that we don't need any further locking.
+ * @task->siglock guarantees that @task->parent points to the ptracer.
+ *
+ * CONTEXT:
+ * Must be called with @task->sighand->siglock held.
+ */
+static void task_clear_group_stop_trapping(struct task_struct *task)
+{
+       if (unlikely(task->group_stop & GROUP_STOP_TRAPPING)) {
+               task->group_stop &= ~GROUP_STOP_TRAPPING;
+               __wake_up_sync_key(&task->parent->signal->wait_chldexit,
+                                  TASK_UNINTERRUPTIBLE, 1, task);
+       }
+}
+
+/**
+ * task_clear_group_stop_pending - clear pending group stop
+ * @task: target task
+ *
+ * Clear group stop states for @task.
+ *
+ * CONTEXT:
+ * Must be called with @task->sighand->siglock held.
+ */
+void task_clear_group_stop_pending(struct task_struct *task)
+{
+       task->group_stop &= ~(GROUP_STOP_PENDING | GROUP_STOP_CONSUME |
+                             GROUP_STOP_DEQUEUED);
+}
+
+/**
+ * task_participate_group_stop - participate in a group stop
+ * @task: task participating in a group stop
+ *
+ * @task has GROUP_STOP_PENDING set and is participating in a group stop.
+ * Group stop states are cleared and the group stop count is consumed if
+ * %GROUP_STOP_CONSUME was set.  If the consumption completes the group
+ * stop, the appropriate %SIGNAL_* flags are set.
+ *
+ * CONTEXT:
+ * Must be called with @task->sighand->siglock held.
+ *
+ * RETURNS:
+ * %true if group stop completion should be notified to the parent, %false
+ * otherwise.
+ */
+static bool task_participate_group_stop(struct task_struct *task)
+{
+       struct signal_struct *sig = task->signal;
+       bool consume = task->group_stop & GROUP_STOP_CONSUME;
+
+       WARN_ON_ONCE(!(task->group_stop & GROUP_STOP_PENDING));
+
+       task_clear_group_stop_pending(task);
+
+       if (!consume)
+               return false;
+
+       if (!WARN_ON_ONCE(sig->group_stop_count == 0))
+               sig->group_stop_count--;
+
+       /*
+        * Tell the caller to notify completion iff we are entering into a
+        * fresh group stop.  Read comment in do_signal_stop() for details.
+        */
+       if (!sig->group_stop_count && !(sig->flags & SIGNAL_STOP_STOPPED)) {
+               sig->flags = SIGNAL_STOP_STOPPED;
+               return true;
+       }
+       return false;
+}
+
 /*
  * allocate a new signal queue record
  * - this may be called without locks if and only if t == current, otherwise an
@@ -527,7 +604,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
                 * is to alert stop-signal processing code when another
                 * processor has come along and cleared the flag.
                 */
-               tsk->signal->flags |= SIGNAL_STOP_DEQUEUED;
+               current->group_stop |= GROUP_STOP_DEQUEUED;
        }
        if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) {
                /*
@@ -592,7 +669,7 @@ static int rm_from_queue_full(sigset_t *mask, struct sigpending *s)
        if (sigisemptyset(&m))
                return 0;
 
-       signandsets(&s->signal, &s->signal, mask);
+       sigandnsets(&s->signal, &s->signal, mask);
        list_for_each_entry_safe(q, n, &s->list, list) {
                if (sigismember(mask, q->info.si_signo)) {
                        list_del_init(&q->list);
@@ -727,34 +804,14 @@ static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns)
        } else if (sig == SIGCONT) {
                unsigned int why;
                /*
-                * Remove all stop signals from all queues,
-                * and wake all threads.
+                * Remove all stop signals from all queues, wake all threads.
                 */
                rm_from_queue(SIG_KERNEL_STOP_MASK, &signal->shared_pending);
                t = p;
                do {
-                       unsigned int state;
+                       task_clear_group_stop_pending(t);
                        rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending);
-                       /*
-                        * If there is a handler for SIGCONT, we must make
-                        * sure that no thread returns to user mode before
-                        * we post the signal, in case it was the only
-                        * thread eligible to run the signal handler--then
-                        * it must not do anything between resuming and
-                        * running the handler.  With the TIF_SIGPENDING
-                        * flag set, the thread will pause and acquire the
-                        * siglock that we hold now and until we've queued
-                        * the pending signal.
-                        *
-                        * Wake up the stopped thread _after_ setting
-                        * TIF_SIGPENDING
-                        */
-                       state = __TASK_STOPPED;
-                       if (sig_user_defined(t, SIGCONT) && !sigismember(&t->blocked, SIGCONT)) {
-                               set_tsk_thread_flag(t, TIF_SIGPENDING);
-                               state |= TASK_INTERRUPTIBLE;
-                       }
-                       wake_up_state(t, state);
+                       wake_up_state(t, __TASK_STOPPED);
                } while_each_thread(p, t);
 
                /*
@@ -780,13 +837,6 @@ static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns)
                        signal->flags = why | SIGNAL_STOP_CONTINUED;
                        signal->group_stop_count = 0;
                        signal->group_exit_code = 0;
-               } else {
-                       /*
-                        * We are not stopped, but there could be a stop
-                        * signal in the middle of being processed after
-                        * being removed from the queue.  Clear that too.
-                        */
-                       signal->flags &= ~SIGNAL_STOP_DEQUEUED;
                }
        }
 
@@ -875,6 +925,7 @@ static void complete_signal(int sig, struct task_struct *p, int group)
                        signal->group_stop_count = 0;
                        t = p;
                        do {
+                               task_clear_group_stop_pending(t);
                                sigaddset(&t->pending.signal, SIGKILL);
                                signal_wake_up(t, 1);
                        } while_each_thread(p, t);
@@ -1109,6 +1160,7 @@ int zap_other_threads(struct task_struct *p)
        p->signal->group_stop_count = 0;
 
        while_each_thread(p, t) {
+               task_clear_group_stop_pending(t);
                count++;
 
                /* Don't bother with already dead threads */
@@ -1536,16 +1588,30 @@ int do_notify_parent(struct task_struct *tsk, int sig)
        return ret;
 }
 
-static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
+/**
+ * do_notify_parent_cldstop - notify parent of stopped/continued state change
+ * @tsk: task reporting the state change
+ * @for_ptracer: the notification is for ptracer
+ * @why: CLD_{CONTINUED|STOPPED|TRAPPED} to report
+ *
+ * Notify @tsk's parent that the stopped/continued state has changed.  If
+ * @for_ptracer is %false, @tsk's group leader notifies to its real parent.
+ * If %true, @tsk reports to @tsk->parent which should be the ptracer.
+ *
+ * CONTEXT:
+ * Must be called with tasklist_lock at least read locked.
+ */
+static void do_notify_parent_cldstop(struct task_struct *tsk,
+                                    bool for_ptracer, int why)
 {
        struct siginfo info;
        unsigned long flags;
        struct task_struct *parent;
        struct sighand_struct *sighand;
 
-       if (task_ptrace(tsk))
+       if (for_ptracer) {
                parent = tsk->parent;
-       else {
+       else {
                tsk = tsk->group_leader;
                parent = tsk->real_parent;
        }
@@ -1620,6 +1686,15 @@ static int sigkill_pending(struct task_struct *tsk)
                sigismember(&tsk->signal->shared_pending.signal, SIGKILL);
 }
 
+/*
+ * Test whether the target task of the usual cldstop notification - the
+ * real_parent of @child - is in the same group as the ptracer.
+ */
+static bool real_parent_is_ptracer(struct task_struct *child)
+{
+       return same_thread_group(child->parent, child->real_parent);
+}
+
 /*
  * This must be called with current->sighand->siglock held.
  *
@@ -1631,10 +1706,12 @@ static int sigkill_pending(struct task_struct *tsk)
  * If we actually decide not to stop at all because the tracer
  * is gone, we keep current->exit_code unless clear_code.
  */
-static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
+static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
        __releases(&current->sighand->siglock)
        __acquires(&current->sighand->siglock)
 {
+       bool gstop_done = false;
+
        if (arch_ptrace_stop_needed(exit_code, info)) {
                /*
                 * The arch code has something special to do before a
@@ -1655,21 +1732,49 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
        }
 
        /*
-        * If there is a group stop in progress,
-        * we must participate in the bookkeeping.
+        * If @why is CLD_STOPPED, we're trapping to participate in a group
+        * stop.  Do the bookkeeping.  Note that if SIGCONT was delievered
+        * while siglock was released for the arch hook, PENDING could be
+        * clear now.  We act as if SIGCONT is received after TASK_TRACED
+        * is entered - ignore it.
         */
-       if (current->signal->group_stop_count > 0)
-               --current->signal->group_stop_count;
+       if (why == CLD_STOPPED && (current->group_stop & GROUP_STOP_PENDING))
+               gstop_done = task_participate_group_stop(current);
 
        current->last_siginfo = info;
        current->exit_code = exit_code;
 
-       /* Let the debugger run.  */
-       __set_current_state(TASK_TRACED);
+       /*
+        * TRACED should be visible before TRAPPING is cleared; otherwise,
+        * the tracer might fail do_wait().
+        */
+       set_current_state(TASK_TRACED);
+
+       /*
+        * We're committing to trapping.  Clearing GROUP_STOP_TRAPPING and
+        * transition to TASK_TRACED should be atomic with respect to
+        * siglock.  This hsould be done after the arch hook as siglock is
+        * released and regrabbed across it.
+        */
+       task_clear_group_stop_trapping(current);
+
        spin_unlock_irq(&current->sighand->siglock);
        read_lock(&tasklist_lock);
        if (may_ptrace_stop()) {
-               do_notify_parent_cldstop(current, CLD_TRAPPED);
+               /*
+                * Notify parents of the stop.
+                *
+                * While ptraced, there are two parents - the ptracer and
+                * the real_parent of the group_leader.  The ptracer should
+                * know about every stop while the real parent is only
+                * interested in the completion of group stop.  The states
+                * for the two don't interact with each other.  Notify
+                * separately unless they're gonna be duplicates.
+                */
+               do_notify_parent_cldstop(current, true, why);
+               if (gstop_done && !real_parent_is_ptracer(current))
+                       do_notify_parent_cldstop(current, false, why);
+
                /*
                 * Don't want to allow preemption here, because
                 * sys_ptrace() needs this task to be inactive.
@@ -1684,7 +1789,16 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
                /*
                 * By the time we got the lock, our tracer went away.
                 * Don't drop the lock yet, another tracer may come.
+                *
+                * If @gstop_done, the ptracer went away between group stop
+                * completion and here.  During detach, it would have set
+                * GROUP_STOP_PENDING on us and we'll re-enter TASK_STOPPED
+                * in do_signal_stop() on return, so notifying the real
+                * parent of the group stop completion is enough.
                 */
+               if (gstop_done)
+                       do_notify_parent_cldstop(current, false, why);
+
                __set_current_state(TASK_RUNNING);
                if (clear_code)
                        current->exit_code = 0;
@@ -1728,7 +1842,7 @@ void ptrace_notify(int exit_code)
 
        /* Let the debugger run.  */
        spin_lock_irq(&current->sighand->siglock);
-       ptrace_stop(exit_code, 1, &info);
+       ptrace_stop(exit_code, CLD_TRAPPED, 1, &info);
        spin_unlock_irq(&current->sighand->siglock);
 }
 
@@ -1741,66 +1855,115 @@ void ptrace_notify(int exit_code)
 static int do_signal_stop(int signr)
 {
        struct signal_struct *sig = current->signal;
-       int notify;
 
-       if (!sig->group_stop_count) {
+       if (!(current->group_stop & GROUP_STOP_PENDING)) {
+               unsigned int gstop = GROUP_STOP_PENDING | GROUP_STOP_CONSUME;
                struct task_struct *t;
 
-               if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) ||
+               /* signr will be recorded in task->group_stop for retries */
+               WARN_ON_ONCE(signr & ~GROUP_STOP_SIGMASK);
+
+               if (!likely(current->group_stop & GROUP_STOP_DEQUEUED) ||
                    unlikely(signal_group_exit(sig)))
                        return 0;
                /*
-                * There is no group stop already in progress.
-                * We must initiate one now.
+                * There is no group stop already in progress.  We must
+                * initiate one now.
+                *
+                * While ptraced, a task may be resumed while group stop is
+                * still in effect and then receive a stop signal and
+                * initiate another group stop.  This deviates from the
+                * usual behavior as two consecutive stop signals can't
+                * cause two group stops when !ptraced.  That is why we
+                * also check !task_is_stopped(t) below.
+                *
+                * The condition can be distinguished by testing whether
+                * SIGNAL_STOP_STOPPED is already set.  Don't generate
+                * group_exit_code in such case.
+                *
+                * This is not necessary for SIGNAL_STOP_CONTINUED because
+                * an intervening stop signal is required to cause two
+                * continued events regardless of ptrace.
                 */
-               sig->group_exit_code = signr;
+               if (!(sig->flags & SIGNAL_STOP_STOPPED))
+                       sig->group_exit_code = signr;
+               else
+                       WARN_ON_ONCE(!task_ptrace(current));
 
+               current->group_stop &= ~GROUP_STOP_SIGMASK;
+               current->group_stop |= signr | gstop;
                sig->group_stop_count = 1;
-               for (t = next_thread(current); t != current; t = next_thread(t))
+               for (t = next_thread(current); t != current;
+                    t = next_thread(t)) {
+                       t->group_stop &= ~GROUP_STOP_SIGMASK;
                        /*
                         * Setting state to TASK_STOPPED for a group
                         * stop is always done with the siglock held,
                         * so this check has no races.
                         */
-                       if (!(t->flags & PF_EXITING) &&
-                           !task_is_stopped_or_traced(t)) {
+                       if (!(t->flags & PF_EXITING) && !task_is_stopped(t)) {
+                               t->group_stop |= signr | gstop;
                                sig->group_stop_count++;
                                signal_wake_up(t, 0);
                        }
+               }
        }
-       /*
-        * If there are no other threads in the group, or if there is
-        * a group stop in progress and we are the last to stop, report
-        * to the parent.  When ptraced, every thread reports itself.
-        */
-       notify = sig->group_stop_count == 1 ? CLD_STOPPED : 0;
-       notify = tracehook_notify_jctl(notify, CLD_STOPPED);
-       /*
-        * tracehook_notify_jctl() can drop and reacquire siglock, so
-        * we keep ->group_stop_count != 0 before the call. If SIGCONT
-        * or SIGKILL comes in between ->group_stop_count == 0.
-        */
-       if (sig->group_stop_count) {
-               if (!--sig->group_stop_count)
-                       sig->flags = SIGNAL_STOP_STOPPED;
-               current->exit_code = sig->group_exit_code;
+retry:
+       if (likely(!task_ptrace(current))) {
+               int notify = 0;
+
+               /*
+                * If there are no other threads in the group, or if there
+                * is a group stop in progress and we are the last to stop,
+                * report to the parent.
+                */
+               if (task_participate_group_stop(current))
+                       notify = CLD_STOPPED;
+
                __set_current_state(TASK_STOPPED);
+               spin_unlock_irq(&current->sighand->siglock);
+
+               /*
+                * Notify the parent of the group stop completion.  Because
+                * we're not holding either the siglock or tasklist_lock
+                * here, ptracer may attach inbetween; however, this is for
+                * group stop and should always be delivered to the real
+                * parent of the group leader.  The new ptracer will get
+                * its notification when this task transitions into
+                * TASK_TRACED.
+                */
+               if (notify) {
+                       read_lock(&tasklist_lock);
+                       do_notify_parent_cldstop(current, false, notify);
+                       read_unlock(&tasklist_lock);
+               }
+
+               /* Now we don't run again until woken by SIGCONT or SIGKILL */
+               schedule();
+
+               spin_lock_irq(&current->sighand->siglock);
+       } else {
+               ptrace_stop(current->group_stop & GROUP_STOP_SIGMASK,
+                           CLD_STOPPED, 0, NULL);
+               current->exit_code = 0;
        }
-       spin_unlock_irq(&current->sighand->siglock);
 
-       if (notify) {
-               read_lock(&tasklist_lock);
-               do_notify_parent_cldstop(current, notify);
-               read_unlock(&tasklist_lock);
+       /*
+        * GROUP_STOP_PENDING could be set if another group stop has
+        * started since being woken up or ptrace wants us to transit
+        * between TASK_STOPPED and TRACED.  Retry group stop.
+        */
+       if (current->group_stop & GROUP_STOP_PENDING) {
+               WARN_ON_ONCE(!(current->group_stop & GROUP_STOP_SIGMASK));
+               goto retry;
        }
 
-       /* Now we don't run again until woken by SIGCONT or SIGKILL */
-       do {
-               schedule();
-       } while (try_to_freeze());
+       /* PTRACE_ATTACH might have raced with task killing, clear trapping */
+       task_clear_group_stop_trapping(current);
+
+       spin_unlock_irq(&current->sighand->siglock);
 
        tracehook_finish_jctl();
-       current->exit_code = 0;
 
        return 1;
 }
@@ -1814,7 +1977,7 @@ static int ptrace_signal(int signr, siginfo_t *info,
        ptrace_signal_deliver(regs, cookie);
 
        /* Let the debugger run.  */
-       ptrace_stop(signr, 0, info);
+       ptrace_stop(signr, CLD_TRAPPED, 0, info);
 
        /* We're back.  Did the debugger cancel the sig?  */
        signr = current->exit_code;
@@ -1869,18 +2032,36 @@ relock:
         * the CLD_ si_code into SIGNAL_CLD_MASK bits.
         */
        if (unlikely(signal->flags & SIGNAL_CLD_MASK)) {
-               int why = (signal->flags & SIGNAL_STOP_CONTINUED)
-                               ? CLD_CONTINUED : CLD_STOPPED;
+               struct task_struct *leader;
+               int why;
+
+               if (signal->flags & SIGNAL_CLD_CONTINUED)
+                       why = CLD_CONTINUED;
+               else
+                       why = CLD_STOPPED;
+
                signal->flags &= ~SIGNAL_CLD_MASK;
 
-               why = tracehook_notify_jctl(why, CLD_CONTINUED);
                spin_unlock_irq(&sighand->siglock);
 
-               if (why) {
-                       read_lock(&tasklist_lock);
-                       do_notify_parent_cldstop(current->group_leader, why);
-                       read_unlock(&tasklist_lock);
-               }
+               /*
+                * Notify the parent that we're continuing.  This event is
+                * always per-process and doesn't make whole lot of sense
+                * for ptracers, who shouldn't consume the state via
+                * wait(2) either, but, for backward compatibility, notify
+                * the ptracer of the group leader too unless it's gonna be
+                * a duplicate.
+                */
+               read_lock(&tasklist_lock);
+
+               do_notify_parent_cldstop(current, false, why);
+
+               leader = current->group_leader;
+               if (task_ptrace(leader) && !real_parent_is_ptracer(leader))
+                       do_notify_parent_cldstop(leader, true, why);
+
+               read_unlock(&tasklist_lock);
+
                goto relock;
        }
 
@@ -1897,8 +2078,8 @@ relock:
                if (unlikely(signr != 0))
                        ka = return_ka;
                else {
-                       if (unlikely(signal->group_stop_count > 0) &&
-                           do_signal_stop(0))
+                       if (unlikely(current->group_stop &
+                                    GROUP_STOP_PENDING) && do_signal_stop(0))
                                goto relock;
 
                        signr = dequeue_signal(current, &current->blocked,
@@ -2017,10 +2198,42 @@ relock:
        return signr;
 }
 
+/*
+ * It could be that complete_signal() picked us to notify about the
+ * group-wide signal. Other threads should be notified now to take
+ * the shared signals in @which since we will not.
+ */
+static void retarget_shared_pending(struct task_struct *tsk, sigset_t *which)
+{
+       sigset_t retarget;
+       struct task_struct *t;
+
+       sigandsets(&retarget, &tsk->signal->shared_pending.signal, which);
+       if (sigisemptyset(&retarget))
+               return;
+
+       t = tsk;
+       while_each_thread(tsk, t) {
+               if (t->flags & PF_EXITING)
+                       continue;
+
+               if (!has_pending_signals(&retarget, &t->blocked))
+                       continue;
+               /* Remove the signals this thread can handle. */
+               sigandsets(&retarget, &retarget, &t->blocked);
+
+               if (!signal_pending(t))
+                       signal_wake_up(t, 0);
+
+               if (sigisemptyset(&retarget))
+                       break;
+       }
+}
+
 void exit_signals(struct task_struct *tsk)
 {
        int group_stop = 0;
-       struct task_struct *t;
+       sigset_t unblocked;
 
        if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) {
                tsk->flags |= PF_EXITING;
@@ -2036,26 +2249,23 @@ void exit_signals(struct task_struct *tsk)
        if (!signal_pending(tsk))
                goto out;
 
-       /*
-        * It could be that __group_complete_signal() choose us to
-        * notify about group-wide signal. Another thread should be
-        * woken now to take the signal since we will not.
-        */
-       for (t = tsk; (t = next_thread(t)) != tsk; )
-               if (!signal_pending(t) && !(t->flags & PF_EXITING))
-                       recalc_sigpending_and_wake(t);
+       unblocked = tsk->blocked;
+       signotset(&unblocked);
+       retarget_shared_pending(tsk, &unblocked);
 
-       if (unlikely(tsk->signal->group_stop_count) &&
-                       !--tsk->signal->group_stop_count) {
-               tsk->signal->flags = SIGNAL_STOP_STOPPED;
-               group_stop = tracehook_notify_jctl(CLD_STOPPED, CLD_STOPPED);
-       }
+       if (unlikely(tsk->group_stop & GROUP_STOP_PENDING) &&
+           task_participate_group_stop(tsk))
+               group_stop = CLD_STOPPED;
 out:
        spin_unlock_irq(&tsk->sighand->siglock);
 
+       /*
+        * If group stop has completed, deliver the notification.  This
+        * should always go to the real parent of the group leader.
+        */
        if (unlikely(group_stop)) {
                read_lock(&tasklist_lock);
-               do_notify_parent_cldstop(tsk, group_stop);
+               do_notify_parent_cldstop(tsk, false, group_stop);
                read_unlock(&tasklist_lock);
        }
 }
@@ -2089,11 +2299,33 @@ long do_no_restart_syscall(struct restart_block *param)
        return -EINTR;
 }
 
-/*
- * We don't need to get the kernel lock - this is all local to this
- * particular thread.. (and that's good, because this is _heavily_
- * used by various programs)
+static void __set_task_blocked(struct task_struct *tsk, const sigset_t *newset)
+{
+       if (signal_pending(tsk) && !thread_group_empty(tsk)) {
+               sigset_t newblocked;
+               /* A set of now blocked but previously unblocked signals. */
+               sigandnsets(&newblocked, newset, &current->blocked);
+               retarget_shared_pending(tsk, &newblocked);
+       }
+       tsk->blocked = *newset;
+       recalc_sigpending();
+}
+
+/**
+ * set_current_blocked - change current->blocked mask
+ * @newset: new mask
+ *
+ * It is wrong to change ->blocked directly, this helper should be used
+ * to ensure the process can't miss a shared signal we are going to block.
  */
+void set_current_blocked(const sigset_t *newset)
+{
+       struct task_struct *tsk = current;
+
+       spin_lock_irq(&tsk->sighand->siglock);
+       __set_task_blocked(tsk, newset);
+       spin_unlock_irq(&tsk->sighand->siglock);
+}
 
 /*
  * This is also useful for kernel threads that want to temporarily
@@ -2105,30 +2337,29 @@ long do_no_restart_syscall(struct restart_block *param)
  */
 int sigprocmask(int how, sigset_t *set, sigset_t *oldset)
 {
-       int error;
+       struct task_struct *tsk = current;
+       sigset_t newset;
 
-       spin_lock_irq(&current->sighand->siglock);
+       /* Lockless, only current can change ->blocked, never from irq */
        if (oldset)
-               *oldset = current->blocked;
+               *oldset = tsk->blocked;
 
-       error = 0;
        switch (how) {
        case SIG_BLOCK:
-               sigorsets(&current->blocked, &current->blocked, set);
+               sigorsets(&newset, &tsk->blocked, set);
                break;
        case SIG_UNBLOCK:
-               signandsets(&current->blocked, &current->blocked, set);
+               sigandnsets(&newset, &tsk->blocked, set);
                break;
        case SIG_SETMASK:
-               current->blocked = *set;
+               newset = *set;
                break;
        default:
-               error = -EINVAL;
+               return -EINVAL;
        }
-       recalc_sigpending();
-       spin_unlock_irq(&current->sighand->siglock);
 
-       return error;
+       set_current_blocked(&newset);
+       return 0;
 }
 
 /**
@@ -2138,40 +2369,34 @@ int sigprocmask(int how, sigset_t *set, sigset_t *oldset)
  *  @oset: previous value of signal mask if non-null
  *  @sigsetsize: size of sigset_t type
  */
-SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, set,
+SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, nset,
                sigset_t __user *, oset, size_t, sigsetsize)
 {
-       int error = -EINVAL;
        sigset_t old_set, new_set;
+       int error;
 
        /* XXX: Don't preclude handling different sized sigset_t's.  */
        if (sigsetsize != sizeof(sigset_t))
-               goto out;
+               return -EINVAL;
 
-       if (set) {
-               error = -EFAULT;
-               if (copy_from_user(&new_set, set, sizeof(*set)))
-                       goto out;
+       old_set = current->blocked;
+
+       if (nset) {
+               if (copy_from_user(&new_set, nset, sizeof(sigset_t)))
+                       return -EFAULT;
                sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP));
 
-               error = sigprocmask(how, &new_set, &old_set);
+               error = sigprocmask(how, &new_set, NULL);
                if (error)
-                       goto out;
-               if (oset)
-                       goto set_old;
-       } else if (oset) {
-               spin_lock_irq(&current->sighand->siglock);
-               old_set = current->blocked;
-               spin_unlock_irq(&current->sighand->siglock);
+                       return error;
+       }
 
-       set_old:
-               error = -EFAULT;
-               if (copy_to_user(oset, &old_set, sizeof(*oset)))
-                       goto out;
+       if (oset) {
+               if (copy_to_user(oset, &old_set, sizeof(sigset_t)))
+                       return -EFAULT;
        }
-       error = 0;
-out:
-       return error;
+
+       return 0;
 }
 
 long do_sigpending(void __user *set, unsigned long sigsetsize)
@@ -2283,6 +2508,66 @@ int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from)
 
 #endif
 
+/**
+ *  do_sigtimedwait - wait for queued signals specified in @which
+ *  @which: queued signals to wait for
+ *  @info: if non-null, the signal's siginfo is returned here
+ *  @ts: upper bound on process time suspension
+ */
+int do_sigtimedwait(const sigset_t *which, siginfo_t *info,
+                       const struct timespec *ts)
+{
+       struct task_struct *tsk = current;
+       long timeout = MAX_SCHEDULE_TIMEOUT;
+       sigset_t mask = *which;
+       int sig;
+
+       if (ts) {
+               if (!timespec_valid(ts))
+                       return -EINVAL;
+               timeout = timespec_to_jiffies(ts);
+               /*
+                * We can be close to the next tick, add another one
+                * to ensure we will wait at least the time asked for.
+                */
+               if (ts->tv_sec || ts->tv_nsec)
+                       timeout++;
+       }
+
+       /*
+        * Invert the set of allowed signals to get those we want to block.
+        */
+       sigdelsetmask(&mask, sigmask(SIGKILL) | sigmask(SIGSTOP));
+       signotset(&mask);
+
+       spin_lock_irq(&tsk->sighand->siglock);
+       sig = dequeue_signal(tsk, &mask, info);
+       if (!sig && timeout) {
+               /*
+                * None ready, temporarily unblock those we're interested
+                * while we are sleeping in so that we'll be awakened when
+                * they arrive. Unblocking is always fine, we can avoid
+                * set_current_blocked().
+                */
+               tsk->real_blocked = tsk->blocked;
+               sigandsets(&tsk->blocked, &tsk->blocked, &mask);
+               recalc_sigpending();
+               spin_unlock_irq(&tsk->sighand->siglock);
+
+               timeout = schedule_timeout_interruptible(timeout);
+
+               spin_lock_irq(&tsk->sighand->siglock);
+               __set_task_blocked(tsk, &tsk->real_blocked);
+               siginitset(&tsk->real_blocked, 0);
+               sig = dequeue_signal(tsk, &mask, info);
+       }
+       spin_unlock_irq(&tsk->sighand->siglock);
+
+       if (sig)
+               return sig;
+       return timeout ? -EINTR : -EAGAIN;
+}
+
 /**
  *  sys_rt_sigtimedwait - synchronously wait for queued signals specified
  *                     in @uthese
@@ -2295,11 +2580,10 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
                siginfo_t __user *, uinfo, const struct timespec __user *, uts,
                size_t, sigsetsize)
 {
-       int ret, sig;
        sigset_t these;
        struct timespec ts;
        siginfo_t info;
-       long timeout = 0;
+       int ret;
 
        /* XXX: Don't preclude handling different sized sigset_t's.  */
        if (sigsetsize != sizeof(sigset_t))
@@ -2308,61 +2592,16 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
        if (copy_from_user(&these, uthese, sizeof(these)))
                return -EFAULT;
 
-       /*
-        * Invert the set of allowed signals to get those we
-        * want to block.
-        */
-       sigdelsetmask(&these, sigmask(SIGKILL)|sigmask(SIGSTOP));
-       signotset(&these);
-
        if (uts) {
                if (copy_from_user(&ts, uts, sizeof(ts)))
                        return -EFAULT;
-               if (ts.tv_nsec >= 1000000000L || ts.tv_nsec < 0
-                   || ts.tv_sec < 0)
-                       return -EINVAL;
        }
 
-       spin_lock_irq(&current->sighand->siglock);
-       sig = dequeue_signal(current, &these, &info);
-       if (!sig) {
-               timeout = MAX_SCHEDULE_TIMEOUT;
-               if (uts)
-                       timeout = (timespec_to_jiffies(&ts)
-                                  + (ts.tv_sec || ts.tv_nsec));
-
-               if (timeout) {
-                       /*
-                        * None ready -- temporarily unblock those we're
-                        * interested while we are sleeping in so that we'll
-                        * be awakened when they arrive.
-                        */
-                       current->real_blocked = current->blocked;
-                       sigandsets(&current->blocked, &current->blocked, &these);
-                       recalc_sigpending();
-                       spin_unlock_irq(&current->sighand->siglock);
-
-                       timeout = schedule_timeout_interruptible(timeout);
-
-                       spin_lock_irq(&current->sighand->siglock);
-                       sig = dequeue_signal(current, &these, &info);
-                       current->blocked = current->real_blocked;
-                       siginitset(&current->real_blocked, 0);
-                       recalc_sigpending();
-               }
-       }
-       spin_unlock_irq(&current->sighand->siglock);
+       ret = do_sigtimedwait(&these, &info, uts ? &ts : NULL);
 
-       if (sig) {
-               ret = sig;
-               if (uinfo) {
-                       if (copy_siginfo_to_user(uinfo, &info))
-                               ret = -EFAULT;
-               }
-       } else {
-               ret = -EAGAIN;
-               if (timeout)
-                       ret = -EINTR;
+       if (ret > 0 && uinfo) {
+               if (copy_siginfo_to_user(uinfo, &info))
+                       ret = -EFAULT;
        }
 
        return ret;
@@ -2650,60 +2889,51 @@ SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, set)
 /**
  *  sys_sigprocmask - examine and change blocked signals
  *  @how: whether to add, remove, or set signals
- *  @set: signals to add or remove (if non-null)
+ *  @nset: signals to add or remove (if non-null)
  *  @oset: previous value of signal mask if non-null
  *
  * Some platforms have their own version with special arguments;
  * others support only sys_rt_sigprocmask.
  */
 
-SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, set,
+SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, nset,
                old_sigset_t __user *, oset)
 {
-       int error;
        old_sigset_t old_set, new_set;
+       sigset_t new_blocked;
 
-       if (set) {
-               error = -EFAULT;
-               if (copy_from_user(&new_set, set, sizeof(*set)))
-                       goto out;
+       old_set = current->blocked.sig[0];
+
+       if (nset) {
+               if (copy_from_user(&new_set, nset, sizeof(*nset)))
+                       return -EFAULT;
                new_set &= ~(sigmask(SIGKILL) | sigmask(SIGSTOP));
 
-               spin_lock_irq(&current->sighand->siglock);
-               old_set = current->blocked.sig[0];
+               new_blocked = current->blocked;
 
-               error = 0;
                switch (how) {
-               default:
-                       error = -EINVAL;
-                       break;
                case SIG_BLOCK:
-                       sigaddsetmask(&current->blocked, new_set);
+                       sigaddsetmask(&new_blocked, new_set);
                        break;
                case SIG_UNBLOCK:
-                       sigdelsetmask(&current->blocked, new_set);
+                       sigdelsetmask(&new_blocked, new_set);
                        break;
                case SIG_SETMASK:
-                       current->blocked.sig[0] = new_set;
+                       new_blocked.sig[0] = new_set;
                        break;
+               default:
+                       return -EINVAL;
                }
 
-               recalc_sigpending();
-               spin_unlock_irq(&current->sighand->siglock);
-               if (error)
-                       goto out;
-               if (oset)
-                       goto set_old;
-       } else if (oset) {
-               old_set = current->blocked.sig[0];
-       set_old:
-               error = -EFAULT;
+               set_current_blocked(&new_blocked);
+       }
+
+       if (oset) {
                if (copy_to_user(oset, &old_set, sizeof(*oset)))
-                       goto out;
+                       return -EFAULT;
        }
-       error = 0;
-out:
-       return error;
+
+       return 0;
 }
 #endif /* __ARCH_WANT_SYS_SIGPROCMASK */