kernel/signal.c

   1 /*
   2  *  linux/kernel/signal.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  *
   6  *  1997-11-02  Modified for POSIX.1b signals by Richard Henderson
   7  *
   8  *  2003-06-02  Jim Houston - Concurrent Computer Corp.
   9  *              Changes to use preallocated sigqueue structures
  10  *              to allow signals to be sent reliably.
  11  */
  12
  13 #include <linux/slab.h>
  14 #include <linux/module.h>
  15 #include <linux/init.h>
  16 #include <linux/sched.h>
  17 #include <linux/fs.h>
  18 #include <linux/tty.h>
  19 #include <linux/binfmts.h>
  20 #include <linux/security.h>
  21 #include <linux/syscalls.h>
  22 #include <linux/ptrace.h>
  23 #include <linux/signal.h>
  24 #include <linux/signalfd.h>
  25 #include <linux/tracehook.h>
  26 #include <linux/capability.h>
  27 #include <linux/freezer.h>
  28 #include <linux/pid_namespace.h>
  29 #include <linux/nsproxy.h>
  30 #include <trace/events/sched.h>
  31
  32 #include <asm/param.h>
  33 #include <asm/uaccess.h>
  34 #include <asm/unistd.h>
  35 #include <asm/siginfo.h>
  36 #include "audit.h"      /* audit_signal_info() */
  37
  38 /*
  39  * SLAB caches for signal bits.
  40  */
  41
  42 static struct kmem_cache *sigqueue_cachep;
  43
  44 static void __user *sig_handler(struct task_struct *t, int sig)
  45 {
  46         return t->sighand->action[sig - 1].sa.sa_handler;
  47 }
  48
  49 static int sig_handler_ignored(void __user *handler, int sig)
  50 {
  51         /* Is it explicitly or implicitly ignored? */
  52         return handler == SIG_IGN ||
  53                 (handler == SIG_DFL && sig_kernel_ignore(sig));
  54 }
  55
  56 static int sig_task_ignored(struct task_struct *t, int sig,
  57                 int from_ancestor_ns)
  58 {
  59         void __user *handler;
  60
  61         handler = sig_handler(t, sig);
  62
  63         if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) &&
  64                         handler == SIG_DFL && !from_ancestor_ns)
  65                 return 1;
  66
  67         return sig_handler_ignored(handler, sig);
  68 }
  69
  70 static int sig_ignored(struct task_struct *t, int sig, int from_ancestor_ns)
  71 {
  72         /*
  73          * Blocked signals are never ignored, since the
  74          * signal handler may change by the time it is
  75          * unblocked.
  76          */
  77         if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig))
  78                 return 0;
  79
  80         if (!sig_task_ignored(t, sig, from_ancestor_ns))
  81                 return 0;
  82
  83         /*
  84          * Tracers may want to know about even ignored signals.
  85          */
  86         return !tracehook_consider_ignored_signal(t, sig);
  87 }
  88
  89 /*
  90  * Re-calculate pending state from the set of locally pending
  91  * signals, globally pending signals, and blocked signals.
  92  */
  93 static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked)
  94 {
  95         unsigned long ready;
  96         long i;
  97
  98         switch (_NSIG_WORDS) {
  99         default:
 100                 for (i = _NSIG_WORDS, ready = 0; --i >= 0 ;)
 101                         ready |= signal->sig[i] &~ blocked->sig[i];
 102                 break;
 103
 104         case 4: ready  = signal->sig[3] &~ blocked->sig[3];
 105                 ready |= signal->sig[2] &~ blocked->sig[2];
 106                 ready |= signal->sig[1] &~ blocked->sig[1];
 107                 ready |= signal->sig[0] &~ blocked->sig[0];
 108                 break;
 109
 110         case 2: ready  = signal->sig[1] &~ blocked->sig[1];
 111                 ready |= signal->sig[0] &~ blocked->sig[0];
 112                 break;
 113
 114         case 1: ready  = signal->sig[0] &~ blocked->sig[0];
 115         }
 116         return ready != 0;
 117 }
 118
 119 #define PENDING(p,b) has_pending_signals(&(p)->signal, (b))
 120
 121 static int recalc_sigpending_tsk(struct task_struct *t)
 122 {
 123         if (t->signal->group_stop_count > 0 ||
 124             PENDING(&t->pending, &t->blocked) ||
 125             PENDING(&t->signal->shared_pending, &t->blocked)) {
 126                 set_tsk_thread_flag(t, TIF_SIGPENDING);
 127                 return 1;
 128         }
 129         /*
 130          * We must never clear the flag in another thread, or in current
 131          * when it's possible the current syscall is returning -ERESTART*.
 132          * So we don't clear it here, and only callers who know they should do.
 133          */
 134         return 0;
 135 }
 136
 137 /*
 138  * After recalculating TIF_SIGPENDING, we need to make sure the task wakes up.
 139  * This is superfluous when called on current, the wakeup is a harmless no-op.
 140  */
 141 void recalc_sigpending_and_wake(struct task_struct *t)
 142 {
 143         if (recalc_sigpending_tsk(t))
 144                 signal_wake_up(t, 0);
 145 }
 146
 147 void recalc_sigpending(void)
 148 {
 149         if (unlikely(tracehook_force_sigpending()))
 150                 set_thread_flag(TIF_SIGPENDING);
 151         else if (!recalc_sigpending_tsk(current) && !freezing(current))
 152                 clear_thread_flag(TIF_SIGPENDING);
 153
 154 }
 155
 156 /* Given the mask, find the first available signal that should be serviced. */
 157
 158 int next_signal(struct sigpending *pending, sigset_t *mask)
 159 {
 160         unsigned long i, *s, *m, x;
 161         int sig = 0;
 162
 163         s = pending->signal.sig;
 164         m = mask->sig;
 165         switch (_NSIG_WORDS) {
 166         default:
 167                 for (i = 0; i < _NSIG_WORDS; ++i, ++s, ++m)
 168                         if ((x = *s &~ *m) != 0) {
 169                                 sig = ffz(~x) + i*_NSIG_BPW + 1;
 170                                 break;
 171                         }
 172                 break;
 173
 174         case 2: if ((x = s[0] &~ m[0]) != 0)
 175                         sig = 1;
 176                 else if ((x = s[1] &~ m[1]) != 0)
 177                         sig = _NSIG_BPW + 1;
 178                 else
 179                         break;
 180                 sig += ffz(~x);
 181                 break;
 182
 183         case 1: if ((x = *s &~ *m) != 0)
 184                         sig = ffz(~x) + 1;
 185                 break;
 186         }
 187
 188         return sig;
 189 }
 190
 191 /*
 192  * allocate a new signal queue record
 193  * - this may be called without locks if and only if t == current, otherwise an
 194  *   appopriate lock must be held to stop the target task from exiting
 195  */
 196 static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
 197                                          int override_rlimit)
 198 {
 199         struct sigqueue *q = NULL;
 200         struct user_struct *user;
 201
 202         /*
 203          * We won't get problems with the target's UID changing under us
 204          * because changing it requires RCU be used, and if t != current, the
 205          * caller must be holding the RCU readlock (by way of a spinlock) and
 206          * we use RCU protection here
 207          */
 208         user = get_uid(__task_cred(t)->user);
 209         atomic_inc(&user->sigpending);
 210         if (override_rlimit ||
 211             atomic_read(&user->sigpending) <=
 212                         t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur)
 213                 q = kmem_cache_alloc(sigqueue_cachep, flags);
 214         if (unlikely(q == NULL)) {
 215                 atomic_dec(&user->sigpending);
 216                 free_uid(user);
 217         } else {
 218                 INIT_LIST_HEAD(&q->list);
 219                 q->flags = 0;
 220                 q->user = user;
 221         }
 222
 223         return q;
 224 }
 225
 226 static void __sigqueue_free(struct sigqueue *q)
 227 {
 228         if (q->flags & SIGQUEUE_PREALLOC)
 229                 return;
 230         atomic_dec(&q->user->sigpending);
 231         free_uid(q->user);
 232         kmem_cache_free(sigqueue_cachep, q);
 233 }
 234
 235 void flush_sigqueue(struct sigpending *queue)
 236 {
 237         struct sigqueue *q;
 238
 239         sigemptyset(&queue->signal);
 240         while (!list_empty(&queue->list)) {
 241                 q = list_entry(queue->list.next, struct sigqueue , list);
 242                 list_del_init(&q->list);
 243                 __sigqueue_free(q);
 244         }
 245 }
 246
 247 /*
 248  * Flush all pending signals for a task.
 249  */
 250 void __flush_signals(struct task_struct *t)
 251 {
 252         clear_tsk_thread_flag(t, TIF_SIGPENDING);
 253         flush_sigqueue(&t->pending);
 254         flush_sigqueue(&t->signal->shared_pending);
 255 }
 256
 257 void flush_signals(struct task_struct *t)
 258 {
 259         unsigned long flags;
 260
 261         spin_lock_irqsave(&t->sighand->siglock, flags);
 262         __flush_signals(t);
 263         spin_unlock_irqrestore(&t->sighand->siglock, flags);
 264 }
 265
 266 static void __flush_itimer_signals(struct sigpending *pending)
 267 {
 268         sigset_t signal, retain;
 269         struct sigqueue *q, *n;
 270
 271         signal = pending->signal;
 272         sigemptyset(&retain);
 273
 274         list_for_each_entry_safe(q, n, &pending->list, list) {
 275                 int sig = q->info.si_signo;
 276
 277                 if (likely(q->info.si_code != SI_TIMER)) {
 278                         sigaddset(&retain, sig);
 279                 } else {
 280                         sigdelset(&signal, sig);
 281                         list_del_init(&q->list);
 282                         __sigqueue_free(q);
 283                 }
 284         }
 285
 286         sigorsets(&pending->signal, &signal, &retain);
 287 }
 288
 289 void flush_itimer_signals(void)
 290 {
 291         struct task_struct *tsk = current;
 292         unsigned long flags;
 293
 294         spin_lock_irqsave(&tsk->sighand->siglock, flags);
 295         __flush_itimer_signals(&tsk->pending);
 296         __flush_itimer_signals(&tsk->signal->shared_pending);
 297         spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
 298 }
 299
 300 void ignore_signals(struct task_struct *t)
 301 {
 302         int i;
 303
 304         for (i = 0; i < _NSIG; ++i)
 305                 t->sighand->action[i].sa.sa_handler = SIG_IGN;
 306
 307         flush_signals(t);
 308 }
 309
 310 /*
 311  * Flush all handlers for a task.
 312  */
 313
 314 void
 315 flush_signal_handlers(struct task_struct *t, int force_default)
 316 {
 317         int i;
 318         struct k_sigaction *ka = &t->sighand->action[0];
 319         for (i = _NSIG ; i != 0 ; i--) {
 320                 if (force_default || ka->sa.sa_handler != SIG_IGN)
 321                         ka->sa.sa_handler = SIG_DFL;
 322                 ka->sa.sa_flags = 0;
 323                 sigemptyset(&ka->sa.sa_mask);
 324                 ka++;
 325         }
 326 }
 327
 328 int unhandled_signal(struct task_struct *tsk, int sig)
 329 {
 330         void __user *handler = tsk->sighand->action[sig-1].sa.sa_handler;
 331         if (is_global_init(tsk))
 332                 return 1;
 333         if (handler != SIG_IGN && handler != SIG_DFL)
 334                 return 0;
 335         return !tracehook_consider_fatal_signal(tsk, sig);
 336 }
 337
 338
 339 /* Notify the system that a driver wants to block all signals for this
 340  * process, and wants to be notified if any signals at all were to be
 341  * sent/acted upon.  If the notifier routine returns non-zero, then the
 342  * signal will be acted upon after all.  If the notifier routine returns 0,
 343  * then then signal will be blocked.  Only one block per process is
 344  * allowed.  priv is a pointer to private data that the notifier routine
 345  * can use to determine if the signal should be blocked or not.  */
 346
 347 void
 348 block_all_signals(int (*notifier)(void *priv), void *priv, sigset_t *mask)
 349 {
 350         unsigned long flags;
 351
 352         spin_lock_irqsave(&current->sighand->siglock, flags);
 353         current->notifier_mask = mask;
 354         current->notifier_data = priv;
 355         current->notifier = notifier;
 356         spin_unlock_irqrestore(&current->sighand->siglock, flags);
 357 }
 358
 359 /* Notify the system that blocking has ended. */
 360
 361 void
 362 unblock_all_signals(void)
 363 {
 364         unsigned long flags;
 365
 366         spin_lock_irqsave(&current->sighand->siglock, flags);
 367         current->notifier = NULL;
 368         current->notifier_data = NULL;
 369         recalc_sigpending();
 370         spin_unlock_irqrestore(&current->sighand->siglock, flags);
 371 }
 372
 373 static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
 374 {
 375         struct sigqueue *q, *first = NULL;
 376
 377         /*
 378          * Collect the siginfo appropriate to this signal.  Check if
 379          * there is another siginfo for the same signal.
 380         */
 381         list_for_each_entry(q, &list->list, list) {
 382                 if (q->info.si_signo == sig) {
 383                         if (first)
 384                                 goto still_pending;
 385                         first = q;
 386                 }
 387         }
 388
 389         sigdelset(&list->signal, sig);
 390
 391         if (first) {
 392 still_pending:
 393                 list_del_init(&first->list);
 394                 copy_siginfo(info, &first->info);
 395                 __sigqueue_free(first);
 396         } else {
 397                 /* Ok, it wasn't in the queue.  This must be
 398                    a fast-pathed signal or we must have been
 399                    out of queue space.  So zero out the info.
 400                  */
 401                 info->si_signo = sig;
 402                 info->si_errno = 0;
 403                 info->si_code = 0;
 404                 info->si_pid = 0;
 405                 info->si_uid = 0;
 406         }
 407 }
 408
 409 static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
 410                         siginfo_t *info)
 411 {
 412         int sig = next_signal(pending, mask);
 413
 414         if (sig) {
 415                 if (current->notifier) {
 416                         if (sigismember(current->notifier_mask, sig)) {
 417                                 if (!(current->notifier)(current->notifier_data)) {
 418                                         clear_thread_flag(TIF_SIGPENDING);
 419                                         return 0;
 420                                 }
 421                         }
 422                 }
 423
 424                 collect_signal(sig, pending, info);
 425         }
 426
 427         return sig;
 428 }
 429
 430 /*
 431  * Dequeue a signal and return the element to the caller, which is
 432  * expected to free it.
 433  *
 434  * All callers have to hold the siglock.
 435  */
 436 int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
 437 {
 438         int signr;
 439
 440         /* We only dequeue private signals from ourselves, we don't let
 441          * signalfd steal them
 442          */
 443         signr = __dequeue_signal(&tsk->pending, mask, info);
 444         if (!signr) {
 445                 signr = __dequeue_signal(&tsk->signal->shared_pending,
 446                                          mask, info);
 447                 /*
 448                  * itimer signal ?
 449                  *
 450                  * itimers are process shared and we restart periodic
 451                  * itimers in the signal delivery path to prevent DoS
 452                  * attacks in the high resolution timer case. This is
 453                  * compliant with the old way of self restarting
 454                  * itimers, as the SIGALRM is a legacy signal and only
 455                  * queued once. Changing the restart behaviour to
 456                  * restart the timer in the signal dequeue path is
 457                  * reducing the timer noise on heavy loaded !highres
 458                  * systems too.
 459                  */
 460                 if (unlikely(signr == SIGALRM)) {
 461                         struct hrtimer *tmr = &tsk->signal->real_timer;
 462
 463                         if (!hrtimer_is_queued(tmr) &&
 464                             tsk->signal->it_real_incr.tv64 != 0) {
 465                                 hrtimer_forward(tmr, tmr->base->get_time(),
 466                                                 tsk->signal->it_real_incr);
 467                                 hrtimer_restart(tmr);
 468                         }
 469                 }
 470         }
 471
 472         recalc_sigpending();
 473         if (!signr)
 474                 return 0;
 475
 476         if (unlikely(sig_kernel_stop(signr))) {
 477                 /*
 478                  * Set a marker that we have dequeued a stop signal.  Our
 479                  * caller might release the siglock and then the pending
 480                  * stop signal it is about to process is no longer in the
 481                  * pending bitmasks, but must still be cleared by a SIGCONT
 482                  * (and overruled by a SIGKILL).  So those cases clear this
 483                  * shared flag after we've set it.  Note that this flag may
 484                  * remain set after the signal we return is ignored or
 485                  * handled.  That doesn't matter because its only purpose
 486                  * is to alert stop-signal processing code when another
 487                  * processor has come along and cleared the flag.
 488                  */
 489                 tsk->signal->flags |= SIGNAL_STOP_DEQUEUED;
 490         }
 491         if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) {
 492                 /*
 493                  * Release the siglock to ensure proper locking order
 494                  * of timer locks outside of siglocks.  Note, we leave
 495                  * irqs disabled here, since the posix-timers code is
 496                  * about to disable them again anyway.
 497                  */
 498                 spin_unlock(&tsk->sighand->siglock);
 499                 do_schedule_next_timer(info);
 500                 spin_lock(&tsk->sighand->siglock);
 501         }
 502         return signr;
 503 }
 504
 505 /*
 506  * Tell a process that it has a new active signal..
 507  *
 508  * NOTE! we rely on the previous spin_lock to
 509  * lock interrupts for us! We can only be called with
 510  * "siglock" held, and the local interrupt must
 511  * have been disabled when that got acquired!
 512  *
 513  * No need to set need_resched since signal event passing
 514  * goes through ->blocked
 515  */
 516 void signal_wake_up(struct task_struct *t, int resume)
 517 {
 518         unsigned int mask;
 519
 520         set_tsk_thread_flag(t, TIF_SIGPENDING);
 521
 522         /*
 523          * For SIGKILL, we want to wake it up in the stopped/traced/killable
 524          * case. We don't check t->state here because there is a race with it
 525          * executing another processor and just now entering stopped state.
 526          * By using wake_up_state, we ensure the process will wake up and
 527          * handle its death signal.
 528          */
 529         mask = TASK_INTERRUPTIBLE;
 530         if (resume)
 531                 mask |= TASK_WAKEKILL;
 532         if (!wake_up_state(t, mask))
 533                 kick_process(t);
 534 }
 535
 536 /*
 537  * Remove signals in mask from the pending set and queue.
 538  * Returns 1 if any signals were found.
 539  *
 540  * All callers must be holding the siglock.
 541  *
 542  * This version takes a sigset mask and looks at all signals,
 543  * not just those in the first mask word.
 544  */
 545 static int rm_from_queue_full(sigset_t *mask, struct sigpending *s)
 546 {
 547         struct sigqueue *q, *n;
 548         sigset_t m;
 549
 550         sigandsets(&m, mask, &s->signal);
 551         if (sigisemptyset(&m))
 552                 return 0;
 553
 554         signandsets(&s->signal, &s->signal, mask);
 555         list_for_each_entry_safe(q, n, &s->list, list) {
 556                 if (sigismember(mask, q->info.si_signo)) {
 557                         list_del_init(&q->list);
 558                         __sigqueue_free(q);
 559                 }
 560         }
 561         return 1;
 562 }
 563 /*
 564  * Remove signals in mask from the pending set and queue.
 565  * Returns 1 if any signals were found.
 566  *
 567  * All callers must be holding the siglock.
 568  */
 569 static int rm_from_queue(unsigned long mask, struct sigpending *s)
 570 {
 571         struct sigqueue *q, *n;
 572
 573         if (!sigtestsetmask(&s->signal, mask))
 574                 return 0;
 575
 576         sigdelsetmask(&s->signal, mask);
 577         list_for_each_entry_safe(q, n, &s->list, list) {
 578                 if (q->info.si_signo < SIGRTMIN &&
 579                     (mask & sigmask(q->info.si_signo))) {
 580                         list_del_init(&q->list);
 581                         __sigqueue_free(q);
 582                 }
 583         }
 584         return 1;
 585 }
 586
 587 /*
 588  * Bad permissions for sending the signal
 589  * - the caller must hold at least the RCU read lock
 590  */
 591 static int check_kill_permission(int sig, struct siginfo *info,
 592                                  struct task_struct *t)
 593 {
 594         const struct cred *cred = current_cred(), *tcred;
 595         struct pid *sid;
 596         int error;
 597
 598         if (!valid_signal(sig))
 599                 return -EINVAL;
 600
 601         if (info != SEND_SIG_NOINFO && (is_si_special(info) || SI_FROMKERNEL(info)))
 602                 return 0;
 603
 604         error = audit_signal_info(sig, t); /* Let audit system see the signal */
 605         if (error)
 606                 return error;
 607
 608         tcred = __task_cred(t);
 609         if ((cred->euid ^ tcred->suid) &&
 610             (cred->euid ^ tcred->uid) &&
 611             (cred->uid  ^ tcred->suid) &&
 612             (cred->uid  ^ tcred->uid) &&
 613             !capable(CAP_KILL)) {
 614                 switch (sig) {
 615                 case SIGCONT:
 616                         sid = task_session(t);
 617                         /*
 618                          * We don't return the error if sid == NULL. The
 619                          * task was unhashed, the caller must notice this.
 620                          */
 621                         if (!sid || sid == task_session(current))
 622                                 break;
 623                 default:
 624                         return -EPERM;
 625                 }
 626         }
 627
 628         return security_task_kill(t, info, sig, 0);
 629 }
 630
 631 /*
 632  * Handle magic process-wide effects of stop/continue signals. Unlike
 633  * the signal actions, these happen immediately at signal-generation
 634  * time regardless of blocking, ignoring, or handling.  This does the
 635  * actual continuing for SIGCONT, but not the actual stopping for stop
 636  * signals. The process stop is done as a signal action for SIG_DFL.
 637  *
 638  * Returns true if the signal should be actually delivered, otherwise
 639  * it should be dropped.
 640  */
 641 static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns)
 642 {
 643         struct signal_struct *signal = p->signal;
 644         struct task_struct *t;
 645
 646         if (unlikely(signal->flags & SIGNAL_GROUP_EXIT)) {
 647                 /*
 648                  * The process is in the middle of dying, nothing to do.
 649                  */
 650         } else if (sig_kernel_stop(sig)) {
 651                 /*
 652                  * This is a stop signal.  Remove SIGCONT from all queues.
 653                  */
 654                 rm_from_queue(sigmask(SIGCONT), &signal->shared_pending);
 655                 t = p;
 656                 do {
 657                         rm_from_queue(sigmask(SIGCONT), &t->pending);
 658                 } while_each_thread(p, t);
 659         } else if (sig == SIGCONT) {
 660                 unsigned int why;
 661                 /*
 662                  * Remove all stop signals from all queues,
 663                  * and wake all threads.
 664                  */
 665                 rm_from_queue(SIG_KERNEL_STOP_MASK, &signal->shared_pending);
 666                 t = p;
 667                 do {
 668                         unsigned int state;
 669                         rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending);
 670                         /*
 671                          * If there is a handler for SIGCONT, we must make
 672                          * sure that no thread returns to user mode before
 673                          * we post the signal, in case it was the only
 674                          * thread eligible to run the signal handler--then
 675                          * it must not do anything between resuming and
 676                          * running the handler.  With the TIF_SIGPENDING
 677                          * flag set, the thread will pause and acquire the
 678                          * siglock that we hold now and until we've queued
 679                          * the pending signal.
 680                          *
 681                          * Wake up the stopped thread _after_ setting
 682                          * TIF_SIGPENDING
 683                          */
 684                         state = __TASK_STOPPED;
 685                         if (sig_user_defined(t, SIGCONT) && !sigismember(&t->blocked, SIGCONT)) {
 686                                 set_tsk_thread_flag(t, TIF_SIGPENDING);
 687                                 state |= TASK_INTERRUPTIBLE;
 688                         }
 689                         wake_up_state(t, state);
 690                 } while_each_thread(p, t);
 691
 692                 /*
 693                  * Notify the parent with CLD_CONTINUED if we were stopped.
 694                  *
 695                  * If we were in the middle of a group stop, we pretend it
 696                  * was already finished, and then continued. Since SIGCHLD
 697                  * doesn't queue we report only CLD_STOPPED, as if the next
 698                  * CLD_CONTINUED was dropped.
 699                  */
 700                 why = 0;
 701                 if (signal->flags & SIGNAL_STOP_STOPPED)
 702                         why |= SIGNAL_CLD_CONTINUED;
 703                 else if (signal->group_stop_count)
 704                         why |= SIGNAL_CLD_STOPPED;
 705
 706                 if (why) {
 707                         /*
 708                          * The first thread which returns from do_signal_stop()
 709                          * will take ->siglock, notice SIGNAL_CLD_MASK, and
 710                          * notify its parent. See get_signal_to_deliver().
 711                          */
 712                         signal->flags = why | SIGNAL_STOP_CONTINUED;
 713                         signal->group_stop_count = 0;
 714                         signal->group_exit_code = 0;
 715                 } else {
 716                         /*
 717                          * We are not stopped, but there could be a stop
 718                          * signal in the middle of being processed after
 719                          * being removed from the queue.  Clear that too.
 720                          */
 721                         signal->flags &= ~SIGNAL_STOP_DEQUEUED;
 722                 }
 723         }
 724
 725         return !sig_ignored(p, sig, from_ancestor_ns);
 726 }
 727
 728 /*
 729  * Test if P wants to take SIG.  After we've checked all threads with this,
 730  * it's equivalent to finding no threads not blocking SIG.  Any threads not
 731  * blocking SIG were ruled out because they are not running and already
 732  * have pending signals.  Such threads will dequeue from the shared queue
 733  * as soon as they're available, so putting the signal on the shared queue
 734  * will be equivalent to sending it to one such thread.
 735  */
 736 static inline int wants_signal(int sig, struct task_struct *p)
 737 {
 738         if (sigismember(&p->blocked, sig))
 739                 return 0;
 740         if (p->flags & PF_EXITING)
 741                 return 0;
 742         if (sig == SIGKILL)
 743                 return 1;
 744         if (task_is_stopped_or_traced(p))
 745                 return 0;
 746         return task_curr(p) || !signal_pending(p);
 747 }
 748
 749 static void complete_signal(int sig, struct task_struct *p, int group)
 750 {
 751         struct signal_struct *signal = p->signal;
 752         struct task_struct *t;
 753
 754         /*
 755          * Now find a thread we can wake up to take the signal off the queue.
 756          *
 757          * If the main thread wants the signal, it gets first crack.
 758          * Probably the least surprising to the average bear.
 759          */
 760         if (wants_signal(sig, p))
 761                 t = p;
 762         else if (!group || thread_group_empty(p))
 763                 /*
 764                  * There is just one thread and it does not need to be woken.
 765                  * It will dequeue unblocked signals before it runs again.
 766                  */
 767                 return;
 768         else {
 769                 /*
 770                  * Otherwise try to find a suitable thread.
 771                  */
 772                 t = signal->curr_target;
 773                 while (!wants_signal(sig, t)) {
 774                         t = next_thread(t);
 775                         if (t == signal->curr_target)
 776                                 /*
 777                                  * No thread needs to be woken.
 778                                  * Any eligible threads will see
 779                                  * the signal in the queue soon.
 780                                  */
 781                                 return;
 782                 }
 783                 signal->curr_target = t;
 784         }
 785
 786         /*
 787          * Found a killable thread.  If the signal will be fatal,
 788          * then start taking the whole group down immediately.
 789          */
 790         if (sig_fatal(p, sig) &&
 791             !(signal->flags & (SIGNAL_UNKILLABLE | SIGNAL_GROUP_EXIT)) &&
 792             !sigismember(&t->real_blocked, sig) &&
 793             (sig == SIGKILL ||
 794              !tracehook_consider_fatal_signal(t, sig))) {
 795                 /*
 796                  * This signal will be fatal to the whole group.
 797                  */
 798                 if (!sig_kernel_coredump(sig)) {
 799                         /*
 800                          * Start a group exit and wake everybody up.
 801                          * This way we don't have other threads
 802                          * running and doing things after a slower
 803                          * thread has the fatal signal pending.
 804                          */
 805                         signal->flags = SIGNAL_GROUP_EXIT;
 806                         signal->group_exit_code = sig;
 807                         signal->group_stop_count = 0;
 808                         t = p;
 809                         do {
 810                                 sigaddset(&t->pending.signal, SIGKILL);
 811                                 signal_wake_up(t, 1);
 812                         } while_each_thread(p, t);
 813                         return;
 814                 }
 815         }
 816
 817         /*
 818          * The signal is already in the shared-pending queue.
 819          * Tell the chosen thread to wake up and dequeue it.
 820          */
 821         signal_wake_up(t, sig == SIGKILL);
 822         return;
 823 }
 824
 825 static inline int legacy_queue(struct sigpending *signals, int sig)
 826 {
 827         return (sig < SIGRTMIN) && sigismember(&signals->signal, sig);
 828 }
 829
 830 static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
 831                         int group, int from_ancestor_ns)
 832 {
 833         struct sigpending *pending;
 834         struct sigqueue *q;
 835         int override_rlimit;
 836
 837         trace_sched_signal_send(sig, t);
 838
 839         assert_spin_locked(&t->sighand->siglock);
 840
 841         if (!prepare_signal(sig, t, from_ancestor_ns))
 842                 return 0;
 843
 844         pending = group ? &t->signal->shared_pending : &t->pending;
 845         /*
 846          * Short-circuit ignored signals and support queuing
 847          * exactly one non-rt signal, so that we can get more
 848          * detailed information about the cause of the signal.
 849          */
 850         if (legacy_queue(pending, sig))
 851                 return 0;
 852         /*
 853          * fast-pathed signals for kernel-internal things like SIGSTOP
 854          * or SIGKILL.
 855          */
 856         if (info == SEND_SIG_FORCED)
 857                 goto out_set;
 858
 859         /* Real-time signals must be queued if sent by sigqueue, or
 860            some other real-time mechanism.  It is implementation
 861            defined whether kill() does so.  We attempt to do so, on
 862            the principle of least surprise, but since kill is not
 863            allowed to fail with EAGAIN when low on memory we just
 864            make sure at least one signal gets delivered and don't
 865            pass on the info struct.  */
 866
 867         if (sig < SIGRTMIN)
 868                 override_rlimit = (is_si_special(info) || info->si_code >= 0);
 869         else
 870                 override_rlimit = 0;
 871
 872         q = __sigqueue_alloc(t, GFP_ATOMIC | __GFP_NOTRACK_FALSE_POSITIVE,
 873                 override_rlimit);
 874         if (q) {
 875                 list_add_tail(&q->list, &pending->list);
 876                 switch ((unsigned long) info) {
 877                 case (unsigned long) SEND_SIG_NOINFO:
 878                         q->info.si_signo = sig;
 879                         q->info.si_errno = 0;
 880                         q->info.si_code = SI_USER;
 881                         q->info.si_pid = task_tgid_nr_ns(current,
 882                                                         task_active_pid_ns(t));
 883                         q->info.si_uid = current_uid();
 884                         break;
 885                 case (unsigned long) SEND_SIG_PRIV:
 886                         q->info.si_signo = sig;
 887                         q->info.si_errno = 0;
 888                         q->info.si_code = SI_KERNEL;
 889                         q->info.si_pid = 0;
 890                         q->info.si_uid = 0;
 891                         break;
 892                 default:
 893                         copy_siginfo(&q->info, info);
 894                         if (from_ancestor_ns)
 895                                 q->info.si_pid = 0;
 896                         break;
 897                 }
 898         } else if (!is_si_special(info)) {
 899                 if (sig >= SIGRTMIN && info->si_code != SI_USER)
 900                 /*
 901                  * Queue overflow, abort.  We may abort if the signal was rt
 902                  * and sent by user using something other than kill().
 903                  */
 904                         return -EAGAIN;
 905         }
 906
 907 out_set:
 908         signalfd_notify(t, sig);
 909         sigaddset(&pending->signal, sig);
 910         complete_signal(sig, t, group);
 911         return 0;
 912 }
 913
 914 static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
 915                         int group)
 916 {
 917         int from_ancestor_ns = 0;
 918
 919 #ifdef CONFIG_PID_NS
 920         if (!is_si_special(info) && SI_FROMUSER(info) &&
 921                         task_pid_nr_ns(current, task_active_pid_ns(t)) <= 0)
 922                 from_ancestor_ns = 1;
 923 #endif
 924
 925         return __send_signal(sig, info, t, group, from_ancestor_ns);
 926 }
 927
 928 int print_fatal_signals;
 929
 930 static void print_fatal_signal(struct pt_regs *regs, int signr)
 931 {
 932         printk("%s/%d: potentially unexpected fatal signal %d.\n",
 933                 current->comm, task_pid_nr(current), signr);
 934
 935 #if defined(__i386__) && !defined(__arch_um__)
 936         printk("code at %08lx: ", regs->ip);
 937         {
 938                 int i;
 939                 for (i = 0; i < 16; i++) {
 940                         unsigned char insn;
 941
 942                         if (get_user(insn, (unsigned char *)(regs->ip + i)))
 943                                 break;
 944                         printk("%02x ", insn);
 945                 }
 946         }
 947 #endif
 948         printk("\n");
 949         preempt_disable();
 950         show_regs(regs);
 951         preempt_enable();
 952 }
 953
 954 static int __init setup_print_fatal_signals(char *str)
 955 {
 956         get_option (&str, &print_fatal_signals);
 957
 958         return 1;
 959 }
 960
 961 __setup("print-fatal-signals=", setup_print_fatal_signals);
 962
 963 int
 964 __group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 965 {
 966         return send_signal(sig, info, p, 1);
 967 }
 968
 969 static int
 970 specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
 971 {
 972         return send_signal(sig, info, t, 0);
 973 }
 974
 975 int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
 976                         bool group)
 977 {
 978         unsigned long flags;
 979         int ret = -ESRCH;
 980
 981         if (lock_task_sighand(p, &flags)) {
 982                 ret = send_signal(sig, info, p, group);
 983                 unlock_task_sighand(p, &flags);
 984         }
 985
 986         return ret;
 987 }
 988
 989 /*
 990  * Force a signal that the process can't ignore: if necessary
 991  * we unblock the signal and change any SIG_IGN to SIG_DFL.
 992  *
 993  * Note: If we unblock the signal, we always reset it to SIG_DFL,
 994  * since we do not want to have a signal handler that was blocked
 995  * be invoked when user space had explicitly blocked it.
 996  *
 997  * We don't want to have recursive SIGSEGV's etc, for example,
 998  * that is why we also clear SIGNAL_UNKILLABLE.
 999  */
1000 int
1001 force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
1002 {
1003         unsigned long int flags;
1004         int ret, blocked, ignored;
1005         struct k_sigaction *action;
1006
1007         spin_lock_irqsave(&t->sighand->siglock, flags);
1008         action = &t->sighand->action[sig-1];
1009         ignored = action->sa.sa_handler == SIG_IGN;
1010         blocked = sigismember(&t->blocked, sig);
1011         if (blocked || ignored) {
1012                 action->sa.sa_handler = SIG_DFL;
1013                 if (blocked) {
1014                         sigdelset(&t->blocked, sig);
1015                         recalc_sigpending_and_wake(t);
1016                 }
1017         }
1018         if (action->sa.sa_handler == SIG_DFL)
1019                 t->signal->flags &= ~SIGNAL_UNKILLABLE;
1020         ret = specific_send_sig_info(sig, info, t);
1021         spin_unlock_irqrestore(&t->sighand->siglock, flags);
1022
1023         return ret;
1024 }
1025
1026 void
1027 force_sig_specific(int sig, struct task_struct *t)
1028 {
1029         force_sig_info(sig, SEND_SIG_FORCED, t);
1030 }
1031
1032 /*
1033  * Nuke all other threads in the group.
1034  */
1035 void zap_other_threads(struct task_struct *p)
1036 {
1037         struct task_struct *t;
1038
1039         p->signal->group_stop_count = 0;
1040
1041         for (t = next_thread(p); t != p; t = next_thread(t)) {
1042                 /*
1043                  * Don't bother with already dead threads
1044                  */
1045                 if (t->exit_state)
1046                         continue;
1047
1048                 /* SIGKILL will be handled before any pending SIGSTOP */
1049                 sigaddset(&t->pending.signal, SIGKILL);
1050                 signal_wake_up(t, 1);
1051         }
1052 }
1053
1054 struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags)
1055 {
1056         struct sighand_struct *sighand;
1057
1058         rcu_read_lock();
1059         for (;;) {
1060                 sighand = rcu_dereference(tsk->sighand);
1061                 if (unlikely(sighand == NULL))
1062                         break;
1063
1064                 spin_lock_irqsave(&sighand->siglock, *flags);
1065                 if (likely(sighand == tsk->sighand))
1066                         break;
1067                 spin_unlock_irqrestore(&sighand->siglock, *flags);
1068         }
1069         rcu_read_unlock();
1070
1071         return sighand;
1072 }
1073
1074 /*
1075  * send signal info to all the members of a group
1076  * - the caller must hold the RCU read lock at least
1077  */
1078 int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
1079 {
1080         int ret = check_kill_permission(sig, info, p);
1081
1082         if (!ret && sig)
1083                 ret = do_send_sig_info(sig, info, p, true);
1084
1085         return ret;
1086 }
1087
1088 /*
1089  * __kill_pgrp_info() sends a signal to a process group: this is what the tty
1090  * control characters do (^C, ^Z etc)
1091  * - the caller must hold at least a readlock on tasklist_lock
1092  */
1093 int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp)
1094 {
1095         struct task_struct *p = NULL;
1096         int retval, success;
1097
1098         success = 0;
1099         retval = -ESRCH;
1100         do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
1101                 int err = group_send_sig_info(sig, info, p);
1102                 success |= !err;
1103                 retval = err;
1104         } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
1105         return success ? 0 : retval;
1106 }
1107
1108 int kill_pid_info(int sig, struct siginfo *info, struct pid *pid)
1109 {
1110         int error = -ESRCH;
1111         struct task_struct *p;
1112
1113         rcu_read_lock();
1114 retry:
1115         p = pid_task(pid, PIDTYPE_PID);
1116         if (p) {
1117                 error = group_send_sig_info(sig, info, p);
1118                 if (unlikely(error == -ESRCH))
1119                         /*
1120                          * The task was unhashed in between, try again.
1121                          * If it is dead, pid_task() will return NULL,
1122                          * if we race with de_thread() it will find the
1123                          * new leader.
1124                          */
1125                         goto retry;
1126         }
1127         rcu_read_unlock();
1128
1129         return error;
1130 }
1131
1132 int
1133 kill_proc_info(int sig, struct siginfo *info, pid_t pid)
1134 {
1135         int error;
1136         rcu_read_lock();
1137         error = kill_pid_info(sig, info, find_vpid(pid));
1138         rcu_read_unlock();
1139         return error;
1140 }
1141
1142 /* like kill_pid_info(), but doesn't use uid/euid of "current" */
1143 int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid,
1144                       uid_t uid, uid_t euid, u32 secid)
1145 {
1146         int ret = -EINVAL;
1147         struct task_struct *p;
1148         const struct cred *pcred;
1149
1150         if (!valid_signal(sig))
1151                 return ret;
1152
1153         read_lock(&tasklist_lock);
1154         p = pid_task(pid, PIDTYPE_PID);
1155         if (!p) {
1156                 ret = -ESRCH;
1157                 goto out_unlock;
1158         }
1159         pcred = __task_cred(p);
1160         if ((info == SEND_SIG_NOINFO ||
1161              (!is_si_special(info) && SI_FROMUSER(info))) &&
1162             euid != pcred->suid && euid != pcred->uid &&
1163             uid  != pcred->suid && uid  != pcred->uid) {
1164                 ret = -EPERM;
1165                 goto out_unlock;
1166         }
1167         ret = security_task_kill(p, info, sig, secid);
1168         if (ret)
1169                 goto out_unlock;
1170         if (sig && p->sighand) {
1171                 unsigned long flags;
1172                 spin_lock_irqsave(&p->sighand->siglock, flags);
1173                 ret = __send_signal(sig, info, p, 1, 0);
1174                 spin_unlock_irqrestore(&p->sighand->siglock, flags);
1175         }
1176 out_unlock:
1177         read_unlock(&tasklist_lock);
1178         return ret;
1179 }
1180 EXPORT_SYMBOL_GPL(kill_pid_info_as_uid);
1181
1182 /*
1183  * kill_something_info() interprets pid in interesting ways just like kill(2).
1184  *
1185  * POSIX specifies that kill(-1,sig) is unspecified, but what we have
1186  * is probably wrong.  Should make it like BSD or SYSV.
1187  */
1188
1189 static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
1190 {
1191         int ret;
1192
1193         if (pid > 0) {
1194                 rcu_read_lock();
1195                 ret = kill_pid_info(sig, info, find_vpid(pid));
1196                 rcu_read_unlock();
1197                 return ret;
1198         }
1199
1200         read_lock(&tasklist_lock);
1201         if (pid != -1) {
1202                 ret = __kill_pgrp_info(sig, info,
1203                                 pid ? find_vpid(-pid) : task_pgrp(current));
1204         } else {
1205                 int retval = 0, count = 0;
1206                 struct task_struct * p;
1207
1208                 for_each_process(p) {
1209                         if (task_pid_vnr(p) > 1 &&
1210                                         !same_thread_group(p, current)) {
1211                                 int err = group_send_sig_info(sig, info, p);
1212                                 ++count;
1213                                 if (err != -EPERM)
1214                                         retval = err;
1215                         }
1216                 }
1217                 ret = count ? retval : -ESRCH;
1218         }
1219         read_unlock(&tasklist_lock);
1220
1221         return ret;
1222 }
1223
1224 /*
1225  * These are for backward compatibility with the rest of the kernel source.
1226  */
1227
1228 int
1229 send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
1230 {
1231         /*
1232          * Make sure legacy kernel users don't send in bad values
1233          * (normal paths check this in check_kill_permission).
1234          */
1235         if (!valid_signal(sig))
1236                 return -EINVAL;
1237
1238         return do_send_sig_info(sig, info, p, false);
1239 }
1240
1241 #define __si_special(priv) \
1242         ((priv) ? SEND_SIG_PRIV : SEND_SIG_NOINFO)
1243
1244 int
1245 send_sig(int sig, struct task_struct *p, int priv)
1246 {
1247         return send_sig_info(sig, __si_special(priv), p);
1248 }
1249
1250 void
1251 force_sig(int sig, struct task_struct *p)
1252 {
1253         force_sig_info(sig, SEND_SIG_PRIV, p);
1254 }
1255
1256 /*
1257  * When things go south during signal handling, we
1258  * will force a SIGSEGV. And if the signal that caused
1259  * the problem was already a SIGSEGV, we'll want to
1260  * make sure we don't even try to deliver the signal..
1261  */
1262 int
1263 force_sigsegv(int sig, struct task_struct *p)
1264 {
1265         if (sig == SIGSEGV) {
1266                 unsigned long flags;
1267                 spin_lock_irqsave(&p->sighand->siglock, flags);
1268                 p->sighand->action[sig - 1].sa.sa_handler = SIG_DFL;
1269                 spin_unlock_irqrestore(&p->sighand->siglock, flags);
1270         }
1271         force_sig(SIGSEGV, p);
1272         return 0;
1273 }
1274
1275 int kill_pgrp(struct pid *pid, int sig, int priv)
1276 {
1277         int ret;
1278
1279         read_lock(&tasklist_lock);
1280         ret = __kill_pgrp_info(sig, __si_special(priv), pid);
1281         read_unlock(&tasklist_lock);
1282
1283         return ret;
1284 }
1285 EXPORT_SYMBOL(kill_pgrp);
1286
1287 int kill_pid(struct pid *pid, int sig, int priv)
1288 {
1289         return kill_pid_info(sig, __si_special(priv), pid);
1290 }
1291 EXPORT_SYMBOL(kill_pid);
1292
1293 /*
1294  * These functions support sending signals using preallocated sigqueue
1295  * structures.  This is needed "because realtime applications cannot
1296  * afford to lose notifications of asynchronous events, like timer
1297  * expirations or I/O completions".  In the case of Posix Timers
1298  * we allocate the sigqueue structure from the timer_create.  If this
1299  * allocation fails we are able to report the failure to the application
1300  * with an EAGAIN error.
1301  */
1302
1303 struct sigqueue *sigqueue_alloc(void)
1304 {
1305         struct sigqueue *q;
1306
1307         if ((q = __sigqueue_alloc(current, GFP_KERNEL, 0)))
1308                 q->flags |= SIGQUEUE_PREALLOC;
1309         return(q);
1310 }
1311
1312 void sigqueue_free(struct sigqueue *q)
1313 {
1314         unsigned long flags;
1315         spinlock_t *lock = &current->sighand->siglock;
1316
1317         BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
1318         /*
1319          * We must hold ->siglock while testing q->list
1320          * to serialize with collect_signal() or with
1321          * __exit_signal()->flush_sigqueue().
1322          */
1323         spin_lock_irqsave(lock, flags);
1324         q->flags &= ~SIGQUEUE_PREALLOC;
1325         /*
1326          * If it is queued it will be freed when dequeued,
1327          * like the "regular" sigqueue.
1328          */
1329         if (!list_empty(&q->list))
1330                 q = NULL;
1331         spin_unlock_irqrestore(lock, flags);
1332
1333         if (q)
1334                 __sigqueue_free(q);
1335 }
1336
1337 int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
1338 {
1339         int sig = q->info.si_signo;
1340         struct sigpending *pending;
1341         unsigned long flags;
1342         int ret;
1343
1344         BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
1345
1346         ret = -1;
1347         if (!likely(lock_task_sighand(t, &flags)))
1348                 goto ret;
1349
1350         ret = 1; /* the signal is ignored */
1351         if (!prepare_signal(sig, t, 0))
1352                 goto out;
1353
1354         ret = 0;
1355         if (unlikely(!list_empty(&q->list))) {
1356                 /*
1357                  * If an SI_TIMER entry is already queue just increment
1358                  * the overrun count.
1359                  */
1360                 BUG_ON(q->info.si_code != SI_TIMER);
1361                 q->info.si_overrun++;
1362                 goto out;
1363         }
1364         q->info.si_overrun = 0;
1365
1366         signalfd_notify(t, sig);
1367         pending = group ? &t->signal->shared_pending : &t->pending;
1368         list_add_tail(&q->list, &pending->list);
1369         sigaddset(&pending->signal, sig);
1370         complete_signal(sig, t, group);
1371 out:
1372         unlock_task_sighand(t, &flags);
1373 ret:
1374         return ret;
1375 }
1376
1377 /*
1378  * Let a parent know about the death of a child.
1379  * For a stopped/continued status change, use do_notify_parent_cldstop instead.
1380  *
1381  * Returns -1 if our parent ignored us and so we've switched to
1382  * self-reaping, or else @sig.
1383  */
1384 int do_notify_parent(struct task_struct *tsk, int sig)
1385 {
1386         struct siginfo info;
1387         unsigned long flags;
1388         struct sighand_struct *psig;
1389         int ret = sig;
1390
1391         BUG_ON(sig == -1);
1392
1393         /* do_notify_parent_cldstop should have been called instead.  */
1394         BUG_ON(task_is_stopped_or_traced(tsk));
1395
1396         BUG_ON(!task_ptrace(tsk) &&
1397                (tsk->group_leader != tsk || !thread_group_empty(tsk)));
1398
1399         info.si_signo = sig;
1400         info.si_errno = 0;
1401         /*
1402          * we are under tasklist_lock here so our parent is tied to
1403          * us and cannot exit and release its namespace.
1404          *
1405          * the only it can is to switch its nsproxy with sys_unshare,
1406          * bu uncharing pid namespaces is not allowed, so we'll always
1407          * see relevant namespace
1408          *
1409          * write_lock() currently calls preempt_disable() which is the
1410          * same as rcu_read_lock(), but according to Oleg, this is not
1411          * correct to rely on this
1412          */
1413         rcu_read_lock();
1414         info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns);
1415         info.si_uid = __task_cred(tsk)->uid;
1416         rcu_read_unlock();
1417
1418         info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime,
1419                                 tsk->signal->utime));
1420         info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime,
1421                                 tsk->signal->stime));
1422
1423         info.si_status = tsk->exit_code & 0x7f;
1424         if (tsk->exit_code & 0x80)
1425                 info.si_code = CLD_DUMPED;
1426         else if (tsk->exit_code & 0x7f)
1427                 info.si_code = CLD_KILLED;
1428         else {
1429                 info.si_code = CLD_EXITED;
1430                 info.si_status = tsk->exit_code >> 8;
1431         }
1432
1433         psig = tsk->parent->sighand;
1434         spin_lock_irqsave(&psig->siglock, flags);
1435         if (!task_ptrace(tsk) && sig == SIGCHLD &&
1436             (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN ||
1437              (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT))) {
1438                 /*
1439                  * We are exiting and our parent doesn't care.  POSIX.1
1440                  * defines special semantics for setting SIGCHLD to SIG_IGN
1441                  * or setting the SA_NOCLDWAIT flag: we should be reaped
1442                  * automatically and not left for our parent's wait4 call.
1443                  * Rather than having the parent do it as a magic kind of
1444                  * signal handler, we just set this to tell do_exit that we
1445                  * can be cleaned up without becoming a zombie.  Note that
1446                  * we still call __wake_up_parent in this case, because a
1447                  * blocked sys_wait4 might now return -ECHILD.
1448                  *
1449                  * Whether we send SIGCHLD or not for SA_NOCLDWAIT
1450                  * is implementation-defined: we do (if you don't want
1451                  * it, just use SIG_IGN instead).
1452                  */
1453                 ret = tsk->exit_signal = -1;
1454                 if (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN)
1455                         sig = -1;
1456         }
1457         if (valid_signal(sig) && sig > 0)
1458                 __group_send_sig_info(sig, &info, tsk->parent);
1459         __wake_up_parent(tsk, tsk->parent);
1460         spin_unlock_irqrestore(&psig->siglock, flags);
1461
1462         return ret;
1463 }
1464
1465 static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
1466 {
1467         struct siginfo info;
1468         unsigned long flags;
1469         struct task_struct *parent;
1470         struct sighand_struct *sighand;
1471
1472         if (task_ptrace(tsk))
1473                 parent = tsk->parent;
1474         else {
1475                 tsk = tsk->group_leader;
1476                 parent = tsk->real_parent;
1477         }
1478
1479         info.si_signo = SIGCHLD;
1480         info.si_errno = 0;
1481         /*
1482          * see comment in do_notify_parent() abot the following 3 lines
1483          */
1484         rcu_read_lock();
1485         info.si_pid = task_pid_nr_ns(tsk, parent->nsproxy->pid_ns);
1486         info.si_uid = __task_cred(tsk)->uid;
1487         rcu_read_unlock();
1488
1489         info.si_utime = cputime_to_clock_t(tsk->utime);
1490         info.si_stime = cputime_to_clock_t(tsk->stime);
1491
1492         info.si_code = why;
1493         switch (why) {
1494         case CLD_CONTINUED:
1495                 info.si_status = SIGCONT;
1496                 break;
1497         case CLD_STOPPED:
1498                 info.si_status = tsk->signal->group_exit_code & 0x7f;
1499                 break;
1500         case CLD_TRAPPED:
1501                 info.si_status = tsk->exit_code & 0x7f;
1502                 break;
1503         default:
1504                 BUG();
1505         }
1506
1507         sighand = parent->sighand;
1508         spin_lock_irqsave(&sighand->siglock, flags);
1509         if (sighand->action[SIGCHLD-1].sa.sa_handler != SIG_IGN &&
1510             !(sighand->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP))
1511                 __group_send_sig_info(SIGCHLD, &info, parent);
1512         /*
1513          * Even if SIGCHLD is not generated, we must wake up wait4 calls.
1514          */
1515         __wake_up_parent(tsk, parent);
1516         spin_unlock_irqrestore(&sighand->siglock, flags);
1517 }
1518
1519 static inline int may_ptrace_stop(void)
1520 {
1521         if (!likely(task_ptrace(current)))
1522                 return 0;
1523         /*
1524          * Are we in the middle of do_coredump?
1525          * If so and our tracer is also part of the coredump stopping
1526          * is a deadlock situation, and pointless because our tracer
1527          * is dead so don't allow us to stop.
1528          * If SIGKILL was already sent before the caller unlocked
1529          * ->siglock we must see ->core_state != NULL. Otherwise it
1530          * is safe to enter schedule().
1531          */
1532         if (unlikely(current->mm->core_state) &&
1533             unlikely(current->mm == current->parent->mm))
1534                 return 0;
1535
1536         return 1;
1537 }
1538
1539 /*
1540  * Return nonzero if there is a SIGKILL that should be waking us up.
1541  * Called with the siglock held.
1542  */
1543 static int sigkill_pending(struct task_struct *tsk)
1544 {
1545         return  sigismember(&tsk->pending.signal, SIGKILL) ||
1546                 sigismember(&tsk->signal->shared_pending.signal, SIGKILL);
1547 }
1548
1549 /*
1550  * This must be called with current->sighand->siglock held.
1551  *
1552  * This should be the path for all ptrace stops.
1553  * We always set current->last_siginfo while stopped here.
1554  * That makes it a way to test a stopped process for
1555  * being ptrace-stopped vs being job-control-stopped.
1556  *
1557  * If we actually decide not to stop at all because the tracer
1558  * is gone, we keep current->exit_code unless clear_code.
1559  */
1560 static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
1561 {
1562         if (arch_ptrace_stop_needed(exit_code, info)) {
1563                 /*
1564                  * The arch code has something special to do before a
1565                  * ptrace stop.  This is allowed to block, e.g. for faults
1566                  * on user stack pages.  We can't keep the siglock while
1567                  * calling arch_ptrace_stop, so we must release it now.
1568                  * To preserve proper semantics, we must do this before
1569                  * any signal bookkeeping like checking group_stop_count.
1570                  * Meanwhile, a SIGKILL could come in before we retake the
1571                  * siglock.  That must prevent us from sleeping in TASK_TRACED.
1572                  * So after regaining the lock, we must check for SIGKILL.
1573                  */
1574                 spin_unlock_irq(&current->sighand->siglock);
1575                 arch_ptrace_stop(exit_code, info);
1576                 spin_lock_irq(&current->sighand->siglock);
1577                 if (sigkill_pending(current))
1578                         return;
1579         }
1580
1581         /*
1582          * If there is a group stop in progress,
1583          * we must participate in the bookkeeping.
1584          */
1585         if (current->signal->group_stop_count > 0)
1586                 --current->signal->group_stop_count;
1587
1588         current->last_siginfo = info;
1589         current->exit_code = exit_code;
1590
1591         /* Let the debugger run.  */
1592         __set_current_state(TASK_TRACED);
1593         spin_unlock_irq(&current->sighand->siglock);
1594         read_lock(&tasklist_lock);
1595         if (may_ptrace_stop()) {
1596                 do_notify_parent_cldstop(current, CLD_TRAPPED);
1597                 /*
1598                  * Don't want to allow preemption here, because
1599                  * sys_ptrace() needs this task to be inactive.
1600                  *
1601                  * XXX: implement read_unlock_no_resched().
1602                  */
1603                 preempt_disable();
1604                 read_unlock(&tasklist_lock);
1605                 preempt_enable_no_resched();
1606                 schedule();
1607         } else {
1608                 /*
1609                  * By the time we got the lock, our tracer went away.
1610                  * Don't drop the lock yet, another tracer may come.
1611                  */
1612                 __set_current_state(TASK_RUNNING);
1613                 if (clear_code)
1614                         current->exit_code = 0;
1615                 read_unlock(&tasklist_lock);
1616         }
1617
1618         /*
1619          * While in TASK_TRACED, we were considered "frozen enough".
1620          * Now that we woke up, it's crucial if we're supposed to be
1621          * frozen that we freeze now before running anything substantial.
1622          */
1623         try_to_freeze();
1624
1625         /*
1626          * We are back.  Now reacquire the siglock before touching
1627          * last_siginfo, so that we are sure to have synchronized with
1628          * any signal-sending on another CPU that wants to examine it.
1629          */
1630         spin_lock_irq(&current->sighand->siglock);
1631         current->last_siginfo = NULL;
1632
1633         /*
1634          * Queued signals ignored us while we were stopped for tracing.
1635          * So check for any that we should take before resuming user mode.
1636          * This sets TIF_SIGPENDING, but never clears it.
1637          */
1638         recalc_sigpending_tsk(current);
1639 }
1640
1641 void ptrace_notify(int exit_code)
1642 {
1643         siginfo_t info;
1644
1645         BUG_ON((exit_code & (0x7f | ~0xffff)) != SIGTRAP);
1646
1647         memset(&info, 0, sizeof info);
1648         info.si_signo = SIGTRAP;
1649         info.si_code = exit_code;
1650         info.si_pid = task_pid_vnr(current);
1651         info.si_uid = current_uid();
1652
1653         /* Let the debugger run.  */
1654         spin_lock_irq(&current->sighand->siglock);
1655         ptrace_stop(exit_code, 1, &info);
1656         spin_unlock_irq(&current->sighand->siglock);
1657 }
1658
1659 /*
1660  * This performs the stopping for SIGSTOP and other stop signals.
1661  * We have to stop all threads in the thread group.
1662  * Returns nonzero if we've actually stopped and released the siglock.
1663  * Returns zero if we didn't stop and still hold the siglock.
1664  */
1665 static int do_signal_stop(int signr)
1666 {
1667         struct signal_struct *sig = current->signal;
1668         int notify;
1669
1670         if (!sig->group_stop_count) {
1671                 struct task_struct *t;
1672
1673                 if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) ||
1674                     unlikely(signal_group_exit(sig)))
1675                         return 0;
1676                 /*
1677                  * There is no group stop already in progress.
1678                  * We must initiate one now.
1679                  */
1680                 sig->group_exit_code = signr;
1681
1682                 sig->group_stop_count = 1;
1683                 for (t = next_thread(current); t != current; t = next_thread(t))
1684                         /*
1685                          * Setting state to TASK_STOPPED for a group
1686                          * stop is always done with the siglock held,
1687                          * so this check has no races.
1688                          */
1689                         if (!(t->flags & PF_EXITING) &&
1690                             !task_is_stopped_or_traced(t)) {
1691                                 sig->group_stop_count++;
1692                                 signal_wake_up(t, 0);
1693                         }
1694         }
1695         /*
1696          * If there are no other threads in the group, or if there is
1697          * a group stop in progress and we are the last to stop, report
1698          * to the parent.  When ptraced, every thread reports itself.
1699          */
1700         notify = sig->group_stop_count == 1 ? CLD_STOPPED : 0;
1701         notify = tracehook_notify_jctl(notify, CLD_STOPPED);
1702         /*
1703          * tracehook_notify_jctl() can drop and reacquire siglock, so
1704          * we keep ->group_stop_count != 0 before the call. If SIGCONT
1705          * or SIGKILL comes in between ->group_stop_count == 0.
1706          */
1707         if (sig->group_stop_count) {
1708                 if (!--sig->group_stop_count)
1709                         sig->flags = SIGNAL_STOP_STOPPED;
1710                 current->exit_code = sig->group_exit_code;
1711                 __set_current_state(TASK_STOPPED);
1712         }
1713         spin_unlock_irq(&current->sighand->siglock);
1714
1715         if (notify) {
1716                 read_lock(&tasklist_lock);
1717                 do_notify_parent_cldstop(current, notify);
1718                 read_unlock(&tasklist_lock);
1719         }
1720
1721         /* Now we don't run again until woken by SIGCONT or SIGKILL */
1722         do {
1723                 schedule();
1724         } while (try_to_freeze());
1725
1726         tracehook_finish_jctl();
1727         current->exit_code = 0;
1728
1729         return 1;
1730 }
1731
1732 static int ptrace_signal(int signr, siginfo_t *info,
1733                          struct pt_regs *regs, void *cookie)
1734 {
1735         if (!task_ptrace(current))
1736                 return signr;
1737
1738         ptrace_signal_deliver(regs, cookie);
1739
1740         /* Let the debugger run.  */
1741         ptrace_stop(signr, 0, info);
1742
1743         /* We're back.  Did the debugger cancel the sig?  */
1744         signr = current->exit_code;
1745         if (signr == 0)
1746                 return signr;
1747
1748         current->exit_code = 0;
1749
1750         /* Update the siginfo structure if the signal has
1751            changed.  If the debugger wanted something
1752            specific in the siginfo structure then it should
1753            have updated *info via PTRACE_SETSIGINFO.  */
1754         if (signr != info->si_signo) {
1755                 info->si_signo = signr;
1756                 info->si_errno = 0;
1757                 info->si_code = SI_USER;
1758                 info->si_pid = task_pid_vnr(current->parent);
1759                 info->si_uid = task_uid(current->parent);
1760         }
1761
1762         /* If the (new) signal is now blocked, requeue it.  */
1763         if (sigismember(&current->blocked, signr)) {
1764                 specific_send_sig_info(signr, info, current);
1765                 signr = 0;
1766         }
1767
1768         return signr;
1769 }
1770
1771 int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka,
1772                           struct pt_regs *regs, void *cookie)
1773 {
1774         struct sighand_struct *sighand = current->sighand;
1775         struct signal_struct *signal = current->signal;
1776         int signr;
1777
1778 relock:
1779         /*
1780          * We'll jump back here after any time we were stopped in TASK_STOPPED.
1781          * While in TASK_STOPPED, we were considered "frozen enough".
1782          * Now that we woke up, it's crucial if we're supposed to be
1783          * frozen that we freeze now before running anything substantial.
1784          */
1785         try_to_freeze();
1786
1787         spin_lock_irq(&sighand->siglock);
1788         /*
1789          * Every stopped thread goes here after wakeup. Check to see if
1790          * we should notify the parent, prepare_signal(SIGCONT) encodes
1791          * the CLD_ si_code into SIGNAL_CLD_MASK bits.
1792          */
1793         if (unlikely(signal->flags & SIGNAL_CLD_MASK)) {
1794                 int why = (signal->flags & SIGNAL_STOP_CONTINUED)
1795                                 ? CLD_CONTINUED : CLD_STOPPED;
1796                 signal->flags &= ~SIGNAL_CLD_MASK;
1797
1798                 why = tracehook_notify_jctl(why, CLD_CONTINUED);
1799                 spin_unlock_irq(&sighand->siglock);
1800
1801                 if (why) {
1802                         read_lock(&tasklist_lock);
1803                         do_notify_parent_cldstop(current->group_leader, why);
1804                         read_unlock(&tasklist_lock);
1805                 }
1806                 goto relock;
1807         }
1808
1809         for (;;) {
1810                 struct k_sigaction *ka;
1811
1812                 if (unlikely(signal->group_stop_count > 0) &&
1813                     do_signal_stop(0))
1814                         goto relock;
1815
1816                 /*
1817                  * Tracing can induce an artifical signal and choose sigaction.
1818                  * The return value in @signr determines the default action,
1819                  * but @info->si_signo is the signal number we will report.
1820                  */
1821                 signr = tracehook_get_signal(current, regs, info, return_ka);
1822                 if (unlikely(signr < 0))
1823                         goto relock;
1824                 if (unlikely(signr != 0))
1825                         ka = return_ka;
1826                 else {
1827                         signr = dequeue_signal(current, &current->blocked,
1828                                                info);
1829
1830                         if (!signr)
1831                                 break; /* will return 0 */
1832
1833                         if (signr != SIGKILL) {
1834                                 signr = ptrace_signal(signr, info,
1835                                                       regs, cookie);
1836                                 if (!signr)
1837                                         continue;
1838                         }
1839
1840                         ka = &sighand->action[signr-1];
1841                 }
1842
1843                 if (ka->sa.sa_handler == SIG_IGN) /* Do nothing.  */
1844                         continue;
1845                 if (ka->sa.sa_handler != SIG_DFL) {
1846                         /* Run the handler.  */
1847                         *return_ka = *ka;
1848
1849                         if (ka->sa.sa_flags & SA_ONESHOT)
1850                                 ka->sa.sa_handler = SIG_DFL;
1851
1852                         break; /* will return non-zero "signr" value */
1853                 }
1854
1855                 /*
1856                  * Now we are doing the default action for this signal.
1857                  */
1858                 if (sig_kernel_ignore(signr)) /* Default is nothing. */
1859                         continue;
1860
1861                 /*
1862                  * Global init gets no signals it doesn't want.
1863                  * Container-init gets no signals it doesn't want from same
1864                  * container.
1865                  *
1866                  * Note that if global/container-init sees a sig_kernel_only()
1867                  * signal here, the signal must have been generated internally
1868                  * or must have come from an ancestor namespace. In either
1869                  * case, the signal cannot be dropped.
1870                  */
1871                 if (unlikely(signal->flags & SIGNAL_UNKILLABLE) &&
1872                                 !sig_kernel_only(signr))
1873                         continue;
1874
1875                 if (sig_kernel_stop(signr)) {
1876                         /*
1877                          * The default action is to stop all threads in
1878                          * the thread group.  The job control signals
1879                          * do nothing in an orphaned pgrp, but SIGSTOP
1880                          * always works.  Note that siglock needs to be
1881                          * dropped during the call to is_orphaned_pgrp()
1882                          * because of lock ordering with tasklist_lock.
1883                          * This allows an intervening SIGCONT to be posted.
1884                          * We need to check for that and bail out if necessary.
1885                          */
1886                         if (signr != SIGSTOP) {
1887                                 spin_unlock_irq(&sighand->siglock);
1888
1889                                 /* signals can be posted during this window */
1890
1891                                 if (is_current_pgrp_orphaned())
1892                                         goto relock;
1893
1894                                 spin_lock_irq(&sighand->siglock);
1895                         }
1896
1897                         if (likely(do_signal_stop(info->si_signo))) {
1898                                 /* It released the siglock.  */
1899                                 goto relock;
1900                         }
1901
1902                         /*
1903                          * We didn't actually stop, due to a race
1904                          * with SIGCONT or something like that.
1905                          */
1906                         continue;
1907                 }
1908
1909                 spin_unlock_irq(&sighand->siglock);
1910
1911                 /*
1912                  * Anything else is fatal, maybe with a core dump.
1913                  */
1914                 current->flags |= PF_SIGNALED;
1915
1916                 if (sig_kernel_coredump(signr)) {
1917                         if (print_fatal_signals)
1918                                 print_fatal_signal(regs, info->si_signo);
1919                         /*
1920                          * If it was able to dump core, this kills all
1921                          * other threads in the group and synchronizes with
1922                          * their demise.  If we lost the race with another
1923                          * thread getting here, it set group_exit_code
1924                          * first and our do_group_exit call below will use
1925                          * that value and ignore the one we pass it.
1926                          */
1927                         do_coredump(info->si_signo, info->si_signo, regs);
1928                 }
1929
1930                 /*
1931                  * Death signals, no core dump.
1932                  */
1933                 do_group_exit(info->si_signo);
1934                 /* NOTREACHED */
1935         }
1936         spin_unlock_irq(&sighand->siglock);
1937         return signr;
1938 }
1939
1940 void exit_signals(struct task_struct *tsk)
1941 {
1942         int group_stop = 0;
1943         struct task_struct *t;
1944
1945         if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) {
1946                 tsk->flags |= PF_EXITING;
1947                 return;
1948         }
1949
1950         spin_lock_irq(&tsk->sighand->siglock);
1951         /*
1952          * From now this task is not visible for group-wide signals,
1953          * see wants_signal(), do_signal_stop().
1954          */
1955         tsk->flags |= PF_EXITING;
1956         if (!signal_pending(tsk))
1957                 goto out;
1958
1959         /* It could be that __group_complete_signal() choose us to
1960          * notify about group-wide signal. Another thread should be
1961          * woken now to take the signal since we will not.
1962          */
1963         for (t = tsk; (t = next_thread(t)) != tsk; )
1964                 if (!signal_pending(t) && !(t->flags & PF_EXITING))
1965                         recalc_sigpending_and_wake(t);
1966
1967         if (unlikely(tsk->signal->group_stop_count) &&
1968                         !--tsk->signal->group_stop_count) {
1969                 tsk->signal->flags = SIGNAL_STOP_STOPPED;
1970                 group_stop = tracehook_notify_jctl(CLD_STOPPED, CLD_STOPPED);
1971         }
1972 out:
1973         spin_unlock_irq(&tsk->sighand->siglock);
1974
1975         if (unlikely(group_stop)) {
1976                 read_lock(&tasklist_lock);
1977                 do_notify_parent_cldstop(tsk, group_stop);
1978                 read_unlock(&tasklist_lock);
1979         }
1980 }
1981
1982 EXPORT_SYMBOL(recalc_sigpending);
1983 EXPORT_SYMBOL_GPL(dequeue_signal);
1984 EXPORT_SYMBOL(flush_signals);
1985 EXPORT_SYMBOL(force_sig);
1986 EXPORT_SYMBOL(send_sig);
1987 EXPORT_SYMBOL(send_sig_info);
1988 EXPORT_SYMBOL(sigprocmask);
1989 EXPORT_SYMBOL(block_all_signals);
1990 EXPORT_SYMBOL(unblock_all_signals);
1991
1992
1993 /*
1994  * System call entry points.
1995  */
1996
1997 SYSCALL_DEFINE0(restart_syscall)
1998 {
1999         struct restart_block *restart = &current_thread_info()->restart_block;
2000         return restart->fn(restart);
2001 }
2002
2003 long do_no_restart_syscall(struct restart_block *param)
2004 {
2005         return -EINTR;
2006 }
2007
2008 /*
2009  * We don't need to get the kernel lock - this is all local to this
2010  * particular thread.. (and that's good, because this is _heavily_
2011  * used by various programs)
2012  */
2013
2014 /*
2015  * This is also useful for kernel threads that want to temporarily
2016  * (or permanently) block certain signals.
2017  *
2018  * NOTE! Unlike the user-mode sys_sigprocmask(), the kernel
2019  * interface happily blocks "unblockable" signals like SIGKILL
2020  * and friends.
2021  */
2022 int sigprocmask(int how, sigset_t *set, sigset_t *oldset)
2023 {
2024         int error;
2025
2026         spin_lock_irq(&current->sighand->siglock);
2027         if (oldset)
2028                 *oldset = current->blocked;
2029
2030         error = 0;
2031         switch (how) {
2032         case SIG_BLOCK:
2033                 sigorsets(&current->blocked, &current->blocked, set);
2034                 break;
2035         case SIG_UNBLOCK:
2036                 signandsets(&current->blocked, &current->blocked, set);
2037                 break;
2038         case SIG_SETMASK:
2039                 current->blocked = *set;
2040                 break;
2041         default:
2042                 error = -EINVAL;
2043         }
2044         recalc_sigpending();
2045         spin_unlock_irq(&current->sighand->siglock);
2046
2047         return error;
2048 }
2049
2050 SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, set,
2051                 sigset_t __user *, oset, size_t, sigsetsize)
2052 {
2053         int error = -EINVAL;
2054         sigset_t old_set, new_set;
2055
2056         /* XXX: Don't preclude handling different sized sigset_t's.  */
2057         if (sigsetsize != sizeof(sigset_t))
2058                 goto out;
2059
2060         if (set) {
2061                 error = -EFAULT;
2062                 if (copy_from_user(&new_set, set, sizeof(*set)))
2063                         goto out;
2064                 sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP));
2065
2066                 error = sigprocmask(how, &new_set, &old_set);
2067                 if (error)
2068                         goto out;
2069                 if (oset)
2070                         goto set_old;
2071         } else if (oset) {
2072                 spin_lock_irq(&current->sighand->siglock);
2073                 old_set = current->blocked;
2074                 spin_unlock_irq(&current->sighand->siglock);
2075
2076         set_old:
2077                 error = -EFAULT;
2078                 if (copy_to_user(oset, &old_set, sizeof(*oset)))
2079                         goto out;
2080         }
2081         error = 0;
2082 out:
2083         return error;
2084 }
2085
2086 long do_sigpending(void __user *set, unsigned long sigsetsize)
2087 {
2088         long error = -EINVAL;
2089         sigset_t pending;
2090
2091         if (sigsetsize > sizeof(sigset_t))
2092                 goto out;
2093
2094         spin_lock_irq(&current->sighand->siglock);
2095         sigorsets(&pending, &current->pending.signal,
2096                   &current->signal->shared_pending.signal);
2097         spin_unlock_irq(&current->sighand->siglock);
2098
2099         /* Outside the lock because only this thread touches it.  */
2100         sigandsets(&pending, &current->blocked, &pending);
2101
2102         error = -EFAULT;
2103         if (!copy_to_user(set, &pending, sigsetsize))
2104                 error = 0;
2105
2106 out:
2107         return error;
2108 }
2109
2110 SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, set, size_t, sigsetsize)
2111 {
2112         return do_sigpending(set, sigsetsize);
2113 }
2114
2115 #ifndef HAVE_ARCH_COPY_SIGINFO_TO_USER
2116
2117 int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from)
2118 {
2119         int err;
2120
2121         if (!access_ok (VERIFY_WRITE, to, sizeof(siginfo_t)))
2122                 return -EFAULT;
2123         if (from->si_code < 0)
2124                 return __copy_to_user(to, from, sizeof(siginfo_t))
2125                         ? -EFAULT : 0;
2126         /*
2127          * If you change siginfo_t structure, please be sure
2128          * this code is fixed accordingly.
2129          * Please remember to update the signalfd_copyinfo() function
2130          * inside fs/signalfd.c too, in case siginfo_t changes.
2131          * It should never copy any pad contained in the structure
2132          * to avoid security leaks, but must copy the generic
2133          * 3 ints plus the relevant union member.
2134          */
2135         err = __put_user(from->si_signo, &to->si_signo);
2136         err |= __put_user(from->si_errno, &to->si_errno);
2137         err |= __put_user((short)from->si_code, &to->si_code);
2138         switch (from->si_code & __SI_MASK) {
2139         case __SI_KILL:
2140                 err |= __put_user(from->si_pid, &to->si_pid);
2141                 err |= __put_user(from->si_uid, &to->si_uid);
2142                 break;
2143         case __SI_TIMER:
2144                  err |= __put_user(from->si_tid, &to->si_tid);
2145                  err |= __put_user(from->si_overrun, &to->si_overrun);
2146                  err |= __put_user(from->si_ptr, &to->si_ptr);
2147                 break;
2148         case __SI_POLL:
2149                 err |= __put_user(from->si_band, &to->si_band);
2150                 err |= __put_user(from->si_fd, &to->si_fd);
2151                 break;
2152         case __SI_FAULT:
2153                 err |= __put_user(from->si_addr, &to->si_addr);
2154 #ifdef __ARCH_SI_TRAPNO
2155                 err |= __put_user(from->si_trapno, &to->si_trapno);
2156 #endif
2157                 break;
2158         case __SI_CHLD:
2159                 err |= __put_user(from->si_pid, &to->si_pid);
2160                 err |= __put_user(from->si_uid, &to->si_uid);
2161                 err |= __put_user(from->si_status, &to->si_status);
2162                 err |= __put_user(from->si_utime, &to->si_utime);
2163                 err |= __put_user(from->si_stime, &to->si_stime);
2164                 break;
2165         case __SI_RT: /* This is not generated by the kernel as of now. */
2166         case __SI_MESGQ: /* But this is */
2167                 err |= __put_user(from->si_pid, &to->si_pid);
2168                 err |= __put_user(from->si_uid, &to->si_uid);
2169                 err |= __put_user(from->si_ptr, &to->si_ptr);
2170                 break;
2171         default: /* this is just in case for now ... */
2172                 err |= __put_user(from->si_pid, &to->si_pid);
2173                 err |= __put_user(from->si_uid, &to->si_uid);
2174                 break;
2175         }
2176         return err;
2177 }
2178
2179 #endif
2180
2181 SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
2182                 siginfo_t __user *, uinfo, const struct timespec __user *, uts,
2183                 size_t, sigsetsize)
2184 {
2185         int ret, sig;
2186         sigset_t these;
2187         struct timespec ts;
2188         siginfo_t info;
2189         long timeout = 0;
2190
2191         /* XXX: Don't preclude handling different sized sigset_t's.  */
2192         if (sigsetsize != sizeof(sigset_t))
2193                 return -EINVAL;
2194
2195         if (copy_from_user(&these, uthese, sizeof(these)))
2196                 return -EFAULT;
2197
2198         /*
2199          * Invert the set of allowed signals to get those we
2200          * want to block.
2201          */
2202         sigdelsetmask(&these, sigmask(SIGKILL)|sigmask(SIGSTOP));
2203         signotset(&these);
2204
2205         if (uts) {
2206                 if (copy_from_user(&ts, uts, sizeof(ts)))
2207                         return -EFAULT;
2208                 if (ts.tv_nsec >= 1000000000L || ts.tv_nsec < 0
2209                     || ts.tv_sec < 0)
2210                         return -EINVAL;
2211         }
2212
2213         spin_lock_irq(&current->sighand->siglock);
2214         sig = dequeue_signal(current, &these, &info);
2215         if (!sig) {
2216                 timeout = MAX_SCHEDULE_TIMEOUT;
2217                 if (uts)
2218                         timeout = (timespec_to_jiffies(&ts)
2219                                    + (ts.tv_sec || ts.tv_nsec));
2220
2221                 if (timeout) {
2222                         /* None ready -- temporarily unblock those we're
2223                          * interested while we are sleeping in so that we'll
2224                          * be awakened when they arrive.  */
2225                         current->real_blocked = current->blocked;
2226                         sigandsets(&current->blocked, &current->blocked, &these);
2227                         recalc_sigpending();
2228                         spin_unlock_irq(&current->sighand->siglock);
2229
2230                         timeout = schedule_timeout_interruptible(timeout);
2231
2232                         spin_lock_irq(&current->sighand->siglock);
2233                         sig = dequeue_signal(current, &these, &info);
2234                         current->blocked = current->real_blocked;
2235                         siginitset(&current->real_blocked, 0);
2236                         recalc_sigpending();
2237                 }
2238         }
2239         spin_unlock_irq(&current->sighand->siglock);
2240
2241         if (sig) {
2242                 ret = sig;
2243                 if (uinfo) {
2244                         if (copy_siginfo_to_user(uinfo, &info))
2245                                 ret = -EFAULT;
2246                 }
2247         } else {
2248                 ret = -EAGAIN;
2249                 if (timeout)
2250                         ret = -EINTR;
2251         }
2252
2253         return ret;
2254 }
2255
2256 SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
2257 {
2258         struct siginfo info;
2259
2260         info.si_signo = sig;
2261         info.si_errno = 0;
2262         info.si_code = SI_USER;
2263         info.si_pid = task_tgid_vnr(current);
2264         info.si_uid = current_uid();
2265
2266         return kill_something_info(sig, &info, pid);
2267 }
2268
2269 static int
2270 do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
2271 {
2272         struct task_struct *p;
2273         int error = -ESRCH;
2274
2275         rcu_read_lock();
2276         p = find_task_by_vpid(pid);
2277         if (p && (tgid <= 0 || task_tgid_vnr(p) == tgid)) {
2278                 error = check_kill_permission(sig, info, p);
2279                 /*
2280                  * The null signal is a permissions and process existence
2281                  * probe.  No signal is actually delivered.
2282                  */
2283                 if (!error && sig) {
2284                         error = do_send_sig_info(sig, info, p, false);
2285                         /*
2286                          * If lock_task_sighand() failed we pretend the task
2287                          * dies after receiving the signal. The window is tiny,
2288                          * and the signal is private anyway.
2289                          */
2290                         if (unlikely(error == -ESRCH))
2291                                 error = 0;
2292                 }
2293         }
2294         rcu_read_unlock();
2295
2296         return error;
2297 }
2298
2299 static int do_tkill(pid_t tgid, pid_t pid, int sig)
2300 {
2301         struct siginfo info;
2302
2303         info.si_signo = sig;
2304         info.si_errno = 0;
2305         info.si_code = SI_TKILL;
2306         info.si_pid = task_tgid_vnr(current);
2307         info.si_uid = current_uid();
2308
2309         return do_send_specific(tgid, pid, sig, &info);
2310 }
2311
2312 /**
2313  *  sys_tgkill - send signal to one specific thread
2314  *  @tgid: the thread group ID of the thread
2315  *  @pid: the PID of the thread
2316  *  @sig: signal to be sent
2317  *
2318  *  This syscall also checks the @tgid and returns -ESRCH even if the PID
2319  *  exists but it's not belonging to the target process anymore. This
2320  *  method solves the problem of threads exiting and PIDs getting reused.
2321  */
2322 SYSCALL_DEFINE3(tgkill, pid_t, tgid, pid_t, pid, int, sig)
2323 {
2324         /* This is only valid for single tasks */
2325         if (pid <= 0 || tgid <= 0)
2326                 return -EINVAL;
2327
2328         return do_tkill(tgid, pid, sig);
2329 }
2330
2331 /*
2332  *  Send a signal to only one task, even if it's a CLONE_THREAD task.
2333  */
2334 SYSCALL_DEFINE2(tkill, pid_t, pid, int, sig)
2335 {
2336         /* This is only valid for single tasks */
2337         if (pid <= 0)
2338                 return -EINVAL;
2339
2340         return do_tkill(0, pid, sig);
2341 }
2342
2343 SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig,
2344                 siginfo_t __user *, uinfo)
2345 {
2346         siginfo_t info;
2347
2348         if (copy_from_user(&info, uinfo, sizeof(siginfo_t)))
2349                 return -EFAULT;
2350
2351         /* Not even root can pretend to send signals from the kernel.
2352            Nor can they impersonate a kill(), which adds source info.  */
2353         if (info.si_code >= 0)
2354                 return -EPERM;
2355         info.si_signo = sig;
2356
2357         /* POSIX.1b doesn't mention process groups.  */
2358         return kill_proc_info(sig, &info, pid);
2359 }
2360
2361 long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info)
2362 {
2363         /* This is only valid for single tasks */
2364         if (pid <= 0 || tgid <= 0)
2365                 return -EINVAL;
2366
2367         /* Not even root can pretend to send signals from the kernel.
2368            Nor can they impersonate a kill(), which adds source info.  */
2369         if (info->si_code >= 0)
2370                 return -EPERM;
2371         info->si_signo = sig;
2372
2373         return do_send_specific(tgid, pid, sig, info);
2374 }
2375
2376 SYSCALL_DEFINE4(rt_tgsigqueueinfo, pid_t, tgid, pid_t, pid, int, sig,
2377                 siginfo_t __user *, uinfo)
2378 {
2379         siginfo_t info;
2380
2381         if (copy_from_user(&info, uinfo, sizeof(siginfo_t)))
2382                 return -EFAULT;
2383
2384         return do_rt_tgsigqueueinfo(tgid, pid, sig, &info);
2385 }
2386
2387 int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
2388 {
2389         struct task_struct *t = current;
2390         struct k_sigaction *k;
2391         sigset_t mask;
2392
2393         if (!valid_signal(sig) || sig < 1 || (act && sig_kernel_only(sig)))
2394                 return -EINVAL;
2395
2396         k = &t->sighand->action[sig-1];
2397
2398         spin_lock_irq(&current->sighand->siglock);
2399         if (oact)
2400                 *oact = *k;
2401
2402         if (act) {
2403                 sigdelsetmask(&act->sa.sa_mask,
2404                               sigmask(SIGKILL) | sigmask(SIGSTOP));
2405                 *k = *act;
2406                 /*
2407                  * POSIX 3.3.1.3:
2408                  *  "Setting a signal action to SIG_IGN for a signal that is
2409                  *   pending shall cause the pending signal to be discarded,
2410                  *   whether or not it is blocked."
2411                  *
2412                  *  "Setting a signal action to SIG_DFL for a signal that is
2413                  *   pending and whose default action is to ignore the signal
2414                  *   (for example, SIGCHLD), shall cause the pending signal to
2415                  *   be discarded, whether or not it is blocked"
2416                  */
2417                 if (sig_handler_ignored(sig_handler(t, sig), sig)) {
2418                         sigemptyset(&mask);
2419                         sigaddset(&mask, sig);
2420                         rm_from_queue_full(&mask, &t->signal->shared_pending);
2421                         do {
2422                                 rm_from_queue_full(&mask, &t->pending);
2423                                 t = next_thread(t);
2424                         } while (t != current);
2425                 }
2426         }
2427
2428         spin_unlock_irq(&current->sighand->siglock);
2429         return 0;
2430 }
2431
2432 int
2433 do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long sp)
2434 {
2435         stack_t oss;
2436         int error;
2437
2438         oss.ss_sp = (void __user *) current->sas_ss_sp;
2439         oss.ss_size = current->sas_ss_size;
2440         oss.ss_flags = sas_ss_flags(sp);
2441
2442         if (uss) {
2443                 void __user *ss_sp;
2444                 size_t ss_size;
2445                 int ss_flags;
2446
2447                 error = -EFAULT;
2448                 if (!access_ok(VERIFY_READ, uss, sizeof(*uss)))
2449                         goto out;
2450                 error = __get_user(ss_sp, &uss->ss_sp) |
2451                         __get_user(ss_flags, &uss->ss_flags) |
2452                         __get_user(ss_size, &uss->ss_size);
2453                 if (error)
2454                         goto out;
2455
2456                 error = -EPERM;
2457                 if (on_sig_stack(sp))
2458                         goto out;
2459
2460                 error = -EINVAL;
2461                 /*
2462                  *
2463                  * Note - this code used to test ss_flags incorrectly
2464                  *        old code may have been written using ss_flags==0
2465                  *        to mean ss_flags==SS_ONSTACK (as this was the only
2466                  *        way that worked) - this fix preserves that older
2467                  *        mechanism
2468                  */
2469                 if (ss_flags != SS_DISABLE && ss_flags != SS_ONSTACK && ss_flags != 0)
2470                         goto out;
2471
2472                 if (ss_flags == SS_DISABLE) {
2473                         ss_size = 0;
2474                         ss_sp = NULL;
2475                 } else {
2476                         error = -ENOMEM;
2477                         if (ss_size < MINSIGSTKSZ)
2478                                 goto out;
2479                 }
2480
2481                 current->sas_ss_sp = (unsigned long) ss_sp;
2482                 current->sas_ss_size = ss_size;
2483         }
2484
2485         error = 0;
2486         if (uoss) {
2487                 error = -EFAULT;
2488                 if (!access_ok(VERIFY_WRITE, uoss, sizeof(*uoss)))
2489                         goto out;
2490                 error = __put_user(oss.ss_sp, &uoss->ss_sp) |
2491                         __put_user(oss.ss_size, &uoss->ss_size) |
2492                         __put_user(oss.ss_flags, &uoss->ss_flags);
2493         }
2494
2495 out:
2496         return error;
2497 }
2498
2499 #ifdef __ARCH_WANT_SYS_SIGPENDING
2500
2501 SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, set)
2502 {
2503         return do_sigpending(set, sizeof(*set));
2504 }
2505
2506 #endif
2507
2508 #ifdef __ARCH_WANT_SYS_SIGPROCMASK
2509 /* Some platforms have their own version with special arguments others
2510    support only sys_rt_sigprocmask.  */
2511
2512 SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, set,
2513                 old_sigset_t __user *, oset)
2514 {
2515         int error;
2516         old_sigset_t old_set, new_set;
2517
2518         if (set) {
2519                 error = -EFAULT;
2520                 if (copy_from_user(&new_set, set, sizeof(*set)))
2521                         goto out;
2522                 new_set &= ~(sigmask(SIGKILL) | sigmask(SIGSTOP));
2523
2524                 spin_lock_irq(&current->sighand->siglock);
2525                 old_set = current->blocked.sig[0];
2526
2527                 error = 0;
2528                 switch (how) {
2529                 default:
2530                         error = -EINVAL;
2531                         break;
2532                 case SIG_BLOCK:
2533                         sigaddsetmask(&current->blocked, new_set);
2534                         break;
2535                 case SIG_UNBLOCK:
2536                         sigdelsetmask(&current->blocked, new_set);
2537                         break;
2538                 case SIG_SETMASK:
2539                         current->blocked.sig[0] = new_set;
2540                         break;
2541                 }
2542
2543                 recalc_sigpending();
2544                 spin_unlock_irq(&current->sighand->siglock);
2545                 if (error)
2546                         goto out;
2547                 if (oset)
2548                         goto set_old;
2549         } else if (oset) {
2550                 old_set = current->blocked.sig[0];
2551         set_old:
2552                 error = -EFAULT;
2553                 if (copy_to_user(oset, &old_set, sizeof(*oset)))
2554                         goto out;
2555         }
2556         error = 0;
2557 out:
2558         return error;
2559 }
2560 #endif /* __ARCH_WANT_SYS_SIGPROCMASK */
2561
2562 #ifdef __ARCH_WANT_SYS_RT_SIGACTION
2563 SYSCALL_DEFINE4(rt_sigaction, int, sig,
2564                 const struct sigaction __user *, act,
2565                 struct sigaction __user *, oact,
2566                 size_t, sigsetsize)
2567 {
2568         struct k_sigaction new_sa, old_sa;
2569         int ret = -EINVAL;
2570
2571         /* XXX: Don't preclude handling different sized sigset_t's.  */
2572         if (sigsetsize != sizeof(sigset_t))
2573                 goto out;
2574
2575         if (act) {
2576                 if (copy_from_user(&new_sa.sa, act, sizeof(new_sa.sa)))
2577                         return -EFAULT;
2578         }
2579
2580         ret = do_sigaction(sig, act ? &new_sa : NULL, oact ? &old_sa : NULL);
2581
2582         if (!ret && oact) {
2583                 if (copy_to_user(oact, &old_sa.sa, sizeof(old_sa.sa)))
2584                         return -EFAULT;
2585         }
2586 out:
2587         return ret;
2588 }
2589 #endif /* __ARCH_WANT_SYS_RT_SIGACTION */
2590
2591 #ifdef __ARCH_WANT_SYS_SGETMASK
2592
2593 /*
2594  * For backwards compatibility.  Functionality superseded by sigprocmask.
2595  */
2596 SYSCALL_DEFINE0(sgetmask)
2597 {
2598         /* SMP safe */
2599         return current->blocked.sig[0];
2600 }
2601
2602 SYSCALL_DEFINE1(ssetmask, int, newmask)
2603 {
2604         int old;
2605
2606         spin_lock_irq(&current->sighand->siglock);
2607         old = current->blocked.sig[0];
2608
2609         siginitset(&current->blocked, newmask & ~(sigmask(SIGKILL)|
2610                                                   sigmask(SIGSTOP)));
2611         recalc_sigpending();
2612         spin_unlock_irq(&current->sighand->siglock);
2613
2614         return old;
2615 }
2616 #endif /* __ARCH_WANT_SGETMASK */
2617
2618 #ifdef __ARCH_WANT_SYS_SIGNAL
2619 /*
2620  * For backwards compatibility.  Functionality superseded by sigaction.
2621  */
2622 SYSCALL_DEFINE2(signal, int, sig, __sighandler_t, handler)
2623 {
2624         struct k_sigaction new_sa, old_sa;
2625         int ret;
2626
2627         new_sa.sa.sa_handler = handler;
2628         new_sa.sa.sa_flags = SA_ONESHOT | SA_NOMASK;
2629         sigemptyset(&new_sa.sa.sa_mask);
2630
2631         ret = do_sigaction(sig, &new_sa, &old_sa);
2632
2633         return ret ? ret : (unsigned long)old_sa.sa.sa_handler;
2634 }
2635 #endif /* __ARCH_WANT_SYS_SIGNAL */
2636
2637 #ifdef __ARCH_WANT_SYS_PAUSE
2638
2639 SYSCALL_DEFINE0(pause)
2640 {
2641         current->state = TASK_INTERRUPTIBLE;
2642         schedule();
2643         return -ERESTARTNOHAND;
2644 }
2645
2646 #endif
2647
2648 #ifdef __ARCH_WANT_SYS_RT_SIGSUSPEND
2649 SYSCALL_DEFINE2(rt_sigsuspend, sigset_t __user *, unewset, size_t, sigsetsize)
2650 {
2651         sigset_t newset;
2652
2653         /* XXX: Don't preclude handling different sized sigset_t's.  */
2654         if (sigsetsize != sizeof(sigset_t))
2655                 return -EINVAL;
2656
2657         if (copy_from_user(&newset, unewset, sizeof(newset)))
2658                 return -EFAULT;
2659         sigdelsetmask(&newset, sigmask(SIGKILL)|sigmask(SIGSTOP));
2660
2661         spin_lock_irq(&current->sighand->siglock);
2662         current->saved_sigmask = current->blocked;
2663         current->blocked = newset;
2664         recalc_sigpending();
2665         spin_unlock_irq(&current->sighand->siglock);
2666
2667         current->state = TASK_INTERRUPTIBLE;
2668         schedule();
2669         set_restore_sigmask();
2670         return -ERESTARTNOHAND;
2671 }
2672 #endif /* __ARCH_WANT_SYS_RT_SIGSUSPEND */
2673
2674 __attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma)
2675 {
2676         return NULL;
2677 }
2678
2679 void __init signals_init(void)
2680 {
2681         sigqueue_cachep = KMEM_CACHE(sigqueue, SLAB_PANIC);
2682 }