Merge branch 'ptrace' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc into...
authorOleg Nesterov <oleg@redhat.com>
Thu, 7 Apr 2011 18:44:11 +0000 (20:44 +0200)
committerOleg Nesterov <oleg@redhat.com>
Thu, 7 Apr 2011 18:44:11 +0000 (20:44 +0200)
1  2 
include/linux/sched.h
kernel/exit.c
kernel/ptrace.c
kernel/signal.c

diff --combined include/linux/sched.h
@@@ -99,7 -99,6 +99,7 @@@ struct robust_list_head
  struct bio_list;
  struct fs_struct;
  struct perf_event_context;
 +struct blk_plug;
  
  /*
   * List of flags we want to share for kernel threads,
@@@ -517,7 -516,7 +517,7 @@@ struct thread_group_cputimer 
  struct autogroup;
  
  /*
 - * NOTE! "signal_struct" does not have it's own
 + * NOTE! "signal_struct" does not have its own
   * locking, because a shared signal_struct always
   * implies a shared sighand_struct, so locking
   * sighand_struct is always a proper superset of
@@@ -653,9 -652,8 +653,8 @@@ struct signal_struct 
   * Bits in flags field of signal_struct.
   */
  #define SIGNAL_STOP_STOPPED   0x00000001 /* job control stop in effect */
- #define SIGNAL_STOP_DEQUEUED  0x00000002 /* stop signal dequeued */
- #define SIGNAL_STOP_CONTINUED 0x00000004 /* SIGCONT since WCONTINUED reap */
- #define SIGNAL_GROUP_EXIT     0x00000008 /* group exit in progress */
+ #define SIGNAL_STOP_CONTINUED 0x00000002 /* SIGCONT since WCONTINUED reap */
+ #define SIGNAL_GROUP_EXIT     0x00000004 /* group exit in progress */
  /*
   * Pending notifications to parent.
   */
@@@ -1261,6 -1259,7 +1260,7 @@@ struct task_struct 
        int exit_state;
        int exit_code, exit_signal;
        int pdeath_signal;  /*  The signal sent when the parent dies  */
+       unsigned int group_stop;        /* GROUP_STOP_*, siglock protected */
        /* ??? */
        unsigned int personality;
        unsigned did_exec:1;
  /* stacked block device info */
        struct bio_list *bio_list;
  
 +#ifdef CONFIG_BLOCK
 +/* stack plugging */
 +      struct blk_plug *plug;
 +#endif
 +
  /* VM state */
        struct reclaim_state *reclaim_state;
  
        struct memcg_batch_info {
                int do_batch;   /* incremented when batch uncharge started */
                struct mem_cgroup *memcg; /* target memcg of uncharge */
 -              unsigned long bytes;            /* uncharged usage */
 -              unsigned long memsw_bytes; /* uncharged mem+swap usage */
 +              unsigned long nr_pages; /* uncharged usage */
 +              unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
        } memcg_batch;
  #endif
  };
@@@ -1777,6 -1771,17 +1777,17 @@@ extern void thread_group_times(struct t
  #define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
  #define used_math() tsk_used_math(current)
  
+ /*
+  * task->group_stop flags
+  */
+ #define GROUP_STOP_SIGMASK    0xffff    /* signr of the last group stop */
+ #define GROUP_STOP_PENDING    (1 << 16) /* task should stop for group stop */
+ #define GROUP_STOP_CONSUME    (1 << 17) /* consume group stop count */
+ #define GROUP_STOP_TRAPPING   (1 << 18) /* switching from STOPPED to TRACED */
+ #define GROUP_STOP_DEQUEUED   (1 << 19) /* stop signal dequeued */
+ extern void task_clear_group_stop_pending(struct task_struct *task);
  #ifdef CONFIG_PREEMPT_RCU
  
  #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
diff --combined kernel/exit.c
@@@ -908,7 -908,6 +908,7 @@@ NORET_TYPE void do_exit(long code
        profile_task_exit(tsk);
  
        WARN_ON(atomic_read(&tsk->fs_excl));
 +      WARN_ON(blk_needs_flush_plug(tsk));
  
        if (unlikely(in_interrupt()))
                panic("Aiee, killing interrupt handler!");
@@@ -1538,33 -1537,83 +1538,83 @@@ static int wait_consider_task(struct wa
                return 0;
        }
  
-       if (likely(!ptrace) && unlikely(task_ptrace(p))) {
+       /* dead body doesn't have much to contribute */
+       if (p->exit_state == EXIT_DEAD)
+               return 0;
+       /* slay zombie? */
+       if (p->exit_state == EXIT_ZOMBIE) {
                /*
-                * This child is hidden by ptrace.
-                * We aren't allowed to see it now, but eventually we will.
+                * A zombie ptracee is only visible to its ptracer.
+                * Notification and reaping will be cascaded to the real
+                * parent when the ptracer detaches.
                 */
-               wo->notask_error = 0;
-               return 0;
-       }
+               if (likely(!ptrace) && unlikely(task_ptrace(p))) {
+                       /* it will become visible, clear notask_error */
+                       wo->notask_error = 0;
+                       return 0;
+               }
  
-       if (p->exit_state == EXIT_DEAD)
-               return 0;
+               /* we don't reap group leaders with subthreads */
+               if (!delay_group_leader(p))
+                       return wait_task_zombie(wo, p);
  
-       /*
-        * We don't reap group leaders with subthreads.
-        */
-       if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p))
-               return wait_task_zombie(wo, p);
+               /*
+                * Allow access to stopped/continued state via zombie by
+                * falling through.  Clearing of notask_error is complex.
+                *
+                * When !@ptrace:
+                *
+                * If WEXITED is set, notask_error should naturally be
+                * cleared.  If not, subset of WSTOPPED|WCONTINUED is set,
+                * so, if there are live subthreads, there are events to
+                * wait for.  If all subthreads are dead, it's still safe
+                * to clear - this function will be called again in finite
+                * amount time once all the subthreads are released and
+                * will then return without clearing.
+                *
+                * When @ptrace:
+                *
+                * Stopped state is per-task and thus can't change once the
+                * target task dies.  Only continued and exited can happen.
+                * Clear notask_error if WCONTINUED | WEXITED.
+                */
+               if (likely(!ptrace) || (wo->wo_flags & (WCONTINUED | WEXITED)))
+                       wo->notask_error = 0;
+       } else {
+               /*
+                * If @p is ptraced by a task in its real parent's group,
+                * hide group stop/continued state when looking at @p as
+                * the real parent; otherwise, a single stop can be
+                * reported twice as group and ptrace stops.
+                *
+                * If a ptracer wants to distinguish the two events for its
+                * own children, it should create a separate process which
+                * takes the role of real parent.
+                */
+               if (likely(!ptrace) && task_ptrace(p) &&
+                   same_thread_group(p->parent, p->real_parent))
+                       return 0;
+               /*
+                * @p is alive and it's gonna stop, continue or exit, so
+                * there always is something to wait for.
+                */
+               wo->notask_error = 0;
+       }
  
        /*
-        * It's stopped or running now, so it might
-        * later continue, exit, or stop again.
+        * Wait for stopped.  Depending on @ptrace, different stopped state
+        * is used and the two don't interact with each other.
         */
-       wo->notask_error = 0;
        if (task_stopped_code(p, ptrace))
                return wait_task_stopped(wo, ptrace, p);
  
+       /*
+        * Wait for continued.  There's only one continued state and the
+        * ptracer can consume it which can confuse the real parent.  Don't
+        * use WCONTINUED from ptracer.  You don't need or want it.
+        */
        return wait_task_continued(wo, p);
  }
  
diff --combined kernel/ptrace.c
@@@ -37,35 -37,33 +37,33 @@@ void __ptrace_link(struct task_struct *
        child->parent = new_parent;
  }
  
- /*
-  * Turn a tracing stop into a normal stop now, since with no tracer there
-  * would be no way to wake it up with SIGCONT or SIGKILL.  If there was a
-  * signal sent that would resume the child, but didn't because it was in
-  * TASK_TRACED, resume it now.
-  * Requires that irqs be disabled.
-  */
- static void ptrace_untrace(struct task_struct *child)
- {
-       spin_lock(&child->sighand->siglock);
-       if (task_is_traced(child)) {
-               /*
-                * If the group stop is completed or in progress,
-                * this thread was already counted as stopped.
-                */
-               if (child->signal->flags & SIGNAL_STOP_STOPPED ||
-                   child->signal->group_stop_count)
-                       __set_task_state(child, TASK_STOPPED);
-               else
-                       signal_wake_up(child, 1);
-       }
-       spin_unlock(&child->sighand->siglock);
- }
- /*
-  * unptrace a task: move it back to its original parent and
-  * remove it from the ptrace list.
+ /**
+  * __ptrace_unlink - unlink ptracee and restore its execution state
+  * @child: ptracee to be unlinked
   *
-  * Must be called with the tasklist lock write-held.
+  * Remove @child from the ptrace list, move it back to the original parent,
+  * and restore the execution state so that it conforms to the group stop
+  * state.
+  *
+  * Unlinking can happen via two paths - explicit PTRACE_DETACH or ptracer
+  * exiting.  For PTRACE_DETACH, unless the ptracee has been killed between
+  * ptrace_check_attach() and here, it's guaranteed to be in TASK_TRACED.
+  * If the ptracer is exiting, the ptracee can be in any state.
+  *
+  * After detach, the ptracee should be in a state which conforms to the
+  * group stop.  If the group is stopped or in the process of stopping, the
+  * ptracee should be put into TASK_STOPPED; otherwise, it should be woken
+  * up from TASK_TRACED.
+  *
+  * If the ptracee is in TASK_TRACED and needs to be moved to TASK_STOPPED,
+  * it goes through TRACED -> RUNNING -> STOPPED transition which is similar
+  * to but in the opposite direction of what happens while attaching to a
+  * stopped task.  However, in this direction, the intermediate RUNNING
+  * state is not hidden even from the current ptracer and if it immediately
+  * re-attaches and performs a WNOHANG wait(2), it may fail.
+  *
+  * CONTEXT:
+  * write_lock_irq(tasklist_lock)
   */
  void __ptrace_unlink(struct task_struct *child)
  {
        child->parent = child->real_parent;
        list_del_init(&child->ptrace_entry);
  
-       if (task_is_traced(child))
-               ptrace_untrace(child);
+       spin_lock(&child->sighand->siglock);
+       /*
+        * Reinstate GROUP_STOP_PENDING if group stop is in effect and
+        * @child isn't dead.
+        */
+       if (!(child->flags & PF_EXITING) &&
+           (child->signal->flags & SIGNAL_STOP_STOPPED ||
+            child->signal->group_stop_count))
+               child->group_stop |= GROUP_STOP_PENDING;
+       /*
+        * If transition to TASK_STOPPED is pending or in TASK_TRACED, kick
+        * @child in the butt.  Note that @resume should be used iff @child
+        * is in TASK_TRACED; otherwise, we might unduly disrupt
+        * TASK_KILLABLE sleeps.
+        */
+       if (child->group_stop & GROUP_STOP_PENDING || task_is_traced(child))
+               signal_wake_up(child, task_is_traced(child));
+       spin_unlock(&child->sighand->siglock);
  }
  
  /*
@@@ -95,16 -112,14 +112,14 @@@ int ptrace_check_attach(struct task_str
         */
        read_lock(&tasklist_lock);
        if ((child->ptrace & PT_PTRACED) && child->parent == current) {
-               ret = 0;
                /*
                 * child->sighand can't be NULL, release_task()
                 * does ptrace_unlink() before __exit_signal().
                 */
                spin_lock_irq(&child->sighand->siglock);
-               if (task_is_stopped(child))
-                       child->state = TASK_TRACED;
-               else if (!task_is_traced(child) && !kill)
-                       ret = -ESRCH;
+               WARN_ON_ONCE(task_is_stopped(child));
+               if (task_is_traced(child) || kill)
+                       ret = 0;
                spin_unlock_irq(&child->sighand->siglock);
        }
        read_unlock(&tasklist_lock);
@@@ -134,24 -149,21 +149,24 @@@ int __ptrace_may_access(struct task_str
                return 0;
        rcu_read_lock();
        tcred = __task_cred(task);
 -      if ((cred->uid != tcred->euid ||
 -           cred->uid != tcred->suid ||
 -           cred->uid != tcred->uid  ||
 -           cred->gid != tcred->egid ||
 -           cred->gid != tcred->sgid ||
 -           cred->gid != tcred->gid) &&
 -          !capable(CAP_SYS_PTRACE)) {
 -              rcu_read_unlock();
 -              return -EPERM;
 -      }
 +      if (cred->user->user_ns == tcred->user->user_ns &&
 +          (cred->uid == tcred->euid &&
 +           cred->uid == tcred->suid &&
 +           cred->uid == tcred->uid  &&
 +           cred->gid == tcred->egid &&
 +           cred->gid == tcred->sgid &&
 +           cred->gid == tcred->gid))
 +              goto ok;
 +      if (ns_capable(tcred->user->user_ns, CAP_SYS_PTRACE))
 +              goto ok;
 +      rcu_read_unlock();
 +      return -EPERM;
 +ok:
        rcu_read_unlock();
        smp_rmb();
        if (task->mm)
                dumpable = get_dumpable(task->mm);
 -      if (!dumpable && !capable(CAP_SYS_PTRACE))
 +      if (!dumpable && !task_ns_capable(task, CAP_SYS_PTRACE))
                return -EPERM;
  
        return security_ptrace_access_check(task, mode);
@@@ -168,6 -180,7 +183,7 @@@ bool ptrace_may_access(struct task_stru
  
  static int ptrace_attach(struct task_struct *task)
  {
+       bool wait_trap = false;
        int retval;
  
        audit_ptrace(task);
                goto unlock_tasklist;
  
        task->ptrace = PT_PTRACED;
 -      if (capable(CAP_SYS_PTRACE))
 +      if (task_ns_capable(task, CAP_SYS_PTRACE))
                task->ptrace |= PT_PTRACE_CAP;
  
        __ptrace_link(task, current);
        send_sig_info(SIGSTOP, SEND_SIG_FORCED, task);
  
+       spin_lock(&task->sighand->siglock);
+       /*
+        * If the task is already STOPPED, set GROUP_STOP_PENDING and
+        * TRAPPING, and kick it so that it transits to TRACED.  TRAPPING
+        * will be cleared if the child completes the transition or any
+        * event which clears the group stop states happens.  We'll wait
+        * for the transition to complete before returning from this
+        * function.
+        *
+        * This hides STOPPED -> RUNNING -> TRACED transition from the
+        * attaching thread but a different thread in the same group can
+        * still observe the transient RUNNING state.  IOW, if another
+        * thread's WNOHANG wait(2) on the stopped tracee races against
+        * ATTACH, the wait(2) may fail due to the transient RUNNING.
+        *
+        * The following task_is_stopped() test is safe as both transitions
+        * in and out of STOPPED are protected by siglock.
+        */
+       if (task_is_stopped(task)) {
+               task->group_stop |= GROUP_STOP_PENDING | GROUP_STOP_TRAPPING;
+               signal_wake_up(task, 1);
+               wait_trap = true;
+       }
+       spin_unlock(&task->sighand->siglock);
        retval = 0;
  unlock_tasklist:
        write_unlock_irq(&tasklist_lock);
  unlock_creds:
        mutex_unlock(&task->signal->cred_guard_mutex);
  out:
+       if (wait_trap)
+               wait_event(current->signal->wait_chldexit,
+                          !(task->group_stop & GROUP_STOP_TRAPPING));
        return retval;
  }
  
@@@ -315,8 -358,6 +361,6 @@@ static int ptrace_detach(struct task_st
        if (child->ptrace) {
                child->exit_code = data;
                dead = __ptrace_detach(current, child);
-               if (!child->exit_state)
-                       wake_up_state(child, TASK_TRACED | TASK_STOPPED);
        }
        write_unlock_irq(&tasklist_lock);
  
diff --combined kernel/signal.c
@@@ -124,7 -124,7 +124,7 @@@ static inline int has_pending_signals(s
  
  static int recalc_sigpending_tsk(struct task_struct *t)
  {
-       if (t->signal->group_stop_count > 0 ||
+       if ((t->group_stop & GROUP_STOP_PENDING) ||
            PENDING(&t->pending, &t->blocked) ||
            PENDING(&t->signal->shared_pending, &t->blocked)) {
                set_tsk_thread_flag(t, TIF_SIGPENDING);
@@@ -223,10 -223,87 +223,87 @@@ static inline void print_dropped_signal
                                current->comm, current->pid, sig);
  }
  
+ /**
+  * task_clear_group_stop_trapping - clear group stop trapping bit
+  * @task: target task
+  *
+  * If GROUP_STOP_TRAPPING is set, a ptracer is waiting for us.  Clear it
+  * and wake up the ptracer.  Note that we don't need any further locking.
+  * @task->siglock guarantees that @task->parent points to the ptracer.
+  *
+  * CONTEXT:
+  * Must be called with @task->sighand->siglock held.
+  */
+ static void task_clear_group_stop_trapping(struct task_struct *task)
+ {
+       if (unlikely(task->group_stop & GROUP_STOP_TRAPPING)) {
+               task->group_stop &= ~GROUP_STOP_TRAPPING;
+               __wake_up_sync(&task->parent->signal->wait_chldexit,
+                              TASK_UNINTERRUPTIBLE, 1);
+       }
+ }
+ /**
+  * task_clear_group_stop_pending - clear pending group stop
+  * @task: target task
+  *
+  * Clear group stop states for @task.
+  *
+  * CONTEXT:
+  * Must be called with @task->sighand->siglock held.
+  */
+ void task_clear_group_stop_pending(struct task_struct *task)
+ {
+       task->group_stop &= ~(GROUP_STOP_PENDING | GROUP_STOP_CONSUME |
+                             GROUP_STOP_DEQUEUED);
+ }
+ /**
+  * task_participate_group_stop - participate in a group stop
+  * @task: task participating in a group stop
+  *
+  * @task has GROUP_STOP_PENDING set and is participating in a group stop.
+  * Group stop states are cleared and the group stop count is consumed if
+  * %GROUP_STOP_CONSUME was set.  If the consumption completes the group
+  * stop, the appropriate %SIGNAL_* flags are set.
+  *
+  * CONTEXT:
+  * Must be called with @task->sighand->siglock held.
+  *
+  * RETURNS:
+  * %true if group stop completion should be notified to the parent, %false
+  * otherwise.
+  */
+ static bool task_participate_group_stop(struct task_struct *task)
+ {
+       struct signal_struct *sig = task->signal;
+       bool consume = task->group_stop & GROUP_STOP_CONSUME;
+       WARN_ON_ONCE(!(task->group_stop & GROUP_STOP_PENDING));
+       task_clear_group_stop_pending(task);
+       if (!consume)
+               return false;
+       if (!WARN_ON_ONCE(sig->group_stop_count == 0))
+               sig->group_stop_count--;
+       /*
+        * Tell the caller to notify completion iff we are entering into a
+        * fresh group stop.  Read comment in do_signal_stop() for details.
+        */
+       if (!sig->group_stop_count && !(sig->flags & SIGNAL_STOP_STOPPED)) {
+               sig->flags = SIGNAL_STOP_STOPPED;
+               return true;
+       }
+       return false;
+ }
  /*
   * allocate a new signal queue record
   * - this may be called without locks if and only if t == current, otherwise an
 - *   appopriate lock must be held to stop the target task from exiting
 + *   appropriate lock must be held to stop the target task from exiting
   */
  static struct sigqueue *
  __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit)
@@@ -375,15 -452,15 +452,15 @@@ int unhandled_signal(struct task_struc
        return !tracehook_consider_fatal_signal(tsk, sig);
  }
  
 -
 -/* Notify the system that a driver wants to block all signals for this
 +/*
 + * Notify the system that a driver wants to block all signals for this
   * process, and wants to be notified if any signals at all were to be
   * sent/acted upon.  If the notifier routine returns non-zero, then the
   * signal will be acted upon after all.  If the notifier routine returns 0,
   * then then signal will be blocked.  Only one block per process is
   * allowed.  priv is a pointer to private data that the notifier routine
 - * can use to determine if the signal should be blocked or not.  */
 -
 + * can use to determine if the signal should be blocked or not.
 + */
  void
  block_all_signals(int (*notifier)(void *priv), void *priv, sigset_t *mask)
  {
@@@ -434,10 -511,9 +511,10 @@@ still_pending
                copy_siginfo(info, &first->info);
                __sigqueue_free(first);
        } else {
 -              /* Ok, it wasn't in the queue.  This must be
 -                 a fast-pathed signal or we must have been
 -                 out of queue space.  So zero out the info.
 +              /*
 +               * Ok, it wasn't in the queue.  This must be
 +               * a fast-pathed signal or we must have been
 +               * out of queue space.  So zero out the info.
                 */
                info->si_signo = sig;
                info->si_errno = 0;
@@@ -469,7 -545,7 +546,7 @@@ static int __dequeue_signal(struct sigp
  }
  
  /*
 - * Dequeue a signal and return the element to the caller, which is 
 + * Dequeue a signal and return the element to the caller, which is
   * expected to free it.
   *
   * All callers have to hold the siglock.
@@@ -491,7 -567,7 +568,7 @@@ int dequeue_signal(struct task_struct *
                 * itimers are process shared and we restart periodic
                 * itimers in the signal delivery path to prevent DoS
                 * attacks in the high resolution timer case. This is
 -               * compliant with the old way of self restarting
 +               * compliant with the old way of self-restarting
                 * itimers, as the SIGALRM is a legacy signal and only
                 * queued once. Changing the restart behaviour to
                 * restart the timer in the signal dequeue path is
                 * is to alert stop-signal processing code when another
                 * processor has come along and cleared the flag.
                 */
-               tsk->signal->flags |= SIGNAL_STOP_DEQUEUED;
+               current->group_stop |= GROUP_STOP_DEQUEUED;
        }
        if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) {
                /*
@@@ -636,27 -712,6 +713,27 @@@ static inline bool si_fromuser(const st
                (!is_si_special(info) && SI_FROMUSER(info));
  }
  
 +/*
 + * called with RCU read lock from check_kill_permission()
 + */
 +static int kill_ok_by_cred(struct task_struct *t)
 +{
 +      const struct cred *cred = current_cred();
 +      const struct cred *tcred = __task_cred(t);
 +
 +      if (cred->user->user_ns == tcred->user->user_ns &&
 +          (cred->euid == tcred->suid ||
 +           cred->euid == tcred->uid ||
 +           cred->uid  == tcred->suid ||
 +           cred->uid  == tcred->uid))
 +              return 1;
 +
 +      if (ns_capable(tcred->user->user_ns, CAP_KILL))
 +              return 1;
 +
 +      return 0;
 +}
 +
  /*
   * Bad permissions for sending the signal
   * - the caller must hold the RCU read lock
  static int check_kill_permission(int sig, struct siginfo *info,
                                 struct task_struct *t)
  {
 -      const struct cred *cred, *tcred;
        struct pid *sid;
        int error;
  
        if (error)
                return error;
  
 -      cred = current_cred();
 -      tcred = __task_cred(t);
        if (!same_thread_group(current, t) &&
 -          (cred->euid ^ tcred->suid) &&
 -          (cred->euid ^ tcred->uid) &&
 -          (cred->uid  ^ tcred->suid) &&
 -          (cred->uid  ^ tcred->uid) &&
 -          !capable(CAP_KILL)) {
 +          !kill_ok_by_cred(t)) {
                switch (sig) {
                case SIGCONT:
                        sid = task_session(t);
@@@ -727,34 -789,14 +804,14 @@@ static int prepare_signal(int sig, stru
        } else if (sig == SIGCONT) {
                unsigned int why;
                /*
-                * Remove all stop signals from all queues,
-                * and wake all threads.
+                * Remove all stop signals from all queues, wake all threads.
                 */
                rm_from_queue(SIG_KERNEL_STOP_MASK, &signal->shared_pending);
                t = p;
                do {
-                       unsigned int state;
+                       task_clear_group_stop_pending(t);
                        rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending);
-                       /*
-                        * If there is a handler for SIGCONT, we must make
-                        * sure that no thread returns to user mode before
-                        * we post the signal, in case it was the only
-                        * thread eligible to run the signal handler--then
-                        * it must not do anything between resuming and
-                        * running the handler.  With the TIF_SIGPENDING
-                        * flag set, the thread will pause and acquire the
-                        * siglock that we hold now and until we've queued
-                        * the pending signal.
-                        *
-                        * Wake up the stopped thread _after_ setting
-                        * TIF_SIGPENDING
-                        */
-                       state = __TASK_STOPPED;
-                       if (sig_user_defined(t, SIGCONT) && !sigismember(&t->blocked, SIGCONT)) {
-                               set_tsk_thread_flag(t, TIF_SIGPENDING);
-                               state |= TASK_INTERRUPTIBLE;
-                       }
-                       wake_up_state(t, state);
+                       wake_up_state(t, __TASK_STOPPED);
                } while_each_thread(p, t);
  
                /*
                        signal->flags = why | SIGNAL_STOP_CONTINUED;
                        signal->group_stop_count = 0;
                        signal->group_exit_code = 0;
-               } else {
-                       /*
-                        * We are not stopped, but there could be a stop
-                        * signal in the middle of being processed after
-                        * being removed from the queue.  Clear that too.
-                        */
-                       signal->flags &= ~SIGNAL_STOP_DEQUEUED;
                }
        }
  
@@@ -875,6 -910,7 +925,7 @@@ static void complete_signal(int sig, st
                        signal->group_stop_count = 0;
                        t = p;
                        do {
+                               task_clear_group_stop_pending(t);
                                sigaddset(&t->pending.signal, SIGKILL);
                                signal_wake_up(t, 1);
                        } while_each_thread(p, t);
@@@ -924,15 -960,14 +975,15 @@@ static int __send_signal(int sig, struc
        if (info == SEND_SIG_FORCED)
                goto out_set;
  
 -      /* Real-time signals must be queued if sent by sigqueue, or
 -         some other real-time mechanism.  It is implementation
 -         defined whether kill() does so.  We attempt to do so, on
 -         the principle of least surprise, but since kill is not
 -         allowed to fail with EAGAIN when low on memory we just
 -         make sure at least one signal gets delivered and don't
 -         pass on the info struct.  */
 -
 +      /*
 +       * Real-time signals must be queued if sent by sigqueue, or
 +       * some other real-time mechanism.  It is implementation
 +       * defined whether kill() does so.  We attempt to do so, on
 +       * the principle of least surprise, but since kill is not
 +       * allowed to fail with EAGAIN when low on memory we just
 +       * make sure at least one signal gets delivered and don't
 +       * pass on the info struct.
 +       */
        if (sig < SIGRTMIN)
                override_rlimit = (is_si_special(info) || info->si_code >= 0);
        else
@@@ -1109,6 -1144,7 +1160,7 @@@ int zap_other_threads(struct task_struc
        p->signal->group_stop_count = 0;
  
        while_each_thread(p, t) {
+               task_clear_group_stop_pending(t);
                count++;
  
                /* Don't bother with already dead threads */
@@@ -1203,7 -1239,8 +1255,7 @@@ retry
        return error;
  }
  
 -int
 -kill_proc_info(int sig, struct siginfo *info, pid_t pid)
 +int kill_proc_info(int sig, struct siginfo *info, pid_t pid)
  {
        int error;
        rcu_read_lock();
@@@ -1300,7 -1337,8 +1352,7 @@@ static int kill_something_info(int sig
   * These are for backward compatibility with the rest of the kernel source.
   */
  
 -int
 -send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 +int send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
  {
        /*
         * Make sure legacy kernel users don't send in bad values
@@@ -1368,7 -1406,7 +1420,7 @@@ EXPORT_SYMBOL(kill_pid)
   * These functions support sending signals using preallocated sigqueue
   * structures.  This is needed "because realtime applications cannot
   * afford to lose notifications of asynchronous events, like timer
 - * expirations or I/O completions".  In the case of Posix Timers
 + * expirations or I/O completions".  In the case of POSIX Timers
   * we allocate the sigqueue structure from the timer_create.  If this
   * allocation fails we are able to report the failure to the application
   * with an EAGAIN error.
@@@ -1536,16 -1574,30 +1588,30 @@@ int do_notify_parent(struct task_struc
        return ret;
  }
  
- static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
+ /**
+  * do_notify_parent_cldstop - notify parent of stopped/continued state change
+  * @tsk: task reporting the state change
+  * @for_ptracer: the notification is for ptracer
+  * @why: CLD_{CONTINUED|STOPPED|TRAPPED} to report
+  *
+  * Notify @tsk's parent that the stopped/continued state has changed.  If
+  * @for_ptracer is %false, @tsk's group leader notifies to its real parent.
+  * If %true, @tsk reports to @tsk->parent which should be the ptracer.
+  *
+  * CONTEXT:
+  * Must be called with tasklist_lock at least read locked.
+  */
+ static void do_notify_parent_cldstop(struct task_struct *tsk,
+                                    bool for_ptracer, int why)
  {
        struct siginfo info;
        unsigned long flags;
        struct task_struct *parent;
        struct sighand_struct *sighand;
  
-       if (task_ptrace(tsk))
+       if (for_ptracer) {
                parent = tsk->parent;
-       else {
+       else {
                tsk = tsk->group_leader;
                parent = tsk->real_parent;
        }
        info.si_signo = SIGCHLD;
        info.si_errno = 0;
        /*
 -       * see comment in do_notify_parent() abot the following 3 lines
 +       * see comment in do_notify_parent() about the following 4 lines
         */
        rcu_read_lock();
        info.si_pid = task_pid_nr_ns(tsk, parent->nsproxy->pid_ns);
@@@ -1611,7 -1663,7 +1677,7 @@@ static inline int may_ptrace_stop(void
  }
  
  /*
 - * Return nonzero if there is a SIGKILL that should be waking us up.
 + * Return non-zero if there is a SIGKILL that should be waking us up.
   * Called with the siglock held.
   */
  static int sigkill_pending(struct task_struct *tsk)
                sigismember(&tsk->signal->shared_pending.signal, SIGKILL);
  }
  
+ /*
+  * Test whether the target task of the usual cldstop notification - the
+  * real_parent of @child - is in the same group as the ptracer.
+  */
+ static bool real_parent_is_ptracer(struct task_struct *child)
+ {
+       return same_thread_group(child->parent, child->real_parent);
+ }
  /*
   * This must be called with current->sighand->siglock held.
   *
   * If we actually decide not to stop at all because the tracer
   * is gone, we keep current->exit_code unless clear_code.
   */
- static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
+ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
        __releases(&current->sighand->siglock)
        __acquires(&current->sighand->siglock)
  {
+       bool gstop_done = false;
        if (arch_ptrace_stop_needed(exit_code, info)) {
                /*
                 * The arch code has something special to do before a
        }
  
        /*
-        * If there is a group stop in progress,
-        * we must participate in the bookkeeping.
+        * If @why is CLD_STOPPED, we're trapping to participate in a group
+        * stop.  Do the bookkeeping.  Note that if SIGCONT was delievered
+        * while siglock was released for the arch hook, PENDING could be
+        * clear now.  We act as if SIGCONT is received after TASK_TRACED
+        * is entered - ignore it.
         */
-       if (current->signal->group_stop_count > 0)
-               --current->signal->group_stop_count;
+       if (why == CLD_STOPPED && (current->group_stop & GROUP_STOP_PENDING))
+               gstop_done = task_participate_group_stop(current);
  
        current->last_siginfo = info;
        current->exit_code = exit_code;
  
-       /* Let the debugger run.  */
-       __set_current_state(TASK_TRACED);
+       /*
+        * TRACED should be visible before TRAPPING is cleared; otherwise,
+        * the tracer might fail do_wait().
+        */
+       set_current_state(TASK_TRACED);
+       /*
+        * We're committing to trapping.  Clearing GROUP_STOP_TRAPPING and
+        * transition to TASK_TRACED should be atomic with respect to
+        * siglock.  This hsould be done after the arch hook as siglock is
+        * released and regrabbed across it.
+        */
+       task_clear_group_stop_trapping(current);
        spin_unlock_irq(&current->sighand->siglock);
        read_lock(&tasklist_lock);
        if (may_ptrace_stop()) {
-               do_notify_parent_cldstop(current, CLD_TRAPPED);
+               /*
+                * Notify parents of the stop.
+                *
+                * While ptraced, there are two parents - the ptracer and
+                * the real_parent of the group_leader.  The ptracer should
+                * know about every stop while the real parent is only
+                * interested in the completion of group stop.  The states
+                * for the two don't interact with each other.  Notify
+                * separately unless they're gonna be duplicates.
+                */
+               do_notify_parent_cldstop(current, true, why);
+               if (gstop_done && !real_parent_is_ptracer(current))
+                       do_notify_parent_cldstop(current, false, why);
                /*
                 * Don't want to allow preemption here, because
                 * sys_ptrace() needs this task to be inactive.
                /*
                 * By the time we got the lock, our tracer went away.
                 * Don't drop the lock yet, another tracer may come.
+                *
+                * If @gstop_done, the ptracer went away between group stop
+                * completion and here.  During detach, it would have set
+                * GROUP_STOP_PENDING on us and we'll re-enter TASK_STOPPED
+                * in do_signal_stop() on return, so notifying the real
+                * parent of the group stop completion is enough.
                 */
+               if (gstop_done)
+                       do_notify_parent_cldstop(current, false, why);
                __set_current_state(TASK_RUNNING);
                if (clear_code)
                        current->exit_code = 0;
@@@ -1728,79 -1828,128 +1842,128 @@@ void ptrace_notify(int exit_code
  
        /* Let the debugger run.  */
        spin_lock_irq(&current->sighand->siglock);
-       ptrace_stop(exit_code, 1, &info);
+       ptrace_stop(exit_code, CLD_TRAPPED, 1, &info);
        spin_unlock_irq(&current->sighand->siglock);
  }
  
  /*
   * This performs the stopping for SIGSTOP and other stop signals.
   * We have to stop all threads in the thread group.
 - * Returns nonzero if we've actually stopped and released the siglock.
 + * Returns non-zero if we've actually stopped and released the siglock.
   * Returns zero if we didn't stop and still hold the siglock.
   */
  static int do_signal_stop(int signr)
  {
        struct signal_struct *sig = current->signal;
-       int notify;
  
-       if (!sig->group_stop_count) {
+       if (!(current->group_stop & GROUP_STOP_PENDING)) {
+               unsigned int gstop = GROUP_STOP_PENDING | GROUP_STOP_CONSUME;
                struct task_struct *t;
  
-               if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) ||
+               /* signr will be recorded in task->group_stop for retries */
+               WARN_ON_ONCE(signr & ~GROUP_STOP_SIGMASK);
+               if (!likely(current->group_stop & GROUP_STOP_DEQUEUED) ||
                    unlikely(signal_group_exit(sig)))
                        return 0;
                /*
-                * There is no group stop already in progress.
-                * We must initiate one now.
+                * There is no group stop already in progress.  We must
+                * initiate one now.
+                *
+                * While ptraced, a task may be resumed while group stop is
+                * still in effect and then receive a stop signal and
+                * initiate another group stop.  This deviates from the
+                * usual behavior as two consecutive stop signals can't
+                * cause two group stops when !ptraced.  That is why we
+                * also check !task_is_stopped(t) below.
+                *
+                * The condition can be distinguished by testing whether
+                * SIGNAL_STOP_STOPPED is already set.  Don't generate
+                * group_exit_code in such case.
+                *
+                * This is not necessary for SIGNAL_STOP_CONTINUED because
+                * an intervening stop signal is required to cause two
+                * continued events regardless of ptrace.
                 */
-               sig->group_exit_code = signr;
+               if (!(sig->flags & SIGNAL_STOP_STOPPED))
+                       sig->group_exit_code = signr;
+               else
+                       WARN_ON_ONCE(!task_ptrace(current));
  
+               current->group_stop &= ~GROUP_STOP_SIGMASK;
+               current->group_stop |= signr | gstop;
                sig->group_stop_count = 1;
-               for (t = next_thread(current); t != current; t = next_thread(t))
+               for (t = next_thread(current); t != current;
+                    t = next_thread(t)) {
+                       t->group_stop &= ~GROUP_STOP_SIGMASK;
                        /*
                         * Setting state to TASK_STOPPED for a group
                         * stop is always done with the siglock held,
                         * so this check has no races.
                         */
-                       if (!(t->flags & PF_EXITING) &&
-                           !task_is_stopped_or_traced(t)) {
+                       if (!(t->flags & PF_EXITING) && !task_is_stopped(t)) {
+                               t->group_stop |= signr | gstop;
                                sig->group_stop_count++;
                                signal_wake_up(t, 0);
                        }
+               }
        }
-       /*
-        * If there are no other threads in the group, or if there is
-        * a group stop in progress and we are the last to stop, report
-        * to the parent.  When ptraced, every thread reports itself.
-        */
-       notify = sig->group_stop_count == 1 ? CLD_STOPPED : 0;
-       notify = tracehook_notify_jctl(notify, CLD_STOPPED);
-       /*
-        * tracehook_notify_jctl() can drop and reacquire siglock, so
-        * we keep ->group_stop_count != 0 before the call. If SIGCONT
-        * or SIGKILL comes in between ->group_stop_count == 0.
-        */
-       if (sig->group_stop_count) {
-               if (!--sig->group_stop_count)
-                       sig->flags = SIGNAL_STOP_STOPPED;
-               current->exit_code = sig->group_exit_code;
+ retry:
+       if (likely(!task_ptrace(current))) {
+               int notify = 0;
+               /*
+                * If there are no other threads in the group, or if there
+                * is a group stop in progress and we are the last to stop,
+                * report to the parent.
+                */
+               if (task_participate_group_stop(current))
+                       notify = CLD_STOPPED;
                __set_current_state(TASK_STOPPED);
+               spin_unlock_irq(&current->sighand->siglock);
+               /*
+                * Notify the parent of the group stop completion.  Because
+                * we're not holding either the siglock or tasklist_lock
+                * here, ptracer may attach inbetween; however, this is for
+                * group stop and should always be delivered to the real
+                * parent of the group leader.  The new ptracer will get
+                * its notification when this task transitions into
+                * TASK_TRACED.
+                */
+               if (notify) {
+                       read_lock(&tasklist_lock);
+                       do_notify_parent_cldstop(current, false, notify);
+                       read_unlock(&tasklist_lock);
+               }
+               /* Now we don't run again until woken by SIGCONT or SIGKILL */
+               schedule();
+               spin_lock_irq(&current->sighand->siglock);
+       } else {
+               ptrace_stop(current->group_stop & GROUP_STOP_SIGMASK,
+                           CLD_STOPPED, 0, NULL);
+               current->exit_code = 0;
        }
-       spin_unlock_irq(&current->sighand->siglock);
  
-       if (notify) {
-               read_lock(&tasklist_lock);
-               do_notify_parent_cldstop(current, notify);
-               read_unlock(&tasklist_lock);
+       /*
+        * GROUP_STOP_PENDING could be set if another group stop has
+        * started since being woken up or ptrace wants us to transit
+        * between TASK_STOPPED and TRACED.  Retry group stop.
+        */
+       if (current->group_stop & GROUP_STOP_PENDING) {
+               WARN_ON_ONCE(!(current->group_stop & GROUP_STOP_SIGMASK));
+               goto retry;
        }
  
-       /* Now we don't run again until woken by SIGCONT or SIGKILL */
-       do {
-               schedule();
-       } while (try_to_freeze());
+       /* PTRACE_ATTACH might have raced with task killing, clear trapping */
+       task_clear_group_stop_trapping(current);
+       spin_unlock_irq(&current->sighand->siglock);
  
        tracehook_finish_jctl();
-       current->exit_code = 0;
  
        return 1;
  }
@@@ -1814,7 -1963,7 +1977,7 @@@ static int ptrace_signal(int signr, sig
        ptrace_signal_deliver(regs, cookie);
  
        /* Let the debugger run.  */
-       ptrace_stop(signr, 0, info);
+       ptrace_stop(signr, CLD_TRAPPED, 0, info);
  
        /* We're back.  Did the debugger cancel the sig?  */
        signr = current->exit_code;
  
        current->exit_code = 0;
  
 -      /* Update the siginfo structure if the signal has
 -         changed.  If the debugger wanted something
 -         specific in the siginfo structure then it should
 -         have updated *info via PTRACE_SETSIGINFO.  */
 +      /*
 +       * Update the siginfo structure if the signal has
 +       * changed.  If the debugger wanted something
 +       * specific in the siginfo structure then it should
 +       * have updated *info via PTRACE_SETSIGINFO.
 +       */
        if (signr != info->si_signo) {
                info->si_signo = signr;
                info->si_errno = 0;
@@@ -1869,18 -2016,36 +2032,36 @@@ relock
         * the CLD_ si_code into SIGNAL_CLD_MASK bits.
         */
        if (unlikely(signal->flags & SIGNAL_CLD_MASK)) {
-               int why = (signal->flags & SIGNAL_STOP_CONTINUED)
-                               ? CLD_CONTINUED : CLD_STOPPED;
+               struct task_struct *leader;
+               int why;
+               if (signal->flags & SIGNAL_CLD_CONTINUED)
+                       why = CLD_CONTINUED;
+               else
+                       why = CLD_STOPPED;
                signal->flags &= ~SIGNAL_CLD_MASK;
  
-               why = tracehook_notify_jctl(why, CLD_CONTINUED);
                spin_unlock_irq(&sighand->siglock);
  
-               if (why) {
-                       read_lock(&tasklist_lock);
-                       do_notify_parent_cldstop(current->group_leader, why);
-                       read_unlock(&tasklist_lock);
-               }
+               /*
+                * Notify the parent that we're continuing.  This event is
+                * always per-process and doesn't make whole lot of sense
+                * for ptracers, who shouldn't consume the state via
+                * wait(2) either, but, for backward compatibility, notify
+                * the ptracer of the group leader too unless it's gonna be
+                * a duplicate.
+                */
+               read_lock(&tasklist_lock);
+               do_notify_parent_cldstop(current, false, why);
+               leader = current->group_leader;
+               if (task_ptrace(leader) && !real_parent_is_ptracer(leader))
+                       do_notify_parent_cldstop(leader, true, why);
+               read_unlock(&tasklist_lock);
                goto relock;
        }
  
                if (unlikely(signr != 0))
                        ka = return_ka;
                else {
-                       if (unlikely(signal->group_stop_count > 0) &&
-                           do_signal_stop(0))
+                       if (unlikely(current->group_stop &
+                                    GROUP_STOP_PENDING) && do_signal_stop(0))
                                goto relock;
  
                        signr = dequeue_signal(current, &current->blocked,
@@@ -2036,8 -2201,7 +2217,8 @@@ void exit_signals(struct task_struct *t
        if (!signal_pending(tsk))
                goto out;
  
 -      /* It could be that __group_complete_signal() choose us to
 +      /*
 +       * It could be that __group_complete_signal() choose us to
         * notify about group-wide signal. Another thread should be
         * woken now to take the signal since we will not.
         */
                if (!signal_pending(t) && !(t->flags & PF_EXITING))
                        recalc_sigpending_and_wake(t);
  
-       if (unlikely(tsk->signal->group_stop_count) &&
-                       !--tsk->signal->group_stop_count) {
-               tsk->signal->flags = SIGNAL_STOP_STOPPED;
-               group_stop = tracehook_notify_jctl(CLD_STOPPED, CLD_STOPPED);
-       }
+       if (unlikely(tsk->group_stop & GROUP_STOP_PENDING) &&
+           task_participate_group_stop(tsk))
+               group_stop = CLD_STOPPED;
  out:
        spin_unlock_irq(&tsk->sighand->siglock);
  
+       /*
+        * If group stop has completed, deliver the notification.  This
+        * should always go to the real parent of the group leader.
+        */
        if (unlikely(group_stop)) {
                read_lock(&tasklist_lock);
-               do_notify_parent_cldstop(tsk, group_stop);
+               do_notify_parent_cldstop(tsk, false, group_stop);
                read_unlock(&tasklist_lock);
        }
  }
@@@ -2075,9 -2241,6 +2258,9 @@@ EXPORT_SYMBOL(unblock_all_signals)
   * System call entry points.
   */
  
 +/**
 + *  sys_restart_syscall - restart a system call
 + */
  SYSCALL_DEFINE0(restart_syscall)
  {
        struct restart_block *restart = &current_thread_info()->restart_block;
@@@ -2131,13 -2294,6 +2314,13 @@@ int sigprocmask(int how, sigset_t *set
        return error;
  }
  
 +/**
 + *  sys_rt_sigprocmask - change the list of currently blocked signals
 + *  @how: whether to add, remove, or set signals
 + *  @set: stores pending signals
 + *  @oset: previous value of signal mask if non-null
 + *  @sigsetsize: size of sigset_t type
 + */
  SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, set,
                sigset_t __user *, oset, size_t, sigsetsize)
  {
@@@ -2196,14 -2352,8 +2379,14 @@@ long do_sigpending(void __user *set, un
  
  out:
        return error;
 -}     
 +}
  
 +/**
 + *  sys_rt_sigpending - examine a pending signal that has been raised
 + *                    while blocked
 + *  @set: stores pending signals
 + *  @sigsetsize: size of sigset_t type or larger
 + */
  SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, set, size_t, sigsetsize)
  {
        return do_sigpending(set, sigsetsize);
@@@ -2252,9 -2402,9 +2435,9 @@@ int copy_siginfo_to_user(siginfo_t __us
                err |= __put_user(from->si_trapno, &to->si_trapno);
  #endif
  #ifdef BUS_MCEERR_AO
 -              /* 
 +              /*
                 * Other callers might not initialize the si_lsb field,
 -               * so check explicitely for the right codes here.
 +               * so check explicitly for the right codes here.
                 */
                if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO)
                        err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb);
  
  #endif
  
 +/**
 + *  sys_rt_sigtimedwait - synchronously wait for queued signals specified
 + *                    in @uthese
 + *  @uthese: queued signals to wait for
 + *  @uinfo: if non-null, the signal's siginfo is returned here
 + *  @uts: upper bound on process time suspension
 + *  @sigsetsize: size of sigset_t type
 + */
  SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
                siginfo_t __user *, uinfo, const struct timespec __user *, uts,
                size_t, sigsetsize)
  
        if (copy_from_user(&these, uthese, sizeof(these)))
                return -EFAULT;
 -              
 +
        /*
         * Invert the set of allowed signals to get those we
         * want to block.
                                   + (ts.tv_sec || ts.tv_nsec));
  
                if (timeout) {
 -                      /* None ready -- temporarily unblock those we're
 +                      /*
 +                       * None ready -- temporarily unblock those we're
                         * interested while we are sleeping in so that we'll
 -                       * be awakened when they arrive.  */
 +                       * be awakened when they arrive.
 +                       */
                        current->real_blocked = current->blocked;
                        sigandsets(&current->blocked, &current->blocked, &these);
                        recalc_sigpending();
        return ret;
  }
  
 +/**
 + *  sys_kill - send a signal to a process
 + *  @pid: the PID of the process
 + *  @sig: signal to be sent
 + */
  SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
  {
        struct siginfo info;
@@@ -2448,11 -2583,7 +2631,11 @@@ SYSCALL_DEFINE3(tgkill, pid_t, tgid, pi
        return do_tkill(tgid, pid, sig);
  }
  
 -/*
 +/**
 + *  sys_tkill - send signal to one specific task
 + *  @pid: the PID of the task
 + *  @sig: signal to be sent
 + *
   *  Send a signal to only one task, even if it's a CLONE_THREAD task.
   */
  SYSCALL_DEFINE2(tkill, pid_t, pid, int, sig)
        return do_tkill(0, pid, sig);
  }
  
 +/**
 + *  sys_rt_sigqueueinfo - send signal information to a signal
 + *  @pid: the PID of the thread
 + *  @sig: signal to be sent
 + *  @uinfo: signal info to be sent
 + */
  SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig,
                siginfo_t __user *, uinfo)
  {
        /* Not even root can pretend to send signals from the kernel.
         * Nor can they impersonate a kill()/tgkill(), which adds source info.
         */
 -      if (info.si_code != SI_QUEUE) {
 +      if (info.si_code >= 0 || info.si_code == SI_TKILL) {
                /* We used to allow any < 0 si_code */
                WARN_ON_ONCE(info.si_code < 0);
                return -EPERM;
@@@ -2501,7 -2626,7 +2684,7 @@@ long do_rt_tgsigqueueinfo(pid_t tgid, p
        /* Not even root can pretend to send signals from the kernel.
         * Nor can they impersonate a kill()/tgkill(), which adds source info.
         */
 -      if (info->si_code != SI_QUEUE) {
 +      if (info->si_code >= 0 || info->si_code == SI_TKILL) {
                /* We used to allow any < 0 si_code */
                WARN_ON_ONCE(info->si_code < 0);
                return -EPERM;
@@@ -2597,11 -2722,12 +2780,11 @@@ do_sigaltstack (const stack_t __user *u
  
                error = -EINVAL;
                /*
 -               *
 -               * Note - this code used to test ss_flags incorrectly
 +               * Note - this code used to test ss_flags incorrectly:
                 *        old code may have been written using ss_flags==0
                 *        to mean ss_flags==SS_ONSTACK (as this was the only
                 *        way that worked) - this fix preserves that older
 -               *        mechanism
 +               *        mechanism.
                 */
                if (ss_flags != SS_DISABLE && ss_flags != SS_ONSTACK && ss_flags != 0)
                        goto out;
  
  #ifdef __ARCH_WANT_SYS_SIGPENDING
  
 +/**
 + *  sys_sigpending - examine pending signals
 + *  @set: where mask of pending signal is returned
 + */
  SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, set)
  {
        return do_sigpending(set, sizeof(*set));
  #endif
  
  #ifdef __ARCH_WANT_SYS_SIGPROCMASK
 -/* Some platforms have their own version with special arguments others
 -   support only sys_rt_sigprocmask.  */
 +/**
 + *  sys_sigprocmask - examine and change blocked signals
 + *  @how: whether to add, remove, or set signals
 + *  @set: signals to add or remove (if non-null)
 + *  @oset: previous value of signal mask if non-null
 + *
 + * Some platforms have their own version with special arguments;
 + * others support only sys_rt_sigprocmask.
 + */
  
  SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, set,
                old_sigset_t __user *, oset)
  #endif /* __ARCH_WANT_SYS_SIGPROCMASK */
  
  #ifdef __ARCH_WANT_SYS_RT_SIGACTION
 +/**
 + *  sys_rt_sigaction - alter an action taken by a process
 + *  @sig: signal to be sent
 + *  @act: the thread group ID of the thread
 + *  @oact: the PID of the thread
 + *  @sigsetsize: size of sigset_t type
 + */
  SYSCALL_DEFINE4(rt_sigaction, int, sig,
                const struct sigaction __user *, act,
                struct sigaction __user *, oact,
@@@ -2801,12 -2909,6 +2984,12 @@@ SYSCALL_DEFINE0(pause
  #endif
  
  #ifdef __ARCH_WANT_SYS_RT_SIGSUSPEND
 +/**
 + *  sys_rt_sigsuspend - replace the signal mask for a value with the
 + *    @unewset value until a signal is received
 + *  @unewset: new signal mask value
 + *  @sigsetsize: size of sigset_t type
 + */
  SYSCALL_DEFINE2(rt_sigsuspend, sigset_t __user *, unewset, size_t, sigsetsize)
  {
        sigset_t newset;