Merge branch 'ptrace' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc into...

author Oleg Nesterov <oleg@redhat.com>

Thu, 7 Apr 2011 18:44:11 +0000 (20:44 +0200)

committer Oleg Nesterov <oleg@redhat.com>

Thu, 7 Apr 2011 18:44:11 +0000 (20:44 +0200)
author Oleg Nesterov <oleg@redhat.com>
Thu, 7 Apr 2011 18:44:11 +0000 (20:44 +0200)
committer Oleg Nesterov <oleg@redhat.com>
Thu, 7 Apr 2011 18:44:11 +0000 (20:44 +0200)
diff --combined include/linux/sched.h

index 83bd2e2,8cef82d..3f53c25
--- 1/include/linux/sched.h
--- 2/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -99,7 -99,6 +99,7 @@@ struct robust_list_head
   struct bio_list;
   struct fs_struct;
   struct perf_event_context;
+ +struct blk_plug;
   
   /*
    * List of flags we want to share for kernel threads,
@@@ -517,7 -516,7 +517,7 @@@ struct thread_group_cputimer 
   struct autogroup;
   
   /*
- - * NOTE! "signal_struct" does not have it's own
+ + * NOTE! "signal_struct" does not have its own
    * locking, because a shared signal_struct always
    * implies a shared sighand_struct, so locking
    * sighand_struct is always a proper superset of
@@@ -653,9 -652,8 +653,8 @@@ struct signal_struct 
    * Bits in flags field of signal_struct.
    */
   #define SIGNAL_STOP_STOPPED   0x00000001 /* job control stop in effect */
- #define SIGNAL_STOP_DEQUEUED  0x00000002 /* stop signal dequeued */
- #define SIGNAL_STOP_CONTINUED 0x00000004 /* SIGCONT since WCONTINUED reap */
- #define SIGNAL_GROUP_EXIT     0x00000008 /* group exit in progress */
+ #define SIGNAL_STOP_CONTINUED 0x00000002 /* SIGCONT since WCONTINUED reap */
+ #define SIGNAL_GROUP_EXIT     0x00000004 /* group exit in progress */
   /*
    * Pending notifications to parent.
    */
@@@ -1261,6 -1259,7 +1260,7 @@@ struct task_struct 
         int exit_state;
         int exit_code, exit_signal;
         int pdeath_signal;  /*  The signal sent when the parent dies  */
+       unsigned int group_stop;        /* GROUP_STOP_*, siglock protected */
         /* ??? */
         unsigned int personality;
         unsigned did_exec:1;
@@@ -1429,11 -1428,6 +1429,11 @@@
   /* stacked block device info */
         struct bio_list *bio_list;
   
+ +#ifdef CONFIG_BLOCK
+ +/* stack plugging */
+ +      struct blk_plug *plug;
+ +#endif
+ +
   /* VM state */
         struct reclaim_state *reclaim_state;
   
@@@ -1530,8 -1524,8 +1530,8 @@@
         struct memcg_batch_info {
                 int do_batch;   /* incremented when batch uncharge started */
                 struct mem_cgroup *memcg; /* target memcg of uncharge */
- -              unsigned long bytes;            /* uncharged usage */
- -              unsigned long memsw_bytes; /* uncharged mem+swap usage */
+ +              unsigned long nr_pages; /* uncharged usage */
+ +              unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
         } memcg_batch;
   #endif
   };
@@@ -1777,6 -1771,17 +1777,17 @@@ extern void thread_group_times(struct t
   #define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
   #define used_math() tsk_used_math(current)
   
+ /*
+  * task->group_stop flags
+  */
+ #define GROUP_STOP_SIGMASK    0xffff    /* signr of the last group stop */
+ #define GROUP_STOP_PENDING    (1 << 16) /* task should stop for group stop */
+ #define GROUP_STOP_CONSUME    (1 << 17) /* consume group stop count */
+ #define GROUP_STOP_TRAPPING   (1 << 18) /* switching from STOPPED to TRACED */
+ #define GROUP_STOP_DEQUEUED   (1 << 19) /* stop signal dequeued */
+ 
+ extern void task_clear_group_stop_pending(struct task_struct *task);
+ 
   #ifdef CONFIG_PREEMPT_RCU
   
   #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
diff --combined kernel/exit.c

index 6a488ad,1a0f10f..5cbc83e
--- 1/kernel/exit.c
--- 2/kernel/exit.c
+++ b/kernel/exit.c
@@@ -908,7 -908,6 +908,7 @@@ NORET_TYPE void do_exit(long code
         profile_task_exit(tsk);
   
         WARN_ON(atomic_read(&tsk->fs_excl));
+ +      WARN_ON(blk_needs_flush_plug(tsk));
   
         if (unlikely(in_interrupt()))
                 panic("Aiee, killing interrupt handler!");
@@@ -1538,33 -1537,83 +1538,83 @@@ static int wait_consider_task(struct wa
                 return 0;
         }
   
-       if (likely(!ptrace) && unlikely(task_ptrace(p))) {
+       /* dead body doesn't have much to contribute */
+       if (p->exit_state == EXIT_DEAD)
+               return 0;
+ 
+       /* slay zombie? */
+       if (p->exit_state == EXIT_ZOMBIE) {
                 /*
-                * This child is hidden by ptrace.
-                * We aren't allowed to see it now, but eventually we will.
+                * A zombie ptracee is only visible to its ptracer.
+                * Notification and reaping will be cascaded to the real
+                * parent when the ptracer detaches.
                  */
-               wo->notask_error = 0;
-               return 0;
-       }
+               if (likely(!ptrace) && unlikely(task_ptrace(p))) {
+                       /* it will become visible, clear notask_error */
+                       wo->notask_error = 0;
+                       return 0;
+               }
   
-       if (p->exit_state == EXIT_DEAD)
-               return 0;
+               /* we don't reap group leaders with subthreads */
+               if (!delay_group_leader(p))
+                       return wait_task_zombie(wo, p);
   
-       /*
-        * We don't reap group leaders with subthreads.
-        */
-       if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p))
-               return wait_task_zombie(wo, p);
+               /*
+                * Allow access to stopped/continued state via zombie by
+                * falling through.  Clearing of notask_error is complex.
+                *
+                * When !@ptrace:
+                *
+                * If WEXITED is set, notask_error should naturally be
+                * cleared.  If not, subset of WSTOPPED|WCONTINUED is set,
+                * so, if there are live subthreads, there are events to
+                * wait for.  If all subthreads are dead, it's still safe
+                * to clear - this function will be called again in finite
+                * amount time once all the subthreads are released and
+                * will then return without clearing.
+                *
+                * When @ptrace:
+                *
+                * Stopped state is per-task and thus can't change once the
+                * target task dies.  Only continued and exited can happen.
+                * Clear notask_error if WCONTINUED | WEXITED.
+                */
+               if (likely(!ptrace) || (wo->wo_flags & (WCONTINUED | WEXITED)))
+                       wo->notask_error = 0;
+       } else {
+               /*
+                * If @p is ptraced by a task in its real parent's group,
+                * hide group stop/continued state when looking at @p as
+                * the real parent; otherwise, a single stop can be
+                * reported twice as group and ptrace stops.
+                *
+                * If a ptracer wants to distinguish the two events for its
+                * own children, it should create a separate process which
+                * takes the role of real parent.
+                */
+               if (likely(!ptrace) && task_ptrace(p) &&
+                   same_thread_group(p->parent, p->real_parent))
+                       return 0;
+ 
+               /*
+                * @p is alive and it's gonna stop, continue or exit, so
+                * there always is something to wait for.
+                */
+               wo->notask_error = 0;
+       }
   
         /*
-        * It's stopped or running now, so it might
-        * later continue, exit, or stop again.
+        * Wait for stopped.  Depending on @ptrace, different stopped state
+        * is used and the two don't interact with each other.
          */
-       wo->notask_error = 0;
- 
         if (task_stopped_code(p, ptrace))
                 return wait_task_stopped(wo, ptrace, p);
   
+       /*
+        * Wait for continued.  There's only one continued state and the
+        * ptracer can consume it which can confuse the real parent.  Don't
+        * use WCONTINUED from ptracer.  You don't need or want it.
+        */
         return wait_task_continued(wo, p);
   }
   
diff --combined kernel/ptrace.c

index 0fc1eed,20d5efd..512bd01
--- 1/kernel/ptrace.c
--- 2/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@@ -37,35 -37,33 +37,33 @@@ void __ptrace_link(struct task_struct *
         child->parent = new_parent;
   }
   
- /*
-  * Turn a tracing stop into a normal stop now, since with no tracer there
-  * would be no way to wake it up with SIGCONT or SIGKILL.  If there was a
-  * signal sent that would resume the child, but didn't because it was in
-  * TASK_TRACED, resume it now.
-  * Requires that irqs be disabled.
-  */
- static void ptrace_untrace(struct task_struct *child)
- {
-       spin_lock(&child->sighand->siglock);
-       if (task_is_traced(child)) {
-               /*
-                * If the group stop is completed or in progress,
-                * this thread was already counted as stopped.
-                */
-               if (child->signal->flags & SIGNAL_STOP_STOPPED ||
-                   child->signal->group_stop_count)
-                       __set_task_state(child, TASK_STOPPED);
-               else
-                       signal_wake_up(child, 1);
-       }
-       spin_unlock(&child->sighand->siglock);
- }
- 
- /*
-  * unptrace a task: move it back to its original parent and
-  * remove it from the ptrace list.
+ /**
+  * __ptrace_unlink - unlink ptracee and restore its execution state
+  * @child: ptracee to be unlinked
    *
-  * Must be called with the tasklist lock write-held.
+  * Remove @child from the ptrace list, move it back to the original parent,
+  * and restore the execution state so that it conforms to the group stop
+  * state.
+  *
+  * Unlinking can happen via two paths - explicit PTRACE_DETACH or ptracer
+  * exiting.  For PTRACE_DETACH, unless the ptracee has been killed between
+  * ptrace_check_attach() and here, it's guaranteed to be in TASK_TRACED.
+  * If the ptracer is exiting, the ptracee can be in any state.
+  *
+  * After detach, the ptracee should be in a state which conforms to the
+  * group stop.  If the group is stopped or in the process of stopping, the
+  * ptracee should be put into TASK_STOPPED; otherwise, it should be woken
+  * up from TASK_TRACED.
+  *
+  * If the ptracee is in TASK_TRACED and needs to be moved to TASK_STOPPED,
+  * it goes through TRACED -> RUNNING -> STOPPED transition which is similar
+  * to but in the opposite direction of what happens while attaching to a
+  * stopped task.  However, in this direction, the intermediate RUNNING
+  * state is not hidden even from the current ptracer and if it immediately
+  * re-attaches and performs a WNOHANG wait(2), it may fail.
+  *
+  * CONTEXT:
+  * write_lock_irq(tasklist_lock)
    */
   void __ptrace_unlink(struct task_struct *child)
   {
@@@ -75,8 -73,27 +73,27 @@@
         child->parent = child->real_parent;
         list_del_init(&child->ptrace_entry);
   
-       if (task_is_traced(child))
-               ptrace_untrace(child);
+       spin_lock(&child->sighand->siglock);
+ 
+       /*
+        * Reinstate GROUP_STOP_PENDING if group stop is in effect and
+        * @child isn't dead.
+        */
+       if (!(child->flags & PF_EXITING) &&
+           (child->signal->flags & SIGNAL_STOP_STOPPED ||
+            child->signal->group_stop_count))
+               child->group_stop |= GROUP_STOP_PENDING;
+ 
+       /*
+        * If transition to TASK_STOPPED is pending or in TASK_TRACED, kick
+        * @child in the butt.  Note that @resume should be used iff @child
+        * is in TASK_TRACED; otherwise, we might unduly disrupt
+        * TASK_KILLABLE sleeps.
+        */
+       if (child->group_stop & GROUP_STOP_PENDING || task_is_traced(child))
+               signal_wake_up(child, task_is_traced(child));
+ 
+       spin_unlock(&child->sighand->siglock);
   }
   
   /*
@@@ -95,16 -112,14 +112,14 @@@ int ptrace_check_attach(struct task_str
          */
         read_lock(&tasklist_lock);
         if ((child->ptrace & PT_PTRACED) && child->parent == current) {
-               ret = 0;
                 /*
                  * child->sighand can't be NULL, release_task()
                  * does ptrace_unlink() before __exit_signal().
                  */
                 spin_lock_irq(&child->sighand->siglock);
-               if (task_is_stopped(child))
-                       child->state = TASK_TRACED;
-               else if (!task_is_traced(child) && !kill)
-                       ret = -ESRCH;
+               WARN_ON_ONCE(task_is_stopped(child));
+               if (task_is_traced(child) || kill)
+                       ret = 0;
                 spin_unlock_irq(&child->sighand->siglock);
         }
         read_unlock(&tasklist_lock);
@@@ -134,24 -149,21 +149,24 @@@ int __ptrace_may_access(struct task_str
                 return 0;
         rcu_read_lock();
         tcred = __task_cred(task);
- -      if ((cred->uid != tcred->euid ||
- -           cred->uid != tcred->suid ||
- -           cred->uid != tcred->uid  ||
- -           cred->gid != tcred->egid ||
- -           cred->gid != tcred->sgid ||
- -           cred->gid != tcred->gid) &&
- -          !capable(CAP_SYS_PTRACE)) {
- -              rcu_read_unlock();
- -              return -EPERM;
- -      }
+ +      if (cred->user->user_ns == tcred->user->user_ns &&
+ +          (cred->uid == tcred->euid &&
+ +           cred->uid == tcred->suid &&
+ +           cred->uid == tcred->uid  &&
+ +           cred->gid == tcred->egid &&
+ +           cred->gid == tcred->sgid &&
+ +           cred->gid == tcred->gid))
+ +              goto ok;
+ +      if (ns_capable(tcred->user->user_ns, CAP_SYS_PTRACE))
+ +              goto ok;
+ +      rcu_read_unlock();
+ +      return -EPERM;
+ +ok:
         rcu_read_unlock();
         smp_rmb();
         if (task->mm)
                 dumpable = get_dumpable(task->mm);
- -      if (!dumpable && !capable(CAP_SYS_PTRACE))
+ +      if (!dumpable && !task_ns_capable(task, CAP_SYS_PTRACE))
                 return -EPERM;
   
         return security_ptrace_access_check(task, mode);
@@@ -168,6 -180,7 +183,7 @@@ bool ptrace_may_access(struct task_stru
   
   static int ptrace_attach(struct task_struct *task)
   {
+       bool wait_trap = false;
         int retval;
   
         audit_ptrace(task);
@@@ -201,18 -214,48 +217,48 @@@
                 goto unlock_tasklist;
   
         task->ptrace = PT_PTRACED;
- -      if (capable(CAP_SYS_PTRACE))
+ +      if (task_ns_capable(task, CAP_SYS_PTRACE))
                 task->ptrace |= PT_PTRACE_CAP;
   
         __ptrace_link(task, current);
         send_sig_info(SIGSTOP, SEND_SIG_FORCED, task);
   
+       spin_lock(&task->sighand->siglock);
+ 
+       /*
+        * If the task is already STOPPED, set GROUP_STOP_PENDING and
+        * TRAPPING, and kick it so that it transits to TRACED.  TRAPPING
+        * will be cleared if the child completes the transition or any
+        * event which clears the group stop states happens.  We'll wait
+        * for the transition to complete before returning from this
+        * function.
+        *
+        * This hides STOPPED -> RUNNING -> TRACED transition from the
+        * attaching thread but a different thread in the same group can
+        * still observe the transient RUNNING state.  IOW, if another
+        * thread's WNOHANG wait(2) on the stopped tracee races against
+        * ATTACH, the wait(2) may fail due to the transient RUNNING.
+        *
+        * The following task_is_stopped() test is safe as both transitions
+        * in and out of STOPPED are protected by siglock.
+        */
+       if (task_is_stopped(task)) {
+               task->group_stop |= GROUP_STOP_PENDING | GROUP_STOP_TRAPPING;
+               signal_wake_up(task, 1);
+               wait_trap = true;
+       }
+ 
+       spin_unlock(&task->sighand->siglock);
+ 
         retval = 0;
   unlock_tasklist:
         write_unlock_irq(&tasklist_lock);
   unlock_creds:
         mutex_unlock(&task->signal->cred_guard_mutex);
   out:
+       if (wait_trap)
+               wait_event(current->signal->wait_chldexit,
+                          !(task->group_stop & GROUP_STOP_TRAPPING));
         return retval;
   }
   
@@@ -315,8 -358,6 +361,6 @@@ static int ptrace_detach(struct task_st
         if (child->ptrace) {
                 child->exit_code = data;
                 dead = __ptrace_detach(current, child);
-               if (!child->exit_state)
-                       wake_up_state(child, TASK_TRACED | TASK_STOPPED);
         }
         write_unlock_irq(&tasklist_lock);
   
diff --combined kernel/signal.c

index dc17929,4f7312b..c15e979
--- 1/kernel/signal.c
--- 2/kernel/signal.c
+++ b/kernel/signal.c
@@@ -124,7 -124,7 +124,7 @@@ static inline int has_pending_signals(s
   
   static int recalc_sigpending_tsk(struct task_struct *t)
   {
-       if (t->signal->group_stop_count > 0 ||
+       if ((t->group_stop & GROUP_STOP_PENDING) ||
             PENDING(&t->pending, &t->blocked) ||
             PENDING(&t->signal->shared_pending, &t->blocked)) {
                 set_tsk_thread_flag(t, TIF_SIGPENDING);
@@@ -223,10 -223,87 +223,87 @@@ static inline void print_dropped_signal
                                 current->comm, current->pid, sig);
   }
   
+ /**
+  * task_clear_group_stop_trapping - clear group stop trapping bit
+  * @task: target task
+  *
+  * If GROUP_STOP_TRAPPING is set, a ptracer is waiting for us.  Clear it
+  * and wake up the ptracer.  Note that we don't need any further locking.
+  * @task->siglock guarantees that @task->parent points to the ptracer.
+  *
+  * CONTEXT:
+  * Must be called with @task->sighand->siglock held.
+  */
+ static void task_clear_group_stop_trapping(struct task_struct *task)
+ {
+       if (unlikely(task->group_stop & GROUP_STOP_TRAPPING)) {
+               task->group_stop &= ~GROUP_STOP_TRAPPING;
+               __wake_up_sync(&task->parent->signal->wait_chldexit,
+                              TASK_UNINTERRUPTIBLE, 1);
+       }
+ }
+ 
+ /**
+  * task_clear_group_stop_pending - clear pending group stop
+  * @task: target task
+  *
+  * Clear group stop states for @task.
+  *
+  * CONTEXT:
+  * Must be called with @task->sighand->siglock held.
+  */
+ void task_clear_group_stop_pending(struct task_struct *task)
+ {
+       task->group_stop &= ~(GROUP_STOP_PENDING | GROUP_STOP_CONSUME |
+                             GROUP_STOP_DEQUEUED);
+ }
+ 
+ /**
+  * task_participate_group_stop - participate in a group stop
+  * @task: task participating in a group stop
+  *
+  * @task has GROUP_STOP_PENDING set and is participating in a group stop.
+  * Group stop states are cleared and the group stop count is consumed if
+  * %GROUP_STOP_CONSUME was set.  If the consumption completes the group
+  * stop, the appropriate %SIGNAL_* flags are set.
+  *
+  * CONTEXT:
+  * Must be called with @task->sighand->siglock held.
+  *
+  * RETURNS:
+  * %true if group stop completion should be notified to the parent, %false
+  * otherwise.
+  */
+ static bool task_participate_group_stop(struct task_struct *task)
+ {
+       struct signal_struct *sig = task->signal;
+       bool consume = task->group_stop & GROUP_STOP_CONSUME;
+ 
+       WARN_ON_ONCE(!(task->group_stop & GROUP_STOP_PENDING));
+ 
+       task_clear_group_stop_pending(task);
+ 
+       if (!consume)
+               return false;
+ 
+       if (!WARN_ON_ONCE(sig->group_stop_count == 0))
+               sig->group_stop_count--;
+ 
+       /*
+        * Tell the caller to notify completion iff we are entering into a
+        * fresh group stop.  Read comment in do_signal_stop() for details.
+        */
+       if (!sig->group_stop_count && !(sig->flags & SIGNAL_STOP_STOPPED)) {
+               sig->flags = SIGNAL_STOP_STOPPED;
+               return true;
+       }
+       return false;
+ }
+ 
   /*
    * allocate a new signal queue record
    * - this may be called without locks if and only if t == current, otherwise an
- - *   appopriate lock must be held to stop the target task from exiting
+ + *   appropriate lock must be held to stop the target task from exiting
    */
   static struct sigqueue *
   __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit)
@@@ -375,15 -452,15 +452,15 @@@ int unhandled_signal(struct task_struc
         return !tracehook_consider_fatal_signal(tsk, sig);
   }
   
- -
- -/* Notify the system that a driver wants to block all signals for this
+ +/*
+ + * Notify the system that a driver wants to block all signals for this
    * process, and wants to be notified if any signals at all were to be
    * sent/acted upon.  If the notifier routine returns non-zero, then the
    * signal will be acted upon after all.  If the notifier routine returns 0,
    * then then signal will be blocked.  Only one block per process is
    * allowed.  priv is a pointer to private data that the notifier routine
- - * can use to determine if the signal should be blocked or not.  */
- -
+ + * can use to determine if the signal should be blocked or not.
+ + */
   void
   block_all_signals(int (*notifier)(void *priv), void *priv, sigset_t *mask)
   {
@@@ -434,10 -511,9 +511,10 @@@ still_pending
                 copy_siginfo(info, &first->info);
                 __sigqueue_free(first);
         } else {
- -              /* Ok, it wasn't in the queue.  This must be
- -                 a fast-pathed signal or we must have been
- -                 out of queue space.  So zero out the info.
+ +              /*
+ +               * Ok, it wasn't in the queue.  This must be
+ +               * a fast-pathed signal or we must have been
+ +               * out of queue space.  So zero out the info.
                  */
                 info->si_signo = sig;
                 info->si_errno = 0;
@@@ -469,7 -545,7 +546,7 @@@ static int __dequeue_signal(struct sigp
   }
   
   /*
- - * Dequeue a signal and return the element to the caller, which is 
+ + * Dequeue a signal and return the element to the caller, which is
    * expected to free it.
    *
    * All callers have to hold the siglock.
@@@ -491,7 -567,7 +568,7 @@@ int dequeue_signal(struct task_struct *
                  * itimers are process shared and we restart periodic
                  * itimers in the signal delivery path to prevent DoS
                  * attacks in the high resolution timer case. This is
- -               * compliant with the old way of self restarting
+ +               * compliant with the old way of self-restarting
                  * itimers, as the SIGALRM is a legacy signal and only
                  * queued once. Changing the restart behaviour to
                  * restart the timer in the signal dequeue path is
@@@ -527,7 -603,7 +604,7 @@@
                  * is to alert stop-signal processing code when another
                  * processor has come along and cleared the flag.
                  */
-               tsk->signal->flags |= SIGNAL_STOP_DEQUEUED;
+               current->group_stop |= GROUP_STOP_DEQUEUED;
         }
         if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) {
                 /*
@@@ -636,27 -712,6 +713,27 @@@ static inline bool si_fromuser(const st
                 (!is_si_special(info) && SI_FROMUSER(info));
   }
   
+ +/*
+ + * called with RCU read lock from check_kill_permission()
+ + */
+ +static int kill_ok_by_cred(struct task_struct *t)
+ +{
+ +      const struct cred *cred = current_cred();
+ +      const struct cred *tcred = __task_cred(t);
+ +
+ +      if (cred->user->user_ns == tcred->user->user_ns &&
+ +          (cred->euid == tcred->suid ||
+ +           cred->euid == tcred->uid ||
+ +           cred->uid  == tcred->suid ||
+ +           cred->uid  == tcred->uid))
+ +              return 1;
+ +
+ +      if (ns_capable(tcred->user->user_ns, CAP_KILL))
+ +              return 1;
+ +
+ +      return 0;
+ +}
+ +
   /*
    * Bad permissions for sending the signal
    * - the caller must hold the RCU read lock
@@@ -664,6 -719,7 +741,6 @@@
   static int check_kill_permission(int sig, struct siginfo *info,
                                  struct task_struct *t)
   {
- -      const struct cred *cred, *tcred;
         struct pid *sid;
         int error;
   
@@@ -677,8 -733,14 +754,8 @@@
         if (error)
                 return error;
   
- -      cred = current_cred();
- -      tcred = __task_cred(t);
         if (!same_thread_group(current, t) &&
- -          (cred->euid ^ tcred->suid) &&
- -          (cred->euid ^ tcred->uid) &&
- -          (cred->uid  ^ tcred->suid) &&
- -          (cred->uid  ^ tcred->uid) &&
- -          !capable(CAP_KILL)) {
+ +          !kill_ok_by_cred(t)) {
                 switch (sig) {
                 case SIGCONT:
                         sid = task_session(t);
@@@ -727,34 -789,14 +804,14 @@@ static int prepare_signal(int sig, stru
         } else if (sig == SIGCONT) {
                 unsigned int why;
                 /*
-                * Remove all stop signals from all queues,
-                * and wake all threads.
+                * Remove all stop signals from all queues, wake all threads.
                  */
                 rm_from_queue(SIG_KERNEL_STOP_MASK, &signal->shared_pending);
                 t = p;
                 do {
-                       unsigned int state;
+                       task_clear_group_stop_pending(t);
                         rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending);
-                       /*
-                        * If there is a handler for SIGCONT, we must make
-                        * sure that no thread returns to user mode before
-                        * we post the signal, in case it was the only
-                        * thread eligible to run the signal handler--then
-                        * it must not do anything between resuming and
-                        * running the handler.  With the TIF_SIGPENDING
-                        * flag set, the thread will pause and acquire the
-                        * siglock that we hold now and until we've queued
-                        * the pending signal.
-                        *
-                        * Wake up the stopped thread _after_ setting
-                        * TIF_SIGPENDING
-                        */
-                       state = __TASK_STOPPED;
-                       if (sig_user_defined(t, SIGCONT) && !sigismember(&t->blocked, SIGCONT)) {
-                               set_tsk_thread_flag(t, TIF_SIGPENDING);
-                               state |= TASK_INTERRUPTIBLE;
-                       }
-                       wake_up_state(t, state);
+                       wake_up_state(t, __TASK_STOPPED);
                 } while_each_thread(p, t);
   
                 /*
@@@ -780,13 -822,6 +837,6 @@@
                         signal->flags = why | SIGNAL_STOP_CONTINUED;
                         signal->group_stop_count = 0;
                         signal->group_exit_code = 0;
-               } else {
-                       /*
-                        * We are not stopped, but there could be a stop
-                        * signal in the middle of being processed after
-                        * being removed from the queue.  Clear that too.
-                        */
-                       signal->flags &= ~SIGNAL_STOP_DEQUEUED;
                 }
         }
   
@@@ -875,6 -910,7 +925,7 @@@ static void complete_signal(int sig, st
                         signal->group_stop_count = 0;
                         t = p;
                         do {
+                               task_clear_group_stop_pending(t);
                                 sigaddset(&t->pending.signal, SIGKILL);
                                 signal_wake_up(t, 1);
                         } while_each_thread(p, t);
@@@ -924,15 -960,14 +975,15 @@@ static int __send_signal(int sig, struc
         if (info == SEND_SIG_FORCED)
                 goto out_set;
   
- -      /* Real-time signals must be queued if sent by sigqueue, or
- -         some other real-time mechanism.  It is implementation
- -         defined whether kill() does so.  We attempt to do so, on
- -         the principle of least surprise, but since kill is not
- -         allowed to fail with EAGAIN when low on memory we just
- -         make sure at least one signal gets delivered and don't
- -         pass on the info struct.  */
- -
+ +      /*
+ +       * Real-time signals must be queued if sent by sigqueue, or
+ +       * some other real-time mechanism.  It is implementation
+ +       * defined whether kill() does so.  We attempt to do so, on
+ +       * the principle of least surprise, but since kill is not
+ +       * allowed to fail with EAGAIN when low on memory we just
+ +       * make sure at least one signal gets delivered and don't
+ +       * pass on the info struct.
+ +       */
         if (sig < SIGRTMIN)
                 override_rlimit = (is_si_special(info) || info->si_code >= 0);
         else
@@@ -1109,6 -1144,7 +1160,7 @@@ int zap_other_threads(struct task_struc
         p->signal->group_stop_count = 0;
   
         while_each_thread(p, t) {
+               task_clear_group_stop_pending(t);
                 count++;
   
                 /* Don't bother with already dead threads */
@@@ -1203,7 -1239,8 +1255,7 @@@ retry
         return error;
   }
   
- -int
- -kill_proc_info(int sig, struct siginfo *info, pid_t pid)
+ +int kill_proc_info(int sig, struct siginfo *info, pid_t pid)
   {
         int error;
         rcu_read_lock();
@@@ -1300,7 -1337,8 +1352,7 @@@ static int kill_something_info(int sig
    * These are for backward compatibility with the rest of the kernel source.
    */
   
- -int
- -send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
+ +int send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
   {
         /*
          * Make sure legacy kernel users don't send in bad values
@@@ -1368,7 -1406,7 +1420,7 @@@ EXPORT_SYMBOL(kill_pid)
    * These functions support sending signals using preallocated sigqueue
    * structures.  This is needed "because realtime applications cannot
    * afford to lose notifications of asynchronous events, like timer
- - * expirations or I/O completions".  In the case of Posix Timers
+ + * expirations or I/O completions".  In the case of POSIX Timers
    * we allocate the sigqueue structure from the timer_create.  If this
    * allocation fails we are able to report the failure to the application
    * with an EAGAIN error.
@@@ -1536,16 -1574,30 +1588,30 @@@ int do_notify_parent(struct task_struc
         return ret;
   }
   
- static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
+ /**
+  * do_notify_parent_cldstop - notify parent of stopped/continued state change
+  * @tsk: task reporting the state change
+  * @for_ptracer: the notification is for ptracer
+  * @why: CLD_{CONTINUED|STOPPED|TRAPPED} to report
+  *
+  * Notify @tsk's parent that the stopped/continued state has changed.  If
+  * @for_ptracer is %false, @tsk's group leader notifies to its real parent.
+  * If %true, @tsk reports to @tsk->parent which should be the ptracer.
+  *
+  * CONTEXT:
+  * Must be called with tasklist_lock at least read locked.
+  */
+ static void do_notify_parent_cldstop(struct task_struct *tsk,
+                                    bool for_ptracer, int why)
   {
         struct siginfo info;
         unsigned long flags;
         struct task_struct *parent;
         struct sighand_struct *sighand;
   
-       if (task_ptrace(tsk))
+       if (for_ptracer) {
                 parent = tsk->parent;
-       else {
+       } else {
                 tsk = tsk->group_leader;
                 parent = tsk->real_parent;
         }
@@@ -1553,7 -1605,7 +1619,7 @@@
         info.si_signo = SIGCHLD;
         info.si_errno = 0;
         /*
- -       * see comment in do_notify_parent() abot the following 3 lines
+ +       * see comment in do_notify_parent() about the following 4 lines
          */
         rcu_read_lock();
         info.si_pid = task_pid_nr_ns(tsk, parent->nsproxy->pid_ns);
@@@ -1611,7 -1663,7 +1677,7 @@@ static inline int may_ptrace_stop(void
   }
   
   /*
- - * Return nonzero if there is a SIGKILL that should be waking us up.
+ + * Return non-zero if there is a SIGKILL that should be waking us up.
    * Called with the siglock held.
    */
   static int sigkill_pending(struct task_struct *tsk)
@@@ -1620,6 -1672,15 +1686,15 @@@
                 sigismember(&tsk->signal->shared_pending.signal, SIGKILL);
   }
   
+ /*
+  * Test whether the target task of the usual cldstop notification - the
+  * real_parent of @child - is in the same group as the ptracer.
+  */
+ static bool real_parent_is_ptracer(struct task_struct *child)
+ {
+       return same_thread_group(child->parent, child->real_parent);
+ }
+ 
   /*
    * This must be called with current->sighand->siglock held.
    *
@@@ -1631,10 -1692,12 +1706,12 @@@
    * If we actually decide not to stop at all because the tracer
    * is gone, we keep current->exit_code unless clear_code.
    */
- static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
+ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
         __releases(&current->sighand->siglock)
         __acquires(&current->sighand->siglock)
   {
+       bool gstop_done = false;
+ 
         if (arch_ptrace_stop_needed(exit_code, info)) {
                 /*
                  * The arch code has something special to do before a
@@@ -1655,21 -1718,49 +1732,49 @@@
         }
   
         /*
-        * If there is a group stop in progress,
-        * we must participate in the bookkeeping.
+        * If @why is CLD_STOPPED, we're trapping to participate in a group
+        * stop.  Do the bookkeeping.  Note that if SIGCONT was delievered
+        * while siglock was released for the arch hook, PENDING could be
+        * clear now.  We act as if SIGCONT is received after TASK_TRACED
+        * is entered - ignore it.
          */
-       if (current->signal->group_stop_count > 0)
-               --current->signal->group_stop_count;
+       if (why == CLD_STOPPED && (current->group_stop & GROUP_STOP_PENDING))
+               gstop_done = task_participate_group_stop(current);
   
         current->last_siginfo = info;
         current->exit_code = exit_code;
   
-       /* Let the debugger run.  */
-       __set_current_state(TASK_TRACED);
+       /*
+        * TRACED should be visible before TRAPPING is cleared; otherwise,
+        * the tracer might fail do_wait().
+        */
+       set_current_state(TASK_TRACED);
+ 
+       /*
+        * We're committing to trapping.  Clearing GROUP_STOP_TRAPPING and
+        * transition to TASK_TRACED should be atomic with respect to
+        * siglock.  This hsould be done after the arch hook as siglock is
+        * released and regrabbed across it.
+        */
+       task_clear_group_stop_trapping(current);
+ 
         spin_unlock_irq(&current->sighand->siglock);
         read_lock(&tasklist_lock);
         if (may_ptrace_stop()) {
-               do_notify_parent_cldstop(current, CLD_TRAPPED);
+               /*
+                * Notify parents of the stop.
+                *
+                * While ptraced, there are two parents - the ptracer and
+                * the real_parent of the group_leader.  The ptracer should
+                * know about every stop while the real parent is only
+                * interested in the completion of group stop.  The states
+                * for the two don't interact with each other.  Notify
+                * separately unless they're gonna be duplicates.
+                */
+               do_notify_parent_cldstop(current, true, why);
+               if (gstop_done && !real_parent_is_ptracer(current))
+                       do_notify_parent_cldstop(current, false, why);
+ 
                 /*
                  * Don't want to allow preemption here, because
                  * sys_ptrace() needs this task to be inactive.
@@@ -1684,7 -1775,16 +1789,16 @@@
                 /*
                  * By the time we got the lock, our tracer went away.
                  * Don't drop the lock yet, another tracer may come.
+                *
+                * If @gstop_done, the ptracer went away between group stop
+                * completion and here.  During detach, it would have set
+                * GROUP_STOP_PENDING on us and we'll re-enter TASK_STOPPED
+                * in do_signal_stop() on return, so notifying the real
+                * parent of the group stop completion is enough.
                  */
+               if (gstop_done)
+                       do_notify_parent_cldstop(current, false, why);
+ 
                 __set_current_state(TASK_RUNNING);
                 if (clear_code)
                         current->exit_code = 0;
@@@ -1728,79 -1828,128 +1842,128 @@@ void ptrace_notify(int exit_code
   
         /* Let the debugger run.  */
         spin_lock_irq(&current->sighand->siglock);
-       ptrace_stop(exit_code, 1, &info);
+       ptrace_stop(exit_code, CLD_TRAPPED, 1, &info);
         spin_unlock_irq(&current->sighand->siglock);
   }
   
   /*
    * This performs the stopping for SIGSTOP and other stop signals.
    * We have to stop all threads in the thread group.
- - * Returns nonzero if we've actually stopped and released the siglock.
+ + * Returns non-zero if we've actually stopped and released the siglock.
    * Returns zero if we didn't stop and still hold the siglock.
    */
   static int do_signal_stop(int signr)
   {
         struct signal_struct *sig = current->signal;
-       int notify;
   
-       if (!sig->group_stop_count) {
+       if (!(current->group_stop & GROUP_STOP_PENDING)) {
+               unsigned int gstop = GROUP_STOP_PENDING | GROUP_STOP_CONSUME;
                 struct task_struct *t;
   
-               if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) ||
+               /* signr will be recorded in task->group_stop for retries */
+               WARN_ON_ONCE(signr & ~GROUP_STOP_SIGMASK);
+ 
+               if (!likely(current->group_stop & GROUP_STOP_DEQUEUED) ||
                     unlikely(signal_group_exit(sig)))
                         return 0;
                 /*
-                * There is no group stop already in progress.
-                * We must initiate one now.
+                * There is no group stop already in progress.  We must
+                * initiate one now.
+                *
+                * While ptraced, a task may be resumed while group stop is
+                * still in effect and then receive a stop signal and
+                * initiate another group stop.  This deviates from the
+                * usual behavior as two consecutive stop signals can't
+                * cause two group stops when !ptraced.  That is why we
+                * also check !task_is_stopped(t) below.
+                *
+                * The condition can be distinguished by testing whether
+                * SIGNAL_STOP_STOPPED is already set.  Don't generate
+                * group_exit_code in such case.
+                *
+                * This is not necessary for SIGNAL_STOP_CONTINUED because
+                * an intervening stop signal is required to cause two
+                * continued events regardless of ptrace.
                  */
-               sig->group_exit_code = signr;
+               if (!(sig->flags & SIGNAL_STOP_STOPPED))
+                       sig->group_exit_code = signr;
+               else
+                       WARN_ON_ONCE(!task_ptrace(current));
   
+               current->group_stop &= ~GROUP_STOP_SIGMASK;
+               current->group_stop |= signr | gstop;
                 sig->group_stop_count = 1;
-               for (t = next_thread(current); t != current; t = next_thread(t))
+               for (t = next_thread(current); t != current;
+                    t = next_thread(t)) {
+                       t->group_stop &= ~GROUP_STOP_SIGMASK;
                         /*
                          * Setting state to TASK_STOPPED for a group
                          * stop is always done with the siglock held,
                          * so this check has no races.
                          */
-                       if (!(t->flags & PF_EXITING) &&
-                           !task_is_stopped_or_traced(t)) {
+                       if (!(t->flags & PF_EXITING) && !task_is_stopped(t)) {
+                               t->group_stop |= signr | gstop;
                                 sig->group_stop_count++;
                                 signal_wake_up(t, 0);
                         }
+               }
         }
-       /*
-        * If there are no other threads in the group, or if there is
-        * a group stop in progress and we are the last to stop, report
-        * to the parent.  When ptraced, every thread reports itself.
-        */
-       notify = sig->group_stop_count == 1 ? CLD_STOPPED : 0;
-       notify = tracehook_notify_jctl(notify, CLD_STOPPED);
-       /*
-        * tracehook_notify_jctl() can drop and reacquire siglock, so
-        * we keep ->group_stop_count != 0 before the call. If SIGCONT
-        * or SIGKILL comes in between ->group_stop_count == 0.
-        */
-       if (sig->group_stop_count) {
-               if (!--sig->group_stop_count)
-                       sig->flags = SIGNAL_STOP_STOPPED;
-               current->exit_code = sig->group_exit_code;
+ retry:
+       if (likely(!task_ptrace(current))) {
+               int notify = 0;
+ 
+               /*
+                * If there are no other threads in the group, or if there
+                * is a group stop in progress and we are the last to stop,
+                * report to the parent.
+                */
+               if (task_participate_group_stop(current))
+                       notify = CLD_STOPPED;
+ 
                 __set_current_state(TASK_STOPPED);
+               spin_unlock_irq(&current->sighand->siglock);
+ 
+               /*
+                * Notify the parent of the group stop completion.  Because
+                * we're not holding either the siglock or tasklist_lock
+                * here, ptracer may attach inbetween; however, this is for
+                * group stop and should always be delivered to the real
+                * parent of the group leader.  The new ptracer will get
+                * its notification when this task transitions into
+                * TASK_TRACED.
+                */
+               if (notify) {
+                       read_lock(&tasklist_lock);
+                       do_notify_parent_cldstop(current, false, notify);
+                       read_unlock(&tasklist_lock);
+               }
+ 
+               /* Now we don't run again until woken by SIGCONT or SIGKILL */
+               schedule();
+ 
+               spin_lock_irq(&current->sighand->siglock);
+       } else {
+               ptrace_stop(current->group_stop & GROUP_STOP_SIGMASK,
+                           CLD_STOPPED, 0, NULL);
+               current->exit_code = 0;
         }
-       spin_unlock_irq(&current->sighand->siglock);
   
-       if (notify) {
-               read_lock(&tasklist_lock);
-               do_notify_parent_cldstop(current, notify);
-               read_unlock(&tasklist_lock);
+       /*
+        * GROUP_STOP_PENDING could be set if another group stop has
+        * started since being woken up or ptrace wants us to transit
+        * between TASK_STOPPED and TRACED.  Retry group stop.
+        */
+       if (current->group_stop & GROUP_STOP_PENDING) {
+               WARN_ON_ONCE(!(current->group_stop & GROUP_STOP_SIGMASK));
+               goto retry;
         }
   
-       /* Now we don't run again until woken by SIGCONT or SIGKILL */
-       do {
-               schedule();
-       } while (try_to_freeze());
+       /* PTRACE_ATTACH might have raced with task killing, clear trapping */
+       task_clear_group_stop_trapping(current);
+ 
+       spin_unlock_irq(&current->sighand->siglock);
   
         tracehook_finish_jctl();
-       current->exit_code = 0;
   
         return 1;
   }
@@@ -1814,7 -1963,7 +1977,7 @@@ static int ptrace_signal(int signr, sig
         ptrace_signal_deliver(regs, cookie);
   
         /* Let the debugger run.  */
-       ptrace_stop(signr, 0, info);
+       ptrace_stop(signr, CLD_TRAPPED, 0, info);
   
         /* We're back.  Did the debugger cancel the sig?  */
         signr = current->exit_code;
@@@ -1823,12 -1972,10 +1986,12 @@@
   
         current->exit_code = 0;
   
- -      /* Update the siginfo structure if the signal has
- -         changed.  If the debugger wanted something
- -         specific in the siginfo structure then it should
- -         have updated *info via PTRACE_SETSIGINFO.  */
+ +      /*
+ +       * Update the siginfo structure if the signal has
+ +       * changed.  If the debugger wanted something
+ +       * specific in the siginfo structure then it should
+ +       * have updated *info via PTRACE_SETSIGINFO.
+ +       */
         if (signr != info->si_signo) {
                 info->si_signo = signr;
                 info->si_errno = 0;
@@@ -1869,18 -2016,36 +2032,36 @@@ relock
          * the CLD_ si_code into SIGNAL_CLD_MASK bits.
          */
         if (unlikely(signal->flags & SIGNAL_CLD_MASK)) {
-               int why = (signal->flags & SIGNAL_STOP_CONTINUED)
-                               ? CLD_CONTINUED : CLD_STOPPED;
+               struct task_struct *leader;
+               int why;
+ 
+               if (signal->flags & SIGNAL_CLD_CONTINUED)
+                       why = CLD_CONTINUED;
+               else
+                       why = CLD_STOPPED;
+ 
                 signal->flags &= ~SIGNAL_CLD_MASK;
   
-               why = tracehook_notify_jctl(why, CLD_CONTINUED);
                 spin_unlock_irq(&sighand->siglock);
   
-               if (why) {
-                       read_lock(&tasklist_lock);
-                       do_notify_parent_cldstop(current->group_leader, why);
-                       read_unlock(&tasklist_lock);
-               }
+               /*
+                * Notify the parent that we're continuing.  This event is
+                * always per-process and doesn't make whole lot of sense
+                * for ptracers, who shouldn't consume the state via
+                * wait(2) either, but, for backward compatibility, notify
+                * the ptracer of the group leader too unless it's gonna be
+                * a duplicate.
+                */
+               read_lock(&tasklist_lock);
+ 
+               do_notify_parent_cldstop(current, false, why);
+ 
+               leader = current->group_leader;
+               if (task_ptrace(leader) && !real_parent_is_ptracer(leader))
+                       do_notify_parent_cldstop(leader, true, why);
+ 
+               read_unlock(&tasklist_lock);
+ 
                 goto relock;
         }
   
@@@ -1897,8 -2062,8 +2078,8 @@@
                 if (unlikely(signr != 0))
                         ka = return_ka;
                 else {
-                       if (unlikely(signal->group_stop_count > 0) &&
-                           do_signal_stop(0))
+                       if (unlikely(current->group_stop &
+                                    GROUP_STOP_PENDING) && do_signal_stop(0))
                                 goto relock;
   
                         signr = dequeue_signal(current, &current->blocked,
@@@ -2036,8 -2201,7 +2217,8 @@@ void exit_signals(struct task_struct *t
         if (!signal_pending(tsk))
                 goto out;
   
- -      /* It could be that __group_complete_signal() choose us to
+ +      /*
+ +       * It could be that __group_complete_signal() choose us to
          * notify about group-wide signal. Another thread should be
          * woken now to take the signal since we will not.
          */
@@@ -2045,17 -2209,19 +2226,19 @@@
                 if (!signal_pending(t) && !(t->flags & PF_EXITING))
                         recalc_sigpending_and_wake(t);
   
-       if (unlikely(tsk->signal->group_stop_count) &&
-                       !--tsk->signal->group_stop_count) {
-               tsk->signal->flags = SIGNAL_STOP_STOPPED;
-               group_stop = tracehook_notify_jctl(CLD_STOPPED, CLD_STOPPED);
-       }
+       if (unlikely(tsk->group_stop & GROUP_STOP_PENDING) &&
+           task_participate_group_stop(tsk))
+               group_stop = CLD_STOPPED;
   out:
         spin_unlock_irq(&tsk->sighand->siglock);
   
+       /*
+        * If group stop has completed, deliver the notification.  This
+        * should always go to the real parent of the group leader.
+        */
         if (unlikely(group_stop)) {
                 read_lock(&tasklist_lock);
-               do_notify_parent_cldstop(tsk, group_stop);
+               do_notify_parent_cldstop(tsk, false, group_stop);
                 read_unlock(&tasklist_lock);
         }
   }
@@@ -2075,9 -2241,6 +2258,9 @@@ EXPORT_SYMBOL(unblock_all_signals)
    * System call entry points.
    */
   
+ +/**
+ + *  sys_restart_syscall - restart a system call
+ + */
   SYSCALL_DEFINE0(restart_syscall)
   {
         struct restart_block *restart = &current_thread_info()->restart_block;
@@@ -2131,13 -2294,6 +2314,13 @@@ int sigprocmask(int how, sigset_t *set
         return error;
   }
   
+ +/**
+ + *  sys_rt_sigprocmask - change the list of currently blocked signals
+ + *  @how: whether to add, remove, or set signals
+ + *  @set: stores pending signals
+ + *  @oset: previous value of signal mask if non-null
+ + *  @sigsetsize: size of sigset_t type
+ + */
   SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, set,
                 sigset_t __user *, oset, size_t, sigsetsize)
   {
@@@ -2196,14 -2352,8 +2379,14 @@@ long do_sigpending(void __user *set, un
   
   out:
         return error;
- -}     
+ +}
   
+ +/**
+ + *  sys_rt_sigpending - examine a pending signal that has been raised
+ + *                    while blocked
+ + *  @set: stores pending signals
+ + *  @sigsetsize: size of sigset_t type or larger
+ + */
   SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, set, size_t, sigsetsize)
   {
         return do_sigpending(set, sigsetsize);
@@@ -2252,9 -2402,9 +2435,9 @@@ int copy_siginfo_to_user(siginfo_t __us
                 err |= __put_user(from->si_trapno, &to->si_trapno);
   #endif
   #ifdef BUS_MCEERR_AO
- -              /* 
+ +              /*
                  * Other callers might not initialize the si_lsb field,
- -               * so check explicitely for the right codes here.
+ +               * so check explicitly for the right codes here.
                  */
                 if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO)
                         err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb);
@@@ -2283,14 -2433,6 +2466,14 @@@
   
   #endif
   
+ +/**
+ + *  sys_rt_sigtimedwait - synchronously wait for queued signals specified
+ + *                    in @uthese
+ + *  @uthese: queued signals to wait for
+ + *  @uinfo: if non-null, the signal's siginfo is returned here
+ + *  @uts: upper bound on process time suspension
+ + *  @sigsetsize: size of sigset_t type
+ + */
   SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
                 siginfo_t __user *, uinfo, const struct timespec __user *, uts,
                 size_t, sigsetsize)
@@@ -2307,7 -2449,7 +2490,7 @@@
   
         if (copy_from_user(&these, uthese, sizeof(these)))
                 return -EFAULT;
- -              
+ +
         /*
          * Invert the set of allowed signals to get those we
          * want to block.
@@@ -2332,11 -2474,9 +2515,11 @@@
                                    + (ts.tv_sec || ts.tv_nsec));
   
                 if (timeout) {
- -                      /* None ready -- temporarily unblock those we're
+ +                      /*
+ +                       * None ready -- temporarily unblock those we're
                          * interested while we are sleeping in so that we'll
- -                       * be awakened when they arrive.  */
+ +                       * be awakened when they arrive.
+ +                       */
                         current->real_blocked = current->blocked;
                         sigandsets(&current->blocked, &current->blocked, &these);
                         recalc_sigpending();
@@@ -2368,11 -2508,6 +2551,11 @@@
         return ret;
   }
   
+ +/**
+ + *  sys_kill - send a signal to a process
+ + *  @pid: the PID of the process
+ + *  @sig: signal to be sent
+ + */
   SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
   {
         struct siginfo info;
@@@ -2448,11 -2583,7 +2631,11 @@@ SYSCALL_DEFINE3(tgkill, pid_t, tgid, pi
         return do_tkill(tgid, pid, sig);
   }
   
- -/*
+ +/**
+ + *  sys_tkill - send signal to one specific task
+ + *  @pid: the PID of the task
+ + *  @sig: signal to be sent
+ + *
    *  Send a signal to only one task, even if it's a CLONE_THREAD task.
    */
   SYSCALL_DEFINE2(tkill, pid_t, pid, int, sig)
@@@ -2464,12 -2595,6 +2647,12 @@@
         return do_tkill(0, pid, sig);
   }
   
+ +/**
+ + *  sys_rt_sigqueueinfo - send signal information to a signal
+ + *  @pid: the PID of the thread
+ + *  @sig: signal to be sent
+ + *  @uinfo: signal info to be sent
+ + */
   SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig,
                 siginfo_t __user *, uinfo)
   {
@@@ -2481,7 -2606,7 +2664,7 @@@
         /* Not even root can pretend to send signals from the kernel.
          * Nor can they impersonate a kill()/tgkill(), which adds source info.
          */
- -      if (info.si_code != SI_QUEUE) {
+ +      if (info.si_code >= 0 || info.si_code == SI_TKILL) {
                 /* We used to allow any < 0 si_code */
                 WARN_ON_ONCE(info.si_code < 0);
                 return -EPERM;
@@@ -2501,7 -2626,7 +2684,7 @@@ long do_rt_tgsigqueueinfo(pid_t tgid, p
         /* Not even root can pretend to send signals from the kernel.
          * Nor can they impersonate a kill()/tgkill(), which adds source info.
          */
- -      if (info->si_code != SI_QUEUE) {
+ +      if (info->si_code >= 0 || info->si_code == SI_TKILL) {
                 /* We used to allow any < 0 si_code */
                 WARN_ON_ONCE(info->si_code < 0);
                 return -EPERM;
@@@ -2597,11 -2722,12 +2780,11 @@@ do_sigaltstack (const stack_t __user *u
   
                 error = -EINVAL;
                 /*
- -               *
- -               * Note - this code used to test ss_flags incorrectly
+ +               * Note - this code used to test ss_flags incorrectly:
                  *        old code may have been written using ss_flags==0
                  *        to mean ss_flags==SS_ONSTACK (as this was the only
                  *        way that worked) - this fix preserves that older
- -               *        mechanism
+ +               *        mechanism.
                  */
                 if (ss_flags != SS_DISABLE && ss_flags != SS_ONSTACK && ss_flags != 0)
                         goto out;
@@@ -2635,10 -2761,6 +2818,10 @@@ out
   
   #ifdef __ARCH_WANT_SYS_SIGPENDING
   
+ +/**
+ + *  sys_sigpending - examine pending signals
+ + *  @set: where mask of pending signal is returned
+ + */
   SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, set)
   {
         return do_sigpending(set, sizeof(*set));
@@@ -2647,15 -2769,8 +2830,15 @@@
   #endif
   
   #ifdef __ARCH_WANT_SYS_SIGPROCMASK
- -/* Some platforms have their own version with special arguments others
- -   support only sys_rt_sigprocmask.  */
+ +/**
+ + *  sys_sigprocmask - examine and change blocked signals
+ + *  @how: whether to add, remove, or set signals
+ + *  @set: signals to add or remove (if non-null)
+ + *  @oset: previous value of signal mask if non-null
+ + *
+ + * Some platforms have their own version with special arguments;
+ + * others support only sys_rt_sigprocmask.
+ + */
   
   SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, set,
                 old_sigset_t __user *, oset)
@@@ -2708,13 -2823,6 +2891,13 @@@ out
   #endif /* __ARCH_WANT_SYS_SIGPROCMASK */
   
   #ifdef __ARCH_WANT_SYS_RT_SIGACTION
+ +/**
+ + *  sys_rt_sigaction - alter an action taken by a process
+ + *  @sig: signal to be sent
+ + *  @act: the thread group ID of the thread
+ + *  @oact: the PID of the thread
+ + *  @sigsetsize: size of sigset_t type
+ + */
   SYSCALL_DEFINE4(rt_sigaction, int, sig,
                 const struct sigaction __user *, act,
                 struct sigaction __user *, oact,
@@@ -2801,12 -2909,6 +2984,12 @@@ SYSCALL_DEFINE0(pause
   #endif
   
   #ifdef __ARCH_WANT_SYS_RT_SIGSUSPEND
+ +/**
+ + *  sys_rt_sigsuspend - replace the signal mask for a value with the
+ + *    @unewset value until a signal is received
+ + *  @unewset: new signal mask value
+ + *  @sigsetsize: size of sigset_t type
+ + */
   SYSCALL_DEFINE2(rt_sigsuspend, sigset_t __user *, unewset, size_t, sigsetsize)
   {
         sigset_t newset;
author	Oleg Nesterov <oleg@redhat.com>
	Thu, 7 Apr 2011 18:44:11 +0000 (20:44 +0200)
committer	Oleg Nesterov <oleg@redhat.com>
	Thu, 7 Apr 2011 18:44:11 +0000 (20:44 +0200)
		1	2
include/linux/sched.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/exit.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/ptrace.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/signal.c	patch \|	diff1 \|	diff2 \|	blob \| history