[patch 1/1] gfs2: get_sb_dev() fix

[pandora-kernel.git] / kernel / sys.c
diff --git a/kernel/sys.c b/kernel/sys.c

index c93d37f..dbb3b9c 100644 (file)
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -4,7 +4,6 @@
   *  Copyright (C) 1991, 1992  Linus Torvalds
   */
  
-#include <linux/config.h>
  #include <linux/module.h>
  #include <linux/mm.h>
  #include <linux/utsname.h>
@@ -13,7 +12,6 @@
  #include <linux/notifier.h>
  #include <linux/reboot.h>
  #include <linux/prctl.h>
-#include <linux/init.h>
  #include <linux/highuid.h>
  #include <linux/fs.h>
  #include <linux/kernel.h>
@@ -57,6 +55,12 @@
  #ifndef GET_FPEXC_CTL
  # define GET_FPEXC_CTL(a,b)    (-EINVAL)
  #endif
+#ifndef GET_ENDIAN
+# define GET_ENDIAN(a,b)       (-EINVAL)
+#endif
+#ifndef SET_ENDIAN
+# define SET_ENDIAN(a,b)       (-EINVAL)
+#endif
  
  /*
   * this is where the system-wide overflow UID and GID are defined, for
@@ -132,14 +136,15 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl,
                 unsigned long val, void *v)
  {
         int ret = NOTIFY_DONE;
-       struct notifier_block *nb;
+       struct notifier_block *nb, *next_nb;
  
         nb = rcu_dereference(*nl);
         while (nb) {
+               next_nb = rcu_dereference(nb->next);
                 ret = nb->notifier_call(nb, val, v);
                 if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK)
                         break;
-               nb = rcu_dereference(nb->next);
+               nb = next_nb;
         }
         return ret;
  }
@@ -583,7 +588,7 @@ void emergency_restart(void)
  }
  EXPORT_SYMBOL_GPL(emergency_restart);
  
-void kernel_restart_prepare(char *cmd)
+static void kernel_restart_prepare(char *cmd)
  {
         blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
         system_state = SYSTEM_RESTART;
@@ -617,7 +622,7 @@ EXPORT_SYMBOL_GPL(kernel_restart);
   *     Move into place and start executing a preloaded standalone
   *     executable.  If nothing was preloaded return an error.
   */
-void kernel_kexec(void)
+static void kernel_kexec(void)
  {
  #ifdef CONFIG_KEXEC
         struct kimage *image;
@@ -631,7 +636,6 @@ void kernel_kexec(void)
         machine_kexec(image);
  #endif
  }
-EXPORT_SYMBOL_GPL(kernel_kexec);
  
  void kernel_shutdown_prepare(enum system_states state)
  {
@@ -1202,69 +1206,24 @@ asmlinkage long sys_times(struct tms __user * tbuf)
          */
         if (tbuf) {
                 struct tms tmp;
+               struct task_struct *tsk = current;
+               struct task_struct *t;
                 cputime_t utime, stime, cutime, cstime;
  
-#ifdef CONFIG_SMP
-               if (thread_group_empty(current)) {
-                       /*
-                        * Single thread case without the use of any locks.
-                        *
-                        * We may race with release_task if two threads are
-                        * executing. However, release task first adds up the
-                        * counters (__exit_signal) before  removing the task
-                        * from the process tasklist (__unhash_process).
-                        * __exit_signal also acquires and releases the
-                        * siglock which results in the proper memory ordering
-                        * so that the list modifications are always visible
-                        * after the counters have been updated.
-                        *
-                        * If the counters have been updated by the second thread
-                        * but the thread has not yet been removed from the list
-                        * then the other branch will be executing which will
-                        * block on tasklist_lock until the exit handling of the
-                        * other task is finished.
-                        *
-                        * This also implies that the sighand->siglock cannot
-                        * be held by another processor. So we can also
-                        * skip acquiring that lock.
-                        */
-                       utime = cputime_add(current->signal->utime, current->utime);
-                       stime = cputime_add(current->signal->utime, current->stime);
-                       cutime = current->signal->cutime;
-                       cstime = current->signal->cstime;
-               } else
-#endif
-               {
+               spin_lock_irq(&tsk->sighand->siglock);
+               utime = tsk->signal->utime;
+               stime = tsk->signal->stime;
+               t = tsk;
+               do {
+                       utime = cputime_add(utime, t->utime);
+                       stime = cputime_add(stime, t->stime);
+                       t = next_thread(t);
+               } while (t != tsk);
  
-                       /* Process with multiple threads */
-                       struct task_struct *tsk = current;
-                       struct task_struct *t;
+               cutime = tsk->signal->cutime;
+               cstime = tsk->signal->cstime;
+               spin_unlock_irq(&tsk->sighand->siglock);
  
-                       read_lock(&tasklist_lock);
-                       utime = tsk->signal->utime;
-                       stime = tsk->signal->stime;
-                       t = tsk;
-                       do {
-                               utime = cputime_add(utime, t->utime);
-                               stime = cputime_add(stime, t->stime);
-                               t = next_thread(t);
-                       } while (t != tsk);
-
-                       /*
-                        * While we have tasklist_lock read-locked, no dying thread
-                        * can be updating current->signal->[us]time.  Instead,
-                        * we got their counts included in the live thread loop.
-                        * However, another thread can come in right now and
-                        * do a wait call that updates current->signal->c[us]time.
-                        * To make sure we always see that pair updated atomically,
-                        * we take the siglock around fetching them.
-                        */
-                       spin_lock_irq(&tsk->sighand->siglock);
-                       cutime = tsk->signal->cutime;
-                       cstime = tsk->signal->cstime;
-                       spin_unlock_irq(&tsk->sighand->siglock);
-                       read_unlock(&tasklist_lock);
-               }
                 tmp.tms_utime = cputime_to_clock_t(utime);
                 tmp.tms_stime = cputime_to_clock_t(stime);
                 tmp.tms_cutime = cputime_to_clock_t(cutime);
@@ -1417,18 +1376,29 @@ asmlinkage long sys_getsid(pid_t pid)
  asmlinkage long sys_setsid(void)
  {
         struct task_struct *group_leader = current->group_leader;
-       struct pid *pid;
+       pid_t session;
         int err = -EPERM;
  
         mutex_lock(&tty_mutex);
         write_lock_irq(&tasklist_lock);
  
-       pid = find_pid(PIDTYPE_PGID, group_leader->pid);
-       if (pid)
+       /* Fail if I am already a session leader */
+       if (group_leader->signal->leader)
+               goto out;
+
+       session = group_leader->pid;
+       /* Fail if a process group id already exists that equals the
+        * proposed session id.
+        *
+        * Don't check if session id == 1 because kernel threads use this
+        * session id and so the check will always fail and make it so
+        * init cannot successfully call setsid.
+        */
+       if (session > 1 && find_task_by_pid_type(PIDTYPE_PGID, session))
                 goto out;
  
         group_leader->signal->leader = 1;
-       __set_special_pids(group_leader->pid, group_leader->pid);
+       __set_special_pids(session, session);
         group_leader->signal->tty = NULL;
         group_leader->signal->tty_old_pgrp = 0;
         err = process_group(group_leader);
@@ -1894,23 +1864,20 @@ out:
   * fields when reaping, so a sample either gets all the additions of a
   * given child after it's reaped, or none so this sample is before reaping.
   *
- * tasklist_lock locking optimisation:
- * If we are current and single threaded, we do not need to take the tasklist
- * lock or the siglock.  No one else can take our signal_struct away,
- * no one else can reap the children to update signal->c* counters, and
- * no one else can race with the signal-> fields.
- * If we do not take the tasklist_lock, the signal-> fields could be read
- * out of order while another thread was just exiting. So we place a
- * read memory barrier when we avoid the lock.  On the writer side,
- * write memory barrier is implied in  __exit_signal as __exit_signal releases
- * the siglock spinlock after updating the signal-> fields.
- *
- * We don't really need the siglock when we access the non c* fields
- * of the signal_struct (for RUSAGE_SELF) even in multithreaded
- * case, since we take the tasklist lock for read and the non c* signal->
- * fields are updated only in __exit_signal, which is called with
- * tasklist_lock taken for write, hence these two threads cannot execute
- * concurrently.
+ * Locking:
+ * We need to take the siglock for CHILDEREN, SELF and BOTH
+ * for  the cases current multithreaded, non-current single threaded
+ * non-current multithreaded.  Thread traversal is now safe with
+ * the siglock held.
+ * Strictly speaking, we donot need to take the siglock if we are current and
+ * single threaded,  as no one else can take our signal_struct away, no one
+ * else can  reap the  children to update signal->c* counters, and no one else
+ * can race with the signal-> fields. If we do not take any lock, the
+ * signal-> fields could be read out of order while another thread was just
+ * exiting. So we should  place a read memory barrier when we avoid the lock.
+ * On the writer side,  write memory barrier is implied in  __exit_signal
+ * as __exit_signal releases  the siglock spinlock after updating the signal->
+ * fields. But we don't do this yet to keep things simple.
   *
   */
  
@@ -1919,35 +1886,25 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
         struct task_struct *t;
         unsigned long flags;
         cputime_t utime, stime;
-       int need_lock = 0;
  
         memset((char *) r, 0, sizeof *r);
         utime = stime = cputime_zero;
  
-       if (p != current || !thread_group_empty(p))
-               need_lock = 1;
-
-       if (need_lock) {
-               read_lock(&tasklist_lock);
-               if (unlikely(!p->signal)) {
-                       read_unlock(&tasklist_lock);
-                       return;
-               }
-       } else
-               /* See locking comments above */
-               smp_rmb();
+       rcu_read_lock();
+       if (!lock_task_sighand(p, &flags)) {
+               rcu_read_unlock();
+               return;
+       }
  
         switch (who) {
                 case RUSAGE_BOTH:
                 case RUSAGE_CHILDREN:
-                       spin_lock_irqsave(&p->sighand->siglock, flags);
                         utime = p->signal->cutime;
                         stime = p->signal->cstime;
                         r->ru_nvcsw = p->signal->cnvcsw;
                         r->ru_nivcsw = p->signal->cnivcsw;
                         r->ru_minflt = p->signal->cmin_flt;
                         r->ru_majflt = p->signal->cmaj_flt;
-                       spin_unlock_irqrestore(&p->sighand->siglock, flags);
  
                         if (who == RUSAGE_CHILDREN)
                                 break;
@@ -1975,8 +1932,9 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
                         BUG();
         }
  
-       if (need_lock)
-               read_unlock(&tasklist_lock);
+       unlock_task_sighand(p, &flags);
+       rcu_read_unlock();
+
         cputime_to_timeval(utime, &r->ru_utime);
         cputime_to_timeval(stime, &r->ru_stime);
  }
@@ -2091,6 +2049,13 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
                                 return -EFAULT;
                         return 0;
                 }
+               case PR_GET_ENDIAN:
+                       error = GET_ENDIAN(current, arg2);
+                       break;
+               case PR_SET_ENDIAN:
+                       error = SET_ENDIAN(current, arg2);
+                       break;
+
                 default:
                         error = -EINVAL;
                         break;