[PATCH] Optimize sys_times for a single thread process
authorChristoph Lameter <christoph@lameter.com>
Thu, 23 Jun 2005 07:10:05 +0000 (00:10 -0700)
committerLinus Torvalds <torvalds@ppc970.osdl.org>
Thu, 23 Jun 2005 16:45:30 +0000 (09:45 -0700)
Avoid taking the tasklist_lock in sys_times if the process is single
threaded.  In a NUMA system taking the tasklist_lock may cause a bouncing
cacheline if multiple independent processes continually call sys_times to
measure their performance.

Signed-off-by: Christoph Lameter <christoph@lameter.com>
Signed-off-by: Shai Fultheim <shai@scalex86.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
kernel/exit.c
kernel/sys.c

index c2bdf6f..3ebcd60 100644 (file)
@@ -72,6 +72,11 @@ repeat:
        BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
        __exit_signal(p);
        __exit_sighand(p);
+       /*
+        * Note that the fastpath in sys_times depends on __exit_signal having
+        * updated the counters before a task is removed from the tasklist of
+        * the process by __unhash_process.
+        */
        __unhash_process(p);
 
        /*
index 0a2c8cd..5a9d6b0 100644 (file)
@@ -894,35 +894,69 @@ asmlinkage long sys_times(struct tms __user * tbuf)
         */
        if (tbuf) {
                struct tms tmp;
-               struct task_struct *tsk = current;
-               struct task_struct *t;
                cputime_t utime, stime, cutime, cstime;
 
-               read_lock(&tasklist_lock);
-               utime = tsk->signal->utime;
-               stime = tsk->signal->stime;
-               t = tsk;
-               do {
-                       utime = cputime_add(utime, t->utime);
-                       stime = cputime_add(stime, t->stime);
-                       t = next_thread(t);
-               } while (t != tsk);
-
-               /*
-                * While we have tasklist_lock read-locked, no dying thread
-                * can be updating current->signal->[us]time.  Instead,
-                * we got their counts included in the live thread loop.
-                * However, another thread can come in right now and
-                * do a wait call that updates current->signal->c[us]time.
-                * To make sure we always see that pair updated atomically,
-                * we take the siglock around fetching them.
-                */
-               spin_lock_irq(&tsk->sighand->siglock);
-               cutime = tsk->signal->cutime;
-               cstime = tsk->signal->cstime;
-               spin_unlock_irq(&tsk->sighand->siglock);
-               read_unlock(&tasklist_lock);
+#ifdef CONFIG_SMP
+               if (thread_group_empty(current)) {
+                       /*
+                        * Single thread case without the use of any locks.
+                        *
+                        * We may race with release_task if two threads are
+                        * executing. However, release task first adds up the
+                        * counters (__exit_signal) before  removing the task
+                        * from the process tasklist (__unhash_process).
+                        * __exit_signal also acquires and releases the
+                        * siglock which results in the proper memory ordering
+                        * so that the list modifications are always visible
+                        * after the counters have been updated.
+                        *
+                        * If the counters have been updated by the second thread
+                        * but the thread has not yet been removed from the list
+                        * then the other branch will be executing which will
+                        * block on tasklist_lock until the exit handling of the
+                        * other task is finished.
+                        *
+                        * This also implies that the sighand->siglock cannot
+                        * be held by another processor. So we can also
+                        * skip acquiring that lock.
+                        */
+                       utime = cputime_add(current->signal->utime, current->utime);
+                       stime = cputime_add(current->signal->utime, current->stime);
+                       cutime = current->signal->cutime;
+                       cstime = current->signal->cstime;
+               } else
+#endif
+               {
+
+                       /* Process with multiple threads */
+                       struct task_struct *tsk = current;
+                       struct task_struct *t;
 
+                       read_lock(&tasklist_lock);
+                       utime = tsk->signal->utime;
+                       stime = tsk->signal->stime;
+                       t = tsk;
+                       do {
+                               utime = cputime_add(utime, t->utime);
+                               stime = cputime_add(stime, t->stime);
+                               t = next_thread(t);
+                       } while (t != tsk);
+
+                       /*
+                        * While we have tasklist_lock read-locked, no dying thread
+                        * can be updating current->signal->[us]time.  Instead,
+                        * we got their counts included in the live thread loop.
+                        * However, another thread can come in right now and
+                        * do a wait call that updates current->signal->c[us]time.
+                        * To make sure we always see that pair updated atomically,
+                        * we take the siglock around fetching them.
+                        */
+                       spin_lock_irq(&tsk->sighand->siglock);
+                       cutime = tsk->signal->cutime;
+                       cstime = tsk->signal->cstime;
+                       spin_unlock_irq(&tsk->sighand->siglock);
+                       read_unlock(&tasklist_lock);
+               }
                tmp.tms_utime = cputime_to_clock_t(utime);
                tmp.tms_stime = cputime_to_clock_t(stime);
                tmp.tms_cutime = cputime_to_clock_t(cutime);