timers: split process wide cpu clocks/timers
authorPeter Zijlstra <a.p.zijlstra@chello.nl>
Thu, 5 Feb 2009 11:24:16 +0000 (12:24 +0100)
committerIngo Molnar <mingo@elte.hu>
Thu, 5 Feb 2009 12:04:33 +0000 (13:04 +0100)
Change the process wide cpu timers/clocks so that we:

 1) don't mess up the kernel with too many threads,
 2) don't have a per-cpu allocation for each process,
 3) have no impact when not used.

In order to accomplish this we're going to split it into two parts:

 - clocks; which can take all the time they want since they run
           from user context -- ie. sys_clock_gettime(CLOCK_PROCESS_CPUTIME_ID)

 - timers; which need constant time sampling but since they're
           explicity used, the user can pay the overhead.

The clock readout will go back to a full sum of the thread group, while the
timers will run of a global 'clock' that only runs when needed, so only
programs that make use of the facility pay the price.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
include/linux/init_task.h
include/linux/sched.h
kernel/itimer.c
kernel/posix-cpu-timers.c
kernel/sched_stats.h

index ea0ea1a..e752d97 100644 (file)
@@ -48,12 +48,11 @@ extern struct fs_struct init_fs;
        .posix_timers    = LIST_HEAD_INIT(sig.posix_timers),            \
        .cpu_timers     = INIT_CPU_TIMERS(sig.cpu_timers),              \
        .rlim           = INIT_RLIMITS,                                 \
-       .cputime        = { .totals = {                                 \
-               .utime = cputime_zero,                                  \
-               .stime = cputime_zero,                                  \
-               .sum_exec_runtime = 0,                                  \
-               .lock = __SPIN_LOCK_UNLOCKED(sig.cputime.totals.lock),  \
-       }, },                                                           \
+       .cputimer       = {                                             \
+               .cputime = INIT_CPUTIME,                                \
+               .running = 0,                                           \
+               .lock = __SPIN_LOCK_UNLOCKED(sig.cputimer.lock),        \
+       },                                                              \
 }
 
 extern struct nsproxy init_nsproxy;
index 2e0646a..082d761 100644 (file)
@@ -443,7 +443,6 @@ struct pacct_struct {
  * @utime:             time spent in user mode, in &cputime_t units
  * @stime:             time spent in kernel mode, in &cputime_t units
  * @sum_exec_runtime:  total time spent on the CPU, in nanoseconds
- * @lock:              lock for fields in this struct
  *
  * This structure groups together three kinds of CPU time that are
  * tracked for threads and thread groups.  Most things considering
@@ -454,23 +453,33 @@ struct task_cputime {
        cputime_t utime;
        cputime_t stime;
        unsigned long long sum_exec_runtime;
-       spinlock_t lock;
 };
 /* Alternate field names when used to cache expirations. */
 #define prof_exp       stime
 #define virt_exp       utime
 #define sched_exp      sum_exec_runtime
 
+#define INIT_CPUTIME   \
+       (struct task_cputime) {                                 \
+               .utime = cputime_zero,                          \
+               .stime = cputime_zero,                          \
+               .sum_exec_runtime = 0,                          \
+       }
+
 /**
- * struct thread_group_cputime - thread group interval timer counts
- * @totals:            thread group interval timers; substructure for
- *                     uniprocessor kernel, per-cpu for SMP kernel.
+ * struct thread_group_cputimer - thread group interval timer counts
+ * @cputime:           thread group interval timers.
+ * @running:           non-zero when there are timers running and
+ *                     @cputime receives updates.
+ * @lock:              lock for fields in this struct.
  *
  * This structure contains the version of task_cputime, above, that is
- * used for thread group CPU clock calculations.
+ * used for thread group CPU timer calculations.
  */
-struct thread_group_cputime {
-       struct task_cputime totals;
+struct thread_group_cputimer {
+       struct task_cputime cputime;
+       int running;
+       spinlock_t lock;
 };
 
 /*
@@ -519,10 +528,10 @@ struct signal_struct {
        cputime_t it_prof_incr, it_virt_incr;
 
        /*
-        * Thread group totals for process CPU clocks.
-        * See thread_group_cputime(), et al, for details.
+        * Thread group totals for process CPU timers.
+        * See thread_group_cputimer(), et al, for details.
         */
-       struct thread_group_cputime cputime;
+       struct thread_group_cputimer cputimer;
 
        /* Earliest-expiration cache. */
        struct task_cputime cputime_expires;
@@ -2191,27 +2200,26 @@ static inline int spin_needbreak(spinlock_t *lock)
 /*
  * Thread group CPU time accounting.
  */
+void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
 
 static inline
-void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
+void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
 {
-       struct task_cputime *totals = &tsk->signal->cputime.totals;
+       struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
        unsigned long flags;
 
-       spin_lock_irqsave(&totals->lock, flags);
-       *times = *totals;
-       spin_unlock_irqrestore(&totals->lock, flags);
+       WARN_ON(!cputimer->running);
+
+       spin_lock_irqsave(&cputimer->lock, flags);
+       *times = cputimer->cputime;
+       spin_unlock_irqrestore(&cputimer->lock, flags);
 }
 
 static inline void thread_group_cputime_init(struct signal_struct *sig)
 {
-       sig->cputime.totals = (struct task_cputime){
-               .utime = cputime_zero,
-               .stime = cputime_zero,
-               .sum_exec_runtime = 0,
-       };
-
-       spin_lock_init(&sig->cputime.totals.lock);
+       sig->cputimer.cputime = INIT_CPUTIME;
+       spin_lock_init(&sig->cputimer.lock);
+       sig->cputimer.running = 0;
 }
 
 static inline void thread_group_cputime_free(struct signal_struct *sig)
index 6a5fe93..58762f7 100644 (file)
@@ -62,7 +62,7 @@ int do_getitimer(int which, struct itimerval *value)
                        struct task_cputime cputime;
                        cputime_t utime;
 
-                       thread_group_cputime(tsk, &cputime);
+                       thread_group_cputimer(tsk, &cputime);
                        utime = cputime.utime;
                        if (cputime_le(cval, utime)) { /* about to fire */
                                cval = jiffies_to_cputime(1);
@@ -82,7 +82,7 @@ int do_getitimer(int which, struct itimerval *value)
                        struct task_cputime times;
                        cputime_t ptime;
 
-                       thread_group_cputime(tsk, &times);
+                       thread_group_cputimer(tsk, &times);
                        ptime = cputime_add(times.utime, times.stime);
                        if (cputime_le(cval, ptime)) { /* about to fire */
                                cval = jiffies_to_cputime(1);
Simple merge
Simple merge