Merge branch 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 28 Oct 2010 01:48:00 +0000 (18:48 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 28 Oct 2010 01:48:00 +0000 (18:48 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 28 Oct 2010 01:48:00 +0000 (18:48 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 28 Oct 2010 01:48:00 +0000 (18:48 -0700)
diff --combined arch/x86/include/asm/msr-index.h

index 83c4bb1,91ba8e6..3ea3dc4
--- 1/arch/x86/include/asm/msr-index.h
--- 2/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@@ -121,6 -121,7 +121,7 @@@
   #define MSR_AMD64_IBSDCLINAD          0xc0011038
   #define MSR_AMD64_IBSDCPHYSAD         0xc0011039
   #define MSR_AMD64_IBSCTL              0xc001103a
+ #define MSR_AMD64_IBSBRTARGET         0xc001103b
   
   /* Fam 10h MSRs */
   #define MSR_FAM10H_MMIO_CONF_BASE     0xc0010058
@@@ -198,7 -199,6 +199,7 @@@
   #define MSR_IA32_TSC                  0x00000010
   #define MSR_IA32_PLATFORM_ID          0x00000017
   #define MSR_IA32_EBL_CR_POWERON               0x0000002a
+ +#define MSR_EBC_FREQUENCY_ID          0x0000002c
   #define MSR_IA32_FEATURE_CONTROL        0x0000003a
   
   #define FEATURE_CONTROL_LOCKED                                (1<<0)
diff --combined arch/x86/kernel/cpu/perf_event.c

index c1e8c7a,a333bf9..ed63101
--- 1/arch/x86/kernel/cpu/perf_event.c
--- 2/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@@ -49,6 -49,7 +49,6 @@@ static unsigned lon
   copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
   {
         unsigned long offset, addr = (unsigned long)from;
- -      int type = in_nmi() ? KM_NMI : KM_IRQ0;
         unsigned long size, len = 0;
         struct page *page;
         void *map;
@@@ -62,9 -63,9 +62,9 @@@
                 offset = addr & (PAGE_SIZE - 1);
                 size = min(PAGE_SIZE - offset, n - len);
   
- -              map = kmap_atomic(page, type);
+ +              map = kmap_atomic(page);
                 memcpy(to, map+offset, size);
- -              kunmap_atomic(map, type);
+ +              kunmap_atomic(map);
                 put_page(page);
   
                 len  += size;
@@@ -237,6 -238,7 +237,7 @@@ struct x86_pmu 
          * Intel DebugStore bits
          */
         int             bts, pebs;
+       int             bts_active, pebs_active;
         int             pebs_record_size;
         void            (*drain_pebs)(struct pt_regs *regs);
         struct event_constraint *pebs_constraints;
@@@ -380,7 -382,7 +381,7 @@@ static void release_pmc_hardware(void) 
   
   #endif
   
- static int reserve_ds_buffers(void);
+ static void reserve_ds_buffers(void);
   static void release_ds_buffers(void);
   
   static void hw_perf_event_destroy(struct perf_event *event)
@@@ -477,7 -479,7 +478,7 @@@ static int x86_setup_perfctr(struct per
         if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
             (hwc->sample_period == 1)) {
                 /* BTS is not supported by this architecture. */
-               if (!x86_pmu.bts)
+               if (!x86_pmu.bts_active)
                         return -EOPNOTSUPP;
   
                 /* BTS is currently only allowed for user-mode. */
@@@ -496,12 -498,13 +497,13 @@@ static int x86_pmu_hw_config(struct per
                 int precise = 0;
   
                 /* Support for constant skid */
-               if (x86_pmu.pebs)
+               if (x86_pmu.pebs_active) {
                         precise++;
   
-               /* Support for IP fixup */
-               if (x86_pmu.lbr_nr)
-                       precise++;
+                       /* Support for IP fixup */
+                       if (x86_pmu.lbr_nr)
+                               precise++;
+               }
   
                 if (event->attr.precise_ip > precise)
                         return -EOPNOTSUPP;
@@@ -543,11 -546,8 +545,8 @@@ static int __x86_pmu_event_init(struct 
                 if (atomic_read(&active_events) == 0) {
                         if (!reserve_pmc_hardware())
                                 err = -EBUSY;
-                       else {
-                               err = reserve_ds_buffers();
-                               if (err)
-                                       release_pmc_hardware();
-                       }
+                       else
+                               reserve_ds_buffers();
                 }
                 if (!err)
                         atomic_inc(&active_events);
diff --combined include/linux/interrupt.h

index 01b2816,07f741a..79d0c4f
--- 1/include/linux/interrupt.h
--- 2/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@@ -410,12 -410,13 +410,12 @@@ extern void open_softirq(int nr, void (
   extern void softirq_init(void);
   static inline void __raise_softirq_irqoff(unsigned int nr)
   {
-       trace_softirq_raise((struct softirq_action *)(unsigned long)nr, NULL);
+       trace_softirq_raise(nr);
         or_softirq_pending(1UL << nr);
   }
   
   extern void raise_softirq_irqoff(unsigned int nr);
   extern void raise_softirq(unsigned int nr);
- -extern void wakeup_softirqd(void);
   
   /* This is the worklist that queues up per-cpu softirq work.
    *
diff --combined kernel/kprobes.c

index 56a8919,7c44133..99865c3
--- 1/kernel/kprobes.c
--- 2/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@@ -74,7 -74,8 +74,8 @@@ static struct hlist_head kretprobe_inst
   /* NOTE: change this value only with kprobe_mutex held */
   static bool kprobes_all_disarmed;
   
- static DEFINE_MUTEX(kprobe_mutex);    /* Protects kprobe_table */
+ /* This protects kprobe_table and optimizing_list */
+ static DEFINE_MUTEX(kprobe_mutex);
   static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
   static struct {
         spinlock_t lock ____cacheline_aligned_in_smp;
@@@ -595,6 -596,7 +596,7 @@@ static __kprobes void try_to_optimize_k
   }
   
   #ifdef CONFIG_SYSCTL
+ /* This should be called with kprobe_mutex locked */
   static void __kprobes optimize_all_kprobes(void)
   {
         struct hlist_head *head;
@@@ -607,17 -609,16 +609,16 @@@
                 return;
   
         kprobes_allow_optimization = true;
-       mutex_lock(&text_mutex);
         for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
                 head = &kprobe_table[i];
                 hlist_for_each_entry_rcu(p, node, head, hlist)
                         if (!kprobe_disabled(p))
                                 optimize_kprobe(p);
         }
-       mutex_unlock(&text_mutex);
         printk(KERN_INFO "Kprobes globally optimized\n");
   }
   
+ /* This should be called with kprobe_mutex locked */
   static void __kprobes unoptimize_all_kprobes(void)
   {
         struct hlist_head *head;
@@@ -2000,7 -2001,6 +2001,7 @@@ static ssize_t write_enabled_file_bool(
   static const struct file_operations fops_kp = {
         .read =         read_enabled_file_bool,
         .write =        write_enabled_file_bool,
+ +      .llseek =       default_llseek,
   };
   
   static int __kprobes debugfs_kprobe_init(void)
diff --combined kernel/softirq.c

index f02a9df,e33fd71..18f4be0
--- 1/kernel/softirq.c
--- 2/kernel/softirq.c
+++ b/kernel/softirq.c
@@@ -67,7 -67,7 +67,7 @@@ char *softirq_to_name[NR_SOFTIRQS] = 
    * to the pending events, so lets the scheduler to balance
    * the softirq load for us.
    */
- -void wakeup_softirqd(void)
+ +static void wakeup_softirqd(void)
   {
         /* Interrupts are disabled: no need to stop preemption */
         struct task_struct *tsk = __get_cpu_var(ksoftirqd);
@@@ -229,18 -229,20 +229,20 @@@ restart
   
         do {
                 if (pending & 1) {
+                       unsigned int vec_nr = h - softirq_vec;
                         int prev_count = preempt_count();
-                       kstat_incr_softirqs_this_cpu(h - softirq_vec);
   
-                       trace_softirq_entry(h, softirq_vec);
+                       kstat_incr_softirqs_this_cpu(vec_nr);
+ 
+                       trace_softirq_entry(vec_nr);
                         h->action(h);
-                       trace_softirq_exit(h, softirq_vec);
+                       trace_softirq_exit(vec_nr);
                         if (unlikely(prev_count != preempt_count())) {
-                               printk(KERN_ERR "huh, entered softirq %td %s %p"
+                               printk(KERN_ERR "huh, entered softirq %u %s %p"
                                        "with preempt_count %08x,"
-                                      " exited with %08x?\n", h - softirq_vec,
-                                      softirq_to_name[h - softirq_vec],
-                                      h->action, prev_count, preempt_count());
+                                      " exited with %08x?\n", vec_nr,
+                                      softirq_to_name[vec_nr], h->action,
+                                      prev_count, preempt_count());
                                 preempt_count() = prev_count;
                         }
   
diff --combined kernel/trace/ring_buffer.c

index c3dab05,ec5c710..9ed509a
--- 1/kernel/trace/ring_buffer.c
--- 2/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@@ -224,6 -224,9 +224,9 @@@ enum 
         RB_LEN_TIME_STAMP = 16,
   };
   
+ #define skip_time_extend(event) \
+       ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
+ 
   static inline int rb_null_event(struct ring_buffer_event *event)
   {
         return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
@@@ -248,8 -251,12 +251,12 @@@ rb_event_data_length(struct ring_buffer
         return length + RB_EVNT_HDR_SIZE;
   }
   
- /* inline for ring buffer fast paths */
- static unsigned
+ /*
+  * Return the length of the given event. Will return
+  * the length of the time extend if the event is a
+  * time extend.
+  */
+ static inline unsigned
   rb_event_length(struct ring_buffer_event *event)
   {
         switch (event->type_len) {
@@@ -274,13 -281,41 +281,41 @@@
         return 0;
   }
   
+ /*
+  * Return total length of time extend and data,
+  *   or just the event length for all other events.
+  */
+ static inline unsigned
+ rb_event_ts_length(struct ring_buffer_event *event)
+ {
+       unsigned len = 0;
+ 
+       if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
+               /* time extends include the data event after it */
+               len = RB_LEN_TIME_EXTEND;
+               event = skip_time_extend(event);
+       }
+       return len + rb_event_length(event);
+ }
+ 
   /**
    * ring_buffer_event_length - return the length of the event
    * @event: the event to get the length of
+  *
+  * Returns the size of the data load of a data event.
+  * If the event is something other than a data event, it
+  * returns the size of the event itself. With the exception
+  * of a TIME EXTEND, where it still returns the size of the
+  * data load of the data event after it.
    */
   unsigned ring_buffer_event_length(struct ring_buffer_event *event)
   {
-       unsigned length = rb_event_length(event);
+       unsigned length;
+ 
+       if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+               event = skip_time_extend(event);
+ 
+       length = rb_event_length(event);
         if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
                 return length;
         length -= RB_EVNT_HDR_SIZE;
@@@ -294,6 -329,8 +329,8 @@@ EXPORT_SYMBOL_GPL(ring_buffer_event_len
   static void *
   rb_event_data(struct ring_buffer_event *event)
   {
+       if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+               event = skip_time_extend(event);
         BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
         /* If length is in len field, then array[0] has the data */
         if (event->type_len)
@@@ -404,9 -441,6 +441,6 @@@ static inline int test_time_stamp(u64 d
   /* Max payload is BUF_PAGE_SIZE - header (8bytes) */
   #define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
   
- /* Max number of timestamps that can fit on a page */
- #define RB_TIMESTAMPS_PER_PAGE        (BUF_PAGE_SIZE / RB_LEN_TIME_EXTEND)
- 
   int ring_buffer_print_page_header(struct trace_seq *s)
   {
         struct buffer_data_page field;
@@@ -1546,6 -1580,25 +1580,25 @@@ static void rb_inc_iter(struct ring_buf
         iter->head = 0;
   }
   
+ /* Slow path, do not inline */
+ static noinline struct ring_buffer_event *
+ rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
+ {
+       event->type_len = RINGBUF_TYPE_TIME_EXTEND;
+ 
+       /* Not the first event on the page? */
+       if (rb_event_index(event)) {
+               event->time_delta = delta & TS_MASK;
+               event->array[0] = delta >> TS_SHIFT;
+       } else {
+               /* nope, just zero it */
+               event->time_delta = 0;
+               event->array[0] = 0;
+       }
+ 
+       return skip_time_extend(event);
+ }
+ 
   /**
    * ring_buffer_update_event - update event type and data
    * @event: the even to update
@@@ -1558,28 -1611,31 +1611,31 @@@
    * data field.
    */
   static void
- rb_update_event(struct ring_buffer_event *event,
-                        unsigned type, unsigned length)
+ rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
+               struct ring_buffer_event *event, unsigned length,
+               int add_timestamp, u64 delta)
   {
-       event->type_len = type;
- 
-       switch (type) {
- 
-       case RINGBUF_TYPE_PADDING:
-       case RINGBUF_TYPE_TIME_EXTEND:
-       case RINGBUF_TYPE_TIME_STAMP:
-               break;
+       /* Only a commit updates the timestamp */
+       if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
+               delta = 0;
   
-       case 0:
-               length -= RB_EVNT_HDR_SIZE;
-               if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
-                       event->array[0] = length;
-               else
-                       event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
-               break;
-       default:
-               BUG();
+       /*
+        * If we need to add a timestamp, then we
+        * add it to the start of the resevered space.
+        */
+       if (unlikely(add_timestamp)) {
+               event = rb_add_time_stamp(event, delta);
+               length -= RB_LEN_TIME_EXTEND;
+               delta = 0;
         }
+ 
+       event->time_delta = delta;
+       length -= RB_EVNT_HDR_SIZE;
+       if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
+               event->type_len = 0;
+               event->array[0] = length;
+       } else
+               event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
   }
   
   /*
@@@ -1823,10 -1879,13 +1879,13 @@@ rb_reset_tail(struct ring_buffer_per_cp
         local_sub(length, &tail_page->write);
   }
   
- static struct ring_buffer_event *
+ /*
+  * This is the slow path, force gcc not to inline it.
+  */
+ static noinline struct ring_buffer_event *
   rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
              unsigned long length, unsigned long tail,
-            struct buffer_page *tail_page, u64 *ts)
+            struct buffer_page *tail_page, u64 ts)
   {
         struct buffer_page *commit_page = cpu_buffer->commit_page;
         struct ring_buffer *buffer = cpu_buffer->buffer;
@@@ -1909,8 -1968,8 +1968,8 @@@
                  * Nested commits always have zero deltas, so
                  * just reread the time stamp
                  */
-               *ts = rb_time_stamp(buffer);
-               next_page->page->time_stamp = *ts;
+               ts = rb_time_stamp(buffer);
+               next_page->page->time_stamp = ts;
         }
   
    out_again:
@@@ -1929,12 -1988,21 +1988,21 @@@
   
   static struct ring_buffer_event *
   __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
-                 unsigned type, unsigned long length, u64 *ts)
+                 unsigned long length, u64 ts,
+                 u64 delta, int add_timestamp)
   {
         struct buffer_page *tail_page;
         struct ring_buffer_event *event;
         unsigned long tail, write;
   
+       /*
+        * If the time delta since the last event is too big to
+        * hold in the time field of the event, then we append a
+        * TIME EXTEND event ahead of the data event.
+        */
+       if (unlikely(add_timestamp))
+               length += RB_LEN_TIME_EXTEND;
+ 
         tail_page = cpu_buffer->tail_page;
         write = local_add_return(length, &tail_page->write);
   
@@@ -1943,7 -2011,7 +2011,7 @@@
         tail = write - length;
   
         /* See if we shot pass the end of this buffer page */
-       if (write > BUF_PAGE_SIZE)
+       if (unlikely(write > BUF_PAGE_SIZE))
                 return rb_move_tail(cpu_buffer, length, tail,
                                     tail_page, ts);
   
@@@ -1951,18 -2019,16 +2019,16 @@@
   
         event = __rb_page_index(tail_page, tail);
         kmemcheck_annotate_bitfield(event, bitfield);
-       rb_update_event(event, type, length);
+       rb_update_event(cpu_buffer, event, length, add_timestamp, delta);
   
-       /* The passed in type is zero for DATA */
-       if (likely(!type))
-               local_inc(&tail_page->entries);
+       local_inc(&tail_page->entries);
   
         /*
          * If this is the first commit on the page, then update
          * its timestamp.
          */
         if (!tail)
-               tail_page->page->time_stamp = *ts;
+               tail_page->page->time_stamp = ts;
   
         return event;
   }
@@@ -1977,7 -2043,7 +2043,7 @@@ rb_try_to_discard(struct ring_buffer_pe
         unsigned long addr;
   
         new_index = rb_event_index(event);
-       old_index = new_index + rb_event_length(event);
+       old_index = new_index + rb_event_ts_length(event);
         addr = (unsigned long)event;
         addr &= PAGE_MASK;
   
@@@ -2003,76 -2069,13 +2069,13 @@@
         return 0;
   }
   
- static int
- rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
-                 u64 *ts, u64 *delta)
- {
-       struct ring_buffer_event *event;
-       int ret;
- 
-       WARN_ONCE(*delta > (1ULL << 59),
-                 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
-                 (unsigned long long)*delta,
-                 (unsigned long long)*ts,
-                 (unsigned long long)cpu_buffer->write_stamp);
- 
-       /*
-        * The delta is too big, we to add a
-        * new timestamp.
-        */
-       event = __rb_reserve_next(cpu_buffer,
-                                 RINGBUF_TYPE_TIME_EXTEND,
-                                 RB_LEN_TIME_EXTEND,
-                                 ts);
-       if (!event)
-               return -EBUSY;
- 
-       if (PTR_ERR(event) == -EAGAIN)
-               return -EAGAIN;
- 
-       /* Only a commited time event can update the write stamp */
-       if (rb_event_is_commit(cpu_buffer, event)) {
-               /*
-                * If this is the first on the page, then it was
-                * updated with the page itself. Try to discard it
-                * and if we can't just make it zero.
-                */
-               if (rb_event_index(event)) {
-                       event->time_delta = *delta & TS_MASK;
-                       event->array[0] = *delta >> TS_SHIFT;
-               } else {
-                       /* try to discard, since we do not need this */
-                       if (!rb_try_to_discard(cpu_buffer, event)) {
-                               /* nope, just zero it */
-                               event->time_delta = 0;
-                               event->array[0] = 0;
-                       }
-               }
-               cpu_buffer->write_stamp = *ts;
-               /* let the caller know this was the commit */
-               ret = 1;
-       } else {
-               /* Try to discard the event */
-               if (!rb_try_to_discard(cpu_buffer, event)) {
-                       /* Darn, this is just wasted space */
-                       event->time_delta = 0;
-                       event->array[0] = 0;
-               }
-               ret = 0;
-       }
- 
-       *delta = 0;
- 
-       return ret;
- }
- 
   static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
   {
         local_inc(&cpu_buffer->committing);
         local_inc(&cpu_buffer->commits);
   }
   
- static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
+ static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
   {
         unsigned long commits;
   
@@@ -2110,9 -2113,10 +2113,10 @@@ rb_reserve_next_event(struct ring_buffe
                       unsigned long length)
   {
         struct ring_buffer_event *event;
-       u64 ts, delta = 0;
-       int commit = 0;
+       u64 ts, delta;
         int nr_loops = 0;
+       int add_timestamp;
+       u64 diff;
   
         rb_start_commit(cpu_buffer);
   
@@@ -2133,6 -2137,9 +2137,9 @@@
   
         length = rb_calculate_event_length(length);
    again:
+       add_timestamp = 0;
+       delta = 0;
+ 
         /*
          * We allow for interrupts to reenter here and do a trace.
          * If one does, it will cause this original code to loop
@@@ -2146,56 -2153,32 +2153,32 @@@
                 goto out_fail;
   
         ts = rb_time_stamp(cpu_buffer->buffer);
+       diff = ts - cpu_buffer->write_stamp;
   
-       /*
-        * Only the first commit can update the timestamp.
-        * Yes there is a race here. If an interrupt comes in
-        * just after the conditional and it traces too, then it
-        * will also check the deltas. More than one timestamp may
-        * also be made. But only the entry that did the actual
-        * commit will be something other than zero.
-        */
-       if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page &&
-                  rb_page_write(cpu_buffer->tail_page) ==
-                  rb_commit_index(cpu_buffer))) {
-               u64 diff;
- 
-               diff = ts - cpu_buffer->write_stamp;
- 
-               /* make sure this diff is calculated here */
-               barrier();
- 
-               /* Did the write stamp get updated already? */
-               if (unlikely(ts < cpu_buffer->write_stamp))
-                       goto get_event;
+       /* make sure this diff is calculated here */
+       barrier();
   
+       /* Did the write stamp get updated already? */
+       if (likely(ts >= cpu_buffer->write_stamp)) {
                 delta = diff;
                 if (unlikely(test_time_stamp(delta))) {
- 
-                       commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
-                       if (commit == -EBUSY)
-                               goto out_fail;
- 
-                       if (commit == -EAGAIN)
-                               goto again;
- 
-                       RB_WARN_ON(cpu_buffer, commit < 0);
+                       WARN_ONCE(delta > (1ULL << 59),
+                                 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
+                                 (unsigned long long)delta,
+                                 (unsigned long long)ts,
+                                 (unsigned long long)cpu_buffer->write_stamp);
+                       add_timestamp = 1;
                 }
         }
   
-  get_event:
-       event = __rb_reserve_next(cpu_buffer, 0, length, &ts);
+       event = __rb_reserve_next(cpu_buffer, length, ts,
+                                 delta, add_timestamp);
         if (unlikely(PTR_ERR(event) == -EAGAIN))
                 goto again;
   
         if (!event)
                 goto out_fail;
   
-       if (!rb_event_is_commit(cpu_buffer, event))
-               delta = 0;
- 
-       event->time_delta = delta;
- 
         return event;
   
    out_fail:
@@@ -2207,13 -2190,9 +2190,9 @@@
   
   #define TRACE_RECURSIVE_DEPTH 16
   
- static int trace_recursive_lock(void)
+ /* Keep this code out of the fast path cache */
+ static noinline void trace_recursive_fail(void)
   {
-       current->trace_recursion++;
- 
-       if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
-               return 0;
- 
         /* Disable all tracing before we do anything else */
         tracing_off_permanent();
   
@@@ -2225,10 -2204,21 +2204,21 @@@
                     in_nmi());
   
         WARN_ON_ONCE(1);
+ }
+ 
+ static inline int trace_recursive_lock(void)
+ {
+       current->trace_recursion++;
+ 
+       if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
+               return 0;
+ 
+       trace_recursive_fail();
+ 
         return -1;
   }
   
- static void trace_recursive_unlock(void)
+ static inline void trace_recursive_unlock(void)
   {
         WARN_ON_ONCE(!current->trace_recursion);
   
@@@ -2308,12 -2298,28 +2298,28 @@@ static voi
   rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
                       struct ring_buffer_event *event)
   {
+       u64 delta;
+ 
         /*
          * The event first in the commit queue updates the
          * time stamp.
          */
-       if (rb_event_is_commit(cpu_buffer, event))
-               cpu_buffer->write_stamp += event->time_delta;
+       if (rb_event_is_commit(cpu_buffer, event)) {
+               /*
+                * A commit event that is first on a page
+                * updates the write timestamp with the page stamp
+                */
+               if (!rb_event_index(event))
+                       cpu_buffer->write_stamp =
+                               cpu_buffer->commit_page->page->time_stamp;
+               else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
+                       delta = event->array[0];
+                       delta <<= TS_SHIFT;
+                       delta += event->time_delta;
+                       cpu_buffer->write_stamp += delta;
+               } else
+                       cpu_buffer->write_stamp += event->time_delta;
+       }
   }
   
   static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
@@@ -2353,6 -2359,9 +2359,9 @@@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_co
   
   static inline void rb_event_discard(struct ring_buffer_event *event)
   {
+       if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+               event = skip_time_extend(event);
+ 
         /* array[0] holds the actual length for the discarded event */
         event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
         event->type_len = RINGBUF_TYPE_PADDING;
@@@ -3049,12 -3058,12 +3058,12 @@@ rb_buffer_peek(struct ring_buffer_per_c
   
    again:
         /*
-        * We repeat when a timestamp is encountered. It is possible
-        * to get multiple timestamps from an interrupt entering just
-        * as one timestamp is about to be written, or from discarded
-        * commits. The most that we can have is the number on a single page.
+        * We repeat when a time extend is encountered.
+        * Since the time extend is always attached to a data event,
+        * we should never loop more than once.
+        * (We never hit the following condition more than twice).
          */
-       if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
+       if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
                 return NULL;
   
         reader = rb_get_reader_page(cpu_buffer);
@@@ -3130,14 -3139,12 +3139,12 @@@ rb_iter_peek(struct ring_buffer_iter *i
                 return NULL;
   
         /*
-        * We repeat when a timestamp is encountered.
-        * We can get multiple timestamps by nested interrupts or also
-        * if filtering is on (discarding commits). Since discarding
-        * commits can be frequent we can get a lot of timestamps.
-        * But we limit them by not adding timestamps if they begin
-        * at the start of a page.
+        * We repeat when a time extend is encountered.
+        * Since the time extend is always attached to a data event,
+        * we should never loop more than once.
+        * (We never hit the following condition more than twice).
          */
-       if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
+       if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
                 return NULL;
   
         if (rb_per_cpu_empty(cpu_buffer))
@@@ -3835,7 -3842,8 +3842,8 @@@ int ring_buffer_read_page(struct ring_b
                 if (len > (commit - read))
                         len = (commit - read);
   
-               size = rb_event_length(event);
+               /* Always keep the time extend and data together */
+               size = rb_event_ts_length(event);
   
                 if (len < size)
                         goto out_unlock;
@@@ -3857,7 -3865,8 +3865,8 @@@
                                 break;
   
                         event = rb_reader_event(cpu_buffer);
-                       size = rb_event_length(event);
+                       /* Always keep the time extend and data together */
+                       size = rb_event_ts_length(event);
                 } while (len > size);
   
                 /* update bpage */
@@@ -3974,7 -3983,6 +3983,7 @@@ static const struct file_operations rb_
         .open           = tracing_open_generic,
         .read           = rb_simple_read,
         .write          = rb_simple_write,
+ +      .llseek         = default_llseek,
   };
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 28 Oct 2010 01:48:00 +0000 (18:48 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 28 Oct 2010 01:48:00 +0000 (18:48 -0700)
		1	2
arch/x86/include/asm/msr-index.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/perf_event.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/interrupt.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/kprobes.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/softirq.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/trace/ring_buffer.c	patch \|	diff1 \|	diff2 \|	blob \| history