Merge branch 'perf/core' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux...
[pandora-kernel.git] / tools / perf / util / session.c
index ddf288f..8f83a18 100644 (file)
@@ -5,6 +5,7 @@
 #include <byteswap.h>
 #include <unistd.h>
 #include <sys/types.h>
+#include <sys/mman.h>
 
 #include "session.h"
 #include "sort.h"
@@ -14,6 +15,16 @@ static int perf_session__open(struct perf_session *self, bool force)
 {
        struct stat input_stat;
 
+       if (!strcmp(self->filename, "-")) {
+               self->fd_pipe = true;
+               self->fd = STDIN_FILENO;
+
+               if (perf_header__read(self, self->fd) < 0)
+                       pr_err("incompatible file format");
+
+               return 0;
+       }
+
        self->fd = open(self->filename, O_RDONLY);
        if (self->fd < 0) {
                pr_err("failed to open file: %s", self->filename);
@@ -38,7 +49,7 @@ static int perf_session__open(struct perf_session *self, bool force)
                goto out_close;
        }
 
-       if (perf_header__read(&self->header, self->fd) < 0) {
+       if (perf_header__read(self, self->fd) < 0) {
                pr_err("incompatible file format");
                goto out_close;
        }
@@ -52,7 +63,21 @@ out_close:
        return -1;
 }
 
-struct perf_session *perf_session__new(const char *filename, int mode, bool force)
+void perf_session__update_sample_type(struct perf_session *self)
+{
+       self->sample_type = perf_header__sample_type(&self->header);
+}
+
+int perf_session__create_kernel_maps(struct perf_session *self)
+{
+       int ret = machine__create_kernel_maps(&self->host_machine);
+
+       if (ret >= 0)
+               ret = machines__create_guest_kernel_maps(&self->machines);
+       return ret;
+}
+
+struct perf_session *perf_session__new(const char *filename, int mode, bool force, bool repipe)
 {
        size_t len = filename ? strlen(filename) + 1 : 0;
        struct perf_session *self = zalloc(sizeof(*self) + len);
@@ -65,13 +90,15 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
 
        memcpy(self->filename, filename, len);
        self->threads = RB_ROOT;
-       self->stats_by_id = RB_ROOT;
+       self->hists_tree = RB_ROOT;
        self->last_match = NULL;
        self->mmap_window = 32;
        self->cwd = NULL;
        self->cwdlen = 0;
-       self->unknown_events = 0;
-       map_groups__init(&self->kmaps);
+       self->machines = RB_ROOT;
+       self->repipe = repipe;
+       INIT_LIST_HEAD(&self->ordered_samples.samples_head);
+       machine__init(&self->host_machine, "", HOST_KERNEL_ID);
 
        if (mode == O_RDONLY) {
                if (perf_session__open(self, force) < 0)
@@ -85,7 +112,7 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
                        goto out_delete;
        }
 
-       self->sample_type = perf_header__sample_type(&self->header);
+       perf_session__update_sample_type(self);
 out:
        return self;
 out_free:
@@ -142,8 +169,9 @@ struct map_symbol *perf_session__resolve_callchain(struct perf_session *self,
                        continue;
                }
 
+               al.filtered = false;
                thread__find_addr_location(thread, self, cpumode,
-                                          MAP__FUNCTION, ip, &al, NULL);
+                               MAP__FUNCTION, thread->pid, ip, &al, NULL);
                if (al.sym != NULL) {
                        if (sort__has_parent && !*parent &&
                            symbol__match_parent_regex(al.sym))
@@ -165,6 +193,18 @@ static int process_event_stub(event_t *event __used,
        return 0;
 }
 
+static int process_finished_round_stub(event_t *event __used,
+                                      struct perf_session *session __used,
+                                      struct perf_event_ops *ops __used)
+{
+       dump_printf(": unhandled!\n");
+       return 0;
+}
+
+static int process_finished_round(event_t *event,
+                                 struct perf_session *session,
+                                 struct perf_event_ops *ops);
+
 static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
 {
        if (handler->sample == NULL)
@@ -185,29 +225,20 @@ static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
                handler->throttle = process_event_stub;
        if (handler->unthrottle == NULL)
                handler->unthrottle = process_event_stub;
-}
-
-static const char *event__name[] = {
-       [0]                      = "TOTAL",
-       [PERF_RECORD_MMAP]       = "MMAP",
-       [PERF_RECORD_LOST]       = "LOST",
-       [PERF_RECORD_COMM]       = "COMM",
-       [PERF_RECORD_EXIT]       = "EXIT",
-       [PERF_RECORD_THROTTLE]   = "THROTTLE",
-       [PERF_RECORD_UNTHROTTLE] = "UNTHROTTLE",
-       [PERF_RECORD_FORK]       = "FORK",
-       [PERF_RECORD_READ]       = "READ",
-       [PERF_RECORD_SAMPLE]     = "SAMPLE",
-};
-
-unsigned long event__total[PERF_RECORD_MAX];
-
-void event__print_totals(void)
-{
-       int i;
-       for (i = 0; i < PERF_RECORD_MAX; ++i)
-               pr_info("%10s events: %10ld\n",
-                       event__name[i], event__total[i]);
+       if (handler->attr == NULL)
+               handler->attr = process_event_stub;
+       if (handler->event_type == NULL)
+               handler->event_type = process_event_stub;
+       if (handler->tracing_data == NULL)
+               handler->tracing_data = process_event_stub;
+       if (handler->build_id == NULL)
+               handler->build_id = process_event_stub;
+       if (handler->finished_round == NULL) {
+               if (handler->ordered_samples)
+                       handler->finished_round = process_finished_round;
+               else
+                       handler->finished_round = process_finished_round_stub;
+       }
 }
 
 void mem_bswap_64(void *src, int byte_size)
@@ -261,6 +292,37 @@ static void event__read_swap(event_t *self)
        self->read.id           = bswap_64(self->read.id);
 }
 
+static void event__attr_swap(event_t *self)
+{
+       size_t size;
+
+       self->attr.attr.type            = bswap_32(self->attr.attr.type);
+       self->attr.attr.size            = bswap_32(self->attr.attr.size);
+       self->attr.attr.config          = bswap_64(self->attr.attr.config);
+       self->attr.attr.sample_period   = bswap_64(self->attr.attr.sample_period);
+       self->attr.attr.sample_type     = bswap_64(self->attr.attr.sample_type);
+       self->attr.attr.read_format     = bswap_64(self->attr.attr.read_format);
+       self->attr.attr.wakeup_events   = bswap_32(self->attr.attr.wakeup_events);
+       self->attr.attr.bp_type         = bswap_32(self->attr.attr.bp_type);
+       self->attr.attr.bp_addr         = bswap_64(self->attr.attr.bp_addr);
+       self->attr.attr.bp_len          = bswap_64(self->attr.attr.bp_len);
+
+       size = self->header.size;
+       size -= (void *)&self->attr.id - (void *)self;
+       mem_bswap_64(self->attr.id, size);
+}
+
+static void event__event_type_swap(event_t *self)
+{
+       self->event_type.event_type.event_id =
+               bswap_64(self->event_type.event_type.event_id);
+}
+
+static void event__tracing_data_swap(event_t *self)
+{
+       self->tracing_data.size = bswap_32(self->tracing_data.size);
+}
+
 typedef void (*event__swap_op)(event_t *self);
 
 static event__swap_op event__swap_ops[] = {
@@ -271,9 +333,212 @@ static event__swap_op event__swap_ops[] = {
        [PERF_RECORD_LOST]   = event__all64_swap,
        [PERF_RECORD_READ]   = event__read_swap,
        [PERF_RECORD_SAMPLE] = event__all64_swap,
-       [PERF_RECORD_MAX]    = NULL,
+       [PERF_RECORD_HEADER_ATTR]   = event__attr_swap,
+       [PERF_RECORD_HEADER_EVENT_TYPE]   = event__event_type_swap,
+       [PERF_RECORD_HEADER_TRACING_DATA]   = event__tracing_data_swap,
+       [PERF_RECORD_HEADER_BUILD_ID]   = NULL,
+       [PERF_RECORD_HEADER_MAX]    = NULL,
+};
+
+struct sample_queue {
+       u64                     timestamp;
+       struct sample_event     *event;
+       struct list_head        list;
 };
 
+static void flush_sample_queue(struct perf_session *s,
+                              struct perf_event_ops *ops)
+{
+       struct list_head *head = &s->ordered_samples.samples_head;
+       u64 limit = s->ordered_samples.next_flush;
+       struct sample_queue *tmp, *iter;
+
+       if (!ops->ordered_samples || !limit)
+               return;
+
+       list_for_each_entry_safe(iter, tmp, head, list) {
+               if (iter->timestamp > limit)
+                       return;
+
+               if (iter == s->ordered_samples.last_inserted)
+                       s->ordered_samples.last_inserted = NULL;
+
+               ops->sample((event_t *)iter->event, s);
+
+               s->ordered_samples.last_flush = iter->timestamp;
+               list_del(&iter->list);
+               free(iter->event);
+               free(iter);
+       }
+}
+
+/*
+ * When perf record finishes a pass on every buffers, it records this pseudo
+ * event.
+ * We record the max timestamp t found in the pass n.
+ * Assuming these timestamps are monotonic across cpus, we know that if
+ * a buffer still has events with timestamps below t, they will be all
+ * available and then read in the pass n + 1.
+ * Hence when we start to read the pass n + 2, we can safely flush every
+ * events with timestamps below t.
+ *
+ *    ============ PASS n =================
+ *       CPU 0         |   CPU 1
+ *                     |
+ *    cnt1 timestamps  |   cnt2 timestamps
+ *          1          |         2
+ *          2          |         3
+ *          -          |         4  <--- max recorded
+ *
+ *    ============ PASS n + 1 ==============
+ *       CPU 0         |   CPU 1
+ *                     |
+ *    cnt1 timestamps  |   cnt2 timestamps
+ *          3          |         5
+ *          4          |         6
+ *          5          |         7 <---- max recorded
+ *
+ *      Flush every events below timestamp 4
+ *
+ *    ============ PASS n + 2 ==============
+ *       CPU 0         |   CPU 1
+ *                     |
+ *    cnt1 timestamps  |   cnt2 timestamps
+ *          6          |         8
+ *          7          |         9
+ *          -          |         10
+ *
+ *      Flush every events below timestamp 7
+ *      etc...
+ */
+static int process_finished_round(event_t *event __used,
+                                 struct perf_session *session,
+                                 struct perf_event_ops *ops)
+{
+       flush_sample_queue(session, ops);
+       session->ordered_samples.next_flush = session->ordered_samples.max_timestamp;
+
+       return 0;
+}
+
+static void __queue_sample_end(struct sample_queue *new, struct list_head *head)
+{
+       struct sample_queue *iter;
+
+       list_for_each_entry_reverse(iter, head, list) {
+               if (iter->timestamp < new->timestamp) {
+                       list_add(&new->list, &iter->list);
+                       return;
+               }
+       }
+
+       list_add(&new->list, head);
+}
+
+static void __queue_sample_before(struct sample_queue *new,
+                                 struct sample_queue *iter,
+                                 struct list_head *head)
+{
+       list_for_each_entry_continue_reverse(iter, head, list) {
+               if (iter->timestamp < new->timestamp) {
+                       list_add(&new->list, &iter->list);
+                       return;
+               }
+       }
+
+       list_add(&new->list, head);
+}
+
+static void __queue_sample_after(struct sample_queue *new,
+                                struct sample_queue *iter,
+                                struct list_head *head)
+{
+       list_for_each_entry_continue(iter, head, list) {
+               if (iter->timestamp > new->timestamp) {
+                       list_add_tail(&new->list, &iter->list);
+                       return;
+               }
+       }
+       list_add_tail(&new->list, head);
+}
+
+/* The queue is ordered by time */
+static void __queue_sample_event(struct sample_queue *new,
+                                struct perf_session *s)
+{
+       struct sample_queue *last_inserted = s->ordered_samples.last_inserted;
+       struct list_head *head = &s->ordered_samples.samples_head;
+
+
+       if (!last_inserted) {
+               __queue_sample_end(new, head);
+               return;
+       }
+
+       /*
+        * Most of the time the current event has a timestamp
+        * very close to the last event inserted, unless we just switched
+        * to another event buffer. Having a sorting based on a list and
+        * on the last inserted event that is close to the current one is
+        * probably more efficient than an rbtree based sorting.
+        */
+       if (last_inserted->timestamp >= new->timestamp)
+               __queue_sample_before(new, last_inserted, head);
+       else
+               __queue_sample_after(new, last_inserted, head);
+}
+
+static int queue_sample_event(event_t *event, struct sample_data *data,
+                             struct perf_session *s)
+{
+       u64 timestamp = data->time;
+       struct sample_queue *new;
+
+
+       if (timestamp < s->ordered_samples.last_flush) {
+               printf("Warning: Timestamp below last timeslice flush\n");
+               return -EINVAL;
+       }
+
+       new = malloc(sizeof(*new));
+       if (!new)
+               return -ENOMEM;
+
+       new->timestamp = timestamp;
+
+       new->event = malloc(event->header.size);
+       if (!new->event) {
+               free(new);
+               return -ENOMEM;
+       }
+
+       memcpy(new->event, event, event->header.size);
+
+       __queue_sample_event(new, s);
+       s->ordered_samples.last_inserted = new;
+
+       if (new->timestamp > s->ordered_samples.max_timestamp)
+               s->ordered_samples.max_timestamp = new->timestamp;
+
+       return 0;
+}
+
+static int perf_session__process_sample(event_t *event, struct perf_session *s,
+                                       struct perf_event_ops *ops)
+{
+       struct sample_data data;
+
+       if (!ops->ordered_samples)
+               return ops->sample(event, s);
+
+       bzero(&data, sizeof(struct sample_data));
+       event__parse_sample(event, s->sample_type, &data);
+
+       queue_sample_event(event, &data, s);
+
+       return 0;
+}
+
 static int perf_session__process_event(struct perf_session *self,
                                       event_t *event,
                                       struct perf_event_ops *ops,
@@ -281,12 +546,11 @@ static int perf_session__process_event(struct perf_session *self,
 {
        trace_event(event);
 
-       if (event->header.type < PERF_RECORD_MAX) {
+       if (event->header.type < PERF_RECORD_HEADER_MAX) {
                dump_printf("%#Lx [%#x]: PERF_RECORD_%s",
                            offset + head, event->header.size,
                            event__name[event->header.type]);
-               ++event__total[0];
-               ++event__total[event->header.type];
+               hists__inc_nr_events(&self->hists, event->header.type);
        }
 
        if (self->header.needs_swap && event__swap_ops[event->header.type])
@@ -294,7 +558,7 @@ static int perf_session__process_event(struct perf_session *self,
 
        switch (event->header.type) {
        case PERF_RECORD_SAMPLE:
-               return ops->sample(event, self);
+               return perf_session__process_sample(event, self, ops);
        case PERF_RECORD_MMAP:
                return ops->mmap(event, self);
        case PERF_RECORD_COMM:
@@ -311,8 +575,20 @@ static int perf_session__process_event(struct perf_session *self,
                return ops->throttle(event, self);
        case PERF_RECORD_UNTHROTTLE:
                return ops->unthrottle(event, self);
+       case PERF_RECORD_HEADER_ATTR:
+               return ops->attr(event, self);
+       case PERF_RECORD_HEADER_EVENT_TYPE:
+               return ops->event_type(event, self);
+       case PERF_RECORD_HEADER_TRACING_DATA:
+               /* setup for reading amidst mmap */
+               lseek(self->fd, offset + head, SEEK_SET);
+               return ops->tracing_data(event, self);
+       case PERF_RECORD_HEADER_BUILD_ID:
+               return ops->build_id(event, self);
+       case PERF_RECORD_FINISHED_ROUND:
+               return ops->finished_round(event, self, ops);
        default:
-               self->unknown_events++;
+               ++self->hists.stats.nr_unknown_events;
                return -1;
        }
 }
@@ -324,56 +600,114 @@ void perf_event_header__bswap(struct perf_event_header *self)
        self->size = bswap_16(self->size);
 }
 
-int perf_header__read_build_ids(struct perf_header *self,
-                               int input, u64 offset, u64 size)
+static struct thread *perf_session__register_idle_thread(struct perf_session *self)
 {
-       struct build_id_event bev;
-       char filename[PATH_MAX];
-       u64 limit = offset + size;
-       int err = -1;
-
-       while (offset < limit) {
-               struct dso *dso;
-               ssize_t len;
-               struct list_head *head = &dsos__user;
+       struct thread *thread = perf_session__findnew(self, 0);
 
-               if (read(input, &bev, sizeof(bev)) != sizeof(bev))
-                       goto out;
+       if (thread == NULL || thread__set_comm(thread, "swapper")) {
+               pr_err("problem inserting idle task.\n");
+               thread = NULL;
+       }
 
-               if (self->needs_swap)
-                       perf_event_header__bswap(&bev.header);
+       return thread;
+}
 
-               len = bev.header.size - sizeof(bev);
-               if (read(input, filename, len) != len)
-                       goto out;
+int do_read(int fd, void *buf, size_t size)
+{
+       void *buf_start = buf;
 
-               if (bev.header.misc & PERF_RECORD_MISC_KERNEL)
-                       head = &dsos__kernel;
+       while (size) {
+               int ret = read(fd, buf, size);
 
-               dso = __dsos__findnew(head, filename);
-               if (dso != NULL) {
-                       dso__set_build_id(dso, &bev.build_id);
-                       if (head == &dsos__kernel && filename[0] == '[')
-                               dso->kernel = 1;
-               }
+               if (ret <= 0)
+                       return ret;
 
-               offset += bev.header.size;
+               size -= ret;
+               buf += ret;
        }
-       err = 0;
-out:
-       return err;
+
+       return buf - buf_start;
 }
 
-static struct thread *perf_session__register_idle_thread(struct perf_session *self)
+#define session_done() (*(volatile int *)(&session_done))
+volatile int session_done;
+
+static int __perf_session__process_pipe_events(struct perf_session *self,
+                                              struct perf_event_ops *ops)
 {
-       struct thread *thread = perf_session__findnew(self, 0);
+       event_t event;
+       uint32_t size;
+       int skip = 0;
+       u64 head;
+       int err;
+       void *p;
 
-       if (thread == NULL || thread__set_comm(thread, "swapper")) {
-               pr_err("problem inserting idle task.\n");
-               thread = NULL;
+       perf_event_ops__fill_defaults(ops);
+
+       head = 0;
+more:
+       err = do_read(self->fd, &event, sizeof(struct perf_event_header));
+       if (err <= 0) {
+               if (err == 0)
+                       goto done;
+
+               pr_err("failed to read event header\n");
+               goto out_err;
        }
 
-       return thread;
+       if (self->header.needs_swap)
+               perf_event_header__bswap(&event.header);
+
+       size = event.header.size;
+       if (size == 0)
+               size = 8;
+
+       p = &event;
+       p += sizeof(struct perf_event_header);
+
+       if (size - sizeof(struct perf_event_header)) {
+               err = do_read(self->fd, p,
+                             size - sizeof(struct perf_event_header));
+               if (err <= 0) {
+                       if (err == 0) {
+                               pr_err("unexpected end of event stream\n");
+                               goto done;
+                       }
+
+                       pr_err("failed to read event data\n");
+                       goto out_err;
+               }
+       }
+
+       if (size == 0 ||
+           (skip = perf_session__process_event(self, &event, ops,
+                                               0, head)) < 0) {
+               dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
+                           head, event.header.size, event.header.type);
+               /*
+                * assume we lost track of the stream, check alignment, and
+                * increment a single u64 in the hope to catch on again 'soon'.
+                */
+               if (unlikely(head & 7))
+                       head &= ~7ULL;
+
+               size = 8;
+       }
+
+       head += size;
+
+       dump_printf("\n%#Lx [%#x]: event: %d\n",
+                   head, event.header.size, event.header.type);
+
+       if (skip > 0)
+               head += skip;
+
+       if (!session_done())
+               goto more;
+done:
+       err = 0;
+out_err:
+       return err;
 }
 
 int __perf_session__process_events(struct perf_session *self,
@@ -469,6 +803,9 @@ more:
                goto more;
 done:
        err = 0;
+       /* do the final flush for ordered samples */
+       self->ordered_samples.next_flush = ULLONG_MAX;
+       flush_sample_queue(self, ops);
 out_err:
        ui_progress__delete(progress);
        return err;
@@ -499,9 +836,13 @@ out_getcwd_err:
                self->cwdlen = strlen(self->cwd);
        }
 
-       err = __perf_session__process_events(self, self->header.data_offset,
-                                            self->header.data_size,
-                                            self->size, ops);
+       if (!self->fd_pipe)
+               err = __perf_session__process_events(self,
+                                                    self->header.data_offset,
+                                                    self->header.data_size,
+                                                    self->size, ops);
+       else
+               err = __perf_session__process_pipe_events(self, ops);
 out_err:
        return err;
 }
@@ -516,27 +857,48 @@ bool perf_session__has_traces(struct perf_session *self, const char *msg)
        return true;
 }
 
-int perf_session__set_kallsyms_ref_reloc_sym(struct perf_session *self,
+int perf_session__set_kallsyms_ref_reloc_sym(struct map **maps,
                                             const char *symbol_name,
                                             u64 addr)
 {
        char *bracket;
        enum map_type i;
+       struct ref_reloc_sym *ref;
 
-       self->ref_reloc_sym.name = strdup(symbol_name);
-       if (self->ref_reloc_sym.name == NULL)
+       ref = zalloc(sizeof(struct ref_reloc_sym));
+       if (ref == NULL)
                return -ENOMEM;
 
-       bracket = strchr(self->ref_reloc_sym.name, ']');
+       ref->name = strdup(symbol_name);
+       if (ref->name == NULL) {
+               free(ref);
+               return -ENOMEM;
+       }
+
+       bracket = strchr(ref->name, ']');
        if (bracket)
                *bracket = '\0';
 
-       self->ref_reloc_sym.addr = addr;
+       ref->addr = addr;
 
        for (i = 0; i < MAP__NR_TYPES; ++i) {
-               struct kmap *kmap = map__kmap(self->vmlinux_maps[i]);
-               kmap->ref_reloc_sym = &self->ref_reloc_sym;
+               struct kmap *kmap = map__kmap(maps[i]);
+               kmap->ref_reloc_sym = ref;
        }
 
        return 0;
 }
+
+size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp)
+{
+       return __dsos__fprintf(&self->host_machine.kernel_dsos, fp) +
+              __dsos__fprintf(&self->host_machine.user_dsos, fp) +
+              machines__fprintf_dsos(&self->machines, fp);
+}
+
+size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, FILE *fp,
+                                         bool with_hits)
+{
+       size_t ret = machine__fprintf_dsos_buildid(&self->host_machine, fp, with_hits);
+       return ret + machines__fprintf_dsos_buildid(&self->machines, fp, with_hits);
+}