1 #define _FILE_OFFSET_BITS 64
3 #include <linux/kernel.h>
13 static int perf_session__open(struct perf_session *self, bool force)
15 struct stat input_stat;
17 if (!strcmp(self->filename, "-")) {
19 self->fd = STDIN_FILENO;
21 if (perf_header__read(self, self->fd) < 0)
22 pr_err("incompatible file format");
27 self->fd = open(self->filename, O_RDONLY);
29 pr_err("failed to open file: %s", self->filename);
30 if (!strcmp(self->filename, "perf.data"))
31 pr_err(" (try 'perf record' first)");
36 if (fstat(self->fd, &input_stat) < 0)
39 if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) {
40 pr_err("file %s not owned by current user or root\n",
45 if (!input_stat.st_size) {
46 pr_info("zero-sized file (%s), nothing to do!\n",
51 if (perf_header__read(self, self->fd) < 0) {
52 pr_err("incompatible file format");
56 self->size = input_stat.st_size;
65 void perf_session__update_sample_type(struct perf_session *self)
67 self->sample_type = perf_header__sample_type(&self->header);
70 int perf_session__create_kernel_maps(struct perf_session *self)
72 struct rb_root *machines = &self->machines;
73 int ret = machines__create_kernel_maps(machines, HOST_KERNEL_ID);
76 ret = machines__create_guest_kernel_maps(machines);
80 struct perf_session *perf_session__new(const char *filename, int mode, bool force, bool repipe)
82 size_t len = filename ? strlen(filename) + 1 : 0;
83 struct perf_session *self = zalloc(sizeof(*self) + len);
88 if (perf_header__init(&self->header) < 0)
91 memcpy(self->filename, filename, len);
92 self->threads = RB_ROOT;
93 self->stats_by_id = RB_ROOT;
94 self->last_match = NULL;
95 self->mmap_window = 32;
98 self->unknown_events = 0;
99 self->machines = RB_ROOT;
100 self->repipe = repipe;
101 self->ordered_samples.flush_limit = ULLONG_MAX;
102 INIT_LIST_HEAD(&self->ordered_samples.samples_head);
104 if (mode == O_RDONLY) {
105 if (perf_session__open(self, force) < 0)
107 } else if (mode == O_WRONLY) {
109 * In O_RDONLY mode this will be performed when reading the
110 * kernel MMAP event, in event__process_mmap().
112 if (perf_session__create_kernel_maps(self) < 0)
116 perf_session__update_sample_type(self);
123 perf_session__delete(self);
127 void perf_session__delete(struct perf_session *self)
129 perf_header__exit(&self->header);
135 static bool symbol__match_parent_regex(struct symbol *sym)
137 if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0))
143 struct map_symbol *perf_session__resolve_callchain(struct perf_session *self,
144 struct thread *thread,
145 struct ip_callchain *chain,
146 struct symbol **parent)
148 u8 cpumode = PERF_RECORD_MISC_USER;
150 struct map_symbol *syms = calloc(chain->nr, sizeof(*syms));
155 for (i = 0; i < chain->nr; i++) {
156 u64 ip = chain->ips[i];
157 struct addr_location al;
159 if (ip >= PERF_CONTEXT_MAX) {
161 case PERF_CONTEXT_HV:
162 cpumode = PERF_RECORD_MISC_HYPERVISOR; break;
163 case PERF_CONTEXT_KERNEL:
164 cpumode = PERF_RECORD_MISC_KERNEL; break;
165 case PERF_CONTEXT_USER:
166 cpumode = PERF_RECORD_MISC_USER; break;
174 thread__find_addr_location(thread, self, cpumode,
175 MAP__FUNCTION, thread->pid, ip, &al, NULL);
176 if (al.sym != NULL) {
177 if (sort__has_parent && !*parent &&
178 symbol__match_parent_regex(al.sym))
180 if (!symbol_conf.use_callchain)
182 syms[i].map = al.map;
183 syms[i].sym = al.sym;
190 static int process_event_stub(event_t *event __used,
191 struct perf_session *session __used)
193 dump_printf(": unhandled!\n");
197 static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
199 if (handler->sample == NULL)
200 handler->sample = process_event_stub;
201 if (handler->mmap == NULL)
202 handler->mmap = process_event_stub;
203 if (handler->comm == NULL)
204 handler->comm = process_event_stub;
205 if (handler->fork == NULL)
206 handler->fork = process_event_stub;
207 if (handler->exit == NULL)
208 handler->exit = process_event_stub;
209 if (handler->lost == NULL)
210 handler->lost = process_event_stub;
211 if (handler->read == NULL)
212 handler->read = process_event_stub;
213 if (handler->throttle == NULL)
214 handler->throttle = process_event_stub;
215 if (handler->unthrottle == NULL)
216 handler->unthrottle = process_event_stub;
217 if (handler->attr == NULL)
218 handler->attr = process_event_stub;
219 if (handler->event_type == NULL)
220 handler->event_type = process_event_stub;
221 if (handler->tracing_data == NULL)
222 handler->tracing_data = process_event_stub;
223 if (handler->build_id == NULL)
224 handler->build_id = process_event_stub;
227 static const char *event__name[] = {
229 [PERF_RECORD_MMAP] = "MMAP",
230 [PERF_RECORD_LOST] = "LOST",
231 [PERF_RECORD_COMM] = "COMM",
232 [PERF_RECORD_EXIT] = "EXIT",
233 [PERF_RECORD_THROTTLE] = "THROTTLE",
234 [PERF_RECORD_UNTHROTTLE] = "UNTHROTTLE",
235 [PERF_RECORD_FORK] = "FORK",
236 [PERF_RECORD_READ] = "READ",
237 [PERF_RECORD_SAMPLE] = "SAMPLE",
238 [PERF_RECORD_HEADER_ATTR] = "ATTR",
239 [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
240 [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
241 [PERF_RECORD_HEADER_BUILD_ID] = "BUILD_ID",
244 unsigned long event__total[PERF_RECORD_HEADER_MAX];
246 void event__print_totals(void)
249 for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) {
252 pr_info("%10s events: %10ld\n",
253 event__name[i], event__total[i]);
257 void mem_bswap_64(void *src, int byte_size)
261 while (byte_size > 0) {
263 byte_size -= sizeof(u64);
268 static void event__all64_swap(event_t *self)
270 struct perf_event_header *hdr = &self->header;
271 mem_bswap_64(hdr + 1, self->header.size - sizeof(*hdr));
274 static void event__comm_swap(event_t *self)
276 self->comm.pid = bswap_32(self->comm.pid);
277 self->comm.tid = bswap_32(self->comm.tid);
280 static void event__mmap_swap(event_t *self)
282 self->mmap.pid = bswap_32(self->mmap.pid);
283 self->mmap.tid = bswap_32(self->mmap.tid);
284 self->mmap.start = bswap_64(self->mmap.start);
285 self->mmap.len = bswap_64(self->mmap.len);
286 self->mmap.pgoff = bswap_64(self->mmap.pgoff);
289 static void event__task_swap(event_t *self)
291 self->fork.pid = bswap_32(self->fork.pid);
292 self->fork.tid = bswap_32(self->fork.tid);
293 self->fork.ppid = bswap_32(self->fork.ppid);
294 self->fork.ptid = bswap_32(self->fork.ptid);
295 self->fork.time = bswap_64(self->fork.time);
298 static void event__read_swap(event_t *self)
300 self->read.pid = bswap_32(self->read.pid);
301 self->read.tid = bswap_32(self->read.tid);
302 self->read.value = bswap_64(self->read.value);
303 self->read.time_enabled = bswap_64(self->read.time_enabled);
304 self->read.time_running = bswap_64(self->read.time_running);
305 self->read.id = bswap_64(self->read.id);
308 static void event__attr_swap(event_t *self)
312 self->attr.attr.type = bswap_32(self->attr.attr.type);
313 self->attr.attr.size = bswap_32(self->attr.attr.size);
314 self->attr.attr.config = bswap_64(self->attr.attr.config);
315 self->attr.attr.sample_period = bswap_64(self->attr.attr.sample_period);
316 self->attr.attr.sample_type = bswap_64(self->attr.attr.sample_type);
317 self->attr.attr.read_format = bswap_64(self->attr.attr.read_format);
318 self->attr.attr.wakeup_events = bswap_32(self->attr.attr.wakeup_events);
319 self->attr.attr.bp_type = bswap_32(self->attr.attr.bp_type);
320 self->attr.attr.bp_addr = bswap_64(self->attr.attr.bp_addr);
321 self->attr.attr.bp_len = bswap_64(self->attr.attr.bp_len);
323 size = self->header.size;
324 size -= (void *)&self->attr.id - (void *)self;
325 mem_bswap_64(self->attr.id, size);
328 static void event__event_type_swap(event_t *self)
330 self->event_type.event_type.event_id =
331 bswap_64(self->event_type.event_type.event_id);
334 static void event__tracing_data_swap(event_t *self)
336 self->tracing_data.size = bswap_32(self->tracing_data.size);
339 typedef void (*event__swap_op)(event_t *self);
341 static event__swap_op event__swap_ops[] = {
342 [PERF_RECORD_MMAP] = event__mmap_swap,
343 [PERF_RECORD_COMM] = event__comm_swap,
344 [PERF_RECORD_FORK] = event__task_swap,
345 [PERF_RECORD_EXIT] = event__task_swap,
346 [PERF_RECORD_LOST] = event__all64_swap,
347 [PERF_RECORD_READ] = event__read_swap,
348 [PERF_RECORD_SAMPLE] = event__all64_swap,
349 [PERF_RECORD_HEADER_ATTR] = event__attr_swap,
350 [PERF_RECORD_HEADER_EVENT_TYPE] = event__event_type_swap,
351 [PERF_RECORD_HEADER_TRACING_DATA] = event__tracing_data_swap,
352 [PERF_RECORD_HEADER_BUILD_ID] = NULL,
353 [PERF_RECORD_HEADER_MAX] = NULL,
356 struct sample_queue {
358 struct sample_event *event;
359 struct list_head list;
362 #define FLUSH_PERIOD (2 * NSEC_PER_SEC)
364 static void flush_sample_queue(struct perf_session *s,
365 struct perf_event_ops *ops)
367 struct list_head *head = &s->ordered_samples.samples_head;
368 u64 limit = s->ordered_samples.flush_limit;
369 struct sample_queue *tmp, *iter;
371 if (!ops->ordered_samples)
374 list_for_each_entry_safe(iter, tmp, head, list) {
375 if (iter->timestamp > limit)
378 if (iter == s->ordered_samples.last_inserted)
379 s->ordered_samples.last_inserted = NULL;
381 ops->sample((event_t *)iter->event, s);
383 s->ordered_samples.last_flush = iter->timestamp;
384 list_del(&iter->list);
390 static void __queue_sample_end(struct sample_queue *new, struct list_head *head)
392 struct sample_queue *iter;
394 list_for_each_entry_reverse(iter, head, list) {
395 if (iter->timestamp < new->timestamp) {
396 list_add(&new->list, &iter->list);
401 list_add(&new->list, head);
404 static void __queue_sample_before(struct sample_queue *new,
405 struct sample_queue *iter,
406 struct list_head *head)
408 list_for_each_entry_continue_reverse(iter, head, list) {
409 if (iter->timestamp < new->timestamp) {
410 list_add(&new->list, &iter->list);
415 list_add(&new->list, head);
418 static void __queue_sample_after(struct sample_queue *new,
419 struct sample_queue *iter,
420 struct list_head *head)
422 list_for_each_entry_continue(iter, head, list) {
423 if (iter->timestamp > new->timestamp) {
424 list_add_tail(&new->list, &iter->list);
428 list_add_tail(&new->list, head);
431 /* The queue is ordered by time */
432 static void __queue_sample_event(struct sample_queue *new,
433 struct perf_session *s)
435 struct sample_queue *last_inserted = s->ordered_samples.last_inserted;
436 struct list_head *head = &s->ordered_samples.samples_head;
439 if (!last_inserted) {
440 __queue_sample_end(new, head);
445 * Most of the time the current event has a timestamp
446 * very close to the last event inserted, unless we just switched
447 * to another event buffer. Having a sorting based on a list and
448 * on the last inserted event that is close to the current one is
449 * probably more efficient than an rbtree based sorting.
451 if (last_inserted->timestamp >= new->timestamp)
452 __queue_sample_before(new, last_inserted, head);
454 __queue_sample_after(new, last_inserted, head);
457 static int queue_sample_event(event_t *event, struct sample_data *data,
458 struct perf_session *s,
459 struct perf_event_ops *ops)
461 u64 timestamp = data->time;
462 struct sample_queue *new;
466 if (s->ordered_samples.flush_limit == ULLONG_MAX)
467 s->ordered_samples.flush_limit = timestamp + FLUSH_PERIOD;
469 if (timestamp < s->ordered_samples.last_flush) {
470 printf("Warning: Timestamp below last timeslice flush\n");
474 new = malloc(sizeof(*new));
478 new->timestamp = timestamp;
480 new->event = malloc(event->header.size);
486 memcpy(new->event, event, event->header.size);
488 __queue_sample_event(new, s);
489 s->ordered_samples.last_inserted = new;
492 * We want to have a slice of events covering 2 * FLUSH_PERIOD
493 * If FLUSH_PERIOD is big enough, it ensures every events that occured
494 * in the first half of the timeslice have all been buffered and there
495 * are none remaining (we need that because of the weakly ordered
496 * event recording we have). Then once we reach the 2 * FLUSH_PERIOD
497 * timeslice, we flush the first half to be gentle with the memory
498 * (the second half can still get new events in the middle, so wait
499 * another period to flush it)
501 flush_limit = s->ordered_samples.flush_limit;
503 if (new->timestamp > flush_limit &&
504 new->timestamp - flush_limit > FLUSH_PERIOD) {
505 s->ordered_samples.flush_limit += FLUSH_PERIOD;
506 flush_sample_queue(s, ops);
512 static int perf_session__process_sample(event_t *event, struct perf_session *s,
513 struct perf_event_ops *ops)
515 struct sample_data data;
517 if (!ops->ordered_samples)
518 return ops->sample(event, s);
520 bzero(&data, sizeof(struct sample_data));
521 event__parse_sample(event, s->sample_type, &data);
523 queue_sample_event(event, &data, s, ops);
528 static int perf_session__process_event(struct perf_session *self,
530 struct perf_event_ops *ops,
531 u64 offset, u64 head)
535 if (event->header.type < PERF_RECORD_HEADER_MAX) {
536 dump_printf("%#Lx [%#x]: PERF_RECORD_%s",
537 offset + head, event->header.size,
538 event__name[event->header.type]);
540 ++event__total[event->header.type];
543 if (self->header.needs_swap && event__swap_ops[event->header.type])
544 event__swap_ops[event->header.type](event);
546 switch (event->header.type) {
547 case PERF_RECORD_SAMPLE:
548 return perf_session__process_sample(event, self, ops);
549 case PERF_RECORD_MMAP:
550 return ops->mmap(event, self);
551 case PERF_RECORD_COMM:
552 return ops->comm(event, self);
553 case PERF_RECORD_FORK:
554 return ops->fork(event, self);
555 case PERF_RECORD_EXIT:
556 return ops->exit(event, self);
557 case PERF_RECORD_LOST:
558 return ops->lost(event, self);
559 case PERF_RECORD_READ:
560 return ops->read(event, self);
561 case PERF_RECORD_THROTTLE:
562 return ops->throttle(event, self);
563 case PERF_RECORD_UNTHROTTLE:
564 return ops->unthrottle(event, self);
565 case PERF_RECORD_HEADER_ATTR:
566 return ops->attr(event, self);
567 case PERF_RECORD_HEADER_EVENT_TYPE:
568 return ops->event_type(event, self);
569 case PERF_RECORD_HEADER_TRACING_DATA:
570 /* setup for reading amidst mmap */
571 lseek(self->fd, offset + head, SEEK_SET);
572 return ops->tracing_data(event, self);
573 case PERF_RECORD_HEADER_BUILD_ID:
574 return ops->build_id(event, self);
576 self->unknown_events++;
581 void perf_event_header__bswap(struct perf_event_header *self)
583 self->type = bswap_32(self->type);
584 self->misc = bswap_16(self->misc);
585 self->size = bswap_16(self->size);
588 static struct thread *perf_session__register_idle_thread(struct perf_session *self)
590 struct thread *thread = perf_session__findnew(self, 0);
592 if (thread == NULL || thread__set_comm(thread, "swapper")) {
593 pr_err("problem inserting idle task.\n");
600 int do_read(int fd, void *buf, size_t size)
602 void *buf_start = buf;
605 int ret = read(fd, buf, size);
614 return buf - buf_start;
617 #define session_done() (*(volatile int *)(&session_done))
618 volatile int session_done;
620 static int __perf_session__process_pipe_events(struct perf_session *self,
621 struct perf_event_ops *ops)
630 perf_event_ops__fill_defaults(ops);
634 err = do_read(self->fd, &event, sizeof(struct perf_event_header));
639 pr_err("failed to read event header\n");
643 if (self->header.needs_swap)
644 perf_event_header__bswap(&event.header);
646 size = event.header.size;
651 p += sizeof(struct perf_event_header);
653 err = do_read(self->fd, p, size - sizeof(struct perf_event_header));
656 pr_err("unexpected end of event stream\n");
660 pr_err("failed to read event data\n");
665 (skip = perf_session__process_event(self, &event, ops,
667 dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
668 head, event.header.size, event.header.type);
670 * assume we lost track of the stream, check alignment, and
671 * increment a single u64 in the hope to catch on again 'soon'.
673 if (unlikely(head & 7))
681 dump_printf("\n%#Lx [%#x]: event: %d\n",
682 head, event.header.size, event.header.type);
695 int __perf_session__process_events(struct perf_session *self,
696 u64 data_offset, u64 data_size,
697 u64 file_size, struct perf_event_ops *ops)
699 int err, mmap_prot, mmap_flags;
706 struct ui_progress *progress = ui_progress__new("Processing events...",
708 if (progress == NULL)
711 perf_event_ops__fill_defaults(ops);
713 page_size = sysconf(_SC_PAGESIZE);
716 shift = page_size * (head / page_size);
720 mmap_prot = PROT_READ;
721 mmap_flags = MAP_SHARED;
723 if (self->header.needs_swap) {
724 mmap_prot |= PROT_WRITE;
725 mmap_flags = MAP_PRIVATE;
728 buf = mmap(NULL, page_size * self->mmap_window, mmap_prot,
729 mmap_flags, self->fd, offset);
730 if (buf == MAP_FAILED) {
731 pr_err("failed to mmap file\n");
737 event = (event_t *)(buf + head);
738 ui_progress__update(progress, offset);
740 if (self->header.needs_swap)
741 perf_event_header__bswap(&event->header);
742 size = event->header.size;
746 if (head + event->header.size >= page_size * self->mmap_window) {
749 shift = page_size * (head / page_size);
751 munmap_ret = munmap(buf, page_size * self->mmap_window);
752 assert(munmap_ret == 0);
759 size = event->header.size;
761 dump_printf("\n%#Lx [%#x]: event: %d\n",
762 offset + head, event->header.size, event->header.type);
765 perf_session__process_event(self, event, ops, offset, head) < 0) {
766 dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
767 offset + head, event->header.size,
770 * assume we lost track of the stream, check alignment, and
771 * increment a single u64 in the hope to catch on again 'soon'.
773 if (unlikely(head & 7))
781 if (offset + head >= data_offset + data_size)
784 if (offset + head < file_size)
788 /* do the final flush for ordered samples */
789 self->ordered_samples.flush_limit = ULLONG_MAX;
790 flush_sample_queue(self, ops);
792 ui_progress__delete(progress);
796 int perf_session__process_events(struct perf_session *self,
797 struct perf_event_ops *ops)
801 if (perf_session__register_idle_thread(self) == NULL)
804 if (!symbol_conf.full_paths) {
807 if (getcwd(bf, sizeof(bf)) == NULL) {
810 pr_err("failed to get the current directory\n");
813 self->cwd = strdup(bf);
814 if (self->cwd == NULL) {
818 self->cwdlen = strlen(self->cwd);
822 err = __perf_session__process_events(self,
823 self->header.data_offset,
824 self->header.data_size,
827 err = __perf_session__process_pipe_events(self, ops);
832 bool perf_session__has_traces(struct perf_session *self, const char *msg)
834 if (!(self->sample_type & PERF_SAMPLE_RAW)) {
835 pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg);
842 int perf_session__set_kallsyms_ref_reloc_sym(struct map **maps,
843 const char *symbol_name,
848 struct ref_reloc_sym *ref;
850 ref = zalloc(sizeof(struct ref_reloc_sym));
854 ref->name = strdup(symbol_name);
855 if (ref->name == NULL) {
860 bracket = strchr(ref->name, ']');
866 for (i = 0; i < MAP__NR_TYPES; ++i) {
867 struct kmap *kmap = map__kmap(maps[i]);
868 kmap->ref_reloc_sym = ref;