1 #define _FILE_OFFSET_BITS 64
3 #include <linux/kernel.h>
13 static int perf_session__open(struct perf_session *self, bool force)
15 struct stat input_stat;
17 if (!strcmp(self->filename, "-")) {
19 self->fd = STDIN_FILENO;
21 if (perf_header__read(self, self->fd) < 0)
22 pr_err("incompatible file format");
27 self->fd = open(self->filename, O_RDONLY);
29 pr_err("failed to open file: %s", self->filename);
30 if (!strcmp(self->filename, "perf.data"))
31 pr_err(" (try 'perf record' first)");
36 if (fstat(self->fd, &input_stat) < 0)
39 if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) {
40 pr_err("file %s not owned by current user or root\n",
45 if (!input_stat.st_size) {
46 pr_info("zero-sized file (%s), nothing to do!\n",
51 if (perf_header__read(self, self->fd) < 0) {
52 pr_err("incompatible file format");
56 self->size = input_stat.st_size;
65 void perf_session__update_sample_type(struct perf_session *self)
67 self->sample_type = perf_header__sample_type(&self->header);
70 int perf_session__create_kernel_maps(struct perf_session *self)
72 int ret = machine__create_kernel_maps(&self->host_machine);
75 ret = machines__create_guest_kernel_maps(&self->machines);
79 struct perf_session *perf_session__new(const char *filename, int mode, bool force, bool repipe)
81 size_t len = filename ? strlen(filename) + 1 : 0;
82 struct perf_session *self = zalloc(sizeof(*self) + len);
87 if (perf_header__init(&self->header) < 0)
90 memcpy(self->filename, filename, len);
91 self->threads = RB_ROOT;
92 self->stats_by_id = RB_ROOT;
93 self->last_match = NULL;
94 self->mmap_window = 32;
97 self->unknown_events = 0;
98 self->machines = RB_ROOT;
99 self->repipe = repipe;
100 INIT_LIST_HEAD(&self->ordered_samples.samples_head);
101 machine__init(&self->host_machine, "", HOST_KERNEL_ID);
103 if (mode == O_RDONLY) {
104 if (perf_session__open(self, force) < 0)
106 } else if (mode == O_WRONLY) {
108 * In O_RDONLY mode this will be performed when reading the
109 * kernel MMAP event, in event__process_mmap().
111 if (perf_session__create_kernel_maps(self) < 0)
115 perf_session__update_sample_type(self);
122 perf_session__delete(self);
126 void perf_session__delete(struct perf_session *self)
128 perf_header__exit(&self->header);
134 static bool symbol__match_parent_regex(struct symbol *sym)
136 if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0))
142 struct map_symbol *perf_session__resolve_callchain(struct perf_session *self,
143 struct thread *thread,
144 struct ip_callchain *chain,
145 struct symbol **parent)
147 u8 cpumode = PERF_RECORD_MISC_USER;
149 struct map_symbol *syms = calloc(chain->nr, sizeof(*syms));
154 for (i = 0; i < chain->nr; i++) {
155 u64 ip = chain->ips[i];
156 struct addr_location al;
158 if (ip >= PERF_CONTEXT_MAX) {
160 case PERF_CONTEXT_HV:
161 cpumode = PERF_RECORD_MISC_HYPERVISOR; break;
162 case PERF_CONTEXT_KERNEL:
163 cpumode = PERF_RECORD_MISC_KERNEL; break;
164 case PERF_CONTEXT_USER:
165 cpumode = PERF_RECORD_MISC_USER; break;
173 thread__find_addr_location(thread, self, cpumode,
174 MAP__FUNCTION, thread->pid, ip, &al, NULL);
175 if (al.sym != NULL) {
176 if (sort__has_parent && !*parent &&
177 symbol__match_parent_regex(al.sym))
179 if (!symbol_conf.use_callchain)
181 syms[i].map = al.map;
182 syms[i].sym = al.sym;
189 static int process_event_stub(event_t *event __used,
190 struct perf_session *session __used)
192 dump_printf(": unhandled!\n");
196 static int process_finished_round_stub(event_t *event __used,
197 struct perf_session *session __used,
198 struct perf_event_ops *ops __used)
200 dump_printf(": unhandled!\n");
204 static int process_finished_round(event_t *event,
205 struct perf_session *session,
206 struct perf_event_ops *ops);
208 static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
210 if (handler->sample == NULL)
211 handler->sample = process_event_stub;
212 if (handler->mmap == NULL)
213 handler->mmap = process_event_stub;
214 if (handler->comm == NULL)
215 handler->comm = process_event_stub;
216 if (handler->fork == NULL)
217 handler->fork = process_event_stub;
218 if (handler->exit == NULL)
219 handler->exit = process_event_stub;
220 if (handler->lost == NULL)
221 handler->lost = process_event_stub;
222 if (handler->read == NULL)
223 handler->read = process_event_stub;
224 if (handler->throttle == NULL)
225 handler->throttle = process_event_stub;
226 if (handler->unthrottle == NULL)
227 handler->unthrottle = process_event_stub;
228 if (handler->attr == NULL)
229 handler->attr = process_event_stub;
230 if (handler->event_type == NULL)
231 handler->event_type = process_event_stub;
232 if (handler->tracing_data == NULL)
233 handler->tracing_data = process_event_stub;
234 if (handler->build_id == NULL)
235 handler->build_id = process_event_stub;
236 if (handler->finished_round == NULL) {
237 if (handler->ordered_samples)
238 handler->finished_round = process_finished_round;
240 handler->finished_round = process_finished_round_stub;
244 static const char *event__name[] = {
246 [PERF_RECORD_MMAP] = "MMAP",
247 [PERF_RECORD_LOST] = "LOST",
248 [PERF_RECORD_COMM] = "COMM",
249 [PERF_RECORD_EXIT] = "EXIT",
250 [PERF_RECORD_THROTTLE] = "THROTTLE",
251 [PERF_RECORD_UNTHROTTLE] = "UNTHROTTLE",
252 [PERF_RECORD_FORK] = "FORK",
253 [PERF_RECORD_READ] = "READ",
254 [PERF_RECORD_SAMPLE] = "SAMPLE",
255 [PERF_RECORD_HEADER_ATTR] = "ATTR",
256 [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
257 [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
258 [PERF_RECORD_HEADER_BUILD_ID] = "BUILD_ID",
261 unsigned long event__total[PERF_RECORD_HEADER_MAX];
263 void event__print_totals(void)
266 for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) {
269 pr_info("%10s events: %10ld\n",
270 event__name[i], event__total[i]);
274 void mem_bswap_64(void *src, int byte_size)
278 while (byte_size > 0) {
280 byte_size -= sizeof(u64);
285 static void event__all64_swap(event_t *self)
287 struct perf_event_header *hdr = &self->header;
288 mem_bswap_64(hdr + 1, self->header.size - sizeof(*hdr));
291 static void event__comm_swap(event_t *self)
293 self->comm.pid = bswap_32(self->comm.pid);
294 self->comm.tid = bswap_32(self->comm.tid);
297 static void event__mmap_swap(event_t *self)
299 self->mmap.pid = bswap_32(self->mmap.pid);
300 self->mmap.tid = bswap_32(self->mmap.tid);
301 self->mmap.start = bswap_64(self->mmap.start);
302 self->mmap.len = bswap_64(self->mmap.len);
303 self->mmap.pgoff = bswap_64(self->mmap.pgoff);
306 static void event__task_swap(event_t *self)
308 self->fork.pid = bswap_32(self->fork.pid);
309 self->fork.tid = bswap_32(self->fork.tid);
310 self->fork.ppid = bswap_32(self->fork.ppid);
311 self->fork.ptid = bswap_32(self->fork.ptid);
312 self->fork.time = bswap_64(self->fork.time);
315 static void event__read_swap(event_t *self)
317 self->read.pid = bswap_32(self->read.pid);
318 self->read.tid = bswap_32(self->read.tid);
319 self->read.value = bswap_64(self->read.value);
320 self->read.time_enabled = bswap_64(self->read.time_enabled);
321 self->read.time_running = bswap_64(self->read.time_running);
322 self->read.id = bswap_64(self->read.id);
325 static void event__attr_swap(event_t *self)
329 self->attr.attr.type = bswap_32(self->attr.attr.type);
330 self->attr.attr.size = bswap_32(self->attr.attr.size);
331 self->attr.attr.config = bswap_64(self->attr.attr.config);
332 self->attr.attr.sample_period = bswap_64(self->attr.attr.sample_period);
333 self->attr.attr.sample_type = bswap_64(self->attr.attr.sample_type);
334 self->attr.attr.read_format = bswap_64(self->attr.attr.read_format);
335 self->attr.attr.wakeup_events = bswap_32(self->attr.attr.wakeup_events);
336 self->attr.attr.bp_type = bswap_32(self->attr.attr.bp_type);
337 self->attr.attr.bp_addr = bswap_64(self->attr.attr.bp_addr);
338 self->attr.attr.bp_len = bswap_64(self->attr.attr.bp_len);
340 size = self->header.size;
341 size -= (void *)&self->attr.id - (void *)self;
342 mem_bswap_64(self->attr.id, size);
345 static void event__event_type_swap(event_t *self)
347 self->event_type.event_type.event_id =
348 bswap_64(self->event_type.event_type.event_id);
351 static void event__tracing_data_swap(event_t *self)
353 self->tracing_data.size = bswap_32(self->tracing_data.size);
356 typedef void (*event__swap_op)(event_t *self);
358 static event__swap_op event__swap_ops[] = {
359 [PERF_RECORD_MMAP] = event__mmap_swap,
360 [PERF_RECORD_COMM] = event__comm_swap,
361 [PERF_RECORD_FORK] = event__task_swap,
362 [PERF_RECORD_EXIT] = event__task_swap,
363 [PERF_RECORD_LOST] = event__all64_swap,
364 [PERF_RECORD_READ] = event__read_swap,
365 [PERF_RECORD_SAMPLE] = event__all64_swap,
366 [PERF_RECORD_HEADER_ATTR] = event__attr_swap,
367 [PERF_RECORD_HEADER_EVENT_TYPE] = event__event_type_swap,
368 [PERF_RECORD_HEADER_TRACING_DATA] = event__tracing_data_swap,
369 [PERF_RECORD_HEADER_BUILD_ID] = NULL,
370 [PERF_RECORD_HEADER_MAX] = NULL,
373 struct sample_queue {
375 struct sample_event *event;
376 struct list_head list;
379 static void flush_sample_queue(struct perf_session *s,
380 struct perf_event_ops *ops)
382 struct list_head *head = &s->ordered_samples.samples_head;
383 u64 limit = s->ordered_samples.next_flush;
384 struct sample_queue *tmp, *iter;
386 if (!ops->ordered_samples || !limit)
389 list_for_each_entry_safe(iter, tmp, head, list) {
390 if (iter->timestamp > limit)
393 if (iter == s->ordered_samples.last_inserted)
394 s->ordered_samples.last_inserted = NULL;
396 ops->sample((event_t *)iter->event, s);
398 s->ordered_samples.last_flush = iter->timestamp;
399 list_del(&iter->list);
406 * When perf record finishes a pass on every buffers, it records this pseudo
408 * We record the max timestamp t found in the pass n.
409 * Assuming these timestamps are monotonic across cpus, we know that if
410 * a buffer still has events with timestamps below t, they will be all
411 * available and then read in the pass n + 1.
412 * Hence when we start to read the pass n + 2, we can safely flush every
413 * events with timestamps below t.
415 * ============ PASS n =================
418 * cnt1 timestamps | cnt2 timestamps
421 * - | 4 <--- max recorded
423 * ============ PASS n + 1 ==============
426 * cnt1 timestamps | cnt2 timestamps
429 * 5 | 7 <---- max recorded
431 * Flush every events below timestamp 4
433 * ============ PASS n + 2 ==============
436 * cnt1 timestamps | cnt2 timestamps
441 * Flush every events below timestamp 7
444 static int process_finished_round(event_t *event __used,
445 struct perf_session *session,
446 struct perf_event_ops *ops)
448 flush_sample_queue(session, ops);
449 session->ordered_samples.next_flush = session->ordered_samples.max_timestamp;
454 static void __queue_sample_end(struct sample_queue *new, struct list_head *head)
456 struct sample_queue *iter;
458 list_for_each_entry_reverse(iter, head, list) {
459 if (iter->timestamp < new->timestamp) {
460 list_add(&new->list, &iter->list);
465 list_add(&new->list, head);
468 static void __queue_sample_before(struct sample_queue *new,
469 struct sample_queue *iter,
470 struct list_head *head)
472 list_for_each_entry_continue_reverse(iter, head, list) {
473 if (iter->timestamp < new->timestamp) {
474 list_add(&new->list, &iter->list);
479 list_add(&new->list, head);
482 static void __queue_sample_after(struct sample_queue *new,
483 struct sample_queue *iter,
484 struct list_head *head)
486 list_for_each_entry_continue(iter, head, list) {
487 if (iter->timestamp > new->timestamp) {
488 list_add_tail(&new->list, &iter->list);
492 list_add_tail(&new->list, head);
495 /* The queue is ordered by time */
496 static void __queue_sample_event(struct sample_queue *new,
497 struct perf_session *s)
499 struct sample_queue *last_inserted = s->ordered_samples.last_inserted;
500 struct list_head *head = &s->ordered_samples.samples_head;
503 if (!last_inserted) {
504 __queue_sample_end(new, head);
509 * Most of the time the current event has a timestamp
510 * very close to the last event inserted, unless we just switched
511 * to another event buffer. Having a sorting based on a list and
512 * on the last inserted event that is close to the current one is
513 * probably more efficient than an rbtree based sorting.
515 if (last_inserted->timestamp >= new->timestamp)
516 __queue_sample_before(new, last_inserted, head);
518 __queue_sample_after(new, last_inserted, head);
521 static int queue_sample_event(event_t *event, struct sample_data *data,
522 struct perf_session *s)
524 u64 timestamp = data->time;
525 struct sample_queue *new;
528 if (timestamp < s->ordered_samples.last_flush) {
529 printf("Warning: Timestamp below last timeslice flush\n");
533 new = malloc(sizeof(*new));
537 new->timestamp = timestamp;
539 new->event = malloc(event->header.size);
545 memcpy(new->event, event, event->header.size);
547 __queue_sample_event(new, s);
548 s->ordered_samples.last_inserted = new;
550 if (new->timestamp > s->ordered_samples.max_timestamp)
551 s->ordered_samples.max_timestamp = new->timestamp;
556 static int perf_session__process_sample(event_t *event, struct perf_session *s,
557 struct perf_event_ops *ops)
559 struct sample_data data;
561 if (!ops->ordered_samples)
562 return ops->sample(event, s);
564 bzero(&data, sizeof(struct sample_data));
565 event__parse_sample(event, s->sample_type, &data);
567 queue_sample_event(event, &data, s);
572 static int perf_session__process_event(struct perf_session *self,
574 struct perf_event_ops *ops,
575 u64 offset, u64 head)
579 if (event->header.type < PERF_RECORD_HEADER_MAX) {
580 dump_printf("%#Lx [%#x]: PERF_RECORD_%s",
581 offset + head, event->header.size,
582 event__name[event->header.type]);
584 ++event__total[event->header.type];
587 if (self->header.needs_swap && event__swap_ops[event->header.type])
588 event__swap_ops[event->header.type](event);
590 switch (event->header.type) {
591 case PERF_RECORD_SAMPLE:
592 return perf_session__process_sample(event, self, ops);
593 case PERF_RECORD_MMAP:
594 return ops->mmap(event, self);
595 case PERF_RECORD_COMM:
596 return ops->comm(event, self);
597 case PERF_RECORD_FORK:
598 return ops->fork(event, self);
599 case PERF_RECORD_EXIT:
600 return ops->exit(event, self);
601 case PERF_RECORD_LOST:
602 return ops->lost(event, self);
603 case PERF_RECORD_READ:
604 return ops->read(event, self);
605 case PERF_RECORD_THROTTLE:
606 return ops->throttle(event, self);
607 case PERF_RECORD_UNTHROTTLE:
608 return ops->unthrottle(event, self);
609 case PERF_RECORD_HEADER_ATTR:
610 return ops->attr(event, self);
611 case PERF_RECORD_HEADER_EVENT_TYPE:
612 return ops->event_type(event, self);
613 case PERF_RECORD_HEADER_TRACING_DATA:
614 /* setup for reading amidst mmap */
615 lseek(self->fd, offset + head, SEEK_SET);
616 return ops->tracing_data(event, self);
617 case PERF_RECORD_HEADER_BUILD_ID:
618 return ops->build_id(event, self);
619 case PERF_RECORD_FINISHED_ROUND:
620 return ops->finished_round(event, self, ops);
622 self->unknown_events++;
627 void perf_event_header__bswap(struct perf_event_header *self)
629 self->type = bswap_32(self->type);
630 self->misc = bswap_16(self->misc);
631 self->size = bswap_16(self->size);
634 static struct thread *perf_session__register_idle_thread(struct perf_session *self)
636 struct thread *thread = perf_session__findnew(self, 0);
638 if (thread == NULL || thread__set_comm(thread, "swapper")) {
639 pr_err("problem inserting idle task.\n");
646 int do_read(int fd, void *buf, size_t size)
648 void *buf_start = buf;
651 int ret = read(fd, buf, size);
660 return buf - buf_start;
663 #define session_done() (*(volatile int *)(&session_done))
664 volatile int session_done;
666 static int __perf_session__process_pipe_events(struct perf_session *self,
667 struct perf_event_ops *ops)
676 perf_event_ops__fill_defaults(ops);
680 err = do_read(self->fd, &event, sizeof(struct perf_event_header));
685 pr_err("failed to read event header\n");
689 if (self->header.needs_swap)
690 perf_event_header__bswap(&event.header);
692 size = event.header.size;
697 p += sizeof(struct perf_event_header);
699 if (size - sizeof(struct perf_event_header)) {
700 err = do_read(self->fd, p,
701 size - sizeof(struct perf_event_header));
704 pr_err("unexpected end of event stream\n");
708 pr_err("failed to read event data\n");
714 (skip = perf_session__process_event(self, &event, ops,
716 dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
717 head, event.header.size, event.header.type);
719 * assume we lost track of the stream, check alignment, and
720 * increment a single u64 in the hope to catch on again 'soon'.
722 if (unlikely(head & 7))
730 dump_printf("\n%#Lx [%#x]: event: %d\n",
731 head, event.header.size, event.header.type);
744 int __perf_session__process_events(struct perf_session *self,
745 u64 data_offset, u64 data_size,
746 u64 file_size, struct perf_event_ops *ops)
748 int err, mmap_prot, mmap_flags;
755 struct ui_progress *progress = ui_progress__new("Processing events...",
757 if (progress == NULL)
760 perf_event_ops__fill_defaults(ops);
762 page_size = sysconf(_SC_PAGESIZE);
765 shift = page_size * (head / page_size);
769 mmap_prot = PROT_READ;
770 mmap_flags = MAP_SHARED;
772 if (self->header.needs_swap) {
773 mmap_prot |= PROT_WRITE;
774 mmap_flags = MAP_PRIVATE;
777 buf = mmap(NULL, page_size * self->mmap_window, mmap_prot,
778 mmap_flags, self->fd, offset);
779 if (buf == MAP_FAILED) {
780 pr_err("failed to mmap file\n");
786 event = (event_t *)(buf + head);
787 ui_progress__update(progress, offset);
789 if (self->header.needs_swap)
790 perf_event_header__bswap(&event->header);
791 size = event->header.size;
795 if (head + event->header.size >= page_size * self->mmap_window) {
798 shift = page_size * (head / page_size);
800 munmap_ret = munmap(buf, page_size * self->mmap_window);
801 assert(munmap_ret == 0);
808 size = event->header.size;
810 dump_printf("\n%#Lx [%#x]: event: %d\n",
811 offset + head, event->header.size, event->header.type);
814 perf_session__process_event(self, event, ops, offset, head) < 0) {
815 dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
816 offset + head, event->header.size,
819 * assume we lost track of the stream, check alignment, and
820 * increment a single u64 in the hope to catch on again 'soon'.
822 if (unlikely(head & 7))
830 if (offset + head >= data_offset + data_size)
833 if (offset + head < file_size)
837 /* do the final flush for ordered samples */
838 self->ordered_samples.next_flush = ULLONG_MAX;
839 flush_sample_queue(self, ops);
841 ui_progress__delete(progress);
845 int perf_session__process_events(struct perf_session *self,
846 struct perf_event_ops *ops)
850 if (perf_session__register_idle_thread(self) == NULL)
853 if (!symbol_conf.full_paths) {
856 if (getcwd(bf, sizeof(bf)) == NULL) {
859 pr_err("failed to get the current directory\n");
862 self->cwd = strdup(bf);
863 if (self->cwd == NULL) {
867 self->cwdlen = strlen(self->cwd);
871 err = __perf_session__process_events(self,
872 self->header.data_offset,
873 self->header.data_size,
876 err = __perf_session__process_pipe_events(self, ops);
881 bool perf_session__has_traces(struct perf_session *self, const char *msg)
883 if (!(self->sample_type & PERF_SAMPLE_RAW)) {
884 pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg);
891 int perf_session__set_kallsyms_ref_reloc_sym(struct map **maps,
892 const char *symbol_name,
897 struct ref_reloc_sym *ref;
899 ref = zalloc(sizeof(struct ref_reloc_sym));
903 ref->name = strdup(symbol_name);
904 if (ref->name == NULL) {
909 bracket = strchr(ref->name, ']');
915 for (i = 0; i < MAP__NR_TYPES; ++i) {
916 struct kmap *kmap = map__kmap(maps[i]);
917 kmap->ref_reloc_sym = ref;
923 size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp)
925 return __dsos__fprintf(&self->host_machine.kernel_dsos, fp) +
926 __dsos__fprintf(&self->host_machine.user_dsos, fp) +
927 machines__fprintf_dsos(&self->machines, fp);