Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux...
[pandora-kernel.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #define _FILE_OFFSET_BITS 64
9
10 #include "builtin.h"
11
12 #include "perf.h"
13
14 #include "util/build-id.h"
15 #include "util/util.h"
16 #include "util/parse-options.h"
17 #include "util/parse-events.h"
18
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/symbol.h"
27 #include "util/cpumap.h"
28 #include "util/thread_map.h"
29
30 #include <unistd.h>
31 #include <sched.h>
32 #include <sys/mman.h>
33
34 enum write_mode_t {
35         WRITE_FORCE,
36         WRITE_APPEND
37 };
38
39 struct perf_record {
40         struct perf_tool        tool;
41         struct perf_record_opts opts;
42         u64                     bytes_written;
43         const char              *output_name;
44         struct perf_evlist      *evlist;
45         struct perf_session     *session;
46         const char              *progname;
47         int                     output;
48         unsigned int            page_size;
49         int                     realtime_prio;
50         enum write_mode_t       write_mode;
51         bool                    no_buildid;
52         bool                    no_buildid_cache;
53         bool                    force;
54         bool                    file_new;
55         bool                    append_file;
56         long                    samples;
57         off_t                   post_processing_offset;
58 };
59
60 static void advance_output(struct perf_record *rec, size_t size)
61 {
62         rec->bytes_written += size;
63 }
64
65 static int write_output(struct perf_record *rec, void *buf, size_t size)
66 {
67         while (size) {
68                 int ret = write(rec->output, buf, size);
69
70                 if (ret < 0) {
71                         pr_err("failed to write\n");
72                         return -1;
73                 }
74
75                 size -= ret;
76                 buf += ret;
77
78                 rec->bytes_written += ret;
79         }
80
81         return 0;
82 }
83
84 static int process_synthesized_event(struct perf_tool *tool,
85                                      union perf_event *event,
86                                      struct perf_sample *sample __maybe_unused,
87                                      struct machine *machine __maybe_unused)
88 {
89         struct perf_record *rec = container_of(tool, struct perf_record, tool);
90         if (write_output(rec, event, event->header.size) < 0)
91                 return -1;
92
93         return 0;
94 }
95
96 static int perf_record__mmap_read(struct perf_record *rec,
97                                    struct perf_mmap *md)
98 {
99         unsigned int head = perf_mmap__read_head(md);
100         unsigned int old = md->prev;
101         unsigned char *data = md->base + rec->page_size;
102         unsigned long size;
103         void *buf;
104         int rc = 0;
105
106         if (old == head)
107                 return 0;
108
109         rec->samples++;
110
111         size = head - old;
112
113         if ((old & md->mask) + size != (head & md->mask)) {
114                 buf = &data[old & md->mask];
115                 size = md->mask + 1 - (old & md->mask);
116                 old += size;
117
118                 if (write_output(rec, buf, size) < 0) {
119                         rc = -1;
120                         goto out;
121                 }
122         }
123
124         buf = &data[old & md->mask];
125         size = head - old;
126         old += size;
127
128         if (write_output(rec, buf, size) < 0) {
129                 rc = -1;
130                 goto out;
131         }
132
133         md->prev = old;
134         perf_mmap__write_tail(md, old);
135
136 out:
137         return rc;
138 }
139
140 static volatile int done = 0;
141 static volatile int signr = -1;
142 static volatile int child_finished = 0;
143
144 static void sig_handler(int sig)
145 {
146         if (sig == SIGCHLD)
147                 child_finished = 1;
148
149         done = 1;
150         signr = sig;
151 }
152
153 static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg)
154 {
155         struct perf_record *rec = arg;
156         int status;
157
158         if (rec->evlist->workload.pid > 0) {
159                 if (!child_finished)
160                         kill(rec->evlist->workload.pid, SIGTERM);
161
162                 wait(&status);
163                 if (WIFSIGNALED(status))
164                         psignal(WTERMSIG(status), rec->progname);
165         }
166
167         if (signr == -1 || signr == SIGUSR1)
168                 return;
169
170         signal(signr, SIG_DFL);
171         kill(getpid(), signr);
172 }
173
174 static bool perf_evlist__equal(struct perf_evlist *evlist,
175                                struct perf_evlist *other)
176 {
177         struct perf_evsel *pos, *pair;
178
179         if (evlist->nr_entries != other->nr_entries)
180                 return false;
181
182         pair = perf_evlist__first(other);
183
184         list_for_each_entry(pos, &evlist->entries, node) {
185                 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
186                         return false;
187                 pair = perf_evsel__next(pair);
188         }
189
190         return true;
191 }
192
193 static int perf_record__open(struct perf_record *rec)
194 {
195         struct perf_evsel *pos;
196         struct perf_evlist *evlist = rec->evlist;
197         struct perf_session *session = rec->session;
198         struct perf_record_opts *opts = &rec->opts;
199         int rc = 0;
200
201         perf_evlist__config_attrs(evlist, opts);
202
203         if (opts->group)
204                 perf_evlist__set_leader(evlist);
205
206         list_for_each_entry(pos, &evlist->entries, node) {
207                 struct perf_event_attr *attr = &pos->attr;
208                 /*
209                  * Check if parse_single_tracepoint_event has already asked for
210                  * PERF_SAMPLE_TIME.
211                  *
212                  * XXX this is kludgy but short term fix for problems introduced by
213                  * eac23d1c that broke 'perf script' by having different sample_types
214                  * when using multiple tracepoint events when we use a perf binary
215                  * that tries to use sample_id_all on an older kernel.
216                  *
217                  * We need to move counter creation to perf_session, support
218                  * different sample_types, etc.
219                  */
220                 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
221
222 fallback_missing_features:
223                 if (opts->exclude_guest_missing)
224                         attr->exclude_guest = attr->exclude_host = 0;
225 retry_sample_id:
226                 attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
227 try_again:
228                 if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
229                         int err = errno;
230
231                         if (err == EPERM || err == EACCES) {
232                                 ui__error_paranoid();
233                                 rc = -err;
234                                 goto out;
235                         } else if (err ==  ENODEV && opts->target.cpu_list) {
236                                 pr_err("No such device - did you specify"
237                                        " an out-of-range profile CPU?\n");
238                                 rc = -err;
239                                 goto out;
240                         } else if (err == EINVAL) {
241                                 if (!opts->exclude_guest_missing &&
242                                     (attr->exclude_guest || attr->exclude_host)) {
243                                         pr_debug("Old kernel, cannot exclude "
244                                                  "guest or host samples.\n");
245                                         opts->exclude_guest_missing = true;
246                                         goto fallback_missing_features;
247                                 } else if (!opts->sample_id_all_missing) {
248                                         /*
249                                          * Old kernel, no attr->sample_id_type_all field
250                                          */
251                                         opts->sample_id_all_missing = true;
252                                         if (!opts->sample_time && !opts->raw_samples && !time_needed)
253                                                 attr->sample_type &= ~PERF_SAMPLE_TIME;
254
255                                         goto retry_sample_id;
256                                 }
257                         }
258
259                         /*
260                          * If it's cycles then fall back to hrtimer
261                          * based cpu-clock-tick sw counter, which
262                          * is always available even if no PMU support.
263                          *
264                          * PPC returns ENXIO until 2.6.37 (behavior changed
265                          * with commit b0a873e).
266                          */
267                         if ((err == ENOENT || err == ENXIO)
268                                         && attr->type == PERF_TYPE_HARDWARE
269                                         && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
270
271                                 if (verbose)
272                                         ui__warning("The cycles event is not supported, "
273                                                     "trying to fall back to cpu-clock-ticks\n");
274                                 attr->type = PERF_TYPE_SOFTWARE;
275                                 attr->config = PERF_COUNT_SW_CPU_CLOCK;
276                                 if (pos->name) {
277                                         free(pos->name);
278                                         pos->name = NULL;
279                                 }
280                                 goto try_again;
281                         }
282
283                         if (err == ENOENT) {
284                                 ui__error("The %s event is not supported.\n",
285                                           perf_evsel__name(pos));
286                                 rc = -err;
287                                 goto out;
288                         }
289
290                         printf("\n");
291                         error("sys_perf_event_open() syscall returned with %d "
292                               "(%s) for event %s. /bin/dmesg may provide "
293                               "additional information.\n",
294                               err, strerror(err), perf_evsel__name(pos));
295
296 #if defined(__i386__) || defined(__x86_64__)
297                         if (attr->type == PERF_TYPE_HARDWARE &&
298                             err == EOPNOTSUPP) {
299                                 pr_err("No hardware sampling interrupt available."
300                                        " No APIC? If so then you can boot the kernel"
301                                        " with the \"lapic\" boot parameter to"
302                                        " force-enable it.\n");
303                                 rc = -err;
304                                 goto out;
305                         }
306 #endif
307
308                         pr_err("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
309                         rc = -err;
310                         goto out;
311                 }
312         }
313
314         if (perf_evlist__apply_filters(evlist)) {
315                 error("failed to set filter with %d (%s)\n", errno,
316                         strerror(errno));
317                 rc = -1;
318                 goto out;
319         }
320
321         if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
322                 if (errno == EPERM) {
323                         pr_err("Permission error mapping pages.\n"
324                                "Consider increasing "
325                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
326                                "or try again with a smaller value of -m/--mmap_pages.\n"
327                                "(current value: %d)\n", opts->mmap_pages);
328                         rc = -errno;
329                 } else if (!is_power_of_2(opts->mmap_pages)) {
330                         pr_err("--mmap_pages/-m value must be a power of two.");
331                         rc = -EINVAL;
332                 } else {
333                         pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
334                         rc = -errno;
335                 }
336                 goto out;
337         }
338
339         if (rec->file_new)
340                 session->evlist = evlist;
341         else {
342                 if (!perf_evlist__equal(session->evlist, evlist)) {
343                         fprintf(stderr, "incompatible append\n");
344                         rc = -1;
345                         goto out;
346                 }
347         }
348
349         perf_session__set_id_hdr_size(session);
350 out:
351         return rc;
352 }
353
354 static int process_buildids(struct perf_record *rec)
355 {
356         u64 size = lseek(rec->output, 0, SEEK_CUR);
357
358         if (size == 0)
359                 return 0;
360
361         rec->session->fd = rec->output;
362         return __perf_session__process_events(rec->session, rec->post_processing_offset,
363                                               size - rec->post_processing_offset,
364                                               size, &build_id__mark_dso_hit_ops);
365 }
366
367 static void perf_record__exit(int status, void *arg)
368 {
369         struct perf_record *rec = arg;
370
371         if (status != 0)
372                 return;
373
374         if (!rec->opts.pipe_output) {
375                 rec->session->header.data_size += rec->bytes_written;
376
377                 if (!rec->no_buildid)
378                         process_buildids(rec);
379                 perf_session__write_header(rec->session, rec->evlist,
380                                            rec->output, true);
381                 perf_session__delete(rec->session);
382                 perf_evlist__delete(rec->evlist);
383                 symbol__exit();
384         }
385 }
386
387 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
388 {
389         int err;
390         struct perf_tool *tool = data;
391
392         if (machine__is_host(machine))
393                 return;
394
395         /*
396          *As for guest kernel when processing subcommand record&report,
397          *we arrange module mmap prior to guest kernel mmap and trigger
398          *a preload dso because default guest module symbols are loaded
399          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
400          *method is used to avoid symbol missing when the first addr is
401          *in module instead of in guest kernel.
402          */
403         err = perf_event__synthesize_modules(tool, process_synthesized_event,
404                                              machine);
405         if (err < 0)
406                 pr_err("Couldn't record guest kernel [%d]'s reference"
407                        " relocation symbol.\n", machine->pid);
408
409         /*
410          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
411          * have no _text sometimes.
412          */
413         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
414                                                  machine, "_text");
415         if (err < 0)
416                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
417                                                          machine, "_stext");
418         if (err < 0)
419                 pr_err("Couldn't record guest kernel [%d]'s reference"
420                        " relocation symbol.\n", machine->pid);
421 }
422
423 static struct perf_event_header finished_round_event = {
424         .size = sizeof(struct perf_event_header),
425         .type = PERF_RECORD_FINISHED_ROUND,
426 };
427
428 static int perf_record__mmap_read_all(struct perf_record *rec)
429 {
430         int i;
431         int rc = 0;
432
433         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
434                 if (rec->evlist->mmap[i].base) {
435                         if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
436                                 rc = -1;
437                                 goto out;
438                         }
439                 }
440         }
441
442         if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
443                 rc = write_output(rec, &finished_round_event,
444                                   sizeof(finished_round_event));
445
446 out:
447         return rc;
448 }
449
450 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
451 {
452         struct stat st;
453         int flags;
454         int err, output, feat;
455         unsigned long waking = 0;
456         const bool forks = argc > 0;
457         struct machine *machine;
458         struct perf_tool *tool = &rec->tool;
459         struct perf_record_opts *opts = &rec->opts;
460         struct perf_evlist *evsel_list = rec->evlist;
461         const char *output_name = rec->output_name;
462         struct perf_session *session;
463
464         rec->progname = argv[0];
465
466         rec->page_size = sysconf(_SC_PAGE_SIZE);
467
468         on_exit(perf_record__sig_exit, rec);
469         signal(SIGCHLD, sig_handler);
470         signal(SIGINT, sig_handler);
471         signal(SIGUSR1, sig_handler);
472
473         if (!output_name) {
474                 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
475                         opts->pipe_output = true;
476                 else
477                         rec->output_name = output_name = "perf.data";
478         }
479         if (output_name) {
480                 if (!strcmp(output_name, "-"))
481                         opts->pipe_output = true;
482                 else if (!stat(output_name, &st) && st.st_size) {
483                         if (rec->write_mode == WRITE_FORCE) {
484                                 char oldname[PATH_MAX];
485                                 snprintf(oldname, sizeof(oldname), "%s.old",
486                                          output_name);
487                                 unlink(oldname);
488                                 rename(output_name, oldname);
489                         }
490                 } else if (rec->write_mode == WRITE_APPEND) {
491                         rec->write_mode = WRITE_FORCE;
492                 }
493         }
494
495         flags = O_CREAT|O_RDWR;
496         if (rec->write_mode == WRITE_APPEND)
497                 rec->file_new = 0;
498         else
499                 flags |= O_TRUNC;
500
501         if (opts->pipe_output)
502                 output = STDOUT_FILENO;
503         else
504                 output = open(output_name, flags, S_IRUSR | S_IWUSR);
505         if (output < 0) {
506                 perror("failed to create output file");
507                 return -1;
508         }
509
510         rec->output = output;
511
512         session = perf_session__new(output_name, O_WRONLY,
513                                     rec->write_mode == WRITE_FORCE, false, NULL);
514         if (session == NULL) {
515                 pr_err("Not enough memory for reading perf file header\n");
516                 return -1;
517         }
518
519         rec->session = session;
520
521         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
522                 perf_header__set_feat(&session->header, feat);
523
524         if (rec->no_buildid)
525                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
526
527         if (!have_tracepoints(&evsel_list->entries))
528                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
529
530         if (!rec->opts.branch_stack)
531                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
532
533         if (!rec->file_new) {
534                 err = perf_session__read_header(session, output);
535                 if (err < 0)
536                         goto out_delete_session;
537         }
538
539         if (forks) {
540                 err = perf_evlist__prepare_workload(evsel_list, opts, argv);
541                 if (err < 0) {
542                         pr_err("Couldn't run the workload!\n");
543                         goto out_delete_session;
544                 }
545         }
546
547         if (perf_record__open(rec) != 0) {
548                 err = -1;
549                 goto out_delete_session;
550         }
551
552         /*
553          * perf_session__delete(session) will be called at perf_record__exit()
554          */
555         on_exit(perf_record__exit, rec);
556
557         if (opts->pipe_output) {
558                 err = perf_header__write_pipe(output);
559                 if (err < 0)
560                         goto out_delete_session;
561         } else if (rec->file_new) {
562                 err = perf_session__write_header(session, evsel_list,
563                                                  output, false);
564                 if (err < 0)
565                         goto out_delete_session;
566         }
567
568         if (!rec->no_buildid
569             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
570                 pr_err("Couldn't generate buildids. "
571                        "Use --no-buildid to profile anyway.\n");
572                 err = -1;
573                 goto out_delete_session;
574         }
575
576         rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
577
578         machine = perf_session__find_host_machine(session);
579         if (!machine) {
580                 pr_err("Couldn't find native kernel information.\n");
581                 err = -1;
582                 goto out_delete_session;
583         }
584
585         if (opts->pipe_output) {
586                 err = perf_event__synthesize_attrs(tool, session,
587                                                    process_synthesized_event);
588                 if (err < 0) {
589                         pr_err("Couldn't synthesize attrs.\n");
590                         goto out_delete_session;
591                 }
592
593                 err = perf_event__synthesize_event_types(tool, process_synthesized_event,
594                                                          machine);
595                 if (err < 0) {
596                         pr_err("Couldn't synthesize event_types.\n");
597                         goto out_delete_session;
598                 }
599
600                 if (have_tracepoints(&evsel_list->entries)) {
601                         /*
602                          * FIXME err <= 0 here actually means that
603                          * there were no tracepoints so its not really
604                          * an error, just that we don't need to
605                          * synthesize anything.  We really have to
606                          * return this more properly and also
607                          * propagate errors that now are calling die()
608                          */
609                         err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
610                                                                   process_synthesized_event);
611                         if (err <= 0) {
612                                 pr_err("Couldn't record tracing data.\n");
613                                 goto out_delete_session;
614                         }
615                         advance_output(rec, err);
616                 }
617         }
618
619         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
620                                                  machine, "_text");
621         if (err < 0)
622                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
623                                                          machine, "_stext");
624         if (err < 0)
625                 pr_err("Couldn't record kernel reference relocation symbol\n"
626                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
627                        "Check /proc/kallsyms permission or run as root.\n");
628
629         err = perf_event__synthesize_modules(tool, process_synthesized_event,
630                                              machine);
631         if (err < 0)
632                 pr_err("Couldn't record kernel module information.\n"
633                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
634                        "Check /proc/modules permission or run as root.\n");
635
636         if (perf_guest)
637                 perf_session__process_machines(session, tool,
638                                                perf_event__synthesize_guest_os);
639
640         if (!opts->target.system_wide)
641                 err = perf_event__synthesize_thread_map(tool, evsel_list->threads,
642                                                   process_synthesized_event,
643                                                   machine);
644         else
645                 err = perf_event__synthesize_threads(tool, process_synthesized_event,
646                                                machine);
647
648         if (err != 0)
649                 goto out_delete_session;
650
651         if (rec->realtime_prio) {
652                 struct sched_param param;
653
654                 param.sched_priority = rec->realtime_prio;
655                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
656                         pr_err("Could not set realtime priority.\n");
657                         err = -1;
658                         goto out_delete_session;
659                 }
660         }
661
662         perf_evlist__enable(evsel_list);
663
664         /*
665          * Let the child rip
666          */
667         if (forks)
668                 perf_evlist__start_workload(evsel_list);
669
670         for (;;) {
671                 int hits = rec->samples;
672
673                 if (perf_record__mmap_read_all(rec) < 0) {
674                         err = -1;
675                         goto out_delete_session;
676                 }
677
678                 if (hits == rec->samples) {
679                         if (done)
680                                 break;
681                         err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
682                         waking++;
683                 }
684
685                 if (done)
686                         perf_evlist__disable(evsel_list);
687         }
688
689         if (quiet || signr == SIGUSR1)
690                 return 0;
691
692         fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
693
694         /*
695          * Approximate RIP event size: 24 bytes.
696          */
697         fprintf(stderr,
698                 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
699                 (double)rec->bytes_written / 1024.0 / 1024.0,
700                 output_name,
701                 rec->bytes_written / 24);
702
703         return 0;
704
705 out_delete_session:
706         perf_session__delete(session);
707         return err;
708 }
709
710 #define BRANCH_OPT(n, m) \
711         { .name = n, .mode = (m) }
712
713 #define BRANCH_END { .name = NULL }
714
715 struct branch_mode {
716         const char *name;
717         int mode;
718 };
719
720 static const struct branch_mode branch_modes[] = {
721         BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
722         BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
723         BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
724         BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
725         BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
726         BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
727         BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
728         BRANCH_END
729 };
730
731 static int
732 parse_branch_stack(const struct option *opt, const char *str, int unset)
733 {
734 #define ONLY_PLM \
735         (PERF_SAMPLE_BRANCH_USER        |\
736          PERF_SAMPLE_BRANCH_KERNEL      |\
737          PERF_SAMPLE_BRANCH_HV)
738
739         uint64_t *mode = (uint64_t *)opt->value;
740         const struct branch_mode *br;
741         char *s, *os = NULL, *p;
742         int ret = -1;
743
744         if (unset)
745                 return 0;
746
747         /*
748          * cannot set it twice, -b + --branch-filter for instance
749          */
750         if (*mode)
751                 return -1;
752
753         /* str may be NULL in case no arg is passed to -b */
754         if (str) {
755                 /* because str is read-only */
756                 s = os = strdup(str);
757                 if (!s)
758                         return -1;
759
760                 for (;;) {
761                         p = strchr(s, ',');
762                         if (p)
763                                 *p = '\0';
764
765                         for (br = branch_modes; br->name; br++) {
766                                 if (!strcasecmp(s, br->name))
767                                         break;
768                         }
769                         if (!br->name) {
770                                 ui__warning("unknown branch filter %s,"
771                                             " check man page\n", s);
772                                 goto error;
773                         }
774
775                         *mode |= br->mode;
776
777                         if (!p)
778                                 break;
779
780                         s = p + 1;
781                 }
782         }
783         ret = 0;
784
785         /* default to any branch */
786         if ((*mode & ~ONLY_PLM) == 0) {
787                 *mode = PERF_SAMPLE_BRANCH_ANY;
788         }
789 error:
790         free(os);
791         return ret;
792 }
793
794 #ifdef LIBUNWIND_SUPPORT
795 static int get_stack_size(char *str, unsigned long *_size)
796 {
797         char *endptr;
798         unsigned long size;
799         unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
800
801         size = strtoul(str, &endptr, 0);
802
803         do {
804                 if (*endptr)
805                         break;
806
807                 size = round_up(size, sizeof(u64));
808                 if (!size || size > max_size)
809                         break;
810
811                 *_size = size;
812                 return 0;
813
814         } while (0);
815
816         pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
817                max_size, str);
818         return -1;
819 }
820 #endif /* LIBUNWIND_SUPPORT */
821
822 static int
823 parse_callchain_opt(const struct option *opt __maybe_unused, const char *arg,
824                     int unset)
825 {
826         struct perf_record *rec = (struct perf_record *)opt->value;
827         char *tok, *name, *saveptr = NULL;
828         char *buf;
829         int ret = -1;
830
831         /* --no-call-graph */
832         if (unset)
833                 return 0;
834
835         /* We specified default option if none is provided. */
836         BUG_ON(!arg);
837
838         /* We need buffer that we know we can write to. */
839         buf = malloc(strlen(arg) + 1);
840         if (!buf)
841                 return -ENOMEM;
842
843         strcpy(buf, arg);
844
845         tok = strtok_r((char *)buf, ",", &saveptr);
846         name = tok ? : (char *)buf;
847
848         do {
849                 /* Framepointer style */
850                 if (!strncmp(name, "fp", sizeof("fp"))) {
851                         if (!strtok_r(NULL, ",", &saveptr)) {
852                                 rec->opts.call_graph = CALLCHAIN_FP;
853                                 ret = 0;
854                         } else
855                                 pr_err("callchain: No more arguments "
856                                        "needed for -g fp\n");
857                         break;
858
859 #ifdef LIBUNWIND_SUPPORT
860                 /* Dwarf style */
861                 } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
862                         const unsigned long default_stack_dump_size = 8192;
863
864                         ret = 0;
865                         rec->opts.call_graph = CALLCHAIN_DWARF;
866                         rec->opts.stack_dump_size = default_stack_dump_size;
867
868                         tok = strtok_r(NULL, ",", &saveptr);
869                         if (tok) {
870                                 unsigned long size = 0;
871
872                                 ret = get_stack_size(tok, &size);
873                                 rec->opts.stack_dump_size = size;
874                         }
875
876                         if (!ret)
877                                 pr_debug("callchain: stack dump size %d\n",
878                                          rec->opts.stack_dump_size);
879 #endif /* LIBUNWIND_SUPPORT */
880                 } else {
881                         pr_err("callchain: Unknown -g option "
882                                "value: %s\n", arg);
883                         break;
884                 }
885
886         } while (0);
887
888         free(buf);
889
890         if (!ret)
891                 pr_debug("callchain: type %d\n", rec->opts.call_graph);
892
893         return ret;
894 }
895
896 static const char * const record_usage[] = {
897         "perf record [<options>] [<command>]",
898         "perf record [<options>] -- <command> [<options>]",
899         NULL
900 };
901
902 /*
903  * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
904  * because we need to have access to it in perf_record__exit, that is called
905  * after cmd_record() exits, but since record_options need to be accessible to
906  * builtin-script, leave it here.
907  *
908  * At least we don't ouch it in all the other functions here directly.
909  *
910  * Just say no to tons of global variables, sigh.
911  */
912 static struct perf_record record = {
913         .opts = {
914                 .mmap_pages          = UINT_MAX,
915                 .user_freq           = UINT_MAX,
916                 .user_interval       = ULLONG_MAX,
917                 .freq                = 4000,
918                 .target              = {
919                         .uses_mmap   = true,
920                 },
921         },
922         .write_mode = WRITE_FORCE,
923         .file_new   = true,
924 };
925
926 #define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
927
928 #ifdef LIBUNWIND_SUPPORT
929 static const char callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
930 #else
931 static const char callchain_help[] = CALLCHAIN_HELP "[fp]";
932 #endif
933
934 /*
935  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
936  * with it and switch to use the library functions in perf_evlist that came
937  * from builtin-record.c, i.e. use perf_record_opts,
938  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
939  * using pipes, etc.
940  */
941 const struct option record_options[] = {
942         OPT_CALLBACK('e', "event", &record.evlist, "event",
943                      "event selector. use 'perf list' to list available events",
944                      parse_events_option),
945         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
946                      "event filter", parse_filter),
947         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
948                     "record events on existing process id"),
949         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
950                     "record events on existing thread id"),
951         OPT_INTEGER('r', "realtime", &record.realtime_prio,
952                     "collect data with this RT SCHED_FIFO priority"),
953         OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
954                     "collect data without buffering"),
955         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
956                     "collect raw sample records from all opened counters"),
957         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
958                             "system-wide collection from all CPUs"),
959         OPT_BOOLEAN('A', "append", &record.append_file,
960                             "append to the output file to do incremental profiling"),
961         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
962                     "list of cpus to monitor"),
963         OPT_BOOLEAN('f', "force", &record.force,
964                         "overwrite existing data file (deprecated)"),
965         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
966         OPT_STRING('o', "output", &record.output_name, "file",
967                     "output file name"),
968         OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
969                     "child tasks do not inherit counters"),
970         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
971         OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
972                      "number of mmap data pages"),
973         OPT_BOOLEAN(0, "group", &record.opts.group,
974                     "put the counters into a counter group"),
975         OPT_CALLBACK_DEFAULT('g', "call-graph", &record, "mode[,dump_size]",
976                              callchain_help, &parse_callchain_opt,
977                              "fp"),
978         OPT_INCR('v', "verbose", &verbose,
979                     "be more verbose (show counter open errors, etc)"),
980         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
981         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
982                     "per thread counts"),
983         OPT_BOOLEAN('d', "data", &record.opts.sample_address,
984                     "Sample addresses"),
985         OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
986         OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
987         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
988                     "don't sample"),
989         OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
990                     "do not update the buildid cache"),
991         OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
992                     "do not collect buildids in perf.data"),
993         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
994                      "monitor event in cgroup name only",
995                      parse_cgroups),
996         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
997                    "user to profile"),
998
999         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1000                      "branch any", "sample any taken branches",
1001                      parse_branch_stack),
1002
1003         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1004                      "branch filter mask", "branch stack filter modes",
1005                      parse_branch_stack),
1006         OPT_END()
1007 };
1008
1009 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1010 {
1011         int err = -ENOMEM;
1012         struct perf_evsel *pos;
1013         struct perf_evlist *evsel_list;
1014         struct perf_record *rec = &record;
1015         char errbuf[BUFSIZ];
1016
1017         evsel_list = perf_evlist__new(NULL, NULL);
1018         if (evsel_list == NULL)
1019                 return -ENOMEM;
1020
1021         rec->evlist = evsel_list;
1022
1023         argc = parse_options(argc, argv, record_options, record_usage,
1024                             PARSE_OPT_STOP_AT_NON_OPTION);
1025         if (!argc && perf_target__none(&rec->opts.target))
1026                 usage_with_options(record_usage, record_options);
1027
1028         if (rec->force && rec->append_file) {
1029                 ui__error("Can't overwrite and append at the same time."
1030                           " You need to choose between -f and -A");
1031                 usage_with_options(record_usage, record_options);
1032         } else if (rec->append_file) {
1033                 rec->write_mode = WRITE_APPEND;
1034         } else {
1035                 rec->write_mode = WRITE_FORCE;
1036         }
1037
1038         if (nr_cgroups && !rec->opts.target.system_wide) {
1039                 ui__error("cgroup monitoring only available in"
1040                           " system-wide mode\n");
1041                 usage_with_options(record_usage, record_options);
1042         }
1043
1044         symbol__init();
1045
1046         if (symbol_conf.kptr_restrict)
1047                 pr_warning(
1048 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1049 "check /proc/sys/kernel/kptr_restrict.\n\n"
1050 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1051 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1052 "Samples in kernel modules won't be resolved at all.\n\n"
1053 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1054 "even with a suitable vmlinux or kallsyms file.\n\n");
1055
1056         if (rec->no_buildid_cache || rec->no_buildid)
1057                 disable_buildid_cache();
1058
1059         if (evsel_list->nr_entries == 0 &&
1060             perf_evlist__add_default(evsel_list) < 0) {
1061                 pr_err("Not enough memory for event selector list\n");
1062                 goto out_symbol_exit;
1063         }
1064
1065         err = perf_target__validate(&rec->opts.target);
1066         if (err) {
1067                 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1068                 ui__warning("%s", errbuf);
1069         }
1070
1071         err = perf_target__parse_uid(&rec->opts.target);
1072         if (err) {
1073                 int saved_errno = errno;
1074
1075                 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1076                 ui__error("%s", errbuf);
1077
1078                 err = -saved_errno;
1079                 goto out_free_fd;
1080         }
1081
1082         err = -ENOMEM;
1083         if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
1084                 usage_with_options(record_usage, record_options);
1085
1086         list_for_each_entry(pos, &evsel_list->entries, node) {
1087                 if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos)))
1088                         goto out_free_fd;
1089         }
1090
1091         if (rec->opts.user_interval != ULLONG_MAX)
1092                 rec->opts.default_interval = rec->opts.user_interval;
1093         if (rec->opts.user_freq != UINT_MAX)
1094                 rec->opts.freq = rec->opts.user_freq;
1095
1096         /*
1097          * User specified count overrides default frequency.
1098          */
1099         if (rec->opts.default_interval)
1100                 rec->opts.freq = 0;
1101         else if (rec->opts.freq) {
1102                 rec->opts.default_interval = rec->opts.freq;
1103         } else {
1104                 ui__error("frequency and count are zero, aborting\n");
1105                 err = -EINVAL;
1106                 goto out_free_fd;
1107         }
1108
1109         err = __cmd_record(&record, argc, argv);
1110 out_free_fd:
1111         perf_evlist__delete_maps(evsel_list);
1112 out_symbol_exit:
1113         symbol__exit();
1114         return err;
1115 }