sparc: tsb must be flushed before tlb
[pandora-kernel.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9
10 #include "perf.h"
11
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include "util/parse-options.h"
15 #include "util/parse-events.h"
16
17 #include "util/header.h"
18 #include "util/event.h"
19 #include "util/evlist.h"
20 #include "util/evsel.h"
21 #include "util/debug.h"
22 #include "util/session.h"
23 #include "util/tool.h"
24 #include "util/symbol.h"
25 #include "util/cpumap.h"
26 #include "util/thread_map.h"
27
28 #include <unistd.h>
29 #include <sched.h>
30 #include <sys/mman.h>
31
32 #ifndef HAVE_ON_EXIT
33 #ifndef ATEXIT_MAX
34 #define ATEXIT_MAX 32
35 #endif
36 static int __on_exit_count = 0;
37 typedef void (*on_exit_func_t) (int, void *);
38 static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
39 static void *__on_exit_args[ATEXIT_MAX];
40 static int __exitcode = 0;
41 static void __handle_on_exit_funcs(void);
42 static int on_exit(on_exit_func_t function, void *arg);
43 #define exit(x) (exit)(__exitcode = (x))
44
45 static int on_exit(on_exit_func_t function, void *arg)
46 {
47         if (__on_exit_count == ATEXIT_MAX)
48                 return -ENOMEM;
49         else if (__on_exit_count == 0)
50                 atexit(__handle_on_exit_funcs);
51         __on_exit_funcs[__on_exit_count] = function;
52         __on_exit_args[__on_exit_count++] = arg;
53         return 0;
54 }
55
56 static void __handle_on_exit_funcs(void)
57 {
58         int i;
59         for (i = 0; i < __on_exit_count; i++)
60                 __on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
61 }
62 #endif
63
64 enum write_mode_t {
65         WRITE_FORCE,
66         WRITE_APPEND
67 };
68
69 struct perf_record {
70         struct perf_tool        tool;
71         struct perf_record_opts opts;
72         u64                     bytes_written;
73         const char              *output_name;
74         struct perf_evlist      *evlist;
75         struct perf_session     *session;
76         const char              *progname;
77         int                     output;
78         unsigned int            page_size;
79         int                     realtime_prio;
80         enum write_mode_t       write_mode;
81         bool                    no_buildid;
82         bool                    no_buildid_cache;
83         bool                    force;
84         bool                    file_new;
85         bool                    append_file;
86         long                    samples;
87         off_t                   post_processing_offset;
88 };
89
90 static void advance_output(struct perf_record *rec, size_t size)
91 {
92         rec->bytes_written += size;
93 }
94
95 static int write_output(struct perf_record *rec, void *buf, size_t size)
96 {
97         while (size) {
98                 int ret = write(rec->output, buf, size);
99
100                 if (ret < 0) {
101                         pr_err("failed to write\n");
102                         return -1;
103                 }
104
105                 size -= ret;
106                 buf += ret;
107
108                 rec->bytes_written += ret;
109         }
110
111         return 0;
112 }
113
114 static int process_synthesized_event(struct perf_tool *tool,
115                                      union perf_event *event,
116                                      struct perf_sample *sample __maybe_unused,
117                                      struct machine *machine __maybe_unused)
118 {
119         struct perf_record *rec = container_of(tool, struct perf_record, tool);
120         if (write_output(rec, event, event->header.size) < 0)
121                 return -1;
122
123         return 0;
124 }
125
126 static int perf_record__mmap_read(struct perf_record *rec,
127                                    struct perf_mmap *md)
128 {
129         unsigned int head = perf_mmap__read_head(md);
130         unsigned int old = md->prev;
131         unsigned char *data = md->base + rec->page_size;
132         unsigned long size;
133         void *buf;
134         int rc = 0;
135
136         if (old == head)
137                 return 0;
138
139         rec->samples++;
140
141         size = head - old;
142
143         if ((old & md->mask) + size != (head & md->mask)) {
144                 buf = &data[old & md->mask];
145                 size = md->mask + 1 - (old & md->mask);
146                 old += size;
147
148                 if (write_output(rec, buf, size) < 0) {
149                         rc = -1;
150                         goto out;
151                 }
152         }
153
154         buf = &data[old & md->mask];
155         size = head - old;
156         old += size;
157
158         if (write_output(rec, buf, size) < 0) {
159                 rc = -1;
160                 goto out;
161         }
162
163         md->prev = old;
164         perf_mmap__write_tail(md, old);
165
166 out:
167         return rc;
168 }
169
170 static volatile int done = 0;
171 static volatile int signr = -1;
172 static volatile int child_finished = 0;
173
174 static void sig_handler(int sig)
175 {
176         if (sig == SIGCHLD)
177                 child_finished = 1;
178
179         done = 1;
180         signr = sig;
181 }
182
183 static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg)
184 {
185         struct perf_record *rec = arg;
186         int status;
187
188         if (rec->evlist->workload.pid > 0) {
189                 if (!child_finished)
190                         kill(rec->evlist->workload.pid, SIGTERM);
191
192                 wait(&status);
193                 if (WIFSIGNALED(status))
194                         psignal(WTERMSIG(status), rec->progname);
195         }
196
197         if (signr == -1 || signr == SIGUSR1)
198                 return;
199
200         signal(signr, SIG_DFL);
201         kill(getpid(), signr);
202 }
203
204 static bool perf_evlist__equal(struct perf_evlist *evlist,
205                                struct perf_evlist *other)
206 {
207         struct perf_evsel *pos, *pair;
208
209         if (evlist->nr_entries != other->nr_entries)
210                 return false;
211
212         pair = perf_evlist__first(other);
213
214         list_for_each_entry(pos, &evlist->entries, node) {
215                 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
216                         return false;
217                 pair = perf_evsel__next(pair);
218         }
219
220         return true;
221 }
222
223 static int perf_record__open(struct perf_record *rec)
224 {
225         char msg[512];
226         struct perf_evsel *pos;
227         struct perf_evlist *evlist = rec->evlist;
228         struct perf_session *session = rec->session;
229         struct perf_record_opts *opts = &rec->opts;
230         int rc = 0;
231
232         perf_evlist__config(evlist, opts);
233
234         list_for_each_entry(pos, &evlist->entries, node) {
235 try_again:
236                 if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
237                         if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
238                                 if (verbose)
239                                         ui__warning("%s\n", msg);
240                                 goto try_again;
241                         }
242
243                         rc = -errno;
244                         perf_evsel__open_strerror(pos, &opts->target,
245                                                   errno, msg, sizeof(msg));
246                         ui__error("%s\n", msg);
247                         goto out;
248                 }
249         }
250
251         if (perf_evlist__apply_filters(evlist)) {
252                 error("failed to set filter with %d (%s)\n", errno,
253                         strerror(errno));
254                 rc = -1;
255                 goto out;
256         }
257
258         if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
259                 if (errno == EPERM) {
260                         pr_err("Permission error mapping pages.\n"
261                                "Consider increasing "
262                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
263                                "or try again with a smaller value of -m/--mmap_pages.\n"
264                                "(current value: %d)\n", opts->mmap_pages);
265                         rc = -errno;
266                 } else if (!is_power_of_2(opts->mmap_pages) &&
267                            (opts->mmap_pages != UINT_MAX)) {
268                         pr_err("--mmap_pages/-m value must be a power of two.");
269                         rc = -EINVAL;
270                 } else {
271                         pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
272                         rc = -errno;
273                 }
274                 goto out;
275         }
276
277         if (rec->file_new)
278                 session->evlist = evlist;
279         else {
280                 if (!perf_evlist__equal(session->evlist, evlist)) {
281                         fprintf(stderr, "incompatible append\n");
282                         rc = -1;
283                         goto out;
284                 }
285         }
286
287         perf_session__set_id_hdr_size(session);
288 out:
289         return rc;
290 }
291
292 static int process_buildids(struct perf_record *rec)
293 {
294         u64 size = lseek(rec->output, 0, SEEK_CUR);
295
296         if (size == 0)
297                 return 0;
298
299         rec->session->fd = rec->output;
300         return __perf_session__process_events(rec->session, rec->post_processing_offset,
301                                               size - rec->post_processing_offset,
302                                               size, &build_id__mark_dso_hit_ops);
303 }
304
305 static void perf_record__exit(int status, void *arg)
306 {
307         struct perf_record *rec = arg;
308
309         if (status != 0)
310                 return;
311
312         if (!rec->opts.pipe_output) {
313                 rec->session->header.data_size += rec->bytes_written;
314
315                 if (!rec->no_buildid)
316                         process_buildids(rec);
317                 perf_session__write_header(rec->session, rec->evlist,
318                                            rec->output, true);
319                 perf_session__delete(rec->session);
320                 perf_evlist__delete(rec->evlist);
321                 symbol__exit();
322         }
323 }
324
325 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
326 {
327         int err;
328         struct perf_tool *tool = data;
329         /*
330          *As for guest kernel when processing subcommand record&report,
331          *we arrange module mmap prior to guest kernel mmap and trigger
332          *a preload dso because default guest module symbols are loaded
333          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
334          *method is used to avoid symbol missing when the first addr is
335          *in module instead of in guest kernel.
336          */
337         err = perf_event__synthesize_modules(tool, process_synthesized_event,
338                                              machine);
339         if (err < 0)
340                 pr_err("Couldn't record guest kernel [%d]'s reference"
341                        " relocation symbol.\n", machine->pid);
342
343         /*
344          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
345          * have no _text sometimes.
346          */
347         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
348                                                  machine, "_text");
349         if (err < 0)
350                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
351                                                          machine, "_stext");
352         if (err < 0)
353                 pr_err("Couldn't record guest kernel [%d]'s reference"
354                        " relocation symbol.\n", machine->pid);
355 }
356
357 static struct perf_event_header finished_round_event = {
358         .size = sizeof(struct perf_event_header),
359         .type = PERF_RECORD_FINISHED_ROUND,
360 };
361
362 static int perf_record__mmap_read_all(struct perf_record *rec)
363 {
364         int i;
365         int rc = 0;
366
367         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
368                 if (rec->evlist->mmap[i].base) {
369                         if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
370                                 rc = -1;
371                                 goto out;
372                         }
373                 }
374         }
375
376         if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
377                 rc = write_output(rec, &finished_round_event,
378                                   sizeof(finished_round_event));
379
380 out:
381         return rc;
382 }
383
384 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
385 {
386         struct stat st;
387         int flags;
388         int err, output, feat;
389         unsigned long waking = 0;
390         const bool forks = argc > 0;
391         struct machine *machine;
392         struct perf_tool *tool = &rec->tool;
393         struct perf_record_opts *opts = &rec->opts;
394         struct perf_evlist *evsel_list = rec->evlist;
395         const char *output_name = rec->output_name;
396         struct perf_session *session;
397         bool disabled = false;
398
399         rec->progname = argv[0];
400
401         rec->page_size = sysconf(_SC_PAGE_SIZE);
402
403         on_exit(perf_record__sig_exit, rec);
404         signal(SIGCHLD, sig_handler);
405         signal(SIGINT, sig_handler);
406         signal(SIGUSR1, sig_handler);
407
408         if (!output_name) {
409                 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
410                         opts->pipe_output = true;
411                 else
412                         rec->output_name = output_name = "perf.data";
413         }
414         if (output_name) {
415                 if (!strcmp(output_name, "-"))
416                         opts->pipe_output = true;
417                 else if (!stat(output_name, &st) && st.st_size) {
418                         if (rec->write_mode == WRITE_FORCE) {
419                                 char oldname[PATH_MAX];
420                                 snprintf(oldname, sizeof(oldname), "%s.old",
421                                          output_name);
422                                 unlink(oldname);
423                                 rename(output_name, oldname);
424                         }
425                 } else if (rec->write_mode == WRITE_APPEND) {
426                         rec->write_mode = WRITE_FORCE;
427                 }
428         }
429
430         flags = O_CREAT|O_RDWR;
431         if (rec->write_mode == WRITE_APPEND)
432                 rec->file_new = 0;
433         else
434                 flags |= O_TRUNC;
435
436         if (opts->pipe_output)
437                 output = STDOUT_FILENO;
438         else
439                 output = open(output_name, flags, S_IRUSR | S_IWUSR);
440         if (output < 0) {
441                 perror("failed to create output file");
442                 return -1;
443         }
444
445         rec->output = output;
446
447         session = perf_session__new(output_name, O_WRONLY,
448                                     rec->write_mode == WRITE_FORCE, false, NULL);
449         if (session == NULL) {
450                 pr_err("Not enough memory for reading perf file header\n");
451                 return -1;
452         }
453
454         rec->session = session;
455
456         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
457                 perf_header__set_feat(&session->header, feat);
458
459         if (rec->no_buildid)
460                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
461
462         if (!have_tracepoints(&evsel_list->entries))
463                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
464
465         if (!rec->opts.branch_stack)
466                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
467
468         if (!rec->file_new) {
469                 err = perf_session__read_header(session, output);
470                 if (err < 0)
471                         goto out_delete_session;
472         }
473
474         if (forks) {
475                 err = perf_evlist__prepare_workload(evsel_list, &opts->target,
476                                                     argv, opts->pipe_output,
477                                                     true);
478                 if (err < 0) {
479                         pr_err("Couldn't run the workload!\n");
480                         goto out_delete_session;
481                 }
482         }
483
484         if (perf_record__open(rec) != 0) {
485                 err = -1;
486                 goto out_delete_session;
487         }
488
489         if (!evsel_list->nr_groups)
490                 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
491
492         /*
493          * perf_session__delete(session) will be called at perf_record__exit()
494          */
495         on_exit(perf_record__exit, rec);
496
497         if (opts->pipe_output) {
498                 err = perf_header__write_pipe(output);
499                 if (err < 0)
500                         goto out_delete_session;
501         } else if (rec->file_new) {
502                 err = perf_session__write_header(session, evsel_list,
503                                                  output, false);
504                 if (err < 0)
505                         goto out_delete_session;
506         }
507
508         if (!rec->no_buildid
509             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
510                 pr_err("Couldn't generate buildids. "
511                        "Use --no-buildid to profile anyway.\n");
512                 err = -1;
513                 goto out_delete_session;
514         }
515
516         rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
517
518         machine = &session->machines.host;
519
520         if (opts->pipe_output) {
521                 err = perf_event__synthesize_attrs(tool, session,
522                                                    process_synthesized_event);
523                 if (err < 0) {
524                         pr_err("Couldn't synthesize attrs.\n");
525                         goto out_delete_session;
526                 }
527
528                 err = perf_event__synthesize_event_types(tool, process_synthesized_event,
529                                                          machine);
530                 if (err < 0) {
531                         pr_err("Couldn't synthesize event_types.\n");
532                         goto out_delete_session;
533                 }
534
535                 if (have_tracepoints(&evsel_list->entries)) {
536                         /*
537                          * FIXME err <= 0 here actually means that
538                          * there were no tracepoints so its not really
539                          * an error, just that we don't need to
540                          * synthesize anything.  We really have to
541                          * return this more properly and also
542                          * propagate errors that now are calling die()
543                          */
544                         err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
545                                                                   process_synthesized_event);
546                         if (err <= 0) {
547                                 pr_err("Couldn't record tracing data.\n");
548                                 goto out_delete_session;
549                         }
550                         advance_output(rec, err);
551                 }
552         }
553
554         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
555                                                  machine, "_text");
556         if (err < 0)
557                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
558                                                          machine, "_stext");
559         if (err < 0)
560                 pr_err("Couldn't record kernel reference relocation symbol\n"
561                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
562                        "Check /proc/kallsyms permission or run as root.\n");
563
564         err = perf_event__synthesize_modules(tool, process_synthesized_event,
565                                              machine);
566         if (err < 0)
567                 pr_err("Couldn't record kernel module information.\n"
568                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
569                        "Check /proc/modules permission or run as root.\n");
570
571         if (perf_guest) {
572                 machines__process_guests(&session->machines,
573                                          perf_event__synthesize_guest_os, tool);
574         }
575
576         if (perf_target__has_task(&opts->target))
577                 err = perf_event__synthesize_thread_map(tool, evsel_list->threads,
578                                                   process_synthesized_event,
579                                                   machine);
580         else if (perf_target__has_cpu(&opts->target))
581                 err = perf_event__synthesize_threads(tool, process_synthesized_event,
582                                                machine);
583         else /* command specified */
584                 err = 0;
585
586         if (err != 0)
587                 goto out_delete_session;
588
589         if (rec->realtime_prio) {
590                 struct sched_param param;
591
592                 param.sched_priority = rec->realtime_prio;
593                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
594                         pr_err("Could not set realtime priority.\n");
595                         err = -1;
596                         goto out_delete_session;
597                 }
598         }
599
600         /*
601          * When perf is starting the traced process, all the events
602          * (apart from group members) have enable_on_exec=1 set,
603          * so don't spoil it by prematurely enabling them.
604          */
605         if (!perf_target__none(&opts->target))
606                 perf_evlist__enable(evsel_list);
607
608         /*
609          * Let the child rip
610          */
611         if (forks)
612                 perf_evlist__start_workload(evsel_list);
613
614         for (;;) {
615                 int hits = rec->samples;
616
617                 if (perf_record__mmap_read_all(rec) < 0) {
618                         err = -1;
619                         goto out_delete_session;
620                 }
621
622                 if (hits == rec->samples) {
623                         if (done)
624                                 break;
625                         err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
626                         waking++;
627                 }
628
629                 /*
630                  * When perf is starting the traced process, at the end events
631                  * die with the process and we wait for that. Thus no need to
632                  * disable events in this case.
633                  */
634                 if (done && !disabled && !perf_target__none(&opts->target)) {
635                         perf_evlist__disable(evsel_list);
636                         disabled = true;
637                 }
638         }
639
640         if (quiet || signr == SIGUSR1)
641                 return 0;
642
643         fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
644
645         /*
646          * Approximate RIP event size: 24 bytes.
647          */
648         fprintf(stderr,
649                 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
650                 (double)rec->bytes_written / 1024.0 / 1024.0,
651                 output_name,
652                 rec->bytes_written / 24);
653
654         return 0;
655
656 out_delete_session:
657         perf_session__delete(session);
658         return err;
659 }
660
661 #define BRANCH_OPT(n, m) \
662         { .name = n, .mode = (m) }
663
664 #define BRANCH_END { .name = NULL }
665
666 struct branch_mode {
667         const char *name;
668         int mode;
669 };
670
671 static const struct branch_mode branch_modes[] = {
672         BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
673         BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
674         BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
675         BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
676         BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
677         BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
678         BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
679         BRANCH_END
680 };
681
682 static int
683 parse_branch_stack(const struct option *opt, const char *str, int unset)
684 {
685 #define ONLY_PLM \
686         (PERF_SAMPLE_BRANCH_USER        |\
687          PERF_SAMPLE_BRANCH_KERNEL      |\
688          PERF_SAMPLE_BRANCH_HV)
689
690         uint64_t *mode = (uint64_t *)opt->value;
691         const struct branch_mode *br;
692         char *s, *os = NULL, *p;
693         int ret = -1;
694
695         if (unset)
696                 return 0;
697
698         /*
699          * cannot set it twice, -b + --branch-filter for instance
700          */
701         if (*mode)
702                 return -1;
703
704         /* str may be NULL in case no arg is passed to -b */
705         if (str) {
706                 /* because str is read-only */
707                 s = os = strdup(str);
708                 if (!s)
709                         return -1;
710
711                 for (;;) {
712                         p = strchr(s, ',');
713                         if (p)
714                                 *p = '\0';
715
716                         for (br = branch_modes; br->name; br++) {
717                                 if (!strcasecmp(s, br->name))
718                                         break;
719                         }
720                         if (!br->name) {
721                                 ui__warning("unknown branch filter %s,"
722                                             " check man page\n", s);
723                                 goto error;
724                         }
725
726                         *mode |= br->mode;
727
728                         if (!p)
729                                 break;
730
731                         s = p + 1;
732                 }
733         }
734         ret = 0;
735
736         /* default to any branch */
737         if ((*mode & ~ONLY_PLM) == 0) {
738                 *mode = PERF_SAMPLE_BRANCH_ANY;
739         }
740 error:
741         free(os);
742         return ret;
743 }
744
745 #ifdef LIBUNWIND_SUPPORT
746 static int get_stack_size(char *str, unsigned long *_size)
747 {
748         char *endptr;
749         unsigned long size;
750         unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
751
752         size = strtoul(str, &endptr, 0);
753
754         do {
755                 if (*endptr)
756                         break;
757
758                 size = round_up(size, sizeof(u64));
759                 if (!size || size > max_size)
760                         break;
761
762                 *_size = size;
763                 return 0;
764
765         } while (0);
766
767         pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
768                max_size, str);
769         return -1;
770 }
771 #endif /* LIBUNWIND_SUPPORT */
772
773 int record_parse_callchain_opt(const struct option *opt,
774                                const char *arg, int unset)
775 {
776         struct perf_record_opts *opts = opt->value;
777         char *tok, *name, *saveptr = NULL;
778         char *buf;
779         int ret = -1;
780
781         /* --no-call-graph */
782         if (unset)
783                 return 0;
784
785         /* We specified default option if none is provided. */
786         BUG_ON(!arg);
787
788         /* We need buffer that we know we can write to. */
789         buf = malloc(strlen(arg) + 1);
790         if (!buf)
791                 return -ENOMEM;
792
793         strcpy(buf, arg);
794
795         tok = strtok_r((char *)buf, ",", &saveptr);
796         name = tok ? : (char *)buf;
797
798         do {
799                 /* Framepointer style */
800                 if (!strncmp(name, "fp", sizeof("fp"))) {
801                         if (!strtok_r(NULL, ",", &saveptr)) {
802                                 opts->call_graph = CALLCHAIN_FP;
803                                 ret = 0;
804                         } else
805                                 pr_err("callchain: No more arguments "
806                                        "needed for -g fp\n");
807                         break;
808
809 #ifdef LIBUNWIND_SUPPORT
810                 /* Dwarf style */
811                 } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
812                         const unsigned long default_stack_dump_size = 8192;
813
814                         ret = 0;
815                         opts->call_graph = CALLCHAIN_DWARF;
816                         opts->stack_dump_size = default_stack_dump_size;
817
818                         tok = strtok_r(NULL, ",", &saveptr);
819                         if (tok) {
820                                 unsigned long size = 0;
821
822                                 ret = get_stack_size(tok, &size);
823                                 opts->stack_dump_size = size;
824                         }
825
826                         if (!ret)
827                                 pr_debug("callchain: stack dump size %d\n",
828                                          opts->stack_dump_size);
829 #endif /* LIBUNWIND_SUPPORT */
830                 } else {
831                         pr_err("callchain: Unknown -g option "
832                                "value: %s\n", arg);
833                         break;
834                 }
835
836         } while (0);
837
838         free(buf);
839
840         if (!ret)
841                 pr_debug("callchain: type %d\n", opts->call_graph);
842
843         return ret;
844 }
845
846 static const char * const record_usage[] = {
847         "perf record [<options>] [<command>]",
848         "perf record [<options>] -- <command> [<options>]",
849         NULL
850 };
851
852 /*
853  * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
854  * because we need to have access to it in perf_record__exit, that is called
855  * after cmd_record() exits, but since record_options need to be accessible to
856  * builtin-script, leave it here.
857  *
858  * At least we don't ouch it in all the other functions here directly.
859  *
860  * Just say no to tons of global variables, sigh.
861  */
862 static struct perf_record record = {
863         .opts = {
864                 .mmap_pages          = UINT_MAX,
865                 .user_freq           = UINT_MAX,
866                 .user_interval       = ULLONG_MAX,
867                 .freq                = 4000,
868                 .target              = {
869                         .uses_mmap   = true,
870                 },
871         },
872         .write_mode = WRITE_FORCE,
873         .file_new   = true,
874 };
875
876 #define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
877
878 #ifdef LIBUNWIND_SUPPORT
879 const char record_callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
880 #else
881 const char record_callchain_help[] = CALLCHAIN_HELP "[fp]";
882 #endif
883
884 /*
885  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
886  * with it and switch to use the library functions in perf_evlist that came
887  * from builtin-record.c, i.e. use perf_record_opts,
888  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
889  * using pipes, etc.
890  */
891 const struct option record_options[] = {
892         OPT_CALLBACK('e', "event", &record.evlist, "event",
893                      "event selector. use 'perf list' to list available events",
894                      parse_events_option),
895         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
896                      "event filter", parse_filter),
897         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
898                     "record events on existing process id"),
899         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
900                     "record events on existing thread id"),
901         OPT_INTEGER('r', "realtime", &record.realtime_prio,
902                     "collect data with this RT SCHED_FIFO priority"),
903         OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
904                     "collect data without buffering"),
905         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
906                     "collect raw sample records from all opened counters"),
907         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
908                             "system-wide collection from all CPUs"),
909         OPT_BOOLEAN('A', "append", &record.append_file,
910                             "append to the output file to do incremental profiling"),
911         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
912                     "list of cpus to monitor"),
913         OPT_BOOLEAN('f', "force", &record.force,
914                         "overwrite existing data file (deprecated)"),
915         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
916         OPT_STRING('o', "output", &record.output_name, "file",
917                     "output file name"),
918         OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
919                     "child tasks do not inherit counters"),
920         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
921         OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
922                      "number of mmap data pages"),
923         OPT_BOOLEAN(0, "group", &record.opts.group,
924                     "put the counters into a counter group"),
925         OPT_CALLBACK_DEFAULT('g', "call-graph", &record.opts,
926                              "mode[,dump_size]", record_callchain_help,
927                              &record_parse_callchain_opt, "fp"),
928         OPT_INCR('v', "verbose", &verbose,
929                     "be more verbose (show counter open errors, etc)"),
930         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
931         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
932                     "per thread counts"),
933         OPT_BOOLEAN('d', "data", &record.opts.sample_address,
934                     "Sample addresses"),
935         OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
936         OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
937         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
938                     "don't sample"),
939         OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
940                     "do not update the buildid cache"),
941         OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
942                     "do not collect buildids in perf.data"),
943         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
944                      "monitor event in cgroup name only",
945                      parse_cgroups),
946         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
947                    "user to profile"),
948
949         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
950                      "branch any", "sample any taken branches",
951                      parse_branch_stack),
952
953         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
954                      "branch filter mask", "branch stack filter modes",
955                      parse_branch_stack),
956         OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
957                     "sample by weight (on special events only)"),
958         OPT_END()
959 };
960
961 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
962 {
963         int err = -ENOMEM;
964         struct perf_evsel *pos;
965         struct perf_evlist *evsel_list;
966         struct perf_record *rec = &record;
967         char errbuf[BUFSIZ];
968
969         evsel_list = perf_evlist__new();
970         if (evsel_list == NULL)
971                 return -ENOMEM;
972
973         rec->evlist = evsel_list;
974
975         argc = parse_options(argc, argv, record_options, record_usage,
976                             PARSE_OPT_STOP_AT_NON_OPTION);
977         if (!argc && perf_target__none(&rec->opts.target))
978                 usage_with_options(record_usage, record_options);
979
980         if (rec->force && rec->append_file) {
981                 ui__error("Can't overwrite and append at the same time."
982                           " You need to choose between -f and -A");
983                 usage_with_options(record_usage, record_options);
984         } else if (rec->append_file) {
985                 rec->write_mode = WRITE_APPEND;
986         } else {
987                 rec->write_mode = WRITE_FORCE;
988         }
989
990         if (nr_cgroups && !rec->opts.target.system_wide) {
991                 ui__error("cgroup monitoring only available in"
992                           " system-wide mode\n");
993                 usage_with_options(record_usage, record_options);
994         }
995
996         symbol__init();
997
998         if (symbol_conf.kptr_restrict)
999                 pr_warning(
1000 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1001 "check /proc/sys/kernel/kptr_restrict.\n\n"
1002 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1003 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1004 "Samples in kernel modules won't be resolved at all.\n\n"
1005 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1006 "even with a suitable vmlinux or kallsyms file.\n\n");
1007
1008         if (rec->no_buildid_cache || rec->no_buildid)
1009                 disable_buildid_cache();
1010
1011         if (evsel_list->nr_entries == 0 &&
1012             perf_evlist__add_default(evsel_list) < 0) {
1013                 pr_err("Not enough memory for event selector list\n");
1014                 goto out_symbol_exit;
1015         }
1016
1017         err = perf_target__validate(&rec->opts.target);
1018         if (err) {
1019                 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1020                 ui__warning("%s", errbuf);
1021         }
1022
1023         err = perf_target__parse_uid(&rec->opts.target);
1024         if (err) {
1025                 int saved_errno = errno;
1026
1027                 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1028                 ui__error("%s", errbuf);
1029
1030                 err = -saved_errno;
1031                 goto out_symbol_exit;
1032         }
1033
1034         err = -ENOMEM;
1035         if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
1036                 usage_with_options(record_usage, record_options);
1037
1038         list_for_each_entry(pos, &evsel_list->entries, node) {
1039                 if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos)))
1040                         goto out_free_fd;
1041         }
1042
1043         if (rec->opts.user_interval != ULLONG_MAX)
1044                 rec->opts.default_interval = rec->opts.user_interval;
1045         if (rec->opts.user_freq != UINT_MAX)
1046                 rec->opts.freq = rec->opts.user_freq;
1047
1048         /*
1049          * User specified count overrides default frequency.
1050          */
1051         if (rec->opts.default_interval)
1052                 rec->opts.freq = 0;
1053         else if (rec->opts.freq) {
1054                 rec->opts.default_interval = rec->opts.freq;
1055         } else {
1056                 ui__error("frequency and count are zero, aborting\n");
1057                 err = -EINVAL;
1058                 goto out_free_fd;
1059         }
1060
1061         err = __cmd_record(&record, argc, argv);
1062
1063         perf_evlist__munmap(evsel_list);
1064         perf_evlist__close(evsel_list);
1065 out_free_fd:
1066         perf_evlist__delete_maps(evsel_list);
1067 out_symbol_exit:
1068         symbol__exit();
1069         return err;
1070 }