Merge branch 'upstream/jump-label-noearly' of git://git.kernel.org/pub/scm/linux...
[pandora-kernel.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #define _FILE_OFFSET_BITS 64
9
10 #include "builtin.h"
11
12 #include "perf.h"
13
14 #include "util/build-id.h"
15 #include "util/util.h"
16 #include "util/parse-options.h"
17 #include "util/parse-events.h"
18
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/symbol.h"
26 #include "util/cpumap.h"
27 #include "util/thread_map.h"
28
29 #include <unistd.h>
30 #include <sched.h>
31 #include <sys/mman.h>
32
33 enum write_mode_t {
34         WRITE_FORCE,
35         WRITE_APPEND
36 };
37
38 static u64                      user_interval                   = ULLONG_MAX;
39 static u64                      default_interval                =      0;
40
41 static unsigned int             page_size;
42 static unsigned int             mmap_pages                      = UINT_MAX;
43 static unsigned int             user_freq                       = UINT_MAX;
44 static int                      freq                            =   1000;
45 static int                      output;
46 static int                      pipe_output                     =      0;
47 static const char               *output_name                    = NULL;
48 static bool                     group                           =  false;
49 static int                      realtime_prio                   =      0;
50 static bool                     nodelay                         =  false;
51 static bool                     raw_samples                     =  false;
52 static bool                     sample_id_all_avail             =   true;
53 static bool                     system_wide                     =  false;
54 static pid_t                    target_pid                      =     -1;
55 static pid_t                    target_tid                      =     -1;
56 static pid_t                    child_pid                       =     -1;
57 static bool                     no_inherit                      =  false;
58 static enum write_mode_t        write_mode                      = WRITE_FORCE;
59 static bool                     call_graph                      =  false;
60 static bool                     inherit_stat                    =  false;
61 static bool                     no_samples                      =  false;
62 static bool                     sample_address                  =  false;
63 static bool                     sample_time                     =  false;
64 static bool                     no_buildid                      =  false;
65 static bool                     no_buildid_cache                =  false;
66 static struct perf_evlist       *evsel_list;
67
68 static long                     samples                         =      0;
69 static u64                      bytes_written                   =      0;
70
71 static int                      file_new                        =      1;
72 static off_t                    post_processing_offset;
73
74 static struct perf_session      *session;
75 static const char               *cpu_list;
76 static const char               *progname;
77
78 static void advance_output(size_t size)
79 {
80         bytes_written += size;
81 }
82
83 static void write_output(void *buf, size_t size)
84 {
85         while (size) {
86                 int ret = write(output, buf, size);
87
88                 if (ret < 0)
89                         die("failed to write");
90
91                 size -= ret;
92                 buf += ret;
93
94                 bytes_written += ret;
95         }
96 }
97
98 static int process_synthesized_event(union perf_event *event,
99                                      struct perf_sample *sample __used,
100                                      struct perf_session *self __used)
101 {
102         write_output(event, event->header.size);
103         return 0;
104 }
105
106 static void mmap_read(struct perf_mmap *md)
107 {
108         unsigned int head = perf_mmap__read_head(md);
109         unsigned int old = md->prev;
110         unsigned char *data = md->base + page_size;
111         unsigned long size;
112         void *buf;
113
114         if (old == head)
115                 return;
116
117         samples++;
118
119         size = head - old;
120
121         if ((old & md->mask) + size != (head & md->mask)) {
122                 buf = &data[old & md->mask];
123                 size = md->mask + 1 - (old & md->mask);
124                 old += size;
125
126                 write_output(buf, size);
127         }
128
129         buf = &data[old & md->mask];
130         size = head - old;
131         old += size;
132
133         write_output(buf, size);
134
135         md->prev = old;
136         perf_mmap__write_tail(md, old);
137 }
138
139 static volatile int done = 0;
140 static volatile int signr = -1;
141 static volatile int child_finished = 0;
142
143 static void sig_handler(int sig)
144 {
145         if (sig == SIGCHLD)
146                 child_finished = 1;
147
148         done = 1;
149         signr = sig;
150 }
151
152 static void sig_atexit(void)
153 {
154         int status;
155
156         if (child_pid > 0) {
157                 if (!child_finished)
158                         kill(child_pid, SIGTERM);
159
160                 wait(&status);
161                 if (WIFSIGNALED(status))
162                         psignal(WTERMSIG(status), progname);
163         }
164
165         if (signr == -1 || signr == SIGUSR1)
166                 return;
167
168         signal(signr, SIG_DFL);
169         kill(getpid(), signr);
170 }
171
172 static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
173 {
174         struct perf_event_attr *attr = &evsel->attr;
175         int track = !evsel->idx; /* only the first counter needs these */
176
177         attr->disabled          = 1;
178         attr->inherit           = !no_inherit;
179         attr->read_format       = PERF_FORMAT_TOTAL_TIME_ENABLED |
180                                   PERF_FORMAT_TOTAL_TIME_RUNNING |
181                                   PERF_FORMAT_ID;
182
183         attr->sample_type       |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
184
185         if (evlist->nr_entries > 1)
186                 attr->sample_type |= PERF_SAMPLE_ID;
187
188         /*
189          * We default some events to a 1 default interval. But keep
190          * it a weak assumption overridable by the user.
191          */
192         if (!attr->sample_period || (user_freq != UINT_MAX &&
193                                      user_interval != ULLONG_MAX)) {
194                 if (freq) {
195                         attr->sample_type       |= PERF_SAMPLE_PERIOD;
196                         attr->freq              = 1;
197                         attr->sample_freq       = freq;
198                 } else {
199                         attr->sample_period = default_interval;
200                 }
201         }
202
203         if (no_samples)
204                 attr->sample_freq = 0;
205
206         if (inherit_stat)
207                 attr->inherit_stat = 1;
208
209         if (sample_address) {
210                 attr->sample_type       |= PERF_SAMPLE_ADDR;
211                 attr->mmap_data = track;
212         }
213
214         if (call_graph)
215                 attr->sample_type       |= PERF_SAMPLE_CALLCHAIN;
216
217         if (system_wide)
218                 attr->sample_type       |= PERF_SAMPLE_CPU;
219
220         if (sample_id_all_avail &&
221             (sample_time || system_wide || !no_inherit || cpu_list))
222                 attr->sample_type       |= PERF_SAMPLE_TIME;
223
224         if (raw_samples) {
225                 attr->sample_type       |= PERF_SAMPLE_TIME;
226                 attr->sample_type       |= PERF_SAMPLE_RAW;
227                 attr->sample_type       |= PERF_SAMPLE_CPU;
228         }
229
230         if (nodelay) {
231                 attr->watermark = 0;
232                 attr->wakeup_events = 1;
233         }
234
235         attr->mmap              = track;
236         attr->comm              = track;
237
238         if (target_pid == -1 && target_tid == -1 && !system_wide) {
239                 attr->disabled = 1;
240                 attr->enable_on_exec = 1;
241         }
242 }
243
244 static bool perf_evlist__equal(struct perf_evlist *evlist,
245                                struct perf_evlist *other)
246 {
247         struct perf_evsel *pos, *pair;
248
249         if (evlist->nr_entries != other->nr_entries)
250                 return false;
251
252         pair = list_entry(other->entries.next, struct perf_evsel, node);
253
254         list_for_each_entry(pos, &evlist->entries, node) {
255                 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
256                         return false;
257                 pair = list_entry(pair->node.next, struct perf_evsel, node);
258         }
259
260         return true;
261 }
262
263 static void open_counters(struct perf_evlist *evlist)
264 {
265         struct perf_evsel *pos;
266
267         if (evlist->cpus->map[0] < 0)
268                 no_inherit = true;
269
270         list_for_each_entry(pos, &evlist->entries, node) {
271                 struct perf_event_attr *attr = &pos->attr;
272                 /*
273                  * Check if parse_single_tracepoint_event has already asked for
274                  * PERF_SAMPLE_TIME.
275                  *
276                  * XXX this is kludgy but short term fix for problems introduced by
277                  * eac23d1c that broke 'perf script' by having different sample_types
278                  * when using multiple tracepoint events when we use a perf binary
279                  * that tries to use sample_id_all on an older kernel.
280                  *
281                  * We need to move counter creation to perf_session, support
282                  * different sample_types, etc.
283                  */
284                 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
285
286                 config_attr(pos, evlist);
287 retry_sample_id:
288                 attr->sample_id_all = sample_id_all_avail ? 1 : 0;
289 try_again:
290                 if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group) < 0) {
291                         int err = errno;
292
293                         if (err == EPERM || err == EACCES) {
294                                 ui__warning_paranoid();
295                                 exit(EXIT_FAILURE);
296                         } else if (err ==  ENODEV && cpu_list) {
297                                 die("No such device - did you specify"
298                                         " an out-of-range profile CPU?\n");
299                         } else if (err == EINVAL && sample_id_all_avail) {
300                                 /*
301                                  * Old kernel, no attr->sample_id_type_all field
302                                  */
303                                 sample_id_all_avail = false;
304                                 if (!sample_time && !raw_samples && !time_needed)
305                                         attr->sample_type &= ~PERF_SAMPLE_TIME;
306
307                                 goto retry_sample_id;
308                         }
309
310                         /*
311                          * If it's cycles then fall back to hrtimer
312                          * based cpu-clock-tick sw counter, which
313                          * is always available even if no PMU support:
314                          */
315                         if (attr->type == PERF_TYPE_HARDWARE
316                                         && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
317
318                                 if (verbose)
319                                         ui__warning("The cycles event is not supported, "
320                                                     "trying to fall back to cpu-clock-ticks\n");
321                                 attr->type = PERF_TYPE_SOFTWARE;
322                                 attr->config = PERF_COUNT_SW_CPU_CLOCK;
323                                 goto try_again;
324                         }
325
326                         if (err == ENOENT) {
327                                 ui__warning("The %s event is not supported.\n",
328                                             event_name(pos));
329                                 exit(EXIT_FAILURE);
330                         }
331
332                         printf("\n");
333                         error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
334                               err, strerror(err));
335
336 #if defined(__i386__) || defined(__x86_64__)
337                         if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
338                                 die("No hardware sampling interrupt available."
339                                     " No APIC? If so then you can boot the kernel"
340                                     " with the \"lapic\" boot parameter to"
341                                     " force-enable it.\n");
342 #endif
343
344                         die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
345                 }
346         }
347
348         if (perf_evlist__set_filters(evlist)) {
349                 error("failed to set filter with %d (%s)\n", errno,
350                         strerror(errno));
351                 exit(-1);
352         }
353
354         if (perf_evlist__mmap(evlist, mmap_pages, false) < 0)
355                 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
356
357         if (file_new)
358                 session->evlist = evlist;
359         else {
360                 if (!perf_evlist__equal(session->evlist, evlist)) {
361                         fprintf(stderr, "incompatible append\n");
362                         exit(-1);
363                 }
364         }
365
366         perf_session__update_sample_type(session);
367 }
368
369 static int process_buildids(void)
370 {
371         u64 size = lseek(output, 0, SEEK_CUR);
372
373         if (size == 0)
374                 return 0;
375
376         session->fd = output;
377         return __perf_session__process_events(session, post_processing_offset,
378                                               size - post_processing_offset,
379                                               size, &build_id__mark_dso_hit_ops);
380 }
381
382 static void atexit_header(void)
383 {
384         if (!pipe_output) {
385                 session->header.data_size += bytes_written;
386
387                 if (!no_buildid)
388                         process_buildids();
389                 perf_session__write_header(session, evsel_list, output, true);
390                 perf_session__delete(session);
391                 perf_evlist__delete(evsel_list);
392                 symbol__exit();
393         }
394 }
395
396 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
397 {
398         int err;
399         struct perf_session *psession = data;
400
401         if (machine__is_host(machine))
402                 return;
403
404         /*
405          *As for guest kernel when processing subcommand record&report,
406          *we arrange module mmap prior to guest kernel mmap and trigger
407          *a preload dso because default guest module symbols are loaded
408          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
409          *method is used to avoid symbol missing when the first addr is
410          *in module instead of in guest kernel.
411          */
412         err = perf_event__synthesize_modules(process_synthesized_event,
413                                              psession, machine);
414         if (err < 0)
415                 pr_err("Couldn't record guest kernel [%d]'s reference"
416                        " relocation symbol.\n", machine->pid);
417
418         /*
419          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
420          * have no _text sometimes.
421          */
422         err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
423                                                  psession, machine, "_text");
424         if (err < 0)
425                 err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
426                                                          psession, machine,
427                                                          "_stext");
428         if (err < 0)
429                 pr_err("Couldn't record guest kernel [%d]'s reference"
430                        " relocation symbol.\n", machine->pid);
431 }
432
433 static struct perf_event_header finished_round_event = {
434         .size = sizeof(struct perf_event_header),
435         .type = PERF_RECORD_FINISHED_ROUND,
436 };
437
438 static void mmap_read_all(void)
439 {
440         int i;
441
442         for (i = 0; i < evsel_list->nr_mmaps; i++) {
443                 if (evsel_list->mmap[i].base)
444                         mmap_read(&evsel_list->mmap[i]);
445         }
446
447         if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
448                 write_output(&finished_round_event, sizeof(finished_round_event));
449 }
450
451 static int __cmd_record(int argc, const char **argv)
452 {
453         struct stat st;
454         int flags;
455         int err;
456         unsigned long waking = 0;
457         int child_ready_pipe[2], go_pipe[2];
458         const bool forks = argc > 0;
459         char buf;
460         struct machine *machine;
461
462         progname = argv[0];
463
464         page_size = sysconf(_SC_PAGE_SIZE);
465
466         atexit(sig_atexit);
467         signal(SIGCHLD, sig_handler);
468         signal(SIGINT, sig_handler);
469         signal(SIGUSR1, sig_handler);
470
471         if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
472                 perror("failed to create pipes");
473                 exit(-1);
474         }
475
476         if (!output_name) {
477                 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
478                         pipe_output = 1;
479                 else
480                         output_name = "perf.data";
481         }
482         if (output_name) {
483                 if (!strcmp(output_name, "-"))
484                         pipe_output = 1;
485                 else if (!stat(output_name, &st) && st.st_size) {
486                         if (write_mode == WRITE_FORCE) {
487                                 char oldname[PATH_MAX];
488                                 snprintf(oldname, sizeof(oldname), "%s.old",
489                                          output_name);
490                                 unlink(oldname);
491                                 rename(output_name, oldname);
492                         }
493                 } else if (write_mode == WRITE_APPEND) {
494                         write_mode = WRITE_FORCE;
495                 }
496         }
497
498         flags = O_CREAT|O_RDWR;
499         if (write_mode == WRITE_APPEND)
500                 file_new = 0;
501         else
502                 flags |= O_TRUNC;
503
504         if (pipe_output)
505                 output = STDOUT_FILENO;
506         else
507                 output = open(output_name, flags, S_IRUSR | S_IWUSR);
508         if (output < 0) {
509                 perror("failed to create output file");
510                 exit(-1);
511         }
512
513         session = perf_session__new(output_name, O_WRONLY,
514                                     write_mode == WRITE_FORCE, false, NULL);
515         if (session == NULL) {
516                 pr_err("Not enough memory for reading perf file header\n");
517                 return -1;
518         }
519
520         if (!no_buildid)
521                 perf_header__set_feat(&session->header, HEADER_BUILD_ID);
522
523         if (!file_new) {
524                 err = perf_session__read_header(session, output);
525                 if (err < 0)
526                         goto out_delete_session;
527         }
528
529         if (have_tracepoints(&evsel_list->entries))
530                 perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
531
532         perf_header__set_feat(&session->header, HEADER_HOSTNAME);
533         perf_header__set_feat(&session->header, HEADER_OSRELEASE);
534         perf_header__set_feat(&session->header, HEADER_ARCH);
535         perf_header__set_feat(&session->header, HEADER_CPUDESC);
536         perf_header__set_feat(&session->header, HEADER_NRCPUS);
537         perf_header__set_feat(&session->header, HEADER_EVENT_DESC);
538         perf_header__set_feat(&session->header, HEADER_CMDLINE);
539         perf_header__set_feat(&session->header, HEADER_VERSION);
540         perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY);
541         perf_header__set_feat(&session->header, HEADER_TOTAL_MEM);
542         perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY);
543         perf_header__set_feat(&session->header, HEADER_CPUID);
544
545         /* 512 kiB: default amount of unprivileged mlocked memory */
546         if (mmap_pages == UINT_MAX)
547                 mmap_pages = (512 * 1024) / page_size;
548
549         if (forks) {
550                 child_pid = fork();
551                 if (child_pid < 0) {
552                         perror("failed to fork");
553                         exit(-1);
554                 }
555
556                 if (!child_pid) {
557                         if (pipe_output)
558                                 dup2(2, 1);
559                         close(child_ready_pipe[0]);
560                         close(go_pipe[1]);
561                         fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
562
563                         /*
564                          * Do a dummy execvp to get the PLT entry resolved,
565                          * so we avoid the resolver overhead on the real
566                          * execvp call.
567                          */
568                         execvp("", (char **)argv);
569
570                         /*
571                          * Tell the parent we're ready to go
572                          */
573                         close(child_ready_pipe[1]);
574
575                         /*
576                          * Wait until the parent tells us to go.
577                          */
578                         if (read(go_pipe[0], &buf, 1) == -1)
579                                 perror("unable to read pipe");
580
581                         execvp(argv[0], (char **)argv);
582
583                         perror(argv[0]);
584                         kill(getppid(), SIGUSR1);
585                         exit(-1);
586                 }
587
588                 if (!system_wide && target_tid == -1 && target_pid == -1)
589                         evsel_list->threads->map[0] = child_pid;
590
591                 close(child_ready_pipe[1]);
592                 close(go_pipe[0]);
593                 /*
594                  * wait for child to settle
595                  */
596                 if (read(child_ready_pipe[0], &buf, 1) == -1) {
597                         perror("unable to read pipe");
598                         exit(-1);
599                 }
600                 close(child_ready_pipe[0]);
601         }
602
603         open_counters(evsel_list);
604
605         /*
606          * perf_session__delete(session) will be called at atexit_header()
607          */
608         atexit(atexit_header);
609
610         if (pipe_output) {
611                 err = perf_header__write_pipe(output);
612                 if (err < 0)
613                         return err;
614         } else if (file_new) {
615                 err = perf_session__write_header(session, evsel_list,
616                                                  output, false);
617                 if (err < 0)
618                         return err;
619         }
620
621         post_processing_offset = lseek(output, 0, SEEK_CUR);
622
623         if (pipe_output) {
624                 err = perf_session__synthesize_attrs(session,
625                                                      process_synthesized_event);
626                 if (err < 0) {
627                         pr_err("Couldn't synthesize attrs.\n");
628                         return err;
629                 }
630
631                 err = perf_event__synthesize_event_types(process_synthesized_event,
632                                                          session);
633                 if (err < 0) {
634                         pr_err("Couldn't synthesize event_types.\n");
635                         return err;
636                 }
637
638                 if (have_tracepoints(&evsel_list->entries)) {
639                         /*
640                          * FIXME err <= 0 here actually means that
641                          * there were no tracepoints so its not really
642                          * an error, just that we don't need to
643                          * synthesize anything.  We really have to
644                          * return this more properly and also
645                          * propagate errors that now are calling die()
646                          */
647                         err = perf_event__synthesize_tracing_data(output, evsel_list,
648                                                                   process_synthesized_event,
649                                                                   session);
650                         if (err <= 0) {
651                                 pr_err("Couldn't record tracing data.\n");
652                                 return err;
653                         }
654                         advance_output(err);
655                 }
656         }
657
658         machine = perf_session__find_host_machine(session);
659         if (!machine) {
660                 pr_err("Couldn't find native kernel information.\n");
661                 return -1;
662         }
663
664         err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
665                                                  session, machine, "_text");
666         if (err < 0)
667                 err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
668                                                          session, machine, "_stext");
669         if (err < 0)
670                 pr_err("Couldn't record kernel reference relocation symbol\n"
671                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
672                        "Check /proc/kallsyms permission or run as root.\n");
673
674         err = perf_event__synthesize_modules(process_synthesized_event,
675                                              session, machine);
676         if (err < 0)
677                 pr_err("Couldn't record kernel module information.\n"
678                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
679                        "Check /proc/modules permission or run as root.\n");
680
681         if (perf_guest)
682                 perf_session__process_machines(session,
683                                                perf_event__synthesize_guest_os);
684
685         if (!system_wide)
686                 perf_event__synthesize_thread_map(evsel_list->threads,
687                                                   process_synthesized_event,
688                                                   session);
689         else
690                 perf_event__synthesize_threads(process_synthesized_event,
691                                                session);
692
693         if (realtime_prio) {
694                 struct sched_param param;
695
696                 param.sched_priority = realtime_prio;
697                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
698                         pr_err("Could not set realtime priority.\n");
699                         exit(-1);
700                 }
701         }
702
703         perf_evlist__enable(evsel_list);
704
705         /*
706          * Let the child rip
707          */
708         if (forks)
709                 close(go_pipe[1]);
710
711         for (;;) {
712                 int hits = samples;
713
714                 mmap_read_all();
715
716                 if (hits == samples) {
717                         if (done)
718                                 break;
719                         err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
720                         waking++;
721                 }
722
723                 if (done)
724                         perf_evlist__disable(evsel_list);
725         }
726
727         if (quiet || signr == SIGUSR1)
728                 return 0;
729
730         fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
731
732         /*
733          * Approximate RIP event size: 24 bytes.
734          */
735         fprintf(stderr,
736                 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
737                 (double)bytes_written / 1024.0 / 1024.0,
738                 output_name,
739                 bytes_written / 24);
740
741         return 0;
742
743 out_delete_session:
744         perf_session__delete(session);
745         return err;
746 }
747
748 static const char * const record_usage[] = {
749         "perf record [<options>] [<command>]",
750         "perf record [<options>] -- <command> [<options>]",
751         NULL
752 };
753
754 static bool force, append_file;
755
756 const struct option record_options[] = {
757         OPT_CALLBACK('e', "event", &evsel_list, "event",
758                      "event selector. use 'perf list' to list available events",
759                      parse_events_option),
760         OPT_CALLBACK(0, "filter", &evsel_list, "filter",
761                      "event filter", parse_filter),
762         OPT_INTEGER('p', "pid", &target_pid,
763                     "record events on existing process id"),
764         OPT_INTEGER('t', "tid", &target_tid,
765                     "record events on existing thread id"),
766         OPT_INTEGER('r', "realtime", &realtime_prio,
767                     "collect data with this RT SCHED_FIFO priority"),
768         OPT_BOOLEAN('D', "no-delay", &nodelay,
769                     "collect data without buffering"),
770         OPT_BOOLEAN('R', "raw-samples", &raw_samples,
771                     "collect raw sample records from all opened counters"),
772         OPT_BOOLEAN('a', "all-cpus", &system_wide,
773                             "system-wide collection from all CPUs"),
774         OPT_BOOLEAN('A', "append", &append_file,
775                             "append to the output file to do incremental profiling"),
776         OPT_STRING('C', "cpu", &cpu_list, "cpu",
777                     "list of cpus to monitor"),
778         OPT_BOOLEAN('f', "force", &force,
779                         "overwrite existing data file (deprecated)"),
780         OPT_U64('c', "count", &user_interval, "event period to sample"),
781         OPT_STRING('o', "output", &output_name, "file",
782                     "output file name"),
783         OPT_BOOLEAN('i', "no-inherit", &no_inherit,
784                     "child tasks do not inherit counters"),
785         OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"),
786         OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
787         OPT_BOOLEAN(0, "group", &group,
788                     "put the counters into a counter group"),
789         OPT_BOOLEAN('g', "call-graph", &call_graph,
790                     "do call-graph (stack chain/backtrace) recording"),
791         OPT_INCR('v', "verbose", &verbose,
792                     "be more verbose (show counter open errors, etc)"),
793         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
794         OPT_BOOLEAN('s', "stat", &inherit_stat,
795                     "per thread counts"),
796         OPT_BOOLEAN('d', "data", &sample_address,
797                     "Sample addresses"),
798         OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"),
799         OPT_BOOLEAN('n', "no-samples", &no_samples,
800                     "don't sample"),
801         OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
802                     "do not update the buildid cache"),
803         OPT_BOOLEAN('B', "no-buildid", &no_buildid,
804                     "do not collect buildids in perf.data"),
805         OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
806                      "monitor event in cgroup name only",
807                      parse_cgroups),
808         OPT_END()
809 };
810
811 int cmd_record(int argc, const char **argv, const char *prefix __used)
812 {
813         int err = -ENOMEM;
814         struct perf_evsel *pos;
815
816         perf_header__set_cmdline(argc, argv);
817
818         evsel_list = perf_evlist__new(NULL, NULL);
819         if (evsel_list == NULL)
820                 return -ENOMEM;
821
822         argc = parse_options(argc, argv, record_options, record_usage,
823                             PARSE_OPT_STOP_AT_NON_OPTION);
824         if (!argc && target_pid == -1 && target_tid == -1 &&
825                 !system_wide && !cpu_list)
826                 usage_with_options(record_usage, record_options);
827
828         if (force && append_file) {
829                 fprintf(stderr, "Can't overwrite and append at the same time."
830                                 " You need to choose between -f and -A");
831                 usage_with_options(record_usage, record_options);
832         } else if (append_file) {
833                 write_mode = WRITE_APPEND;
834         } else {
835                 write_mode = WRITE_FORCE;
836         }
837
838         if (nr_cgroups && !system_wide) {
839                 fprintf(stderr, "cgroup monitoring only available in"
840                         " system-wide mode\n");
841                 usage_with_options(record_usage, record_options);
842         }
843
844         symbol__init();
845
846         if (symbol_conf.kptr_restrict)
847                 pr_warning(
848 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
849 "check /proc/sys/kernel/kptr_restrict.\n\n"
850 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
851 "file is not found in the buildid cache or in the vmlinux path.\n\n"
852 "Samples in kernel modules won't be resolved at all.\n\n"
853 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
854 "even with a suitable vmlinux or kallsyms file.\n\n");
855
856         if (no_buildid_cache || no_buildid)
857                 disable_buildid_cache();
858
859         if (evsel_list->nr_entries == 0 &&
860             perf_evlist__add_default(evsel_list) < 0) {
861                 pr_err("Not enough memory for event selector list\n");
862                 goto out_symbol_exit;
863         }
864
865         if (target_pid != -1)
866                 target_tid = target_pid;
867
868         if (perf_evlist__create_maps(evsel_list, target_pid,
869                                      target_tid, cpu_list) < 0)
870                 usage_with_options(record_usage, record_options);
871
872         list_for_each_entry(pos, &evsel_list->entries, node) {
873                 if (perf_evsel__alloc_fd(pos, evsel_list->cpus->nr,
874                                          evsel_list->threads->nr) < 0)
875                         goto out_free_fd;
876                 if (perf_header__push_event(pos->attr.config, event_name(pos)))
877                         goto out_free_fd;
878         }
879
880         if (perf_evlist__alloc_pollfd(evsel_list) < 0)
881                 goto out_free_fd;
882
883         if (user_interval != ULLONG_MAX)
884                 default_interval = user_interval;
885         if (user_freq != UINT_MAX)
886                 freq = user_freq;
887
888         /*
889          * User specified count overrides default frequency.
890          */
891         if (default_interval)
892                 freq = 0;
893         else if (freq) {
894                 default_interval = freq;
895         } else {
896                 fprintf(stderr, "frequency and count are zero, aborting\n");
897                 err = -EINVAL;
898                 goto out_free_fd;
899         }
900
901         err = __cmd_record(argc, argv);
902 out_free_fd:
903         perf_evlist__delete_maps(evsel_list);
904 out_symbol_exit:
905         symbol__exit();
906         return err;
907 }