Merge branch 'timers-fixes-for-linus' of git://tesla.tglx.de/git/linux-2.6-tip
[pandora-kernel.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #define _FILE_OFFSET_BITS 64
9
10 #include "builtin.h"
11
12 #include "perf.h"
13
14 #include "util/build-id.h"
15 #include "util/util.h"
16 #include "util/parse-options.h"
17 #include "util/parse-events.h"
18
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/symbol.h"
26 #include "util/cpumap.h"
27 #include "util/thread_map.h"
28
29 #include <unistd.h>
30 #include <sched.h>
31 #include <sys/mman.h>
32
33 enum write_mode_t {
34         WRITE_FORCE,
35         WRITE_APPEND
36 };
37
38 static u64                      user_interval                   = ULLONG_MAX;
39 static u64                      default_interval                =      0;
40
41 static unsigned int             page_size;
42 static unsigned int             mmap_pages                      = UINT_MAX;
43 static unsigned int             user_freq                       = UINT_MAX;
44 static int                      freq                            =   1000;
45 static int                      output;
46 static int                      pipe_output                     =      0;
47 static const char               *output_name                    = NULL;
48 static bool                     group                           =  false;
49 static int                      realtime_prio                   =      0;
50 static bool                     nodelay                         =  false;
51 static bool                     raw_samples                     =  false;
52 static bool                     sample_id_all_avail             =   true;
53 static bool                     system_wide                     =  false;
54 static pid_t                    target_pid                      =     -1;
55 static pid_t                    target_tid                      =     -1;
56 static pid_t                    child_pid                       =     -1;
57 static bool                     no_inherit                      =  false;
58 static enum write_mode_t        write_mode                      = WRITE_FORCE;
59 static bool                     call_graph                      =  false;
60 static bool                     inherit_stat                    =  false;
61 static bool                     no_samples                      =  false;
62 static bool                     sample_address                  =  false;
63 static bool                     sample_time                     =  false;
64 static bool                     no_buildid                      =  false;
65 static bool                     no_buildid_cache                =  false;
66 static struct perf_evlist       *evsel_list;
67
68 static long                     samples                         =      0;
69 static u64                      bytes_written                   =      0;
70
71 static int                      file_new                        =      1;
72 static off_t                    post_processing_offset;
73
74 static struct perf_session      *session;
75 static const char               *cpu_list;
76
77 static void advance_output(size_t size)
78 {
79         bytes_written += size;
80 }
81
82 static void write_output(void *buf, size_t size)
83 {
84         while (size) {
85                 int ret = write(output, buf, size);
86
87                 if (ret < 0)
88                         die("failed to write");
89
90                 size -= ret;
91                 buf += ret;
92
93                 bytes_written += ret;
94         }
95 }
96
97 static int process_synthesized_event(union perf_event *event,
98                                      struct perf_sample *sample __used,
99                                      struct perf_session *self __used)
100 {
101         write_output(event, event->header.size);
102         return 0;
103 }
104
105 static void mmap_read(struct perf_mmap *md)
106 {
107         unsigned int head = perf_mmap__read_head(md);
108         unsigned int old = md->prev;
109         unsigned char *data = md->base + page_size;
110         unsigned long size;
111         void *buf;
112
113         if (old == head)
114                 return;
115
116         samples++;
117
118         size = head - old;
119
120         if ((old & md->mask) + size != (head & md->mask)) {
121                 buf = &data[old & md->mask];
122                 size = md->mask + 1 - (old & md->mask);
123                 old += size;
124
125                 write_output(buf, size);
126         }
127
128         buf = &data[old & md->mask];
129         size = head - old;
130         old += size;
131
132         write_output(buf, size);
133
134         md->prev = old;
135         perf_mmap__write_tail(md, old);
136 }
137
138 static volatile int done = 0;
139 static volatile int signr = -1;
140
141 static void sig_handler(int sig)
142 {
143         done = 1;
144         signr = sig;
145 }
146
147 static void sig_atexit(void)
148 {
149         if (child_pid > 0)
150                 kill(child_pid, SIGTERM);
151
152         if (signr == -1 || signr == SIGUSR1)
153                 return;
154
155         signal(signr, SIG_DFL);
156         kill(getpid(), signr);
157 }
158
159 static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
160 {
161         struct perf_event_attr *attr = &evsel->attr;
162         int track = !evsel->idx; /* only the first counter needs these */
163
164         attr->inherit           = !no_inherit;
165         attr->read_format       = PERF_FORMAT_TOTAL_TIME_ENABLED |
166                                   PERF_FORMAT_TOTAL_TIME_RUNNING |
167                                   PERF_FORMAT_ID;
168
169         attr->sample_type       |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
170
171         if (evlist->nr_entries > 1)
172                 attr->sample_type |= PERF_SAMPLE_ID;
173
174         /*
175          * We default some events to a 1 default interval. But keep
176          * it a weak assumption overridable by the user.
177          */
178         if (!attr->sample_period || (user_freq != UINT_MAX &&
179                                      user_interval != ULLONG_MAX)) {
180                 if (freq) {
181                         attr->sample_type       |= PERF_SAMPLE_PERIOD;
182                         attr->freq              = 1;
183                         attr->sample_freq       = freq;
184                 } else {
185                         attr->sample_period = default_interval;
186                 }
187         }
188
189         if (no_samples)
190                 attr->sample_freq = 0;
191
192         if (inherit_stat)
193                 attr->inherit_stat = 1;
194
195         if (sample_address) {
196                 attr->sample_type       |= PERF_SAMPLE_ADDR;
197                 attr->mmap_data = track;
198         }
199
200         if (call_graph)
201                 attr->sample_type       |= PERF_SAMPLE_CALLCHAIN;
202
203         if (system_wide)
204                 attr->sample_type       |= PERF_SAMPLE_CPU;
205
206         if (sample_id_all_avail &&
207             (sample_time || system_wide || !no_inherit || cpu_list))
208                 attr->sample_type       |= PERF_SAMPLE_TIME;
209
210         if (raw_samples) {
211                 attr->sample_type       |= PERF_SAMPLE_TIME;
212                 attr->sample_type       |= PERF_SAMPLE_RAW;
213                 attr->sample_type       |= PERF_SAMPLE_CPU;
214         }
215
216         if (nodelay) {
217                 attr->watermark = 0;
218                 attr->wakeup_events = 1;
219         }
220
221         attr->mmap              = track;
222         attr->comm              = track;
223
224         if (target_pid == -1 && target_tid == -1 && !system_wide) {
225                 attr->disabled = 1;
226                 attr->enable_on_exec = 1;
227         }
228 }
229
230 static bool perf_evlist__equal(struct perf_evlist *evlist,
231                                struct perf_evlist *other)
232 {
233         struct perf_evsel *pos, *pair;
234
235         if (evlist->nr_entries != other->nr_entries)
236                 return false;
237
238         pair = list_entry(other->entries.next, struct perf_evsel, node);
239
240         list_for_each_entry(pos, &evlist->entries, node) {
241                 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
242                         return false;
243                 pair = list_entry(pair->node.next, struct perf_evsel, node);
244         }
245
246         return true;
247 }
248
249 static void open_counters(struct perf_evlist *evlist)
250 {
251         struct perf_evsel *pos;
252
253         if (evlist->cpus->map[0] < 0)
254                 no_inherit = true;
255
256         list_for_each_entry(pos, &evlist->entries, node) {
257                 struct perf_event_attr *attr = &pos->attr;
258                 /*
259                  * Check if parse_single_tracepoint_event has already asked for
260                  * PERF_SAMPLE_TIME.
261                  *
262                  * XXX this is kludgy but short term fix for problems introduced by
263                  * eac23d1c that broke 'perf script' by having different sample_types
264                  * when using multiple tracepoint events when we use a perf binary
265                  * that tries to use sample_id_all on an older kernel.
266                  *
267                  * We need to move counter creation to perf_session, support
268                  * different sample_types, etc.
269                  */
270                 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
271
272                 config_attr(pos, evlist);
273 retry_sample_id:
274                 attr->sample_id_all = sample_id_all_avail ? 1 : 0;
275 try_again:
276                 if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group) < 0) {
277                         int err = errno;
278
279                         if (err == EPERM || err == EACCES) {
280                                 ui__warning_paranoid();
281                                 exit(EXIT_FAILURE);
282                         } else if (err ==  ENODEV && cpu_list) {
283                                 die("No such device - did you specify"
284                                         " an out-of-range profile CPU?\n");
285                         } else if (err == EINVAL && sample_id_all_avail) {
286                                 /*
287                                  * Old kernel, no attr->sample_id_type_all field
288                                  */
289                                 sample_id_all_avail = false;
290                                 if (!sample_time && !raw_samples && !time_needed)
291                                         attr->sample_type &= ~PERF_SAMPLE_TIME;
292
293                                 goto retry_sample_id;
294                         }
295
296                         /*
297                          * If it's cycles then fall back to hrtimer
298                          * based cpu-clock-tick sw counter, which
299                          * is always available even if no PMU support:
300                          */
301                         if (attr->type == PERF_TYPE_HARDWARE
302                                         && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
303
304                                 if (verbose)
305                                         ui__warning("The cycles event is not supported, "
306                                                     "trying to fall back to cpu-clock-ticks\n");
307                                 attr->type = PERF_TYPE_SOFTWARE;
308                                 attr->config = PERF_COUNT_SW_CPU_CLOCK;
309                                 goto try_again;
310                         }
311
312                         if (err == ENOENT) {
313                                 ui__warning("The %s event is not supported.\n",
314                                             event_name(pos));
315                                 exit(EXIT_FAILURE);
316                         }
317
318                         printf("\n");
319                         error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
320                               err, strerror(err));
321
322 #if defined(__i386__) || defined(__x86_64__)
323                         if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
324                                 die("No hardware sampling interrupt available."
325                                     " No APIC? If so then you can boot the kernel"
326                                     " with the \"lapic\" boot parameter to"
327                                     " force-enable it.\n");
328 #endif
329
330                         die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
331                 }
332         }
333
334         if (perf_evlist__set_filters(evlist)) {
335                 error("failed to set filter with %d (%s)\n", errno,
336                         strerror(errno));
337                 exit(-1);
338         }
339
340         if (perf_evlist__mmap(evlist, mmap_pages, false) < 0)
341                 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
342
343         if (file_new)
344                 session->evlist = evlist;
345         else {
346                 if (!perf_evlist__equal(session->evlist, evlist)) {
347                         fprintf(stderr, "incompatible append\n");
348                         exit(-1);
349                 }
350         }
351
352         perf_session__update_sample_type(session);
353 }
354
355 static int process_buildids(void)
356 {
357         u64 size = lseek(output, 0, SEEK_CUR);
358
359         if (size == 0)
360                 return 0;
361
362         session->fd = output;
363         return __perf_session__process_events(session, post_processing_offset,
364                                               size - post_processing_offset,
365                                               size, &build_id__mark_dso_hit_ops);
366 }
367
368 static void atexit_header(void)
369 {
370         if (!pipe_output) {
371                 session->header.data_size += bytes_written;
372
373                 if (!no_buildid)
374                         process_buildids();
375                 perf_session__write_header(session, evsel_list, output, true);
376                 perf_session__delete(session);
377                 perf_evlist__delete(evsel_list);
378                 symbol__exit();
379         }
380 }
381
382 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
383 {
384         int err;
385         struct perf_session *psession = data;
386
387         if (machine__is_host(machine))
388                 return;
389
390         /*
391          *As for guest kernel when processing subcommand record&report,
392          *we arrange module mmap prior to guest kernel mmap and trigger
393          *a preload dso because default guest module symbols are loaded
394          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
395          *method is used to avoid symbol missing when the first addr is
396          *in module instead of in guest kernel.
397          */
398         err = perf_event__synthesize_modules(process_synthesized_event,
399                                              psession, machine);
400         if (err < 0)
401                 pr_err("Couldn't record guest kernel [%d]'s reference"
402                        " relocation symbol.\n", machine->pid);
403
404         /*
405          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
406          * have no _text sometimes.
407          */
408         err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
409                                                  psession, machine, "_text");
410         if (err < 0)
411                 err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
412                                                          psession, machine,
413                                                          "_stext");
414         if (err < 0)
415                 pr_err("Couldn't record guest kernel [%d]'s reference"
416                        " relocation symbol.\n", machine->pid);
417 }
418
419 static struct perf_event_header finished_round_event = {
420         .size = sizeof(struct perf_event_header),
421         .type = PERF_RECORD_FINISHED_ROUND,
422 };
423
424 static void mmap_read_all(void)
425 {
426         int i;
427
428         for (i = 0; i < evsel_list->nr_mmaps; i++) {
429                 if (evsel_list->mmap[i].base)
430                         mmap_read(&evsel_list->mmap[i]);
431         }
432
433         if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
434                 write_output(&finished_round_event, sizeof(finished_round_event));
435 }
436
437 static int __cmd_record(int argc, const char **argv)
438 {
439         struct stat st;
440         int flags;
441         int err;
442         unsigned long waking = 0;
443         int child_ready_pipe[2], go_pipe[2];
444         const bool forks = argc > 0;
445         char buf;
446         struct machine *machine;
447
448         page_size = sysconf(_SC_PAGE_SIZE);
449
450         atexit(sig_atexit);
451         signal(SIGCHLD, sig_handler);
452         signal(SIGINT, sig_handler);
453         signal(SIGUSR1, sig_handler);
454
455         if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
456                 perror("failed to create pipes");
457                 exit(-1);
458         }
459
460         if (!output_name) {
461                 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
462                         pipe_output = 1;
463                 else
464                         output_name = "perf.data";
465         }
466         if (output_name) {
467                 if (!strcmp(output_name, "-"))
468                         pipe_output = 1;
469                 else if (!stat(output_name, &st) && st.st_size) {
470                         if (write_mode == WRITE_FORCE) {
471                                 char oldname[PATH_MAX];
472                                 snprintf(oldname, sizeof(oldname), "%s.old",
473                                          output_name);
474                                 unlink(oldname);
475                                 rename(output_name, oldname);
476                         }
477                 } else if (write_mode == WRITE_APPEND) {
478                         write_mode = WRITE_FORCE;
479                 }
480         }
481
482         flags = O_CREAT|O_RDWR;
483         if (write_mode == WRITE_APPEND)
484                 file_new = 0;
485         else
486                 flags |= O_TRUNC;
487
488         if (pipe_output)
489                 output = STDOUT_FILENO;
490         else
491                 output = open(output_name, flags, S_IRUSR | S_IWUSR);
492         if (output < 0) {
493                 perror("failed to create output file");
494                 exit(-1);
495         }
496
497         session = perf_session__new(output_name, O_WRONLY,
498                                     write_mode == WRITE_FORCE, false, NULL);
499         if (session == NULL) {
500                 pr_err("Not enough memory for reading perf file header\n");
501                 return -1;
502         }
503
504         if (!no_buildid)
505                 perf_header__set_feat(&session->header, HEADER_BUILD_ID);
506
507         if (!file_new) {
508                 err = perf_session__read_header(session, output);
509                 if (err < 0)
510                         goto out_delete_session;
511         }
512
513         if (have_tracepoints(&evsel_list->entries))
514                 perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
515
516         /* 512 kiB: default amount of unprivileged mlocked memory */
517         if (mmap_pages == UINT_MAX)
518                 mmap_pages = (512 * 1024) / page_size;
519
520         if (forks) {
521                 child_pid = fork();
522                 if (child_pid < 0) {
523                         perror("failed to fork");
524                         exit(-1);
525                 }
526
527                 if (!child_pid) {
528                         if (pipe_output)
529                                 dup2(2, 1);
530                         close(child_ready_pipe[0]);
531                         close(go_pipe[1]);
532                         fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
533
534                         /*
535                          * Do a dummy execvp to get the PLT entry resolved,
536                          * so we avoid the resolver overhead on the real
537                          * execvp call.
538                          */
539                         execvp("", (char **)argv);
540
541                         /*
542                          * Tell the parent we're ready to go
543                          */
544                         close(child_ready_pipe[1]);
545
546                         /*
547                          * Wait until the parent tells us to go.
548                          */
549                         if (read(go_pipe[0], &buf, 1) == -1)
550                                 perror("unable to read pipe");
551
552                         execvp(argv[0], (char **)argv);
553
554                         perror(argv[0]);
555                         kill(getppid(), SIGUSR1);
556                         exit(-1);
557                 }
558
559                 if (!system_wide && target_tid == -1 && target_pid == -1)
560                         evsel_list->threads->map[0] = child_pid;
561
562                 close(child_ready_pipe[1]);
563                 close(go_pipe[0]);
564                 /*
565                  * wait for child to settle
566                  */
567                 if (read(child_ready_pipe[0], &buf, 1) == -1) {
568                         perror("unable to read pipe");
569                         exit(-1);
570                 }
571                 close(child_ready_pipe[0]);
572         }
573
574         open_counters(evsel_list);
575
576         /*
577          * perf_session__delete(session) will be called at atexit_header()
578          */
579         atexit(atexit_header);
580
581         if (pipe_output) {
582                 err = perf_header__write_pipe(output);
583                 if (err < 0)
584                         return err;
585         } else if (file_new) {
586                 err = perf_session__write_header(session, evsel_list,
587                                                  output, false);
588                 if (err < 0)
589                         return err;
590         }
591
592         post_processing_offset = lseek(output, 0, SEEK_CUR);
593
594         if (pipe_output) {
595                 err = perf_session__synthesize_attrs(session,
596                                                      process_synthesized_event);
597                 if (err < 0) {
598                         pr_err("Couldn't synthesize attrs.\n");
599                         return err;
600                 }
601
602                 err = perf_event__synthesize_event_types(process_synthesized_event,
603                                                          session);
604                 if (err < 0) {
605                         pr_err("Couldn't synthesize event_types.\n");
606                         return err;
607                 }
608
609                 if (have_tracepoints(&evsel_list->entries)) {
610                         /*
611                          * FIXME err <= 0 here actually means that
612                          * there were no tracepoints so its not really
613                          * an error, just that we don't need to
614                          * synthesize anything.  We really have to
615                          * return this more properly and also
616                          * propagate errors that now are calling die()
617                          */
618                         err = perf_event__synthesize_tracing_data(output, evsel_list,
619                                                                   process_synthesized_event,
620                                                                   session);
621                         if (err <= 0) {
622                                 pr_err("Couldn't record tracing data.\n");
623                                 return err;
624                         }
625                         advance_output(err);
626                 }
627         }
628
629         machine = perf_session__find_host_machine(session);
630         if (!machine) {
631                 pr_err("Couldn't find native kernel information.\n");
632                 return -1;
633         }
634
635         err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
636                                                  session, machine, "_text");
637         if (err < 0)
638                 err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
639                                                          session, machine, "_stext");
640         if (err < 0)
641                 pr_err("Couldn't record kernel reference relocation symbol\n"
642                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
643                        "Check /proc/kallsyms permission or run as root.\n");
644
645         err = perf_event__synthesize_modules(process_synthesized_event,
646                                              session, machine);
647         if (err < 0)
648                 pr_err("Couldn't record kernel module information.\n"
649                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
650                        "Check /proc/modules permission or run as root.\n");
651
652         if (perf_guest)
653                 perf_session__process_machines(session,
654                                                perf_event__synthesize_guest_os);
655
656         if (!system_wide)
657                 perf_event__synthesize_thread_map(evsel_list->threads,
658                                                   process_synthesized_event,
659                                                   session);
660         else
661                 perf_event__synthesize_threads(process_synthesized_event,
662                                                session);
663
664         if (realtime_prio) {
665                 struct sched_param param;
666
667                 param.sched_priority = realtime_prio;
668                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
669                         pr_err("Could not set realtime priority.\n");
670                         exit(-1);
671                 }
672         }
673
674         /*
675          * Let the child rip
676          */
677         if (forks)
678                 close(go_pipe[1]);
679
680         for (;;) {
681                 int hits = samples;
682
683                 mmap_read_all();
684
685                 if (hits == samples) {
686                         if (done)
687                                 break;
688                         err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
689                         waking++;
690                 }
691
692                 if (done)
693                         perf_evlist__disable(evsel_list);
694         }
695
696         if (quiet || signr == SIGUSR1)
697                 return 0;
698
699         fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
700
701         /*
702          * Approximate RIP event size: 24 bytes.
703          */
704         fprintf(stderr,
705                 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
706                 (double)bytes_written / 1024.0 / 1024.0,
707                 output_name,
708                 bytes_written / 24);
709
710         return 0;
711
712 out_delete_session:
713         perf_session__delete(session);
714         return err;
715 }
716
717 static const char * const record_usage[] = {
718         "perf record [<options>] [<command>]",
719         "perf record [<options>] -- <command> [<options>]",
720         NULL
721 };
722
723 static bool force, append_file;
724
725 const struct option record_options[] = {
726         OPT_CALLBACK('e', "event", &evsel_list, "event",
727                      "event selector. use 'perf list' to list available events",
728                      parse_events_option),
729         OPT_CALLBACK(0, "filter", &evsel_list, "filter",
730                      "event filter", parse_filter),
731         OPT_INTEGER('p', "pid", &target_pid,
732                     "record events on existing process id"),
733         OPT_INTEGER('t', "tid", &target_tid,
734                     "record events on existing thread id"),
735         OPT_INTEGER('r', "realtime", &realtime_prio,
736                     "collect data with this RT SCHED_FIFO priority"),
737         OPT_BOOLEAN('D', "no-delay", &nodelay,
738                     "collect data without buffering"),
739         OPT_BOOLEAN('R', "raw-samples", &raw_samples,
740                     "collect raw sample records from all opened counters"),
741         OPT_BOOLEAN('a', "all-cpus", &system_wide,
742                             "system-wide collection from all CPUs"),
743         OPT_BOOLEAN('A', "append", &append_file,
744                             "append to the output file to do incremental profiling"),
745         OPT_STRING('C', "cpu", &cpu_list, "cpu",
746                     "list of cpus to monitor"),
747         OPT_BOOLEAN('f', "force", &force,
748                         "overwrite existing data file (deprecated)"),
749         OPT_U64('c', "count", &user_interval, "event period to sample"),
750         OPT_STRING('o', "output", &output_name, "file",
751                     "output file name"),
752         OPT_BOOLEAN('i', "no-inherit", &no_inherit,
753                     "child tasks do not inherit counters"),
754         OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"),
755         OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
756         OPT_BOOLEAN(0, "group", &group,
757                     "put the counters into a counter group"),
758         OPT_BOOLEAN('g', "call-graph", &call_graph,
759                     "do call-graph (stack chain/backtrace) recording"),
760         OPT_INCR('v', "verbose", &verbose,
761                     "be more verbose (show counter open errors, etc)"),
762         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
763         OPT_BOOLEAN('s', "stat", &inherit_stat,
764                     "per thread counts"),
765         OPT_BOOLEAN('d', "data", &sample_address,
766                     "Sample addresses"),
767         OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"),
768         OPT_BOOLEAN('n', "no-samples", &no_samples,
769                     "don't sample"),
770         OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
771                     "do not update the buildid cache"),
772         OPT_BOOLEAN('B', "no-buildid", &no_buildid,
773                     "do not collect buildids in perf.data"),
774         OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
775                      "monitor event in cgroup name only",
776                      parse_cgroups),
777         OPT_END()
778 };
779
780 int cmd_record(int argc, const char **argv, const char *prefix __used)
781 {
782         int err = -ENOMEM;
783         struct perf_evsel *pos;
784
785         evsel_list = perf_evlist__new(NULL, NULL);
786         if (evsel_list == NULL)
787                 return -ENOMEM;
788
789         argc = parse_options(argc, argv, record_options, record_usage,
790                             PARSE_OPT_STOP_AT_NON_OPTION);
791         if (!argc && target_pid == -1 && target_tid == -1 &&
792                 !system_wide && !cpu_list)
793                 usage_with_options(record_usage, record_options);
794
795         if (force && append_file) {
796                 fprintf(stderr, "Can't overwrite and append at the same time."
797                                 " You need to choose between -f and -A");
798                 usage_with_options(record_usage, record_options);
799         } else if (append_file) {
800                 write_mode = WRITE_APPEND;
801         } else {
802                 write_mode = WRITE_FORCE;
803         }
804
805         if (nr_cgroups && !system_wide) {
806                 fprintf(stderr, "cgroup monitoring only available in"
807                         " system-wide mode\n");
808                 usage_with_options(record_usage, record_options);
809         }
810
811         symbol__init();
812
813         if (symbol_conf.kptr_restrict)
814                 pr_warning(
815 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
816 "check /proc/sys/kernel/kptr_restrict.\n\n"
817 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
818 "file is not found in the buildid cache or in the vmlinux path.\n\n"
819 "Samples in kernel modules won't be resolved at all.\n\n"
820 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
821 "even with a suitable vmlinux or kallsyms file.\n\n");
822
823         if (no_buildid_cache || no_buildid)
824                 disable_buildid_cache();
825
826         if (evsel_list->nr_entries == 0 &&
827             perf_evlist__add_default(evsel_list) < 0) {
828                 pr_err("Not enough memory for event selector list\n");
829                 goto out_symbol_exit;
830         }
831
832         if (target_pid != -1)
833                 target_tid = target_pid;
834
835         if (perf_evlist__create_maps(evsel_list, target_pid,
836                                      target_tid, cpu_list) < 0)
837                 usage_with_options(record_usage, record_options);
838
839         list_for_each_entry(pos, &evsel_list->entries, node) {
840                 if (perf_evsel__alloc_fd(pos, evsel_list->cpus->nr,
841                                          evsel_list->threads->nr) < 0)
842                         goto out_free_fd;
843                 if (perf_header__push_event(pos->attr.config, event_name(pos)))
844                         goto out_free_fd;
845         }
846
847         if (perf_evlist__alloc_pollfd(evsel_list) < 0)
848                 goto out_free_fd;
849
850         if (user_interval != ULLONG_MAX)
851                 default_interval = user_interval;
852         if (user_freq != UINT_MAX)
853                 freq = user_freq;
854
855         /*
856          * User specified count overrides default frequency.
857          */
858         if (default_interval)
859                 freq = 0;
860         else if (freq) {
861                 default_interval = freq;
862         } else {
863                 fprintf(stderr, "frequency and count are zero, aborting\n");
864                 err = -EINVAL;
865                 goto out_free_fd;
866         }
867
868         err = __cmd_record(argc, argv);
869 out_free_fd:
870         perf_evlist__delete_maps(evsel_list);
871 out_symbol_exit:
872         symbol__exit();
873         return err;
874 }