Merge branch 'urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/rric/oprofile...
[pandora-kernel.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #define _FILE_OFFSET_BITS 64
9
10 #include "builtin.h"
11
12 #include "perf.h"
13
14 #include "util/build-id.h"
15 #include "util/util.h"
16 #include "util/parse-options.h"
17 #include "util/parse-events.h"
18
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/symbol.h"
26 #include "util/cpumap.h"
27 #include "util/thread_map.h"
28
29 #include <unistd.h>
30 #include <sched.h>
31 #include <sys/mman.h>
32
33 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
34
35 enum write_mode_t {
36         WRITE_FORCE,
37         WRITE_APPEND
38 };
39
40 static u64                      user_interval                   = ULLONG_MAX;
41 static u64                      default_interval                =      0;
42
43 static unsigned int             page_size;
44 static unsigned int             mmap_pages                      = UINT_MAX;
45 static unsigned int             user_freq                       = UINT_MAX;
46 static int                      freq                            =   1000;
47 static int                      output;
48 static int                      pipe_output                     =      0;
49 static const char               *output_name                    = NULL;
50 static int                      group                           =      0;
51 static int                      realtime_prio                   =      0;
52 static bool                     nodelay                         =  false;
53 static bool                     raw_samples                     =  false;
54 static bool                     sample_id_all_avail             =   true;
55 static bool                     system_wide                     =  false;
56 static pid_t                    target_pid                      =     -1;
57 static pid_t                    target_tid                      =     -1;
58 static pid_t                    child_pid                       =     -1;
59 static bool                     no_inherit                      =  false;
60 static enum write_mode_t        write_mode                      = WRITE_FORCE;
61 static bool                     call_graph                      =  false;
62 static bool                     inherit_stat                    =  false;
63 static bool                     no_samples                      =  false;
64 static bool                     sample_address                  =  false;
65 static bool                     sample_time                     =  false;
66 static bool                     no_buildid                      =  false;
67 static bool                     no_buildid_cache                =  false;
68 static struct perf_evlist       *evsel_list;
69
70 static long                     samples                         =      0;
71 static u64                      bytes_written                   =      0;
72
73 static int                      file_new                        =      1;
74 static off_t                    post_processing_offset;
75
76 static struct perf_session      *session;
77 static const char               *cpu_list;
78
79 static void advance_output(size_t size)
80 {
81         bytes_written += size;
82 }
83
84 static void write_output(void *buf, size_t size)
85 {
86         while (size) {
87                 int ret = write(output, buf, size);
88
89                 if (ret < 0)
90                         die("failed to write");
91
92                 size -= ret;
93                 buf += ret;
94
95                 bytes_written += ret;
96         }
97 }
98
99 static int process_synthesized_event(union perf_event *event,
100                                      struct perf_sample *sample __used,
101                                      struct perf_session *self __used)
102 {
103         write_output(event, event->header.size);
104         return 0;
105 }
106
107 static void mmap_read(struct perf_mmap *md)
108 {
109         unsigned int head = perf_mmap__read_head(md);
110         unsigned int old = md->prev;
111         unsigned char *data = md->base + page_size;
112         unsigned long size;
113         void *buf;
114
115         if (old == head)
116                 return;
117
118         samples++;
119
120         size = head - old;
121
122         if ((old & md->mask) + size != (head & md->mask)) {
123                 buf = &data[old & md->mask];
124                 size = md->mask + 1 - (old & md->mask);
125                 old += size;
126
127                 write_output(buf, size);
128         }
129
130         buf = &data[old & md->mask];
131         size = head - old;
132         old += size;
133
134         write_output(buf, size);
135
136         md->prev = old;
137         perf_mmap__write_tail(md, old);
138 }
139
140 static volatile int done = 0;
141 static volatile int signr = -1;
142
143 static void sig_handler(int sig)
144 {
145         done = 1;
146         signr = sig;
147 }
148
149 static void sig_atexit(void)
150 {
151         if (child_pid > 0)
152                 kill(child_pid, SIGTERM);
153
154         if (signr == -1 || signr == SIGUSR1)
155                 return;
156
157         signal(signr, SIG_DFL);
158         kill(getpid(), signr);
159 }
160
161 static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
162 {
163         struct perf_event_attr *attr = &evsel->attr;
164         int track = !evsel->idx; /* only the first counter needs these */
165
166         attr->inherit           = !no_inherit;
167         attr->read_format       = PERF_FORMAT_TOTAL_TIME_ENABLED |
168                                   PERF_FORMAT_TOTAL_TIME_RUNNING |
169                                   PERF_FORMAT_ID;
170
171         attr->sample_type       |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
172
173         if (evlist->nr_entries > 1)
174                 attr->sample_type |= PERF_SAMPLE_ID;
175
176         /*
177          * We default some events to a 1 default interval. But keep
178          * it a weak assumption overridable by the user.
179          */
180         if (!attr->sample_period || (user_freq != UINT_MAX &&
181                                      user_interval != ULLONG_MAX)) {
182                 if (freq) {
183                         attr->sample_type       |= PERF_SAMPLE_PERIOD;
184                         attr->freq              = 1;
185                         attr->sample_freq       = freq;
186                 } else {
187                         attr->sample_period = default_interval;
188                 }
189         }
190
191         if (no_samples)
192                 attr->sample_freq = 0;
193
194         if (inherit_stat)
195                 attr->inherit_stat = 1;
196
197         if (sample_address) {
198                 attr->sample_type       |= PERF_SAMPLE_ADDR;
199                 attr->mmap_data = track;
200         }
201
202         if (call_graph)
203                 attr->sample_type       |= PERF_SAMPLE_CALLCHAIN;
204
205         if (system_wide)
206                 attr->sample_type       |= PERF_SAMPLE_CPU;
207
208         if (sample_id_all_avail &&
209             (sample_time || system_wide || !no_inherit || cpu_list))
210                 attr->sample_type       |= PERF_SAMPLE_TIME;
211
212         if (raw_samples) {
213                 attr->sample_type       |= PERF_SAMPLE_TIME;
214                 attr->sample_type       |= PERF_SAMPLE_RAW;
215                 attr->sample_type       |= PERF_SAMPLE_CPU;
216         }
217
218         if (nodelay) {
219                 attr->watermark = 0;
220                 attr->wakeup_events = 1;
221         }
222
223         attr->mmap              = track;
224         attr->comm              = track;
225
226         if (target_pid == -1 && target_tid == -1 && !system_wide) {
227                 attr->disabled = 1;
228                 attr->enable_on_exec = 1;
229         }
230 }
231
232 static bool perf_evlist__equal(struct perf_evlist *evlist,
233                                struct perf_evlist *other)
234 {
235         struct perf_evsel *pos, *pair;
236
237         if (evlist->nr_entries != other->nr_entries)
238                 return false;
239
240         pair = list_entry(other->entries.next, struct perf_evsel, node);
241
242         list_for_each_entry(pos, &evlist->entries, node) {
243                 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
244                         return false;
245                 pair = list_entry(pair->node.next, struct perf_evsel, node);
246         }
247
248         return true;
249 }
250
251 static void open_counters(struct perf_evlist *evlist)
252 {
253         struct perf_evsel *pos;
254
255         if (evlist->cpus->map[0] < 0)
256                 no_inherit = true;
257
258         list_for_each_entry(pos, &evlist->entries, node) {
259                 struct perf_event_attr *attr = &pos->attr;
260                 /*
261                  * Check if parse_single_tracepoint_event has already asked for
262                  * PERF_SAMPLE_TIME.
263                  *
264                  * XXX this is kludgy but short term fix for problems introduced by
265                  * eac23d1c that broke 'perf script' by having different sample_types
266                  * when using multiple tracepoint events when we use a perf binary
267                  * that tries to use sample_id_all on an older kernel.
268                  *
269                  * We need to move counter creation to perf_session, support
270                  * different sample_types, etc.
271                  */
272                 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
273
274                 config_attr(pos, evlist);
275 retry_sample_id:
276                 attr->sample_id_all = sample_id_all_avail ? 1 : 0;
277 try_again:
278                 if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group) < 0) {
279                         int err = errno;
280
281                         if (err == EPERM || err == EACCES) {
282                                 ui__warning_paranoid();
283                                 exit(EXIT_FAILURE);
284                         } else if (err ==  ENODEV && cpu_list) {
285                                 die("No such device - did you specify"
286                                         " an out-of-range profile CPU?\n");
287                         } else if (err == EINVAL && sample_id_all_avail) {
288                                 /*
289                                  * Old kernel, no attr->sample_id_type_all field
290                                  */
291                                 sample_id_all_avail = false;
292                                 if (!sample_time && !raw_samples && !time_needed)
293                                         attr->sample_type &= ~PERF_SAMPLE_TIME;
294
295                                 goto retry_sample_id;
296                         }
297
298                         /*
299                          * If it's cycles then fall back to hrtimer
300                          * based cpu-clock-tick sw counter, which
301                          * is always available even if no PMU support:
302                          */
303                         if (attr->type == PERF_TYPE_HARDWARE
304                                         && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
305
306                                 if (verbose)
307                                         ui__warning("The cycles event is not supported, "
308                                                     "trying to fall back to cpu-clock-ticks\n");
309                                 attr->type = PERF_TYPE_SOFTWARE;
310                                 attr->config = PERF_COUNT_SW_CPU_CLOCK;
311                                 goto try_again;
312                         }
313
314                         if (err == ENOENT) {
315                                 ui__warning("The %s event is not supported.\n",
316                                             event_name(pos));
317                                 exit(EXIT_FAILURE);
318                         }
319
320                         printf("\n");
321                         error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
322                               err, strerror(err));
323
324 #if defined(__i386__) || defined(__x86_64__)
325                         if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
326                                 die("No hardware sampling interrupt available."
327                                     " No APIC? If so then you can boot the kernel"
328                                     " with the \"lapic\" boot parameter to"
329                                     " force-enable it.\n");
330 #endif
331
332                         die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
333                 }
334         }
335
336         if (perf_evlist__set_filters(evlist)) {
337                 error("failed to set filter with %d (%s)\n", errno,
338                         strerror(errno));
339                 exit(-1);
340         }
341
342         if (perf_evlist__mmap(evlist, mmap_pages, false) < 0)
343                 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
344
345         if (file_new)
346                 session->evlist = evlist;
347         else {
348                 if (!perf_evlist__equal(session->evlist, evlist)) {
349                         fprintf(stderr, "incompatible append\n");
350                         exit(-1);
351                 }
352         }
353
354         perf_session__update_sample_type(session);
355 }
356
357 static int process_buildids(void)
358 {
359         u64 size = lseek(output, 0, SEEK_CUR);
360
361         if (size == 0)
362                 return 0;
363
364         session->fd = output;
365         return __perf_session__process_events(session, post_processing_offset,
366                                               size - post_processing_offset,
367                                               size, &build_id__mark_dso_hit_ops);
368 }
369
370 static void atexit_header(void)
371 {
372         if (!pipe_output) {
373                 session->header.data_size += bytes_written;
374
375                 if (!no_buildid)
376                         process_buildids();
377                 perf_session__write_header(session, evsel_list, output, true);
378                 perf_session__delete(session);
379                 perf_evlist__delete(evsel_list);
380                 symbol__exit();
381         }
382 }
383
384 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
385 {
386         int err;
387         struct perf_session *psession = data;
388
389         if (machine__is_host(machine))
390                 return;
391
392         /*
393          *As for guest kernel when processing subcommand record&report,
394          *we arrange module mmap prior to guest kernel mmap and trigger
395          *a preload dso because default guest module symbols are loaded
396          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
397          *method is used to avoid symbol missing when the first addr is
398          *in module instead of in guest kernel.
399          */
400         err = perf_event__synthesize_modules(process_synthesized_event,
401                                              psession, machine);
402         if (err < 0)
403                 pr_err("Couldn't record guest kernel [%d]'s reference"
404                        " relocation symbol.\n", machine->pid);
405
406         /*
407          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
408          * have no _text sometimes.
409          */
410         err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
411                                                  psession, machine, "_text");
412         if (err < 0)
413                 err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
414                                                          psession, machine,
415                                                          "_stext");
416         if (err < 0)
417                 pr_err("Couldn't record guest kernel [%d]'s reference"
418                        " relocation symbol.\n", machine->pid);
419 }
420
421 static struct perf_event_header finished_round_event = {
422         .size = sizeof(struct perf_event_header),
423         .type = PERF_RECORD_FINISHED_ROUND,
424 };
425
426 static void mmap_read_all(void)
427 {
428         int i;
429
430         for (i = 0; i < evsel_list->nr_mmaps; i++) {
431                 if (evsel_list->mmap[i].base)
432                         mmap_read(&evsel_list->mmap[i]);
433         }
434
435         if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
436                 write_output(&finished_round_event, sizeof(finished_round_event));
437 }
438
439 static int __cmd_record(int argc, const char **argv)
440 {
441         int i;
442         struct stat st;
443         int flags;
444         int err;
445         unsigned long waking = 0;
446         int child_ready_pipe[2], go_pipe[2];
447         const bool forks = argc > 0;
448         char buf;
449         struct machine *machine;
450
451         page_size = sysconf(_SC_PAGE_SIZE);
452
453         atexit(sig_atexit);
454         signal(SIGCHLD, sig_handler);
455         signal(SIGINT, sig_handler);
456         signal(SIGUSR1, sig_handler);
457
458         if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
459                 perror("failed to create pipes");
460                 exit(-1);
461         }
462
463         if (!output_name) {
464                 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
465                         pipe_output = 1;
466                 else
467                         output_name = "perf.data";
468         }
469         if (output_name) {
470                 if (!strcmp(output_name, "-"))
471                         pipe_output = 1;
472                 else if (!stat(output_name, &st) && st.st_size) {
473                         if (write_mode == WRITE_FORCE) {
474                                 char oldname[PATH_MAX];
475                                 snprintf(oldname, sizeof(oldname), "%s.old",
476                                          output_name);
477                                 unlink(oldname);
478                                 rename(output_name, oldname);
479                         }
480                 } else if (write_mode == WRITE_APPEND) {
481                         write_mode = WRITE_FORCE;
482                 }
483         }
484
485         flags = O_CREAT|O_RDWR;
486         if (write_mode == WRITE_APPEND)
487                 file_new = 0;
488         else
489                 flags |= O_TRUNC;
490
491         if (pipe_output)
492                 output = STDOUT_FILENO;
493         else
494                 output = open(output_name, flags, S_IRUSR | S_IWUSR);
495         if (output < 0) {
496                 perror("failed to create output file");
497                 exit(-1);
498         }
499
500         session = perf_session__new(output_name, O_WRONLY,
501                                     write_mode == WRITE_FORCE, false, NULL);
502         if (session == NULL) {
503                 pr_err("Not enough memory for reading perf file header\n");
504                 return -1;
505         }
506
507         if (!no_buildid)
508                 perf_header__set_feat(&session->header, HEADER_BUILD_ID);
509
510         if (!file_new) {
511                 err = perf_session__read_header(session, output);
512                 if (err < 0)
513                         goto out_delete_session;
514         }
515
516         if (have_tracepoints(&evsel_list->entries))
517                 perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
518
519         /* 512 kiB: default amount of unprivileged mlocked memory */
520         if (mmap_pages == UINT_MAX)
521                 mmap_pages = (512 * 1024) / page_size;
522
523         if (forks) {
524                 child_pid = fork();
525                 if (child_pid < 0) {
526                         perror("failed to fork");
527                         exit(-1);
528                 }
529
530                 if (!child_pid) {
531                         if (pipe_output)
532                                 dup2(2, 1);
533                         close(child_ready_pipe[0]);
534                         close(go_pipe[1]);
535                         fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
536
537                         /*
538                          * Do a dummy execvp to get the PLT entry resolved,
539                          * so we avoid the resolver overhead on the real
540                          * execvp call.
541                          */
542                         execvp("", (char **)argv);
543
544                         /*
545                          * Tell the parent we're ready to go
546                          */
547                         close(child_ready_pipe[1]);
548
549                         /*
550                          * Wait until the parent tells us to go.
551                          */
552                         if (read(go_pipe[0], &buf, 1) == -1)
553                                 perror("unable to read pipe");
554
555                         execvp(argv[0], (char **)argv);
556
557                         perror(argv[0]);
558                         kill(getppid(), SIGUSR1);
559                         exit(-1);
560                 }
561
562                 if (!system_wide && target_tid == -1 && target_pid == -1)
563                         evsel_list->threads->map[0] = child_pid;
564
565                 close(child_ready_pipe[1]);
566                 close(go_pipe[0]);
567                 /*
568                  * wait for child to settle
569                  */
570                 if (read(child_ready_pipe[0], &buf, 1) == -1) {
571                         perror("unable to read pipe");
572                         exit(-1);
573                 }
574                 close(child_ready_pipe[0]);
575         }
576
577         open_counters(evsel_list);
578
579         /*
580          * perf_session__delete(session) will be called at atexit_header()
581          */
582         atexit(atexit_header);
583
584         if (pipe_output) {
585                 err = perf_header__write_pipe(output);
586                 if (err < 0)
587                         return err;
588         } else if (file_new) {
589                 err = perf_session__write_header(session, evsel_list,
590                                                  output, false);
591                 if (err < 0)
592                         return err;
593         }
594
595         post_processing_offset = lseek(output, 0, SEEK_CUR);
596
597         if (pipe_output) {
598                 err = perf_session__synthesize_attrs(session,
599                                                      process_synthesized_event);
600                 if (err < 0) {
601                         pr_err("Couldn't synthesize attrs.\n");
602                         return err;
603                 }
604
605                 err = perf_event__synthesize_event_types(process_synthesized_event,
606                                                          session);
607                 if (err < 0) {
608                         pr_err("Couldn't synthesize event_types.\n");
609                         return err;
610                 }
611
612                 if (have_tracepoints(&evsel_list->entries)) {
613                         /*
614                          * FIXME err <= 0 here actually means that
615                          * there were no tracepoints so its not really
616                          * an error, just that we don't need to
617                          * synthesize anything.  We really have to
618                          * return this more properly and also
619                          * propagate errors that now are calling die()
620                          */
621                         err = perf_event__synthesize_tracing_data(output, evsel_list,
622                                                                   process_synthesized_event,
623                                                                   session);
624                         if (err <= 0) {
625                                 pr_err("Couldn't record tracing data.\n");
626                                 return err;
627                         }
628                         advance_output(err);
629                 }
630         }
631
632         machine = perf_session__find_host_machine(session);
633         if (!machine) {
634                 pr_err("Couldn't find native kernel information.\n");
635                 return -1;
636         }
637
638         err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
639                                                  session, machine, "_text");
640         if (err < 0)
641                 err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
642                                                          session, machine, "_stext");
643         if (err < 0)
644                 pr_err("Couldn't record kernel reference relocation symbol\n"
645                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
646                        "Check /proc/kallsyms permission or run as root.\n");
647
648         err = perf_event__synthesize_modules(process_synthesized_event,
649                                              session, machine);
650         if (err < 0)
651                 pr_err("Couldn't record kernel module information.\n"
652                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
653                        "Check /proc/modules permission or run as root.\n");
654
655         if (perf_guest)
656                 perf_session__process_machines(session,
657                                                perf_event__synthesize_guest_os);
658
659         if (!system_wide)
660                 perf_event__synthesize_thread_map(evsel_list->threads,
661                                                   process_synthesized_event,
662                                                   session);
663         else
664                 perf_event__synthesize_threads(process_synthesized_event,
665                                                session);
666
667         if (realtime_prio) {
668                 struct sched_param param;
669
670                 param.sched_priority = realtime_prio;
671                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
672                         pr_err("Could not set realtime priority.\n");
673                         exit(-1);
674                 }
675         }
676
677         /*
678          * Let the child rip
679          */
680         if (forks)
681                 close(go_pipe[1]);
682
683         for (;;) {
684                 int hits = samples;
685                 int thread;
686
687                 mmap_read_all();
688
689                 if (hits == samples) {
690                         if (done)
691                                 break;
692                         err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
693                         waking++;
694                 }
695
696                 if (done) {
697                         for (i = 0; i < evsel_list->cpus->nr; i++) {
698                                 struct perf_evsel *pos;
699
700                                 list_for_each_entry(pos, &evsel_list->entries, node) {
701                                         for (thread = 0;
702                                                 thread < evsel_list->threads->nr;
703                                                 thread++)
704                                                 ioctl(FD(pos, i, thread),
705                                                         PERF_EVENT_IOC_DISABLE);
706                                 }
707                         }
708                 }
709         }
710
711         if (quiet || signr == SIGUSR1)
712                 return 0;
713
714         fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
715
716         /*
717          * Approximate RIP event size: 24 bytes.
718          */
719         fprintf(stderr,
720                 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
721                 (double)bytes_written / 1024.0 / 1024.0,
722                 output_name,
723                 bytes_written / 24);
724
725         return 0;
726
727 out_delete_session:
728         perf_session__delete(session);
729         return err;
730 }
731
732 static const char * const record_usage[] = {
733         "perf record [<options>] [<command>]",
734         "perf record [<options>] -- <command> [<options>]",
735         NULL
736 };
737
738 static bool force, append_file;
739
740 const struct option record_options[] = {
741         OPT_CALLBACK('e', "event", &evsel_list, "event",
742                      "event selector. use 'perf list' to list available events",
743                      parse_events),
744         OPT_CALLBACK(0, "filter", &evsel_list, "filter",
745                      "event filter", parse_filter),
746         OPT_INTEGER('p', "pid", &target_pid,
747                     "record events on existing process id"),
748         OPT_INTEGER('t', "tid", &target_tid,
749                     "record events on existing thread id"),
750         OPT_INTEGER('r', "realtime", &realtime_prio,
751                     "collect data with this RT SCHED_FIFO priority"),
752         OPT_BOOLEAN('D', "no-delay", &nodelay,
753                     "collect data without buffering"),
754         OPT_BOOLEAN('R', "raw-samples", &raw_samples,
755                     "collect raw sample records from all opened counters"),
756         OPT_BOOLEAN('a', "all-cpus", &system_wide,
757                             "system-wide collection from all CPUs"),
758         OPT_BOOLEAN('A', "append", &append_file,
759                             "append to the output file to do incremental profiling"),
760         OPT_STRING('C', "cpu", &cpu_list, "cpu",
761                     "list of cpus to monitor"),
762         OPT_BOOLEAN('f', "force", &force,
763                         "overwrite existing data file (deprecated)"),
764         OPT_U64('c', "count", &user_interval, "event period to sample"),
765         OPT_STRING('o', "output", &output_name, "file",
766                     "output file name"),
767         OPT_BOOLEAN('i', "no-inherit", &no_inherit,
768                     "child tasks do not inherit counters"),
769         OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"),
770         OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
771         OPT_BOOLEAN('g', "call-graph", &call_graph,
772                     "do call-graph (stack chain/backtrace) recording"),
773         OPT_INCR('v', "verbose", &verbose,
774                     "be more verbose (show counter open errors, etc)"),
775         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
776         OPT_BOOLEAN('s', "stat", &inherit_stat,
777                     "per thread counts"),
778         OPT_BOOLEAN('d', "data", &sample_address,
779                     "Sample addresses"),
780         OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"),
781         OPT_BOOLEAN('n', "no-samples", &no_samples,
782                     "don't sample"),
783         OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
784                     "do not update the buildid cache"),
785         OPT_BOOLEAN('B', "no-buildid", &no_buildid,
786                     "do not collect buildids in perf.data"),
787         OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
788                      "monitor event in cgroup name only",
789                      parse_cgroups),
790         OPT_END()
791 };
792
793 int cmd_record(int argc, const char **argv, const char *prefix __used)
794 {
795         int err = -ENOMEM;
796         struct perf_evsel *pos;
797
798         evsel_list = perf_evlist__new(NULL, NULL);
799         if (evsel_list == NULL)
800                 return -ENOMEM;
801
802         argc = parse_options(argc, argv, record_options, record_usage,
803                             PARSE_OPT_STOP_AT_NON_OPTION);
804         if (!argc && target_pid == -1 && target_tid == -1 &&
805                 !system_wide && !cpu_list)
806                 usage_with_options(record_usage, record_options);
807
808         if (force && append_file) {
809                 fprintf(stderr, "Can't overwrite and append at the same time."
810                                 " You need to choose between -f and -A");
811                 usage_with_options(record_usage, record_options);
812         } else if (append_file) {
813                 write_mode = WRITE_APPEND;
814         } else {
815                 write_mode = WRITE_FORCE;
816         }
817
818         if (nr_cgroups && !system_wide) {
819                 fprintf(stderr, "cgroup monitoring only available in"
820                         " system-wide mode\n");
821                 usage_with_options(record_usage, record_options);
822         }
823
824         symbol__init();
825
826         if (symbol_conf.kptr_restrict)
827                 pr_warning(
828 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
829 "check /proc/sys/kernel/kptr_restrict.\n\n"
830 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
831 "file is not found in the buildid cache or in the vmlinux path.\n\n"
832 "Samples in kernel modules won't be resolved at all.\n\n"
833 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
834 "even with a suitable vmlinux or kallsyms file.\n\n");
835
836         if (no_buildid_cache || no_buildid)
837                 disable_buildid_cache();
838
839         if (evsel_list->nr_entries == 0 &&
840             perf_evlist__add_default(evsel_list) < 0) {
841                 pr_err("Not enough memory for event selector list\n");
842                 goto out_symbol_exit;
843         }
844
845         if (target_pid != -1)
846                 target_tid = target_pid;
847
848         if (perf_evlist__create_maps(evsel_list, target_pid,
849                                      target_tid, cpu_list) < 0)
850                 usage_with_options(record_usage, record_options);
851
852         list_for_each_entry(pos, &evsel_list->entries, node) {
853                 if (perf_evsel__alloc_fd(pos, evsel_list->cpus->nr,
854                                          evsel_list->threads->nr) < 0)
855                         goto out_free_fd;
856                 if (perf_header__push_event(pos->attr.config, event_name(pos)))
857                         goto out_free_fd;
858         }
859
860         if (perf_evlist__alloc_pollfd(evsel_list) < 0)
861                 goto out_free_fd;
862
863         if (user_interval != ULLONG_MAX)
864                 default_interval = user_interval;
865         if (user_freq != UINT_MAX)
866                 freq = user_freq;
867
868         /*
869          * User specified count overrides default frequency.
870          */
871         if (default_interval)
872                 freq = 0;
873         else if (freq) {
874                 default_interval = freq;
875         } else {
876                 fprintf(stderr, "frequency and count are zero, aborting\n");
877                 err = -EINVAL;
878                 goto out_free_fd;
879         }
880
881         err = __cmd_record(argc, argv);
882 out_free_fd:
883         perf_evlist__delete_maps(evsel_list);
884 out_symbol_exit:
885         symbol__exit();
886         return err;
887 }