Merge branch 'perf/core' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux...
[pandora-kernel.git] / tools / perf / builtin-stat.c
1 /*
2  * builtin-stat.c
3  *
4  * Builtin stat command: Give a precise performance counters summary
5  * overview about any workload, CPU or specific PID.
6  *
7  * Sample output:
8
9    $ perf stat ~/hackbench 10
10    Time: 0.104
11
12     Performance counter stats for '/home/mingo/hackbench':
13
14        1255.538611  task clock ticks     #      10.143 CPU utilization factor
15              54011  context switches     #       0.043 M/sec
16                385  CPU migrations       #       0.000 M/sec
17              17755  pagefaults           #       0.014 M/sec
18         3808323185  CPU cycles           #    3033.219 M/sec
19         1575111190  instructions         #    1254.530 M/sec
20           17367895  cache references     #      13.833 M/sec
21            7674421  cache misses         #       6.112 M/sec
22
23     Wall-clock time elapsed:   123.786620 msecs
24
25  *
26  * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
27  *
28  * Improvements and fixes by:
29  *
30  *   Arjan van de Ven <arjan@linux.intel.com>
31  *   Yanmin Zhang <yanmin.zhang@intel.com>
32  *   Wu Fengguang <fengguang.wu@intel.com>
33  *   Mike Galbraith <efault@gmx.de>
34  *   Paul Mackerras <paulus@samba.org>
35  *   Jaswinder Singh Rajput <jaswinder@kernel.org>
36  *
37  * Released under the GPL v2. (and only v2, not any later version)
38  */
39
40 #include "perf.h"
41 #include "builtin.h"
42 #include "util/util.h"
43 #include "util/parse-options.h"
44 #include "util/parse-events.h"
45 #include "util/event.h"
46 #include "util/debug.h"
47 #include "util/header.h"
48 #include "util/cpumap.h"
49 #include "util/thread.h"
50
51 #include <sys/prctl.h>
52 #include <math.h>
53 #include <locale.h>
54
55 #define DEFAULT_SEPARATOR       " "
56
57 static struct perf_event_attr default_attrs[] = {
58
59   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK              },
60   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES        },
61   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS          },
62   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS             },
63
64   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES              },
65   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS            },
66   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS     },
67   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES           },
68   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES        },
69   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES            },
70
71 };
72
73 static bool                     system_wide                     =  false;
74 static int                      nr_cpus                         =  0;
75 static int                      run_idx                         =  0;
76
77 static int                      run_count                       =  1;
78 static bool                     no_inherit                      = false;
79 static bool                     scale                           =  true;
80 static bool                     no_aggr                         = false;
81 static pid_t                    target_pid                      = -1;
82 static pid_t                    target_tid                      = -1;
83 static pid_t                    *all_tids                       =  NULL;
84 static int                      thread_num                      =  0;
85 static pid_t                    child_pid                       = -1;
86 static bool                     null_run                        =  false;
87 static bool                     big_num                         =  true;
88 static int                      big_num_opt                     =  -1;
89 static const char               *cpu_list;
90 static const char               *csv_sep                        = NULL;
91 static bool                     csv_output                      = false;
92
93
94 static int                      *fd[MAX_NR_CPUS][MAX_COUNTERS];
95
96 static int                      event_scaled[MAX_COUNTERS];
97
98 static struct {
99         u64 val;
100         u64 ena;
101         u64 run;
102 } cpu_counts[MAX_NR_CPUS][MAX_COUNTERS];
103
104 static volatile int done = 0;
105
106 struct stats
107 {
108         double n, mean, M2;
109 };
110
111 static void update_stats(struct stats *stats, u64 val)
112 {
113         double delta;
114
115         stats->n++;
116         delta = val - stats->mean;
117         stats->mean += delta / stats->n;
118         stats->M2 += delta*(val - stats->mean);
119 }
120
121 static double avg_stats(struct stats *stats)
122 {
123         return stats->mean;
124 }
125
126 /*
127  * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
128  *
129  *       (\Sum n_i^2) - ((\Sum n_i)^2)/n
130  * s^2 = -------------------------------
131  *                  n - 1
132  *
133  * http://en.wikipedia.org/wiki/Stddev
134  *
135  * The std dev of the mean is related to the std dev by:
136  *
137  *             s
138  * s_mean = -------
139  *          sqrt(n)
140  *
141  */
142 static double stddev_stats(struct stats *stats)
143 {
144         double variance = stats->M2 / (stats->n - 1);
145         double variance_mean = variance / stats->n;
146
147         return sqrt(variance_mean);
148 }
149
150 struct stats                    event_res_stats[MAX_COUNTERS][3];
151 struct stats                    runtime_nsecs_stats[MAX_NR_CPUS];
152 struct stats                    runtime_cycles_stats[MAX_NR_CPUS];
153 struct stats                    runtime_branches_stats[MAX_NR_CPUS];
154 struct stats                    walltime_nsecs_stats;
155
156 #define MATCH_EVENT(t, c, counter)                      \
157         (attrs[counter].type == PERF_TYPE_##t &&        \
158          attrs[counter].config == PERF_COUNT_##c)
159
160 #define ERR_PERF_OPEN \
161 "counter %d, sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information."
162
163 static int create_perf_stat_counter(int counter, bool *perm_err)
164 {
165         struct perf_event_attr *attr = attrs + counter;
166         int thread;
167         int ncreated = 0;
168
169         if (scale)
170                 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
171                                     PERF_FORMAT_TOTAL_TIME_RUNNING;
172
173         if (system_wide) {
174                 int cpu;
175
176                 for (cpu = 0; cpu < nr_cpus; cpu++) {
177                         fd[cpu][counter][0] = sys_perf_event_open(attr,
178                                         -1, cpumap[cpu], -1, 0);
179                         if (fd[cpu][counter][0] < 0) {
180                                 if (errno == EPERM || errno == EACCES)
181                                         *perm_err = true;
182                                 error(ERR_PERF_OPEN, counter,
183                                          fd[cpu][counter][0], strerror(errno));
184                         } else {
185                                 ++ncreated;
186                         }
187                 }
188         } else {
189                 attr->inherit = !no_inherit;
190                 if (target_pid == -1 && target_tid == -1) {
191                         attr->disabled = 1;
192                         attr->enable_on_exec = 1;
193                 }
194                 for (thread = 0; thread < thread_num; thread++) {
195                         fd[0][counter][thread] = sys_perf_event_open(attr,
196                                 all_tids[thread], -1, -1, 0);
197                         if (fd[0][counter][thread] < 0) {
198                                 if (errno == EPERM || errno == EACCES)
199                                         *perm_err = true;
200                                 error(ERR_PERF_OPEN, counter,
201                                          fd[0][counter][thread],
202                                          strerror(errno));
203                         } else {
204                                 ++ncreated;
205                         }
206                 }
207         }
208
209         return ncreated;
210 }
211
212 /*
213  * Does the counter have nsecs as a unit?
214  */
215 static inline int nsec_counter(int counter)
216 {
217         if (MATCH_EVENT(SOFTWARE, SW_CPU_CLOCK, counter) ||
218             MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter))
219                 return 1;
220
221         return 0;
222 }
223
224 /*
225  * Read out the results of a single counter:
226  * aggregate counts across CPUs in system-wide mode
227  */
228 static void read_counter_aggr(int counter)
229 {
230         u64 count[3], single_count[3];
231         int cpu;
232         size_t res, nv;
233         int scaled;
234         int i, thread;
235
236         count[0] = count[1] = count[2] = 0;
237
238         nv = scale ? 3 : 1;
239         for (cpu = 0; cpu < nr_cpus; cpu++) {
240                 for (thread = 0; thread < thread_num; thread++) {
241                         if (fd[cpu][counter][thread] < 0)
242                                 continue;
243
244                         res = read(fd[cpu][counter][thread],
245                                         single_count, nv * sizeof(u64));
246                         assert(res == nv * sizeof(u64));
247
248                         close(fd[cpu][counter][thread]);
249                         fd[cpu][counter][thread] = -1;
250
251                         count[0] += single_count[0];
252                         if (scale) {
253                                 count[1] += single_count[1];
254                                 count[2] += single_count[2];
255                         }
256                 }
257         }
258
259         scaled = 0;
260         if (scale) {
261                 if (count[2] == 0) {
262                         event_scaled[counter] = -1;
263                         count[0] = 0;
264                         return;
265                 }
266
267                 if (count[2] < count[1]) {
268                         event_scaled[counter] = 1;
269                         count[0] = (unsigned long long)
270                                 ((double)count[0] * count[1] / count[2] + 0.5);
271                 }
272         }
273
274         for (i = 0; i < 3; i++)
275                 update_stats(&event_res_stats[counter][i], count[i]);
276
277         if (verbose) {
278                 fprintf(stderr, "%s: %Ld %Ld %Ld\n", event_name(counter),
279                                 count[0], count[1], count[2]);
280         }
281
282         /*
283          * Save the full runtime - to allow normalization during printout:
284          */
285         if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter))
286                 update_stats(&runtime_nsecs_stats[0], count[0]);
287         if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter))
288                 update_stats(&runtime_cycles_stats[0], count[0]);
289         if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter))
290                 update_stats(&runtime_branches_stats[0], count[0]);
291 }
292
293 /*
294  * Read out the results of a single counter:
295  * do not aggregate counts across CPUs in system-wide mode
296  */
297 static void read_counter(int counter)
298 {
299         u64 count[3];
300         int cpu;
301         size_t res, nv;
302
303         count[0] = count[1] = count[2] = 0;
304
305         nv = scale ? 3 : 1;
306
307         for (cpu = 0; cpu < nr_cpus; cpu++) {
308
309                 if (fd[cpu][counter][0] < 0)
310                         continue;
311
312                 res = read(fd[cpu][counter][0], count, nv * sizeof(u64));
313
314                 assert(res == nv * sizeof(u64));
315
316                 close(fd[cpu][counter][0]);
317                 fd[cpu][counter][0] = -1;
318
319                 if (scale) {
320                         if (count[2] == 0) {
321                                 count[0] = 0;
322                         } else if (count[2] < count[1]) {
323                                 count[0] = (unsigned long long)
324                                 ((double)count[0] * count[1] / count[2] + 0.5);
325                         }
326                 }
327                 cpu_counts[cpu][counter].val = count[0]; /* scaled count */
328                 cpu_counts[cpu][counter].ena = count[1];
329                 cpu_counts[cpu][counter].run = count[2];
330
331                 if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter))
332                         update_stats(&runtime_nsecs_stats[cpu], count[0]);
333                 if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter))
334                         update_stats(&runtime_cycles_stats[cpu], count[0]);
335                 if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter))
336                         update_stats(&runtime_branches_stats[cpu], count[0]);
337         }
338 }
339
340 static int run_perf_stat(int argc __used, const char **argv)
341 {
342         unsigned long long t0, t1;
343         int status = 0;
344         int counter, ncreated = 0;
345         int child_ready_pipe[2], go_pipe[2];
346         bool perm_err = false;
347         const bool forks = (argc > 0);
348         char buf;
349
350         if (!system_wide)
351                 nr_cpus = 1;
352
353         if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
354                 perror("failed to create pipes");
355                 exit(1);
356         }
357
358         if (forks) {
359                 if ((child_pid = fork()) < 0)
360                         perror("failed to fork");
361
362                 if (!child_pid) {
363                         close(child_ready_pipe[0]);
364                         close(go_pipe[1]);
365                         fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
366
367                         /*
368                          * Do a dummy execvp to get the PLT entry resolved,
369                          * so we avoid the resolver overhead on the real
370                          * execvp call.
371                          */
372                         execvp("", (char **)argv);
373
374                         /*
375                          * Tell the parent we're ready to go
376                          */
377                         close(child_ready_pipe[1]);
378
379                         /*
380                          * Wait until the parent tells us to go.
381                          */
382                         if (read(go_pipe[0], &buf, 1) == -1)
383                                 perror("unable to read pipe");
384
385                         execvp(argv[0], (char **)argv);
386
387                         perror(argv[0]);
388                         exit(-1);
389                 }
390
391                 if (target_tid == -1 && target_pid == -1 && !system_wide)
392                         all_tids[0] = child_pid;
393
394                 /*
395                  * Wait for the child to be ready to exec.
396                  */
397                 close(child_ready_pipe[1]);
398                 close(go_pipe[0]);
399                 if (read(child_ready_pipe[0], &buf, 1) == -1)
400                         perror("unable to read pipe");
401                 close(child_ready_pipe[0]);
402         }
403
404         for (counter = 0; counter < nr_counters; counter++)
405                 ncreated += create_perf_stat_counter(counter, &perm_err);
406
407         if (ncreated < nr_counters) {
408                 if (perm_err)
409                         error("You may not have permission to collect %sstats.\n"
410                               "\t Consider tweaking"
411                               " /proc/sys/kernel/perf_event_paranoid or running as root.",
412                               system_wide ? "system-wide " : "");
413                 die("Not all events could be opened.\n");
414                 if (child_pid != -1)
415                         kill(child_pid, SIGTERM);
416                 return -1;
417         }
418
419         /*
420          * Enable counters and exec the command:
421          */
422         t0 = rdclock();
423
424         if (forks) {
425                 close(go_pipe[1]);
426                 wait(&status);
427         } else {
428                 while(!done) sleep(1);
429         }
430
431         t1 = rdclock();
432
433         update_stats(&walltime_nsecs_stats, t1 - t0);
434
435         if (no_aggr) {
436                 for (counter = 0; counter < nr_counters; counter++)
437                         read_counter(counter);
438         } else {
439                 for (counter = 0; counter < nr_counters; counter++)
440                         read_counter_aggr(counter);
441         }
442         return WEXITSTATUS(status);
443 }
444
445 static void print_noise(int counter, double avg)
446 {
447         if (run_count == 1)
448                 return;
449
450         fprintf(stderr, "   ( +- %7.3f%% )",
451                         100 * stddev_stats(&event_res_stats[counter][0]) / avg);
452 }
453
454 static void nsec_printout(int cpu, int counter, double avg)
455 {
456         double msecs = avg / 1e6;
457         char cpustr[16] = { '\0', };
458         const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-24s";
459
460         if (no_aggr)
461                 sprintf(cpustr, "CPU%*d%s",
462                         csv_output ? 0 : -4,
463                         cpumap[cpu], csv_sep);
464
465         fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(counter));
466
467         if (csv_output)
468                 return;
469
470         if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) {
471                 fprintf(stderr, " # %10.3f CPUs ",
472                                 avg / avg_stats(&walltime_nsecs_stats));
473         }
474 }
475
476 static void abs_printout(int cpu, int counter, double avg)
477 {
478         double total, ratio = 0.0;
479         char cpustr[16] = { '\0', };
480         const char *fmt;
481
482         if (csv_output)
483                 fmt = "%s%.0f%s%s";
484         else if (big_num)
485                 fmt = "%s%'18.0f%s%-24s";
486         else
487                 fmt = "%s%18.0f%s%-24s";
488
489         if (no_aggr)
490                 sprintf(cpustr, "CPU%*d%s",
491                         csv_output ? 0 : -4,
492                         cpumap[cpu], csv_sep);
493         else
494                 cpu = 0;
495
496         fprintf(stderr, fmt, cpustr, avg, csv_sep, event_name(counter));
497
498         if (csv_output)
499                 return;
500
501         if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) {
502                 total = avg_stats(&runtime_cycles_stats[cpu]);
503
504                 if (total)
505                         ratio = avg / total;
506
507                 fprintf(stderr, " # %10.3f IPC  ", ratio);
508         } else if (MATCH_EVENT(HARDWARE, HW_BRANCH_MISSES, counter) &&
509                         runtime_branches_stats[cpu].n != 0) {
510                 total = avg_stats(&runtime_branches_stats[cpu]);
511
512                 if (total)
513                         ratio = avg * 100 / total;
514
515                 fprintf(stderr, " # %10.3f %%    ", ratio);
516
517         } else if (runtime_nsecs_stats[cpu].n != 0) {
518                 total = avg_stats(&runtime_nsecs_stats[cpu]);
519
520                 if (total)
521                         ratio = 1000.0 * avg / total;
522
523                 fprintf(stderr, " # %10.3f M/sec", ratio);
524         }
525 }
526
527 /*
528  * Print out the results of a single counter:
529  * aggregated counts in system-wide mode
530  */
531 static void print_counter_aggr(int counter)
532 {
533         double avg = avg_stats(&event_res_stats[counter][0]);
534         int scaled = event_scaled[counter];
535
536         if (scaled == -1) {
537                 fprintf(stderr, "%*s%s%-24s\n",
538                         csv_output ? 0 : 18,
539                         "<not counted>", csv_sep, event_name(counter));
540                 return;
541         }
542
543         if (nsec_counter(counter))
544                 nsec_printout(-1, counter, avg);
545         else
546                 abs_printout(-1, counter, avg);
547
548         if (csv_output) {
549                 fputc('\n', stderr);
550                 return;
551         }
552
553         print_noise(counter, avg);
554
555         if (scaled) {
556                 double avg_enabled, avg_running;
557
558                 avg_enabled = avg_stats(&event_res_stats[counter][1]);
559                 avg_running = avg_stats(&event_res_stats[counter][2]);
560
561                 fprintf(stderr, "  (scaled from %.2f%%)",
562                                 100 * avg_running / avg_enabled);
563         }
564
565         fprintf(stderr, "\n");
566 }
567
568 /*
569  * Print out the results of a single counter:
570  * does not use aggregated count in system-wide
571  */
572 static void print_counter(int counter)
573 {
574         u64 ena, run, val;
575         int cpu;
576
577         for (cpu = 0; cpu < nr_cpus; cpu++) {
578                 val = cpu_counts[cpu][counter].val;
579                 ena = cpu_counts[cpu][counter].ena;
580                 run = cpu_counts[cpu][counter].run;
581                 if (run == 0 || ena == 0) {
582                         fprintf(stderr, "CPU%*d%s%*s%s%-24s",
583                                 csv_output ? 0 : -4,
584                                 cpumap[cpu], csv_sep,
585                                 csv_output ? 0 : 18,
586                                 "<not counted>", csv_sep,
587                                 event_name(counter));
588
589                         fprintf(stderr, "\n");
590                         continue;
591                 }
592
593                 if (nsec_counter(counter))
594                         nsec_printout(cpu, counter, val);
595                 else
596                         abs_printout(cpu, counter, val);
597
598                 if (!csv_output) {
599                         print_noise(counter, 1.0);
600
601                         if (run != ena) {
602                                 fprintf(stderr, "  (scaled from %.2f%%)",
603                                         100.0 * run / ena);
604                         }
605                 }
606                 fprintf(stderr, "\n");
607         }
608 }
609
610 static void print_stat(int argc, const char **argv)
611 {
612         int i, counter;
613
614         fflush(stdout);
615
616         if (!csv_output) {
617                 fprintf(stderr, "\n");
618                 fprintf(stderr, " Performance counter stats for ");
619                 if(target_pid == -1 && target_tid == -1) {
620                         fprintf(stderr, "\'%s", argv[0]);
621                         for (i = 1; i < argc; i++)
622                                 fprintf(stderr, " %s", argv[i]);
623                 } else if (target_pid != -1)
624                         fprintf(stderr, "process id \'%d", target_pid);
625                 else
626                         fprintf(stderr, "thread id \'%d", target_tid);
627
628                 fprintf(stderr, "\'");
629                 if (run_count > 1)
630                         fprintf(stderr, " (%d runs)", run_count);
631                 fprintf(stderr, ":\n\n");
632         }
633
634         if (no_aggr) {
635                 for (counter = 0; counter < nr_counters; counter++)
636                         print_counter(counter);
637         } else {
638                 for (counter = 0; counter < nr_counters; counter++)
639                         print_counter_aggr(counter);
640         }
641
642         if (!csv_output) {
643                 fprintf(stderr, "\n");
644                 fprintf(stderr, " %18.9f  seconds time elapsed",
645                                 avg_stats(&walltime_nsecs_stats)/1e9);
646                 if (run_count > 1) {
647                         fprintf(stderr, "   ( +- %7.3f%% )",
648                                 100*stddev_stats(&walltime_nsecs_stats) /
649                                 avg_stats(&walltime_nsecs_stats));
650                 }
651                 fprintf(stderr, "\n\n");
652         }
653 }
654
655 static volatile int signr = -1;
656
657 static void skip_signal(int signo)
658 {
659         if(child_pid == -1)
660                 done = 1;
661
662         signr = signo;
663 }
664
665 static void sig_atexit(void)
666 {
667         if (child_pid != -1)
668                 kill(child_pid, SIGTERM);
669
670         if (signr == -1)
671                 return;
672
673         signal(signr, SIG_DFL);
674         kill(getpid(), signr);
675 }
676
677 static const char * const stat_usage[] = {
678         "perf stat [<options>] [<command>]",
679         NULL
680 };
681
682 static int stat__set_big_num(const struct option *opt __used,
683                              const char *s __used, int unset)
684 {
685         big_num_opt = unset ? 0 : 1;
686         return 0;
687 }
688
689 static const struct option options[] = {
690         OPT_CALLBACK('e', "event", NULL, "event",
691                      "event selector. use 'perf list' to list available events",
692                      parse_events),
693         OPT_BOOLEAN('i', "no-inherit", &no_inherit,
694                     "child tasks do not inherit counters"),
695         OPT_INTEGER('p', "pid", &target_pid,
696                     "stat events on existing process id"),
697         OPT_INTEGER('t', "tid", &target_tid,
698                     "stat events on existing thread id"),
699         OPT_BOOLEAN('a', "all-cpus", &system_wide,
700                     "system-wide collection from all CPUs"),
701         OPT_BOOLEAN('c', "scale", &scale,
702                     "scale/normalize counters"),
703         OPT_INCR('v', "verbose", &verbose,
704                     "be more verbose (show counter open errors, etc)"),
705         OPT_INTEGER('r', "repeat", &run_count,
706                     "repeat command and print average + stddev (max: 100)"),
707         OPT_BOOLEAN('n', "null", &null_run,
708                     "null run - dont start any counters"),
709         OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 
710                            "print large numbers with thousands\' separators",
711                            stat__set_big_num),
712         OPT_STRING('C', "cpu", &cpu_list, "cpu",
713                     "list of cpus to monitor in system-wide"),
714         OPT_BOOLEAN('A', "no-aggr", &no_aggr,
715                     "disable CPU count aggregation"),
716         OPT_STRING('x', "field-separator", &csv_sep, "separator",
717                    "print counts with custom separator"),
718         OPT_END()
719 };
720
721 int cmd_stat(int argc, const char **argv, const char *prefix __used)
722 {
723         int status;
724         int i,j;
725
726         setlocale(LC_ALL, "");
727
728         argc = parse_options(argc, argv, options, stat_usage,
729                 PARSE_OPT_STOP_AT_NON_OPTION);
730
731         if (csv_sep)
732                 csv_output = true;
733         else
734                 csv_sep = DEFAULT_SEPARATOR;
735
736         /*
737          * let the spreadsheet do the pretty-printing
738          */
739         if (csv_output) {
740                 /* User explicitely passed -B? */
741                 if (big_num_opt == 1) {
742                         fprintf(stderr, "-B option not supported with -x\n");
743                         usage_with_options(stat_usage, options);
744                 } else /* Nope, so disable big number formatting */
745                         big_num = false;
746         } else if (big_num_opt == 0) /* User passed --no-big-num */
747                 big_num = false;
748
749         if (!argc && target_pid == -1 && target_tid == -1)
750                 usage_with_options(stat_usage, options);
751         if (run_count <= 0)
752                 usage_with_options(stat_usage, options);
753
754         /* no_aggr is for system-wide only */
755         if (no_aggr && !system_wide)
756                 usage_with_options(stat_usage, options);
757
758         /* Set attrs and nr_counters if no event is selected and !null_run */
759         if (!null_run && !nr_counters) {
760                 memcpy(attrs, default_attrs, sizeof(default_attrs));
761                 nr_counters = ARRAY_SIZE(default_attrs);
762         }
763
764         if (system_wide)
765                 nr_cpus = read_cpu_map(cpu_list);
766         else
767                 nr_cpus = 1;
768
769         if (nr_cpus < 1)
770                 usage_with_options(stat_usage, options);
771
772         if (target_pid != -1) {
773                 target_tid = target_pid;
774                 thread_num = find_all_tid(target_pid, &all_tids);
775                 if (thread_num <= 0) {
776                         fprintf(stderr, "Can't find all threads of pid %d\n",
777                                         target_pid);
778                         usage_with_options(stat_usage, options);
779                 }
780         } else {
781                 all_tids=malloc(sizeof(pid_t));
782                 if (!all_tids)
783                         return -ENOMEM;
784
785                 all_tids[0] = target_tid;
786                 thread_num = 1;
787         }
788
789         for (i = 0; i < MAX_NR_CPUS; i++) {
790                 for (j = 0; j < MAX_COUNTERS; j++) {
791                         fd[i][j] = malloc(sizeof(int)*thread_num);
792                         if (!fd[i][j])
793                                 return -ENOMEM;
794                 }
795         }
796
797         /*
798          * We dont want to block the signals - that would cause
799          * child tasks to inherit that and Ctrl-C would not work.
800          * What we want is for Ctrl-C to work in the exec()-ed
801          * task, but being ignored by perf stat itself:
802          */
803         atexit(sig_atexit);
804         signal(SIGINT,  skip_signal);
805         signal(SIGALRM, skip_signal);
806         signal(SIGABRT, skip_signal);
807
808         status = 0;
809         for (run_idx = 0; run_idx < run_count; run_idx++) {
810                 if (run_count != 1 && verbose)
811                         fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1);
812                 status = run_perf_stat(argc, argv);
813         }
814
815         if (status != -1)
816                 print_stat(argc, argv);
817
818         return status;
819 }