perf stat: Add -d/--detailed flag to run with a lot of events
[pandora-kernel.git] / tools / perf / builtin-stat.c
1 /*
2  * builtin-stat.c
3  *
4  * Builtin stat command: Give a precise performance counters summary
5  * overview about any workload, CPU or specific PID.
6  *
7  * Sample output:
8
9    $ perf stat ~/hackbench 10
10    Time: 0.104
11
12     Performance counter stats for '/home/mingo/hackbench':
13
14        1255.538611  task clock ticks     #      10.143 CPU utilization factor
15              54011  context switches     #       0.043 M/sec
16                385  CPU migrations       #       0.000 M/sec
17              17755  pagefaults           #       0.014 M/sec
18         3808323185  CPU cycles           #    3033.219 M/sec
19         1575111190  instructions         #    1254.530 M/sec
20           17367895  cache references     #      13.833 M/sec
21            7674421  cache misses         #       6.112 M/sec
22
23     Wall-clock time elapsed:   123.786620 msecs
24
25  *
26  * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
27  *
28  * Improvements and fixes by:
29  *
30  *   Arjan van de Ven <arjan@linux.intel.com>
31  *   Yanmin Zhang <yanmin.zhang@intel.com>
32  *   Wu Fengguang <fengguang.wu@intel.com>
33  *   Mike Galbraith <efault@gmx.de>
34  *   Paul Mackerras <paulus@samba.org>
35  *   Jaswinder Singh Rajput <jaswinder@kernel.org>
36  *
37  * Released under the GPL v2. (and only v2, not any later version)
38  */
39
40 #include "perf.h"
41 #include "builtin.h"
42 #include "util/util.h"
43 #include "util/parse-options.h"
44 #include "util/parse-events.h"
45 #include "util/event.h"
46 #include "util/evlist.h"
47 #include "util/evsel.h"
48 #include "util/debug.h"
49 #include "util/color.h"
50 #include "util/header.h"
51 #include "util/cpumap.h"
52 #include "util/thread.h"
53 #include "util/thread_map.h"
54
55 #include <sys/prctl.h>
56 #include <math.h>
57 #include <locale.h>
58
59 #define DEFAULT_SEPARATOR       " "
60
61 static struct perf_event_attr default_attrs[] = {
62
63   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK              },
64   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES        },
65   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS          },
66   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS             },
67
68   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES              },
69   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES          },
70   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS            },
71   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS     },
72   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES           },
73
74 };
75
76 /*
77  * Detailed stats:
78  */
79 static struct perf_event_attr detailed_attrs[] = {
80
81   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK              },
82   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES        },
83   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS          },
84   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS             },
85
86   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES              },
87   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES          },
88   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS            },
89   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS     },
90   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES           },
91
92   { .type = PERF_TYPE_HW_CACHE,
93     .config =
94          PERF_COUNT_HW_CACHE_L1D                <<  0  |
95         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
96         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
97
98   { .type = PERF_TYPE_HW_CACHE,
99     .config =
100          PERF_COUNT_HW_CACHE_L1D                <<  0  |
101         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
102         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
103
104   { .type = PERF_TYPE_HW_CACHE,
105     .config =
106          PERF_COUNT_HW_CACHE_LL                 <<  0  |
107         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
108         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
109
110   { .type = PERF_TYPE_HW_CACHE,
111     .config =
112          PERF_COUNT_HW_CACHE_LL                 <<  0  |
113         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
114         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
115 };
116
117 struct perf_evlist              *evsel_list;
118
119 static bool                     system_wide                     =  false;
120 static int                      run_idx                         =  0;
121
122 static int                      run_count                       =  1;
123 static bool                     no_inherit                      = false;
124 static bool                     scale                           =  true;
125 static bool                     no_aggr                         = false;
126 static pid_t                    target_pid                      = -1;
127 static pid_t                    target_tid                      = -1;
128 static pid_t                    child_pid                       = -1;
129 static bool                     null_run                        =  false;
130 static bool                     detailed_run                    =  false;
131 static bool                     big_num                         =  true;
132 static int                      big_num_opt                     =  -1;
133 static const char               *cpu_list;
134 static const char               *csv_sep                        = NULL;
135 static bool                     csv_output                      = false;
136
137 static volatile int done = 0;
138
139 struct stats
140 {
141         double n, mean, M2;
142 };
143
144 struct perf_stat {
145         struct stats      res_stats[3];
146 };
147
148 static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
149 {
150         evsel->priv = zalloc(sizeof(struct perf_stat));
151         return evsel->priv == NULL ? -ENOMEM : 0;
152 }
153
154 static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
155 {
156         free(evsel->priv);
157         evsel->priv = NULL;
158 }
159
160 static void update_stats(struct stats *stats, u64 val)
161 {
162         double delta;
163
164         stats->n++;
165         delta = val - stats->mean;
166         stats->mean += delta / stats->n;
167         stats->M2 += delta*(val - stats->mean);
168 }
169
170 static double avg_stats(struct stats *stats)
171 {
172         return stats->mean;
173 }
174
175 /*
176  * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
177  *
178  *       (\Sum n_i^2) - ((\Sum n_i)^2)/n
179  * s^2 = -------------------------------
180  *                  n - 1
181  *
182  * http://en.wikipedia.org/wiki/Stddev
183  *
184  * The std dev of the mean is related to the std dev by:
185  *
186  *             s
187  * s_mean = -------
188  *          sqrt(n)
189  *
190  */
191 static double stddev_stats(struct stats *stats)
192 {
193         double variance = stats->M2 / (stats->n - 1);
194         double variance_mean = variance / stats->n;
195
196         return sqrt(variance_mean);
197 }
198
199 struct stats                    runtime_nsecs_stats[MAX_NR_CPUS];
200 struct stats                    runtime_cycles_stats[MAX_NR_CPUS];
201 struct stats                    runtime_stalled_cycles_stats[MAX_NR_CPUS];
202 struct stats                    runtime_branches_stats[MAX_NR_CPUS];
203 struct stats                    runtime_cacherefs_stats[MAX_NR_CPUS];
204 struct stats                    runtime_l1_dcache_stats[MAX_NR_CPUS];
205 struct stats                    walltime_nsecs_stats;
206
207 static int create_perf_stat_counter(struct perf_evsel *evsel)
208 {
209         struct perf_event_attr *attr = &evsel->attr;
210
211         if (scale)
212                 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
213                                     PERF_FORMAT_TOTAL_TIME_RUNNING;
214
215         attr->inherit = !no_inherit;
216
217         if (system_wide)
218                 return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, false);
219
220         if (target_pid == -1 && target_tid == -1) {
221                 attr->disabled = 1;
222                 attr->enable_on_exec = 1;
223         }
224
225         return perf_evsel__open_per_thread(evsel, evsel_list->threads, false);
226 }
227
228 /*
229  * Does the counter have nsecs as a unit?
230  */
231 static inline int nsec_counter(struct perf_evsel *evsel)
232 {
233         if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
234             perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
235                 return 1;
236
237         return 0;
238 }
239
240 /*
241  * Update various tracking values we maintain to print
242  * more semantic information such as miss/hit ratios,
243  * instruction rates, etc:
244  */
245 static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
246 {
247         if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
248                 update_stats(&runtime_nsecs_stats[0], count[0]);
249         else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
250                 update_stats(&runtime_cycles_stats[0], count[0]);
251         else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES))
252                 update_stats(&runtime_stalled_cycles_stats[0], count[0]);
253         else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
254                 update_stats(&runtime_branches_stats[0], count[0]);
255         else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
256                 update_stats(&runtime_cacherefs_stats[0], count[0]);
257         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
258                 update_stats(&runtime_l1_dcache_stats[0], count[0]);
259 }
260
261 /*
262  * Read out the results of a single counter:
263  * aggregate counts across CPUs in system-wide mode
264  */
265 static int read_counter_aggr(struct perf_evsel *counter)
266 {
267         struct perf_stat *ps = counter->priv;
268         u64 *count = counter->counts->aggr.values;
269         int i;
270
271         if (__perf_evsel__read(counter, evsel_list->cpus->nr,
272                                evsel_list->threads->nr, scale) < 0)
273                 return -1;
274
275         for (i = 0; i < 3; i++)
276                 update_stats(&ps->res_stats[i], count[i]);
277
278         if (verbose) {
279                 fprintf(stderr, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
280                         event_name(counter), count[0], count[1], count[2]);
281         }
282
283         /*
284          * Save the full runtime - to allow normalization during printout:
285          */
286         update_shadow_stats(counter, count);
287
288         return 0;
289 }
290
291 /*
292  * Read out the results of a single counter:
293  * do not aggregate counts across CPUs in system-wide mode
294  */
295 static int read_counter(struct perf_evsel *counter)
296 {
297         u64 *count;
298         int cpu;
299
300         for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) {
301                 if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
302                         return -1;
303
304                 count = counter->counts->cpu[cpu].values;
305
306                 update_shadow_stats(counter, count);
307         }
308
309         return 0;
310 }
311
312 static int run_perf_stat(int argc __used, const char **argv)
313 {
314         unsigned long long t0, t1;
315         struct perf_evsel *counter;
316         int status = 0;
317         int child_ready_pipe[2], go_pipe[2];
318         const bool forks = (argc > 0);
319         char buf;
320
321         if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
322                 perror("failed to create pipes");
323                 exit(1);
324         }
325
326         if (forks) {
327                 if ((child_pid = fork()) < 0)
328                         perror("failed to fork");
329
330                 if (!child_pid) {
331                         close(child_ready_pipe[0]);
332                         close(go_pipe[1]);
333                         fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
334
335                         /*
336                          * Do a dummy execvp to get the PLT entry resolved,
337                          * so we avoid the resolver overhead on the real
338                          * execvp call.
339                          */
340                         execvp("", (char **)argv);
341
342                         /*
343                          * Tell the parent we're ready to go
344                          */
345                         close(child_ready_pipe[1]);
346
347                         /*
348                          * Wait until the parent tells us to go.
349                          */
350                         if (read(go_pipe[0], &buf, 1) == -1)
351                                 perror("unable to read pipe");
352
353                         execvp(argv[0], (char **)argv);
354
355                         perror(argv[0]);
356                         exit(-1);
357                 }
358
359                 if (target_tid == -1 && target_pid == -1 && !system_wide)
360                         evsel_list->threads->map[0] = child_pid;
361
362                 /*
363                  * Wait for the child to be ready to exec.
364                  */
365                 close(child_ready_pipe[1]);
366                 close(go_pipe[0]);
367                 if (read(child_ready_pipe[0], &buf, 1) == -1)
368                         perror("unable to read pipe");
369                 close(child_ready_pipe[0]);
370         }
371
372         list_for_each_entry(counter, &evsel_list->entries, node) {
373                 if (create_perf_stat_counter(counter) < 0) {
374                         if (errno == -EPERM || errno == -EACCES) {
375                                 error("You may not have permission to collect %sstats.\n"
376                                       "\t Consider tweaking"
377                                       " /proc/sys/kernel/perf_event_paranoid or running as root.",
378                                       system_wide ? "system-wide " : "");
379                         } else if (errno == ENOENT) {
380                                 error("%s event is not supported. ", event_name(counter));
381                         } else {
382                                 error("open_counter returned with %d (%s). "
383                                       "/bin/dmesg may provide additional information.\n",
384                                        errno, strerror(errno));
385                         }
386                         if (child_pid != -1)
387                                 kill(child_pid, SIGTERM);
388                         die("Not all events could be opened.\n");
389                         return -1;
390                 }
391         }
392
393         if (perf_evlist__set_filters(evsel_list)) {
394                 error("failed to set filter with %d (%s)\n", errno,
395                         strerror(errno));
396                 return -1;
397         }
398
399         /*
400          * Enable counters and exec the command:
401          */
402         t0 = rdclock();
403
404         if (forks) {
405                 close(go_pipe[1]);
406                 wait(&status);
407         } else {
408                 while(!done) sleep(1);
409         }
410
411         t1 = rdclock();
412
413         update_stats(&walltime_nsecs_stats, t1 - t0);
414
415         if (no_aggr) {
416                 list_for_each_entry(counter, &evsel_list->entries, node) {
417                         read_counter(counter);
418                         perf_evsel__close_fd(counter, evsel_list->cpus->nr, 1);
419                 }
420         } else {
421                 list_for_each_entry(counter, &evsel_list->entries, node) {
422                         read_counter_aggr(counter);
423                         perf_evsel__close_fd(counter, evsel_list->cpus->nr,
424                                              evsel_list->threads->nr);
425                 }
426         }
427
428         return WEXITSTATUS(status);
429 }
430
431 static void print_noise_pct(double total, double avg)
432 {
433         double pct = 0.0;
434
435         if (avg)
436                 pct = 100.0*total/avg;
437
438         fprintf(stderr, "  ( +-%6.2f%% )", pct);
439 }
440
441 static void print_noise(struct perf_evsel *evsel, double avg)
442 {
443         struct perf_stat *ps;
444
445         if (run_count == 1)
446                 return;
447
448         ps = evsel->priv;
449         print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
450 }
451
452 static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
453 {
454         double msecs = avg / 1e6;
455         char cpustr[16] = { '\0', };
456         const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-24s";
457
458         if (no_aggr)
459                 sprintf(cpustr, "CPU%*d%s",
460                         csv_output ? 0 : -4,
461                         evsel_list->cpus->map[cpu], csv_sep);
462
463         fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(evsel));
464
465         if (evsel->cgrp)
466                 fprintf(stderr, "%s%s", csv_sep, evsel->cgrp->name);
467
468         if (csv_output)
469                 return;
470
471         if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
472                 fprintf(stderr, " # %8.3f CPUs utilized          ", avg / avg_stats(&walltime_nsecs_stats));
473 }
474
475 static void print_stalled_cycles(int cpu, struct perf_evsel *evsel __used, double avg)
476 {
477         double total, ratio = 0.0;
478         const char *color;
479
480         total = avg_stats(&runtime_cycles_stats[cpu]);
481
482         if (total)
483                 ratio = avg / total * 100.0;
484
485         color = PERF_COLOR_NORMAL;
486         if (ratio > 75.0)
487                 color = PERF_COLOR_RED;
488         else if (ratio > 50.0)
489                 color = PERF_COLOR_MAGENTA;
490         else if (ratio > 25.0)
491                 color = PERF_COLOR_YELLOW;
492
493         fprintf(stderr, " #   ");
494         color_fprintf(stderr, color, "%5.2f%%", ratio);
495         fprintf(stderr, " of all cycles are idle ");
496 }
497
498 static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double avg)
499 {
500         double total, ratio = 0.0;
501         const char *color;
502
503         total = avg_stats(&runtime_branches_stats[cpu]);
504
505         if (total)
506                 ratio = avg / total * 100.0;
507
508         color = PERF_COLOR_NORMAL;
509         if (ratio > 20.0)
510                 color = PERF_COLOR_RED;
511         else if (ratio > 10.0)
512                 color = PERF_COLOR_MAGENTA;
513         else if (ratio > 5.0)
514                 color = PERF_COLOR_YELLOW;
515
516         fprintf(stderr, " #   ");
517         color_fprintf(stderr, color, "%5.2f%%", ratio);
518         fprintf(stderr, " of all branches        ");
519 }
520
521 static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
522 {
523         double total, ratio = 0.0;
524         const char *color;
525
526         total = avg_stats(&runtime_l1_dcache_stats[cpu]);
527
528         if (total)
529                 ratio = avg / total * 100.0;
530
531         color = PERF_COLOR_NORMAL;
532         if (ratio > 20.0)
533                 color = PERF_COLOR_RED;
534         else if (ratio > 10.0)
535                 color = PERF_COLOR_MAGENTA;
536         else if (ratio > 5.0)
537                 color = PERF_COLOR_YELLOW;
538
539         fprintf(stderr, " #   ");
540         color_fprintf(stderr, color, "%5.2f%%", ratio);
541         fprintf(stderr, " of all L1-dcache hits  ");
542 }
543
544 static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
545 {
546         double total, ratio = 0.0;
547         char cpustr[16] = { '\0', };
548         const char *fmt;
549
550         if (csv_output)
551                 fmt = "%s%.0f%s%s";
552         else if (big_num)
553                 fmt = "%s%'18.0f%s%-24s";
554         else
555                 fmt = "%s%18.0f%s%-24s";
556
557         if (no_aggr)
558                 sprintf(cpustr, "CPU%*d%s",
559                         csv_output ? 0 : -4,
560                         evsel_list->cpus->map[cpu], csv_sep);
561         else
562                 cpu = 0;
563
564         fprintf(stderr, fmt, cpustr, avg, csv_sep, event_name(evsel));
565
566         if (evsel->cgrp)
567                 fprintf(stderr, "%s%s", csv_sep, evsel->cgrp->name);
568
569         if (csv_output)
570                 return;
571
572         if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
573                 total = avg_stats(&runtime_cycles_stats[cpu]);
574
575                 if (total)
576                         ratio = avg / total;
577
578                 fprintf(stderr, " #    %4.2f  insns per cycle", ratio);
579
580                 total = avg_stats(&runtime_stalled_cycles_stats[cpu]);
581
582                 if (total && avg) {
583                         ratio = total / avg;
584                         fprintf(stderr, "\n                                            #    %4.2f  stalled cycles per insn", ratio);
585                 }
586
587         } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
588                         runtime_branches_stats[cpu].n != 0) {
589                 print_branch_misses(cpu, evsel, avg);
590         } else if (
591                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
592                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
593                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
594                                         ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
595                         runtime_l1_dcache_stats[cpu].n != 0) {
596                 print_l1_dcache_misses(cpu, evsel, avg);
597         } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
598                         runtime_cacherefs_stats[cpu].n != 0) {
599                 total = avg_stats(&runtime_cacherefs_stats[cpu]);
600
601                 if (total)
602                         ratio = avg * 100 / total;
603
604                 fprintf(stderr, " # %8.3f %% of all cache refs    ", ratio);
605
606         } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES)) {
607                 print_stalled_cycles(cpu, evsel, avg);
608         } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
609                 total = avg_stats(&runtime_nsecs_stats[cpu]);
610
611                 if (total)
612                         ratio = 1.0 * avg / total;
613
614                 fprintf(stderr, " # %8.3f GHz                    ", ratio);
615         } else if (runtime_nsecs_stats[cpu].n != 0) {
616                 total = avg_stats(&runtime_nsecs_stats[cpu]);
617
618                 if (total)
619                         ratio = 1000.0 * avg / total;
620
621                 fprintf(stderr, " # %8.3f M/sec                  ", ratio);
622         } else {
623                 fprintf(stderr, "                                   ");
624         }
625 }
626
627 /*
628  * Print out the results of a single counter:
629  * aggregated counts in system-wide mode
630  */
631 static void print_counter_aggr(struct perf_evsel *counter)
632 {
633         struct perf_stat *ps = counter->priv;
634         double avg = avg_stats(&ps->res_stats[0]);
635         int scaled = counter->counts->scaled;
636
637         if (scaled == -1) {
638                 fprintf(stderr, "%*s%s%*s",
639                         csv_output ? 0 : 18,
640                         "<not counted>",
641                         csv_sep,
642                         csv_output ? 0 : -24,
643                         event_name(counter));
644
645                 if (counter->cgrp)
646                         fprintf(stderr, "%s%s", csv_sep, counter->cgrp->name);
647
648                 fputc('\n', stderr);
649                 return;
650         }
651
652         if (nsec_counter(counter))
653                 nsec_printout(-1, counter, avg);
654         else
655                 abs_printout(-1, counter, avg);
656
657         if (csv_output) {
658                 fputc('\n', stderr);
659                 return;
660         }
661
662         print_noise(counter, avg);
663
664         if (scaled) {
665                 double avg_enabled, avg_running;
666
667                 avg_enabled = avg_stats(&ps->res_stats[1]);
668                 avg_running = avg_stats(&ps->res_stats[2]);
669
670                 fprintf(stderr, "  (%.2f%%)", 100 * avg_running / avg_enabled);
671         }
672         fprintf(stderr, "\n");
673 }
674
675 /*
676  * Print out the results of a single counter:
677  * does not use aggregated count in system-wide
678  */
679 static void print_counter(struct perf_evsel *counter)
680 {
681         u64 ena, run, val;
682         int cpu;
683
684         for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) {
685                 val = counter->counts->cpu[cpu].val;
686                 ena = counter->counts->cpu[cpu].ena;
687                 run = counter->counts->cpu[cpu].run;
688                 if (run == 0 || ena == 0) {
689                         fprintf(stderr, "CPU%*d%s%*s%s%*s",
690                                 csv_output ? 0 : -4,
691                                 evsel_list->cpus->map[cpu], csv_sep,
692                                 csv_output ? 0 : 18,
693                                 "<not counted>", csv_sep,
694                                 csv_output ? 0 : -24,
695                                 event_name(counter));
696
697                         if (counter->cgrp)
698                                 fprintf(stderr, "%s%s", csv_sep, counter->cgrp->name);
699
700                         fputc('\n', stderr);
701                         continue;
702                 }
703
704                 if (nsec_counter(counter))
705                         nsec_printout(cpu, counter, val);
706                 else
707                         abs_printout(cpu, counter, val);
708
709                 if (!csv_output) {
710                         print_noise(counter, 1.0);
711
712                         if (run != ena)
713                                 fprintf(stderr, "  (%.2f%%)", 100.0 * run / ena);
714                 }
715                 fputc('\n', stderr);
716         }
717 }
718
719 static void print_stat(int argc, const char **argv)
720 {
721         struct perf_evsel *counter;
722         int i;
723
724         fflush(stdout);
725
726         if (!csv_output) {
727                 fprintf(stderr, "\n");
728                 fprintf(stderr, " Performance counter stats for ");
729                 if(target_pid == -1 && target_tid == -1) {
730                         fprintf(stderr, "\'%s", argv[0]);
731                         for (i = 1; i < argc; i++)
732                                 fprintf(stderr, " %s", argv[i]);
733                 } else if (target_pid != -1)
734                         fprintf(stderr, "process id \'%d", target_pid);
735                 else
736                         fprintf(stderr, "thread id \'%d", target_tid);
737
738                 fprintf(stderr, "\'");
739                 if (run_count > 1)
740                         fprintf(stderr, " (%d runs)", run_count);
741                 fprintf(stderr, ":\n\n");
742         }
743
744         if (no_aggr) {
745                 list_for_each_entry(counter, &evsel_list->entries, node)
746                         print_counter(counter);
747         } else {
748                 list_for_each_entry(counter, &evsel_list->entries, node)
749                         print_counter_aggr(counter);
750         }
751
752         if (!csv_output) {
753                 fprintf(stderr, "\n");
754                 fprintf(stderr, " %18.9f  seconds time elapsed",
755                                 avg_stats(&walltime_nsecs_stats)/1e9);
756                 if (run_count > 1) {
757                         print_noise_pct(stddev_stats(&walltime_nsecs_stats),
758                                         avg_stats(&walltime_nsecs_stats));
759                 }
760                 fprintf(stderr, "\n\n");
761         }
762 }
763
764 static volatile int signr = -1;
765
766 static void skip_signal(int signo)
767 {
768         if(child_pid == -1)
769                 done = 1;
770
771         signr = signo;
772 }
773
774 static void sig_atexit(void)
775 {
776         if (child_pid != -1)
777                 kill(child_pid, SIGTERM);
778
779         if (signr == -1)
780                 return;
781
782         signal(signr, SIG_DFL);
783         kill(getpid(), signr);
784 }
785
786 static const char * const stat_usage[] = {
787         "perf stat [<options>] [<command>]",
788         NULL
789 };
790
791 static int stat__set_big_num(const struct option *opt __used,
792                              const char *s __used, int unset)
793 {
794         big_num_opt = unset ? 0 : 1;
795         return 0;
796 }
797
798 static const struct option options[] = {
799         OPT_CALLBACK('e', "event", &evsel_list, "event",
800                      "event selector. use 'perf list' to list available events",
801                      parse_events),
802         OPT_CALLBACK(0, "filter", &evsel_list, "filter",
803                      "event filter", parse_filter),
804         OPT_BOOLEAN('i', "no-inherit", &no_inherit,
805                     "child tasks do not inherit counters"),
806         OPT_INTEGER('p', "pid", &target_pid,
807                     "stat events on existing process id"),
808         OPT_INTEGER('t', "tid", &target_tid,
809                     "stat events on existing thread id"),
810         OPT_BOOLEAN('a', "all-cpus", &system_wide,
811                     "system-wide collection from all CPUs"),
812         OPT_BOOLEAN('c', "scale", &scale,
813                     "scale/normalize counters"),
814         OPT_INCR('v', "verbose", &verbose,
815                     "be more verbose (show counter open errors, etc)"),
816         OPT_INTEGER('r', "repeat", &run_count,
817                     "repeat command and print average + stddev (max: 100)"),
818         OPT_BOOLEAN('n', "null", &null_run,
819                     "null run - dont start any counters"),
820         OPT_BOOLEAN('d', "detailed", &detailed_run,
821                     "detailed run - start a lot of events"),
822         OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 
823                            "print large numbers with thousands\' separators",
824                            stat__set_big_num),
825         OPT_STRING('C', "cpu", &cpu_list, "cpu",
826                     "list of cpus to monitor in system-wide"),
827         OPT_BOOLEAN('A', "no-aggr", &no_aggr,
828                     "disable CPU count aggregation"),
829         OPT_STRING('x', "field-separator", &csv_sep, "separator",
830                    "print counts with custom separator"),
831         OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
832                      "monitor event in cgroup name only",
833                      parse_cgroups),
834         OPT_END()
835 };
836
837 int cmd_stat(int argc, const char **argv, const char *prefix __used)
838 {
839         struct perf_evsel *pos;
840         int status = -ENOMEM;
841
842         setlocale(LC_ALL, "");
843
844         evsel_list = perf_evlist__new(NULL, NULL);
845         if (evsel_list == NULL)
846                 return -ENOMEM;
847
848         argc = parse_options(argc, argv, options, stat_usage,
849                 PARSE_OPT_STOP_AT_NON_OPTION);
850
851         if (csv_sep)
852                 csv_output = true;
853         else
854                 csv_sep = DEFAULT_SEPARATOR;
855
856         /*
857          * let the spreadsheet do the pretty-printing
858          */
859         if (csv_output) {
860                 /* User explicitely passed -B? */
861                 if (big_num_opt == 1) {
862                         fprintf(stderr, "-B option not supported with -x\n");
863                         usage_with_options(stat_usage, options);
864                 } else /* Nope, so disable big number formatting */
865                         big_num = false;
866         } else if (big_num_opt == 0) /* User passed --no-big-num */
867                 big_num = false;
868
869         if (!argc && target_pid == -1 && target_tid == -1)
870                 usage_with_options(stat_usage, options);
871         if (run_count <= 0)
872                 usage_with_options(stat_usage, options);
873
874         /* no_aggr, cgroup are for system-wide only */
875         if ((no_aggr || nr_cgroups) && !system_wide) {
876                 fprintf(stderr, "both cgroup and no-aggregation "
877                         "modes only available in system-wide mode\n");
878
879                 usage_with_options(stat_usage, options);
880         }
881
882         /* Set attrs and nr_counters if no event is selected and !null_run */
883         if (detailed_run) {
884                 size_t c;
885
886                 for (c = 0; c < ARRAY_SIZE(detailed_attrs); ++c) {
887                         pos = perf_evsel__new(&detailed_attrs[c], c);
888                         if (pos == NULL)
889                                 goto out;
890                         perf_evlist__add(evsel_list, pos);
891                 }
892         }
893         /* Set attrs and nr_counters if no event is selected and !null_run */
894         if (!detailed_run && !null_run && !evsel_list->nr_entries) {
895                 size_t c;
896
897                 for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) {
898                         pos = perf_evsel__new(&default_attrs[c], c);
899                         if (pos == NULL)
900                                 goto out;
901                         perf_evlist__add(evsel_list, pos);
902                 }
903         }
904
905         if (target_pid != -1)
906                 target_tid = target_pid;
907
908         evsel_list->threads = thread_map__new(target_pid, target_tid);
909         if (evsel_list->threads == NULL) {
910                 pr_err("Problems finding threads of monitor\n");
911                 usage_with_options(stat_usage, options);
912         }
913
914         if (system_wide)
915                 evsel_list->cpus = cpu_map__new(cpu_list);
916         else
917                 evsel_list->cpus = cpu_map__dummy_new();
918
919         if (evsel_list->cpus == NULL) {
920                 perror("failed to parse CPUs map");
921                 usage_with_options(stat_usage, options);
922                 return -1;
923         }
924
925         list_for_each_entry(pos, &evsel_list->entries, node) {
926                 if (perf_evsel__alloc_stat_priv(pos) < 0 ||
927                     perf_evsel__alloc_counts(pos, evsel_list->cpus->nr) < 0 ||
928                     perf_evsel__alloc_fd(pos, evsel_list->cpus->nr, evsel_list->threads->nr) < 0)
929                         goto out_free_fd;
930         }
931
932         /*
933          * We dont want to block the signals - that would cause
934          * child tasks to inherit that and Ctrl-C would not work.
935          * What we want is for Ctrl-C to work in the exec()-ed
936          * task, but being ignored by perf stat itself:
937          */
938         atexit(sig_atexit);
939         signal(SIGINT,  skip_signal);
940         signal(SIGALRM, skip_signal);
941         signal(SIGABRT, skip_signal);
942
943         status = 0;
944         for (run_idx = 0; run_idx < run_count; run_idx++) {
945                 if (run_count != 1 && verbose)
946                         fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1);
947                 status = run_perf_stat(argc, argv);
948         }
949
950         if (status != -1)
951                 print_stat(argc, argv);
952 out_free_fd:
953         list_for_each_entry(pos, &evsel_list->entries, node)
954                 perf_evsel__free_stat_priv(pos);
955         perf_evlist__delete_maps(evsel_list);
956 out:
957         perf_evlist__delete(evsel_list);
958         return status;
959 }