perf stat: Fix compatibility behavior
[pandora-kernel.git] / tools / perf / builtin-stat.c
1 /*
2  * builtin-stat.c
3  *
4  * Builtin stat command: Give a precise performance counters summary
5  * overview about any workload, CPU or specific PID.
6  *
7  * Sample output:
8
9    $ perf stat ~/hackbench 10
10    Time: 0.104
11
12     Performance counter stats for '/home/mingo/hackbench':
13
14        1255.538611  task clock ticks     #      10.143 CPU utilization factor
15              54011  context switches     #       0.043 M/sec
16                385  CPU migrations       #       0.000 M/sec
17              17755  pagefaults           #       0.014 M/sec
18         3808323185  CPU cycles           #    3033.219 M/sec
19         1575111190  instructions         #    1254.530 M/sec
20           17367895  cache references     #      13.833 M/sec
21            7674421  cache misses         #       6.112 M/sec
22
23     Wall-clock time elapsed:   123.786620 msecs
24
25  *
26  * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
27  *
28  * Improvements and fixes by:
29  *
30  *   Arjan van de Ven <arjan@linux.intel.com>
31  *   Yanmin Zhang <yanmin.zhang@intel.com>
32  *   Wu Fengguang <fengguang.wu@intel.com>
33  *   Mike Galbraith <efault@gmx.de>
34  *   Paul Mackerras <paulus@samba.org>
35  *   Jaswinder Singh Rajput <jaswinder@kernel.org>
36  *
37  * Released under the GPL v2. (and only v2, not any later version)
38  */
39
40 #include "perf.h"
41 #include "builtin.h"
42 #include "util/util.h"
43 #include "util/parse-options.h"
44 #include "util/parse-events.h"
45 #include "util/event.h"
46 #include "util/evlist.h"
47 #include "util/evsel.h"
48 #include "util/debug.h"
49 #include "util/color.h"
50 #include "util/header.h"
51 #include "util/cpumap.h"
52 #include "util/thread.h"
53 #include "util/thread_map.h"
54
55 #include <sys/prctl.h>
56 #include <math.h>
57 #include <locale.h>
58
59 #define DEFAULT_SEPARATOR       " "
60
61 static struct perf_event_attr default_attrs[] = {
62
63   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK              },
64   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES        },
65   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS          },
66   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS             },
67
68   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES              },
69   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES          },
70   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS            },
71   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS     },
72   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES           },
73
74 };
75
76 /*
77  * Detailed stats:
78  */
79 static struct perf_event_attr detailed_attrs[] = {
80
81   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK              },
82   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES        },
83   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS          },
84   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS             },
85
86   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES              },
87   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES          },
88   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS            },
89   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS     },
90   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES           },
91
92   { .type = PERF_TYPE_HW_CACHE,
93     .config =
94          PERF_COUNT_HW_CACHE_L1D                <<  0  |
95         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
96         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
97
98   { .type = PERF_TYPE_HW_CACHE,
99     .config =
100          PERF_COUNT_HW_CACHE_L1D                <<  0  |
101         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
102         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
103
104   { .type = PERF_TYPE_HW_CACHE,
105     .config =
106          PERF_COUNT_HW_CACHE_LL                 <<  0  |
107         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
108         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
109
110   { .type = PERF_TYPE_HW_CACHE,
111     .config =
112          PERF_COUNT_HW_CACHE_LL                 <<  0  |
113         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
114         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
115 };
116
117 struct perf_evlist              *evsel_list;
118
119 static bool                     system_wide                     =  false;
120 static int                      run_idx                         =  0;
121
122 static int                      run_count                       =  1;
123 static bool                     no_inherit                      = false;
124 static bool                     scale                           =  true;
125 static bool                     no_aggr                         = false;
126 static pid_t                    target_pid                      = -1;
127 static pid_t                    target_tid                      = -1;
128 static pid_t                    child_pid                       = -1;
129 static bool                     null_run                        =  false;
130 static bool                     detailed_run                    =  false;
131 static bool                     sync_run                        =  false;
132 static bool                     big_num                         =  true;
133 static int                      big_num_opt                     =  -1;
134 static const char               *cpu_list;
135 static const char               *csv_sep                        = NULL;
136 static bool                     csv_output                      = false;
137
138 static volatile int done = 0;
139
140 struct stats
141 {
142         double n, mean, M2;
143 };
144
145 struct perf_stat {
146         struct stats      res_stats[3];
147 };
148
149 static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
150 {
151         evsel->priv = zalloc(sizeof(struct perf_stat));
152         return evsel->priv == NULL ? -ENOMEM : 0;
153 }
154
155 static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
156 {
157         free(evsel->priv);
158         evsel->priv = NULL;
159 }
160
161 static void update_stats(struct stats *stats, u64 val)
162 {
163         double delta;
164
165         stats->n++;
166         delta = val - stats->mean;
167         stats->mean += delta / stats->n;
168         stats->M2 += delta*(val - stats->mean);
169 }
170
171 static double avg_stats(struct stats *stats)
172 {
173         return stats->mean;
174 }
175
176 /*
177  * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
178  *
179  *       (\Sum n_i^2) - ((\Sum n_i)^2)/n
180  * s^2 = -------------------------------
181  *                  n - 1
182  *
183  * http://en.wikipedia.org/wiki/Stddev
184  *
185  * The std dev of the mean is related to the std dev by:
186  *
187  *             s
188  * s_mean = -------
189  *          sqrt(n)
190  *
191  */
192 static double stddev_stats(struct stats *stats)
193 {
194         double variance = stats->M2 / (stats->n - 1);
195         double variance_mean = variance / stats->n;
196
197         return sqrt(variance_mean);
198 }
199
200 struct stats                    runtime_nsecs_stats[MAX_NR_CPUS];
201 struct stats                    runtime_cycles_stats[MAX_NR_CPUS];
202 struct stats                    runtime_stalled_cycles_stats[MAX_NR_CPUS];
203 struct stats                    runtime_branches_stats[MAX_NR_CPUS];
204 struct stats                    runtime_cacherefs_stats[MAX_NR_CPUS];
205 struct stats                    runtime_l1_dcache_stats[MAX_NR_CPUS];
206 struct stats                    walltime_nsecs_stats;
207
208 static int create_perf_stat_counter(struct perf_evsel *evsel)
209 {
210         struct perf_event_attr *attr = &evsel->attr;
211
212         if (scale)
213                 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
214                                     PERF_FORMAT_TOTAL_TIME_RUNNING;
215
216         attr->inherit = !no_inherit;
217
218         if (system_wide)
219                 return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, false);
220
221         if (target_pid == -1 && target_tid == -1) {
222                 attr->disabled = 1;
223                 attr->enable_on_exec = 1;
224         }
225
226         return perf_evsel__open_per_thread(evsel, evsel_list->threads, false);
227 }
228
229 /*
230  * Does the counter have nsecs as a unit?
231  */
232 static inline int nsec_counter(struct perf_evsel *evsel)
233 {
234         if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
235             perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
236                 return 1;
237
238         return 0;
239 }
240
241 /*
242  * Update various tracking values we maintain to print
243  * more semantic information such as miss/hit ratios,
244  * instruction rates, etc:
245  */
246 static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
247 {
248         if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
249                 update_stats(&runtime_nsecs_stats[0], count[0]);
250         else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
251                 update_stats(&runtime_cycles_stats[0], count[0]);
252         else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES))
253                 update_stats(&runtime_stalled_cycles_stats[0], count[0]);
254         else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
255                 update_stats(&runtime_branches_stats[0], count[0]);
256         else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
257                 update_stats(&runtime_cacherefs_stats[0], count[0]);
258         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
259                 update_stats(&runtime_l1_dcache_stats[0], count[0]);
260 }
261
262 /*
263  * Read out the results of a single counter:
264  * aggregate counts across CPUs in system-wide mode
265  */
266 static int read_counter_aggr(struct perf_evsel *counter)
267 {
268         struct perf_stat *ps = counter->priv;
269         u64 *count = counter->counts->aggr.values;
270         int i;
271
272         if (__perf_evsel__read(counter, evsel_list->cpus->nr,
273                                evsel_list->threads->nr, scale) < 0)
274                 return -1;
275
276         for (i = 0; i < 3; i++)
277                 update_stats(&ps->res_stats[i], count[i]);
278
279         if (verbose) {
280                 fprintf(stderr, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
281                         event_name(counter), count[0], count[1], count[2]);
282         }
283
284         /*
285          * Save the full runtime - to allow normalization during printout:
286          */
287         update_shadow_stats(counter, count);
288
289         return 0;
290 }
291
292 /*
293  * Read out the results of a single counter:
294  * do not aggregate counts across CPUs in system-wide mode
295  */
296 static int read_counter(struct perf_evsel *counter)
297 {
298         u64 *count;
299         int cpu;
300
301         for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) {
302                 if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
303                         return -1;
304
305                 count = counter->counts->cpu[cpu].values;
306
307                 update_shadow_stats(counter, count);
308         }
309
310         return 0;
311 }
312
313 static int run_perf_stat(int argc __used, const char **argv)
314 {
315         unsigned long long t0, t1;
316         struct perf_evsel *counter;
317         int status = 0;
318         int child_ready_pipe[2], go_pipe[2];
319         const bool forks = (argc > 0);
320         char buf;
321
322         if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
323                 perror("failed to create pipes");
324                 exit(1);
325         }
326
327         if (forks) {
328                 if ((child_pid = fork()) < 0)
329                         perror("failed to fork");
330
331                 if (!child_pid) {
332                         close(child_ready_pipe[0]);
333                         close(go_pipe[1]);
334                         fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
335
336                         /*
337                          * Do a dummy execvp to get the PLT entry resolved,
338                          * so we avoid the resolver overhead on the real
339                          * execvp call.
340                          */
341                         execvp("", (char **)argv);
342
343                         /*
344                          * Tell the parent we're ready to go
345                          */
346                         close(child_ready_pipe[1]);
347
348                         /*
349                          * Wait until the parent tells us to go.
350                          */
351                         if (read(go_pipe[0], &buf, 1) == -1)
352                                 perror("unable to read pipe");
353
354                         execvp(argv[0], (char **)argv);
355
356                         perror(argv[0]);
357                         exit(-1);
358                 }
359
360                 if (target_tid == -1 && target_pid == -1 && !system_wide)
361                         evsel_list->threads->map[0] = child_pid;
362
363                 /*
364                  * Wait for the child to be ready to exec.
365                  */
366                 close(child_ready_pipe[1]);
367                 close(go_pipe[0]);
368                 if (read(child_ready_pipe[0], &buf, 1) == -1)
369                         perror("unable to read pipe");
370                 close(child_ready_pipe[0]);
371         }
372
373         list_for_each_entry(counter, &evsel_list->entries, node) {
374                 if (create_perf_stat_counter(counter) < 0) {
375                         if (errno == EINVAL || errno == ENOSYS)
376                                 continue;
377
378                         if (errno == EPERM || errno == EACCES) {
379                                 error("You may not have permission to collect %sstats.\n"
380                                       "\t Consider tweaking"
381                                       " /proc/sys/kernel/perf_event_paranoid or running as root.",
382                                       system_wide ? "system-wide " : "");
383                         } else if (errno == ENOENT) {
384                                 error("%s event is not supported. ", event_name(counter));
385                         } else {
386                                 error("open_counter returned with %d (%s). "
387                                       "/bin/dmesg may provide additional information.\n",
388                                        errno, strerror(errno));
389                         }
390                         if (child_pid != -1)
391                                 kill(child_pid, SIGTERM);
392                         die("Not all events could be opened.\n");
393                         return -1;
394                 }
395         }
396
397         if (perf_evlist__set_filters(evsel_list)) {
398                 error("failed to set filter with %d (%s)\n", errno,
399                         strerror(errno));
400                 return -1;
401         }
402
403         /*
404          * Enable counters and exec the command:
405          */
406         t0 = rdclock();
407
408         if (forks) {
409                 close(go_pipe[1]);
410                 wait(&status);
411         } else {
412                 while(!done) sleep(1);
413         }
414
415         t1 = rdclock();
416
417         update_stats(&walltime_nsecs_stats, t1 - t0);
418
419         if (no_aggr) {
420                 list_for_each_entry(counter, &evsel_list->entries, node) {
421                         read_counter(counter);
422                         perf_evsel__close_fd(counter, evsel_list->cpus->nr, 1);
423                 }
424         } else {
425                 list_for_each_entry(counter, &evsel_list->entries, node) {
426                         read_counter_aggr(counter);
427                         perf_evsel__close_fd(counter, evsel_list->cpus->nr,
428                                              evsel_list->threads->nr);
429                 }
430         }
431
432         return WEXITSTATUS(status);
433 }
434
435 static void print_noise_pct(double total, double avg)
436 {
437         double pct = 0.0;
438
439         if (avg)
440                 pct = 100.0*total/avg;
441
442         fprintf(stderr, "  ( +-%6.2f%% )", pct);
443 }
444
445 static void print_noise(struct perf_evsel *evsel, double avg)
446 {
447         struct perf_stat *ps;
448
449         if (run_count == 1)
450                 return;
451
452         ps = evsel->priv;
453         print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
454 }
455
456 static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
457 {
458         double msecs = avg / 1e6;
459         char cpustr[16] = { '\0', };
460         const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-24s";
461
462         if (no_aggr)
463                 sprintf(cpustr, "CPU%*d%s",
464                         csv_output ? 0 : -4,
465                         evsel_list->cpus->map[cpu], csv_sep);
466
467         fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(evsel));
468
469         if (evsel->cgrp)
470                 fprintf(stderr, "%s%s", csv_sep, evsel->cgrp->name);
471
472         if (csv_output)
473                 return;
474
475         if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
476                 fprintf(stderr, " # %8.3f CPUs utilized          ", avg / avg_stats(&walltime_nsecs_stats));
477 }
478
479 static void print_stalled_cycles(int cpu, struct perf_evsel *evsel __used, double avg)
480 {
481         double total, ratio = 0.0;
482         const char *color;
483
484         total = avg_stats(&runtime_cycles_stats[cpu]);
485
486         if (total)
487                 ratio = avg / total * 100.0;
488
489         color = PERF_COLOR_NORMAL;
490         if (ratio > 75.0)
491                 color = PERF_COLOR_RED;
492         else if (ratio > 50.0)
493                 color = PERF_COLOR_MAGENTA;
494         else if (ratio > 25.0)
495                 color = PERF_COLOR_YELLOW;
496
497         fprintf(stderr, " #   ");
498         color_fprintf(stderr, color, "%5.2f%%", ratio);
499         fprintf(stderr, " of all cycles are idle ");
500 }
501
502 static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double avg)
503 {
504         double total, ratio = 0.0;
505         const char *color;
506
507         total = avg_stats(&runtime_branches_stats[cpu]);
508
509         if (total)
510                 ratio = avg / total * 100.0;
511
512         color = PERF_COLOR_NORMAL;
513         if (ratio > 20.0)
514                 color = PERF_COLOR_RED;
515         else if (ratio > 10.0)
516                 color = PERF_COLOR_MAGENTA;
517         else if (ratio > 5.0)
518                 color = PERF_COLOR_YELLOW;
519
520         fprintf(stderr, " #   ");
521         color_fprintf(stderr, color, "%5.2f%%", ratio);
522         fprintf(stderr, " of all branches        ");
523 }
524
525 static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
526 {
527         double total, ratio = 0.0;
528         const char *color;
529
530         total = avg_stats(&runtime_l1_dcache_stats[cpu]);
531
532         if (total)
533                 ratio = avg / total * 100.0;
534
535         color = PERF_COLOR_NORMAL;
536         if (ratio > 20.0)
537                 color = PERF_COLOR_RED;
538         else if (ratio > 10.0)
539                 color = PERF_COLOR_MAGENTA;
540         else if (ratio > 5.0)
541                 color = PERF_COLOR_YELLOW;
542
543         fprintf(stderr, " #   ");
544         color_fprintf(stderr, color, "%5.2f%%", ratio);
545         fprintf(stderr, " of all L1-dcache hits  ");
546 }
547
548 static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
549 {
550         double total, ratio = 0.0;
551         char cpustr[16] = { '\0', };
552         const char *fmt;
553
554         if (csv_output)
555                 fmt = "%s%.0f%s%s";
556         else if (big_num)
557                 fmt = "%s%'18.0f%s%-24s";
558         else
559                 fmt = "%s%18.0f%s%-24s";
560
561         if (no_aggr)
562                 sprintf(cpustr, "CPU%*d%s",
563                         csv_output ? 0 : -4,
564                         evsel_list->cpus->map[cpu], csv_sep);
565         else
566                 cpu = 0;
567
568         fprintf(stderr, fmt, cpustr, avg, csv_sep, event_name(evsel));
569
570         if (evsel->cgrp)
571                 fprintf(stderr, "%s%s", csv_sep, evsel->cgrp->name);
572
573         if (csv_output)
574                 return;
575
576         if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
577                 total = avg_stats(&runtime_cycles_stats[cpu]);
578
579                 if (total)
580                         ratio = avg / total;
581
582                 fprintf(stderr, " #    %4.2f  insns per cycle        ", ratio);
583
584                 total = avg_stats(&runtime_stalled_cycles_stats[cpu]);
585
586                 if (total && avg) {
587                         ratio = total / avg;
588                         fprintf(stderr, "\n                                            #    %4.2f  stalled cycles per insn", ratio);
589                 }
590
591         } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
592                         runtime_branches_stats[cpu].n != 0) {
593                 print_branch_misses(cpu, evsel, avg);
594         } else if (
595                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
596                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
597                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
598                                         ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
599                         runtime_l1_dcache_stats[cpu].n != 0) {
600                 print_l1_dcache_misses(cpu, evsel, avg);
601         } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
602                         runtime_cacherefs_stats[cpu].n != 0) {
603                 total = avg_stats(&runtime_cacherefs_stats[cpu]);
604
605                 if (total)
606                         ratio = avg * 100 / total;
607
608                 fprintf(stderr, " # %8.3f %% of all cache refs    ", ratio);
609
610         } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES)) {
611                 print_stalled_cycles(cpu, evsel, avg);
612         } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
613                 total = avg_stats(&runtime_nsecs_stats[cpu]);
614
615                 if (total)
616                         ratio = 1.0 * avg / total;
617
618                 fprintf(stderr, " # %8.3f GHz                    ", ratio);
619         } else if (runtime_nsecs_stats[cpu].n != 0) {
620                 total = avg_stats(&runtime_nsecs_stats[cpu]);
621
622                 if (total)
623                         ratio = 1000.0 * avg / total;
624
625                 fprintf(stderr, " # %8.3f M/sec                  ", ratio);
626         } else {
627                 fprintf(stderr, "                                   ");
628         }
629 }
630
631 /*
632  * Print out the results of a single counter:
633  * aggregated counts in system-wide mode
634  */
635 static void print_counter_aggr(struct perf_evsel *counter)
636 {
637         struct perf_stat *ps = counter->priv;
638         double avg = avg_stats(&ps->res_stats[0]);
639         int scaled = counter->counts->scaled;
640
641         if (scaled == -1) {
642                 fprintf(stderr, "%*s%s%*s",
643                         csv_output ? 0 : 18,
644                         "<not counted>",
645                         csv_sep,
646                         csv_output ? 0 : -24,
647                         event_name(counter));
648
649                 if (counter->cgrp)
650                         fprintf(stderr, "%s%s", csv_sep, counter->cgrp->name);
651
652                 fputc('\n', stderr);
653                 return;
654         }
655
656         if (nsec_counter(counter))
657                 nsec_printout(-1, counter, avg);
658         else
659                 abs_printout(-1, counter, avg);
660
661         if (csv_output) {
662                 fputc('\n', stderr);
663                 return;
664         }
665
666         print_noise(counter, avg);
667
668         if (scaled) {
669                 double avg_enabled, avg_running;
670
671                 avg_enabled = avg_stats(&ps->res_stats[1]);
672                 avg_running = avg_stats(&ps->res_stats[2]);
673
674                 fprintf(stderr, "  (%.2f%%)", 100 * avg_running / avg_enabled);
675         }
676         fprintf(stderr, "\n");
677 }
678
679 /*
680  * Print out the results of a single counter:
681  * does not use aggregated count in system-wide
682  */
683 static void print_counter(struct perf_evsel *counter)
684 {
685         u64 ena, run, val;
686         int cpu;
687
688         for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) {
689                 val = counter->counts->cpu[cpu].val;
690                 ena = counter->counts->cpu[cpu].ena;
691                 run = counter->counts->cpu[cpu].run;
692                 if (run == 0 || ena == 0) {
693                         fprintf(stderr, "CPU%*d%s%*s%s%*s",
694                                 csv_output ? 0 : -4,
695                                 evsel_list->cpus->map[cpu], csv_sep,
696                                 csv_output ? 0 : 18,
697                                 "<not counted>", csv_sep,
698                                 csv_output ? 0 : -24,
699                                 event_name(counter));
700
701                         if (counter->cgrp)
702                                 fprintf(stderr, "%s%s", csv_sep, counter->cgrp->name);
703
704                         fputc('\n', stderr);
705                         continue;
706                 }
707
708                 if (nsec_counter(counter))
709                         nsec_printout(cpu, counter, val);
710                 else
711                         abs_printout(cpu, counter, val);
712
713                 if (!csv_output) {
714                         print_noise(counter, 1.0);
715
716                         if (run != ena)
717                                 fprintf(stderr, "  (%.2f%%)", 100.0 * run / ena);
718                 }
719                 fputc('\n', stderr);
720         }
721 }
722
723 static void print_stat(int argc, const char **argv)
724 {
725         struct perf_evsel *counter;
726         int i;
727
728         fflush(stdout);
729
730         if (!csv_output) {
731                 fprintf(stderr, "\n");
732                 fprintf(stderr, " Performance counter stats for ");
733                 if(target_pid == -1 && target_tid == -1) {
734                         fprintf(stderr, "\'%s", argv[0]);
735                         for (i = 1; i < argc; i++)
736                                 fprintf(stderr, " %s", argv[i]);
737                 } else if (target_pid != -1)
738                         fprintf(stderr, "process id \'%d", target_pid);
739                 else
740                         fprintf(stderr, "thread id \'%d", target_tid);
741
742                 fprintf(stderr, "\'");
743                 if (run_count > 1)
744                         fprintf(stderr, " (%d runs)", run_count);
745                 fprintf(stderr, ":\n\n");
746         }
747
748         if (no_aggr) {
749                 list_for_each_entry(counter, &evsel_list->entries, node)
750                         print_counter(counter);
751         } else {
752                 list_for_each_entry(counter, &evsel_list->entries, node)
753                         print_counter_aggr(counter);
754         }
755
756         if (!csv_output) {
757                 fprintf(stderr, "\n");
758                 fprintf(stderr, " %18.9f  seconds time elapsed",
759                                 avg_stats(&walltime_nsecs_stats)/1e9);
760                 if (run_count > 1) {
761                         print_noise_pct(stddev_stats(&walltime_nsecs_stats),
762                                         avg_stats(&walltime_nsecs_stats));
763                 }
764                 fprintf(stderr, "\n\n");
765         }
766 }
767
768 static volatile int signr = -1;
769
770 static void skip_signal(int signo)
771 {
772         if(child_pid == -1)
773                 done = 1;
774
775         signr = signo;
776 }
777
778 static void sig_atexit(void)
779 {
780         if (child_pid != -1)
781                 kill(child_pid, SIGTERM);
782
783         if (signr == -1)
784                 return;
785
786         signal(signr, SIG_DFL);
787         kill(getpid(), signr);
788 }
789
790 static const char * const stat_usage[] = {
791         "perf stat [<options>] [<command>]",
792         NULL
793 };
794
795 static int stat__set_big_num(const struct option *opt __used,
796                              const char *s __used, int unset)
797 {
798         big_num_opt = unset ? 0 : 1;
799         return 0;
800 }
801
802 static const struct option options[] = {
803         OPT_CALLBACK('e', "event", &evsel_list, "event",
804                      "event selector. use 'perf list' to list available events",
805                      parse_events),
806         OPT_CALLBACK(0, "filter", &evsel_list, "filter",
807                      "event filter", parse_filter),
808         OPT_BOOLEAN('i', "no-inherit", &no_inherit,
809                     "child tasks do not inherit counters"),
810         OPT_INTEGER('p', "pid", &target_pid,
811                     "stat events on existing process id"),
812         OPT_INTEGER('t', "tid", &target_tid,
813                     "stat events on existing thread id"),
814         OPT_BOOLEAN('a', "all-cpus", &system_wide,
815                     "system-wide collection from all CPUs"),
816         OPT_BOOLEAN('c', "scale", &scale,
817                     "scale/normalize counters"),
818         OPT_INCR('v', "verbose", &verbose,
819                     "be more verbose (show counter open errors, etc)"),
820         OPT_INTEGER('r', "repeat", &run_count,
821                     "repeat command and print average + stddev (max: 100)"),
822         OPT_BOOLEAN('n', "null", &null_run,
823                     "null run - dont start any counters"),
824         OPT_BOOLEAN('d', "detailed", &detailed_run,
825                     "detailed run - start a lot of events"),
826         OPT_BOOLEAN('S', "sync", &sync_run,
827                     "call sync() before starting a run"),
828         OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 
829                            "print large numbers with thousands\' separators",
830                            stat__set_big_num),
831         OPT_STRING('C', "cpu", &cpu_list, "cpu",
832                     "list of cpus to monitor in system-wide"),
833         OPT_BOOLEAN('A', "no-aggr", &no_aggr,
834                     "disable CPU count aggregation"),
835         OPT_STRING('x', "field-separator", &csv_sep, "separator",
836                    "print counts with custom separator"),
837         OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
838                      "monitor event in cgroup name only",
839                      parse_cgroups),
840         OPT_END()
841 };
842
843 int cmd_stat(int argc, const char **argv, const char *prefix __used)
844 {
845         struct perf_evsel *pos;
846         int status = -ENOMEM;
847
848         setlocale(LC_ALL, "");
849
850         evsel_list = perf_evlist__new(NULL, NULL);
851         if (evsel_list == NULL)
852                 return -ENOMEM;
853
854         argc = parse_options(argc, argv, options, stat_usage,
855                 PARSE_OPT_STOP_AT_NON_OPTION);
856
857         if (csv_sep)
858                 csv_output = true;
859         else
860                 csv_sep = DEFAULT_SEPARATOR;
861
862         /*
863          * let the spreadsheet do the pretty-printing
864          */
865         if (csv_output) {
866                 /* User explicitely passed -B? */
867                 if (big_num_opt == 1) {
868                         fprintf(stderr, "-B option not supported with -x\n");
869                         usage_with_options(stat_usage, options);
870                 } else /* Nope, so disable big number formatting */
871                         big_num = false;
872         } else if (big_num_opt == 0) /* User passed --no-big-num */
873                 big_num = false;
874
875         if (!argc && target_pid == -1 && target_tid == -1)
876                 usage_with_options(stat_usage, options);
877         if (run_count <= 0)
878                 usage_with_options(stat_usage, options);
879
880         /* no_aggr, cgroup are for system-wide only */
881         if ((no_aggr || nr_cgroups) && !system_wide) {
882                 fprintf(stderr, "both cgroup and no-aggregation "
883                         "modes only available in system-wide mode\n");
884
885                 usage_with_options(stat_usage, options);
886         }
887
888         /* Set attrs and nr_counters if no event is selected and !null_run */
889         if (detailed_run) {
890                 size_t c;
891
892                 for (c = 0; c < ARRAY_SIZE(detailed_attrs); ++c) {
893                         pos = perf_evsel__new(&detailed_attrs[c], c);
894                         if (pos == NULL)
895                                 goto out;
896                         perf_evlist__add(evsel_list, pos);
897                 }
898         }
899         /* Set attrs and nr_counters if no event is selected and !null_run */
900         if (!detailed_run && !null_run && !evsel_list->nr_entries) {
901                 size_t c;
902
903                 for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) {
904                         pos = perf_evsel__new(&default_attrs[c], c);
905                         if (pos == NULL)
906                                 goto out;
907                         perf_evlist__add(evsel_list, pos);
908                 }
909         }
910
911         if (target_pid != -1)
912                 target_tid = target_pid;
913
914         evsel_list->threads = thread_map__new(target_pid, target_tid);
915         if (evsel_list->threads == NULL) {
916                 pr_err("Problems finding threads of monitor\n");
917                 usage_with_options(stat_usage, options);
918         }
919
920         if (system_wide)
921                 evsel_list->cpus = cpu_map__new(cpu_list);
922         else
923                 evsel_list->cpus = cpu_map__dummy_new();
924
925         if (evsel_list->cpus == NULL) {
926                 perror("failed to parse CPUs map");
927                 usage_with_options(stat_usage, options);
928                 return -1;
929         }
930
931         list_for_each_entry(pos, &evsel_list->entries, node) {
932                 if (perf_evsel__alloc_stat_priv(pos) < 0 ||
933                     perf_evsel__alloc_counts(pos, evsel_list->cpus->nr) < 0 ||
934                     perf_evsel__alloc_fd(pos, evsel_list->cpus->nr, evsel_list->threads->nr) < 0)
935                         goto out_free_fd;
936         }
937
938         /*
939          * We dont want to block the signals - that would cause
940          * child tasks to inherit that and Ctrl-C would not work.
941          * What we want is for Ctrl-C to work in the exec()-ed
942          * task, but being ignored by perf stat itself:
943          */
944         atexit(sig_atexit);
945         signal(SIGINT,  skip_signal);
946         signal(SIGALRM, skip_signal);
947         signal(SIGABRT, skip_signal);
948
949         status = 0;
950         for (run_idx = 0; run_idx < run_count; run_idx++) {
951                 if (run_count != 1 && verbose)
952                         fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1);
953
954                 if (sync_run)
955                         sync();
956
957                 status = run_perf_stat(argc, argv);
958         }
959
960         if (status != -1)
961                 print_stat(argc, argv);
962 out_free_fd:
963         list_for_each_entry(pos, &evsel_list->entries, node)
964                 perf_evsel__free_stat_priv(pos);
965         perf_evlist__delete_maps(evsel_list);
966 out:
967         perf_evlist__delete(evsel_list);
968         return status;
969 }