perf trace: Add fallback definition of EFD_SEMAPHORE
[pandora-kernel.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
16
17 #include <libaudit.h>
18 #include <stdlib.h>
19 #include <sys/eventfd.h>
20 #include <sys/mman.h>
21 #include <linux/futex.h>
22
23 /* For older distros: */
24 #ifndef MAP_STACK
25 # define MAP_STACK              0x20000
26 #endif
27
28 #ifndef MADV_HWPOISON
29 # define MADV_HWPOISON          100
30 #endif
31
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE         12
34 #endif
35
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE       13
38 #endif
39
40 #ifndef EFD_SEMAPHORE
41 # define EFD_SEMAPHORE          1
42 #endif
43
44 struct tp_field {
45         int offset;
46         union {
47                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
48                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
49         };
50 };
51
52 #define TP_UINT_FIELD(bits) \
53 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
54 { \
55         return *(u##bits *)(sample->raw_data + field->offset); \
56 }
57
58 TP_UINT_FIELD(8);
59 TP_UINT_FIELD(16);
60 TP_UINT_FIELD(32);
61 TP_UINT_FIELD(64);
62
63 #define TP_UINT_FIELD__SWAPPED(bits) \
64 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
65 { \
66         u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
67         return bswap_##bits(value);\
68 }
69
70 TP_UINT_FIELD__SWAPPED(16);
71 TP_UINT_FIELD__SWAPPED(32);
72 TP_UINT_FIELD__SWAPPED(64);
73
74 static int tp_field__init_uint(struct tp_field *field,
75                                struct format_field *format_field,
76                                bool needs_swap)
77 {
78         field->offset = format_field->offset;
79
80         switch (format_field->size) {
81         case 1:
82                 field->integer = tp_field__u8;
83                 break;
84         case 2:
85                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
86                 break;
87         case 4:
88                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
89                 break;
90         case 8:
91                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
92                 break;
93         default:
94                 return -1;
95         }
96
97         return 0;
98 }
99
100 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
101 {
102         return sample->raw_data + field->offset;
103 }
104
105 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
106 {
107         field->offset = format_field->offset;
108         field->pointer = tp_field__ptr;
109         return 0;
110 }
111
112 struct syscall_tp {
113         struct tp_field id;
114         union {
115                 struct tp_field args, ret;
116         };
117 };
118
119 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
120                                           struct tp_field *field,
121                                           const char *name)
122 {
123         struct format_field *format_field = perf_evsel__field(evsel, name);
124
125         if (format_field == NULL)
126                 return -1;
127
128         return tp_field__init_uint(field, format_field, evsel->needs_swap);
129 }
130
131 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
132         ({ struct syscall_tp *sc = evsel->priv;\
133            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
134
135 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
136                                          struct tp_field *field,
137                                          const char *name)
138 {
139         struct format_field *format_field = perf_evsel__field(evsel, name);
140
141         if (format_field == NULL)
142                 return -1;
143
144         return tp_field__init_ptr(field, format_field);
145 }
146
147 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
148         ({ struct syscall_tp *sc = evsel->priv;\
149            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
150
151 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
152 {
153         zfree(&evsel->priv);
154         perf_evsel__delete(evsel);
155 }
156
157 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
158 {
159         evsel->priv = malloc(sizeof(struct syscall_tp));
160         if (evsel->priv != NULL) {
161                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
162                         goto out_delete;
163
164                 evsel->handler = handler;
165                 return 0;
166         }
167
168         return -ENOMEM;
169
170 out_delete:
171         zfree(&evsel->priv);
172         return -ENOENT;
173 }
174
175 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
176 {
177         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
178
179         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
180         if (evsel == NULL)
181                 evsel = perf_evsel__newtp("syscalls", direction);
182
183         if (evsel) {
184                 if (perf_evsel__init_syscall_tp(evsel, handler))
185                         goto out_delete;
186         }
187
188         return evsel;
189
190 out_delete:
191         perf_evsel__delete_priv(evsel);
192         return NULL;
193 }
194
195 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
196         ({ struct syscall_tp *fields = evsel->priv; \
197            fields->name.integer(&fields->name, sample); })
198
199 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
200         ({ struct syscall_tp *fields = evsel->priv; \
201            fields->name.pointer(&fields->name, sample); })
202
203 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
204                                           void *sys_enter_handler,
205                                           void *sys_exit_handler)
206 {
207         int ret = -1;
208         struct perf_evsel *sys_enter, *sys_exit;
209
210         sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
211         if (sys_enter == NULL)
212                 goto out;
213
214         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
215                 goto out_delete_sys_enter;
216
217         sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
218         if (sys_exit == NULL)
219                 goto out_delete_sys_enter;
220
221         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
222                 goto out_delete_sys_exit;
223
224         perf_evlist__add(evlist, sys_enter);
225         perf_evlist__add(evlist, sys_exit);
226
227         ret = 0;
228 out:
229         return ret;
230
231 out_delete_sys_exit:
232         perf_evsel__delete_priv(sys_exit);
233 out_delete_sys_enter:
234         perf_evsel__delete_priv(sys_enter);
235         goto out;
236 }
237
238
239 struct syscall_arg {
240         unsigned long val;
241         struct thread *thread;
242         struct trace  *trace;
243         void          *parm;
244         u8            idx;
245         u8            mask;
246 };
247
248 struct strarray {
249         int         offset;
250         int         nr_entries;
251         const char **entries;
252 };
253
254 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
255         .nr_entries = ARRAY_SIZE(array), \
256         .entries = array, \
257 }
258
259 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
260         .offset     = off, \
261         .nr_entries = ARRAY_SIZE(array), \
262         .entries = array, \
263 }
264
265 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
266                                                 const char *intfmt,
267                                                 struct syscall_arg *arg)
268 {
269         struct strarray *sa = arg->parm;
270         int idx = arg->val - sa->offset;
271
272         if (idx < 0 || idx >= sa->nr_entries)
273                 return scnprintf(bf, size, intfmt, arg->val);
274
275         return scnprintf(bf, size, "%s", sa->entries[idx]);
276 }
277
278 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
279                                               struct syscall_arg *arg)
280 {
281         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
282 }
283
284 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
285
286 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
287                                                  struct syscall_arg *arg)
288 {
289         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
290 }
291
292 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
293
294 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
295                                         struct syscall_arg *arg);
296
297 #define SCA_FD syscall_arg__scnprintf_fd
298
299 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
300                                            struct syscall_arg *arg)
301 {
302         int fd = arg->val;
303
304         if (fd == AT_FDCWD)
305                 return scnprintf(bf, size, "CWD");
306
307         return syscall_arg__scnprintf_fd(bf, size, arg);
308 }
309
310 #define SCA_FDAT syscall_arg__scnprintf_fd_at
311
312 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
313                                               struct syscall_arg *arg);
314
315 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
316
317 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
318                                          struct syscall_arg *arg)
319 {
320         return scnprintf(bf, size, "%#lx", arg->val);
321 }
322
323 #define SCA_HEX syscall_arg__scnprintf_hex
324
325 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
326                                                struct syscall_arg *arg)
327 {
328         int printed = 0, prot = arg->val;
329
330         if (prot == PROT_NONE)
331                 return scnprintf(bf, size, "NONE");
332 #define P_MMAP_PROT(n) \
333         if (prot & PROT_##n) { \
334                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
335                 prot &= ~PROT_##n; \
336         }
337
338         P_MMAP_PROT(EXEC);
339         P_MMAP_PROT(READ);
340         P_MMAP_PROT(WRITE);
341 #ifdef PROT_SEM
342         P_MMAP_PROT(SEM);
343 #endif
344         P_MMAP_PROT(GROWSDOWN);
345         P_MMAP_PROT(GROWSUP);
346 #undef P_MMAP_PROT
347
348         if (prot)
349                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
350
351         return printed;
352 }
353
354 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
355
356 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
357                                                 struct syscall_arg *arg)
358 {
359         int printed = 0, flags = arg->val;
360
361 #define P_MMAP_FLAG(n) \
362         if (flags & MAP_##n) { \
363                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
364                 flags &= ~MAP_##n; \
365         }
366
367         P_MMAP_FLAG(SHARED);
368         P_MMAP_FLAG(PRIVATE);
369 #ifdef MAP_32BIT
370         P_MMAP_FLAG(32BIT);
371 #endif
372         P_MMAP_FLAG(ANONYMOUS);
373         P_MMAP_FLAG(DENYWRITE);
374         P_MMAP_FLAG(EXECUTABLE);
375         P_MMAP_FLAG(FILE);
376         P_MMAP_FLAG(FIXED);
377         P_MMAP_FLAG(GROWSDOWN);
378 #ifdef MAP_HUGETLB
379         P_MMAP_FLAG(HUGETLB);
380 #endif
381         P_MMAP_FLAG(LOCKED);
382         P_MMAP_FLAG(NONBLOCK);
383         P_MMAP_FLAG(NORESERVE);
384         P_MMAP_FLAG(POPULATE);
385         P_MMAP_FLAG(STACK);
386 #ifdef MAP_UNINITIALIZED
387         P_MMAP_FLAG(UNINITIALIZED);
388 #endif
389 #undef P_MMAP_FLAG
390
391         if (flags)
392                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
393
394         return printed;
395 }
396
397 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
398
399 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
400                                                       struct syscall_arg *arg)
401 {
402         int behavior = arg->val;
403
404         switch (behavior) {
405 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
406         P_MADV_BHV(NORMAL);
407         P_MADV_BHV(RANDOM);
408         P_MADV_BHV(SEQUENTIAL);
409         P_MADV_BHV(WILLNEED);
410         P_MADV_BHV(DONTNEED);
411         P_MADV_BHV(REMOVE);
412         P_MADV_BHV(DONTFORK);
413         P_MADV_BHV(DOFORK);
414         P_MADV_BHV(HWPOISON);
415 #ifdef MADV_SOFT_OFFLINE
416         P_MADV_BHV(SOFT_OFFLINE);
417 #endif
418         P_MADV_BHV(MERGEABLE);
419         P_MADV_BHV(UNMERGEABLE);
420 #ifdef MADV_HUGEPAGE
421         P_MADV_BHV(HUGEPAGE);
422 #endif
423 #ifdef MADV_NOHUGEPAGE
424         P_MADV_BHV(NOHUGEPAGE);
425 #endif
426 #ifdef MADV_DONTDUMP
427         P_MADV_BHV(DONTDUMP);
428 #endif
429 #ifdef MADV_DODUMP
430         P_MADV_BHV(DODUMP);
431 #endif
432 #undef P_MADV_PHV
433         default: break;
434         }
435
436         return scnprintf(bf, size, "%#x", behavior);
437 }
438
439 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
440
441 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
442                                            struct syscall_arg *arg)
443 {
444         int printed = 0, op = arg->val;
445
446         if (op == 0)
447                 return scnprintf(bf, size, "NONE");
448 #define P_CMD(cmd) \
449         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
450                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
451                 op &= ~LOCK_##cmd; \
452         }
453
454         P_CMD(SH);
455         P_CMD(EX);
456         P_CMD(NB);
457         P_CMD(UN);
458         P_CMD(MAND);
459         P_CMD(RW);
460         P_CMD(READ);
461         P_CMD(WRITE);
462 #undef P_OP
463
464         if (op)
465                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
466
467         return printed;
468 }
469
470 #define SCA_FLOCK syscall_arg__scnprintf_flock
471
472 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
473 {
474         enum syscall_futex_args {
475                 SCF_UADDR   = (1 << 0),
476                 SCF_OP      = (1 << 1),
477                 SCF_VAL     = (1 << 2),
478                 SCF_TIMEOUT = (1 << 3),
479                 SCF_UADDR2  = (1 << 4),
480                 SCF_VAL3    = (1 << 5),
481         };
482         int op = arg->val;
483         int cmd = op & FUTEX_CMD_MASK;
484         size_t printed = 0;
485
486         switch (cmd) {
487 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
488         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
489         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
490         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
491         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
492         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
493         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
494         P_FUTEX_OP(WAKE_OP);                                                      break;
495         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
496         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
497         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
498         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
499         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
500         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
501         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
502         }
503
504         if (op & FUTEX_PRIVATE_FLAG)
505                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
506
507         if (op & FUTEX_CLOCK_REALTIME)
508                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
509
510         return printed;
511 }
512
513 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
514
515 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
516 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
517
518 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
519 static DEFINE_STRARRAY(itimers);
520
521 static const char *whences[] = { "SET", "CUR", "END",
522 #ifdef SEEK_DATA
523 "DATA",
524 #endif
525 #ifdef SEEK_HOLE
526 "HOLE",
527 #endif
528 };
529 static DEFINE_STRARRAY(whences);
530
531 static const char *fcntl_cmds[] = {
532         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
533         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
534         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
535         "F_GETOWNER_UIDS",
536 };
537 static DEFINE_STRARRAY(fcntl_cmds);
538
539 static const char *rlimit_resources[] = {
540         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
541         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
542         "RTTIME",
543 };
544 static DEFINE_STRARRAY(rlimit_resources);
545
546 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
547 static DEFINE_STRARRAY(sighow);
548
549 static const char *clockid[] = {
550         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
551         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
552 };
553 static DEFINE_STRARRAY(clockid);
554
555 static const char *socket_families[] = {
556         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
557         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
558         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
559         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
560         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
561         "ALG", "NFC", "VSOCK",
562 };
563 static DEFINE_STRARRAY(socket_families);
564
565 #ifndef SOCK_TYPE_MASK
566 #define SOCK_TYPE_MASK 0xf
567 #endif
568
569 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
570                                                       struct syscall_arg *arg)
571 {
572         size_t printed;
573         int type = arg->val,
574             flags = type & ~SOCK_TYPE_MASK;
575
576         type &= SOCK_TYPE_MASK;
577         /*
578          * Can't use a strarray, MIPS may override for ABI reasons.
579          */
580         switch (type) {
581 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
582         P_SK_TYPE(STREAM);
583         P_SK_TYPE(DGRAM);
584         P_SK_TYPE(RAW);
585         P_SK_TYPE(RDM);
586         P_SK_TYPE(SEQPACKET);
587         P_SK_TYPE(DCCP);
588         P_SK_TYPE(PACKET);
589 #undef P_SK_TYPE
590         default:
591                 printed = scnprintf(bf, size, "%#x", type);
592         }
593
594 #define P_SK_FLAG(n) \
595         if (flags & SOCK_##n) { \
596                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
597                 flags &= ~SOCK_##n; \
598         }
599
600         P_SK_FLAG(CLOEXEC);
601         P_SK_FLAG(NONBLOCK);
602 #undef P_SK_FLAG
603
604         if (flags)
605                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
606
607         return printed;
608 }
609
610 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
611
612 #ifndef MSG_PROBE
613 #define MSG_PROBE            0x10
614 #endif
615 #ifndef MSG_WAITFORONE
616 #define MSG_WAITFORONE  0x10000
617 #endif
618 #ifndef MSG_SENDPAGE_NOTLAST
619 #define MSG_SENDPAGE_NOTLAST 0x20000
620 #endif
621 #ifndef MSG_FASTOPEN
622 #define MSG_FASTOPEN         0x20000000
623 #endif
624
625 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
626                                                struct syscall_arg *arg)
627 {
628         int printed = 0, flags = arg->val;
629
630         if (flags == 0)
631                 return scnprintf(bf, size, "NONE");
632 #define P_MSG_FLAG(n) \
633         if (flags & MSG_##n) { \
634                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
635                 flags &= ~MSG_##n; \
636         }
637
638         P_MSG_FLAG(OOB);
639         P_MSG_FLAG(PEEK);
640         P_MSG_FLAG(DONTROUTE);
641         P_MSG_FLAG(TRYHARD);
642         P_MSG_FLAG(CTRUNC);
643         P_MSG_FLAG(PROBE);
644         P_MSG_FLAG(TRUNC);
645         P_MSG_FLAG(DONTWAIT);
646         P_MSG_FLAG(EOR);
647         P_MSG_FLAG(WAITALL);
648         P_MSG_FLAG(FIN);
649         P_MSG_FLAG(SYN);
650         P_MSG_FLAG(CONFIRM);
651         P_MSG_FLAG(RST);
652         P_MSG_FLAG(ERRQUEUE);
653         P_MSG_FLAG(NOSIGNAL);
654         P_MSG_FLAG(MORE);
655         P_MSG_FLAG(WAITFORONE);
656         P_MSG_FLAG(SENDPAGE_NOTLAST);
657         P_MSG_FLAG(FASTOPEN);
658         P_MSG_FLAG(CMSG_CLOEXEC);
659 #undef P_MSG_FLAG
660
661         if (flags)
662                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
663
664         return printed;
665 }
666
667 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
668
669 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
670                                                  struct syscall_arg *arg)
671 {
672         size_t printed = 0;
673         int mode = arg->val;
674
675         if (mode == F_OK) /* 0 */
676                 return scnprintf(bf, size, "F");
677 #define P_MODE(n) \
678         if (mode & n##_OK) { \
679                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
680                 mode &= ~n##_OK; \
681         }
682
683         P_MODE(R);
684         P_MODE(W);
685         P_MODE(X);
686 #undef P_MODE
687
688         if (mode)
689                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
690
691         return printed;
692 }
693
694 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
695
696 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
697                                                struct syscall_arg *arg)
698 {
699         int printed = 0, flags = arg->val;
700
701         if (!(flags & O_CREAT))
702                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
703
704         if (flags == 0)
705                 return scnprintf(bf, size, "RDONLY");
706 #define P_FLAG(n) \
707         if (flags & O_##n) { \
708                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
709                 flags &= ~O_##n; \
710         }
711
712         P_FLAG(APPEND);
713         P_FLAG(ASYNC);
714         P_FLAG(CLOEXEC);
715         P_FLAG(CREAT);
716         P_FLAG(DIRECT);
717         P_FLAG(DIRECTORY);
718         P_FLAG(EXCL);
719         P_FLAG(LARGEFILE);
720         P_FLAG(NOATIME);
721         P_FLAG(NOCTTY);
722 #ifdef O_NONBLOCK
723         P_FLAG(NONBLOCK);
724 #elif O_NDELAY
725         P_FLAG(NDELAY);
726 #endif
727 #ifdef O_PATH
728         P_FLAG(PATH);
729 #endif
730         P_FLAG(RDWR);
731 #ifdef O_DSYNC
732         if ((flags & O_SYNC) == O_SYNC)
733                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
734         else {
735                 P_FLAG(DSYNC);
736         }
737 #else
738         P_FLAG(SYNC);
739 #endif
740         P_FLAG(TRUNC);
741         P_FLAG(WRONLY);
742 #undef P_FLAG
743
744         if (flags)
745                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
746
747         return printed;
748 }
749
750 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
751
752 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
753                                                    struct syscall_arg *arg)
754 {
755         int printed = 0, flags = arg->val;
756
757         if (flags == 0)
758                 return scnprintf(bf, size, "NONE");
759 #define P_FLAG(n) \
760         if (flags & EFD_##n) { \
761                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
762                 flags &= ~EFD_##n; \
763         }
764
765         P_FLAG(SEMAPHORE);
766         P_FLAG(CLOEXEC);
767         P_FLAG(NONBLOCK);
768 #undef P_FLAG
769
770         if (flags)
771                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
772
773         return printed;
774 }
775
776 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
777
778 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
779                                                 struct syscall_arg *arg)
780 {
781         int printed = 0, flags = arg->val;
782
783 #define P_FLAG(n) \
784         if (flags & O_##n) { \
785                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
786                 flags &= ~O_##n; \
787         }
788
789         P_FLAG(CLOEXEC);
790         P_FLAG(NONBLOCK);
791 #undef P_FLAG
792
793         if (flags)
794                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
795
796         return printed;
797 }
798
799 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
800
801 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
802 {
803         int sig = arg->val;
804
805         switch (sig) {
806 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
807         P_SIGNUM(HUP);
808         P_SIGNUM(INT);
809         P_SIGNUM(QUIT);
810         P_SIGNUM(ILL);
811         P_SIGNUM(TRAP);
812         P_SIGNUM(ABRT);
813         P_SIGNUM(BUS);
814         P_SIGNUM(FPE);
815         P_SIGNUM(KILL);
816         P_SIGNUM(USR1);
817         P_SIGNUM(SEGV);
818         P_SIGNUM(USR2);
819         P_SIGNUM(PIPE);
820         P_SIGNUM(ALRM);
821         P_SIGNUM(TERM);
822         P_SIGNUM(STKFLT);
823         P_SIGNUM(CHLD);
824         P_SIGNUM(CONT);
825         P_SIGNUM(STOP);
826         P_SIGNUM(TSTP);
827         P_SIGNUM(TTIN);
828         P_SIGNUM(TTOU);
829         P_SIGNUM(URG);
830         P_SIGNUM(XCPU);
831         P_SIGNUM(XFSZ);
832         P_SIGNUM(VTALRM);
833         P_SIGNUM(PROF);
834         P_SIGNUM(WINCH);
835         P_SIGNUM(IO);
836         P_SIGNUM(PWR);
837         P_SIGNUM(SYS);
838         default: break;
839         }
840
841         return scnprintf(bf, size, "%#x", sig);
842 }
843
844 #define SCA_SIGNUM syscall_arg__scnprintf_signum
845
846 #define TCGETS          0x5401
847
848 static const char *tioctls[] = {
849         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
850         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
851         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
852         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
853         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
854         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
855         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
856         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
857         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
858         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
859         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
860         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
861         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
862         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
863         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
864 };
865
866 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
867
868 #define STRARRAY(arg, name, array) \
869           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
870           .arg_parm      = { [arg] = &strarray__##array, }
871
872 static struct syscall_fmt {
873         const char *name;
874         const char *alias;
875         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
876         void       *arg_parm[6];
877         bool       errmsg;
878         bool       timeout;
879         bool       hexret;
880 } syscall_fmts[] = {
881         { .name     = "access",     .errmsg = true,
882           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
883         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
884         { .name     = "brk",        .hexret = true,
885           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
886         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
887         { .name     = "close",      .errmsg = true,
888           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, 
889         { .name     = "connect",    .errmsg = true, },
890         { .name     = "dup",        .errmsg = true,
891           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
892         { .name     = "dup2",       .errmsg = true,
893           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
894         { .name     = "dup3",       .errmsg = true,
895           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
896         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
897         { .name     = "eventfd2",   .errmsg = true,
898           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
899         { .name     = "faccessat",  .errmsg = true,
900           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
901         { .name     = "fadvise64",  .errmsg = true,
902           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
903         { .name     = "fallocate",  .errmsg = true,
904           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
905         { .name     = "fchdir",     .errmsg = true,
906           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
907         { .name     = "fchmod",     .errmsg = true,
908           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
909         { .name     = "fchmodat",   .errmsg = true,
910           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
911         { .name     = "fchown",     .errmsg = true,
912           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
913         { .name     = "fchownat",   .errmsg = true,
914           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
915         { .name     = "fcntl",      .errmsg = true,
916           .arg_scnprintf = { [0] = SCA_FD, /* fd */
917                              [1] = SCA_STRARRAY, /* cmd */ },
918           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
919         { .name     = "fdatasync",  .errmsg = true,
920           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
921         { .name     = "flock",      .errmsg = true,
922           .arg_scnprintf = { [0] = SCA_FD, /* fd */
923                              [1] = SCA_FLOCK, /* cmd */ }, },
924         { .name     = "fsetxattr",  .errmsg = true,
925           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
926         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
927           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
928         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
929           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
930         { .name     = "fstatfs",    .errmsg = true,
931           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
932         { .name     = "fsync",    .errmsg = true,
933           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
934         { .name     = "ftruncate", .errmsg = true,
935           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
936         { .name     = "futex",      .errmsg = true,
937           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
938         { .name     = "futimesat", .errmsg = true,
939           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
940         { .name     = "getdents",   .errmsg = true,
941           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
942         { .name     = "getdents64", .errmsg = true,
943           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
944         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
945         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
946         { .name     = "ioctl",      .errmsg = true,
947           .arg_scnprintf = { [0] = SCA_FD, /* fd */ 
948                              [1] = SCA_STRHEXARRAY, /* cmd */
949                              [2] = SCA_HEX, /* arg */ },
950           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
951         { .name     = "kill",       .errmsg = true,
952           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
953         { .name     = "linkat",     .errmsg = true,
954           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
955         { .name     = "lseek",      .errmsg = true,
956           .arg_scnprintf = { [0] = SCA_FD, /* fd */
957                              [2] = SCA_STRARRAY, /* whence */ },
958           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
959         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
960         { .name     = "madvise",    .errmsg = true,
961           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
962                              [2] = SCA_MADV_BHV, /* behavior */ }, },
963         { .name     = "mkdirat",    .errmsg = true,
964           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
965         { .name     = "mknodat",    .errmsg = true,
966           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
967         { .name     = "mlock",      .errmsg = true,
968           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
969         { .name     = "mlockall",   .errmsg = true,
970           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
971         { .name     = "mmap",       .hexret = true,
972           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
973                              [2] = SCA_MMAP_PROT, /* prot */
974                              [3] = SCA_MMAP_FLAGS, /* flags */
975                              [4] = SCA_FD,        /* fd */ }, },
976         { .name     = "mprotect",   .errmsg = true,
977           .arg_scnprintf = { [0] = SCA_HEX, /* start */
978                              [2] = SCA_MMAP_PROT, /* prot */ }, },
979         { .name     = "mremap",     .hexret = true,
980           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
981                              [4] = SCA_HEX, /* new_addr */ }, },
982         { .name     = "munlock",    .errmsg = true,
983           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
984         { .name     = "munmap",     .errmsg = true,
985           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
986         { .name     = "name_to_handle_at", .errmsg = true,
987           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
988         { .name     = "newfstatat", .errmsg = true,
989           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
990         { .name     = "open",       .errmsg = true,
991           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
992         { .name     = "open_by_handle_at", .errmsg = true,
993           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
994                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
995         { .name     = "openat",     .errmsg = true,
996           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
997                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
998         { .name     = "pipe2",      .errmsg = true,
999           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1000         { .name     = "poll",       .errmsg = true, .timeout = true, },
1001         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
1002         { .name     = "pread",      .errmsg = true, .alias = "pread64",
1003           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1004         { .name     = "preadv",     .errmsg = true, .alias = "pread",
1005           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1006         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1007         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1008           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1009         { .name     = "pwritev",    .errmsg = true,
1010           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1011         { .name     = "read",       .errmsg = true,
1012           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1013         { .name     = "readlinkat", .errmsg = true,
1014           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1015         { .name     = "readv",      .errmsg = true,
1016           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1017         { .name     = "recvfrom",   .errmsg = true,
1018           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1019         { .name     = "recvmmsg",   .errmsg = true,
1020           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1021         { .name     = "recvmsg",    .errmsg = true,
1022           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1023         { .name     = "renameat",   .errmsg = true,
1024           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1025         { .name     = "rt_sigaction", .errmsg = true,
1026           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1027         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1028         { .name     = "rt_sigqueueinfo", .errmsg = true,
1029           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1030         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1031           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1032         { .name     = "select",     .errmsg = true, .timeout = true, },
1033         { .name     = "sendmmsg",    .errmsg = true,
1034           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1035         { .name     = "sendmsg",    .errmsg = true,
1036           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1037         { .name     = "sendto",     .errmsg = true,
1038           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1039         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1040         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1041         { .name     = "shutdown",   .errmsg = true,
1042           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1043         { .name     = "socket",     .errmsg = true,
1044           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1045                              [1] = SCA_SK_TYPE, /* type */ },
1046           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1047         { .name     = "socketpair", .errmsg = true,
1048           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1049                              [1] = SCA_SK_TYPE, /* type */ },
1050           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1051         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
1052         { .name     = "symlinkat",  .errmsg = true,
1053           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1054         { .name     = "tgkill",     .errmsg = true,
1055           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1056         { .name     = "tkill",      .errmsg = true,
1057           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1058         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1059         { .name     = "unlinkat",   .errmsg = true,
1060           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1061         { .name     = "utimensat",  .errmsg = true,
1062           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1063         { .name     = "write",      .errmsg = true,
1064           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1065         { .name     = "writev",     .errmsg = true,
1066           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1067 };
1068
1069 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1070 {
1071         const struct syscall_fmt *fmt = fmtp;
1072         return strcmp(name, fmt->name);
1073 }
1074
1075 static struct syscall_fmt *syscall_fmt__find(const char *name)
1076 {
1077         const int nmemb = ARRAY_SIZE(syscall_fmts);
1078         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1079 }
1080
1081 struct syscall {
1082         struct event_format *tp_format;
1083         const char          *name;
1084         bool                filtered;
1085         struct syscall_fmt  *fmt;
1086         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1087         void                **arg_parm;
1088 };
1089
1090 static size_t fprintf_duration(unsigned long t, FILE *fp)
1091 {
1092         double duration = (double)t / NSEC_PER_MSEC;
1093         size_t printed = fprintf(fp, "(");
1094
1095         if (duration >= 1.0)
1096                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1097         else if (duration >= 0.01)
1098                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1099         else
1100                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1101         return printed + fprintf(fp, "): ");
1102 }
1103
1104 struct thread_trace {
1105         u64               entry_time;
1106         u64               exit_time;
1107         bool              entry_pending;
1108         unsigned long     nr_events;
1109         char              *entry_str;
1110         double            runtime_ms;
1111         struct {
1112                 int       max;
1113                 char      **table;
1114         } paths;
1115
1116         struct intlist *syscall_stats;
1117 };
1118
1119 static struct thread_trace *thread_trace__new(void)
1120 {
1121         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1122
1123         if (ttrace)
1124                 ttrace->paths.max = -1;
1125
1126         ttrace->syscall_stats = intlist__new(NULL);
1127
1128         return ttrace;
1129 }
1130
1131 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1132 {
1133         struct thread_trace *ttrace;
1134
1135         if (thread == NULL)
1136                 goto fail;
1137
1138         if (thread->priv == NULL)
1139                 thread->priv = thread_trace__new();
1140                 
1141         if (thread->priv == NULL)
1142                 goto fail;
1143
1144         ttrace = thread->priv;
1145         ++ttrace->nr_events;
1146
1147         return ttrace;
1148 fail:
1149         color_fprintf(fp, PERF_COLOR_RED,
1150                       "WARNING: not enough memory, dropping samples!\n");
1151         return NULL;
1152 }
1153
1154 struct trace {
1155         struct perf_tool        tool;
1156         struct {
1157                 int             machine;
1158                 int             open_id;
1159         }                       audit;
1160         struct {
1161                 int             max;
1162                 struct syscall  *table;
1163         } syscalls;
1164         struct record_opts      opts;
1165         struct machine          *host;
1166         u64                     base_time;
1167         FILE                    *output;
1168         unsigned long           nr_events;
1169         struct strlist          *ev_qualifier;
1170         const char              *last_vfs_getname;
1171         struct intlist          *tid_list;
1172         struct intlist          *pid_list;
1173         double                  duration_filter;
1174         double                  runtime_ms;
1175         struct {
1176                 u64             vfs_getname,
1177                                 proc_getname;
1178         } stats;
1179         bool                    not_ev_qualifier;
1180         bool                    live;
1181         bool                    full_time;
1182         bool                    sched;
1183         bool                    multiple_threads;
1184         bool                    summary;
1185         bool                    summary_only;
1186         bool                    show_comm;
1187         bool                    show_tool_stats;
1188 };
1189
1190 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1191 {
1192         struct thread_trace *ttrace = thread->priv;
1193
1194         if (fd > ttrace->paths.max) {
1195                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1196
1197                 if (npath == NULL)
1198                         return -1;
1199
1200                 if (ttrace->paths.max != -1) {
1201                         memset(npath + ttrace->paths.max + 1, 0,
1202                                (fd - ttrace->paths.max) * sizeof(char *));
1203                 } else {
1204                         memset(npath, 0, (fd + 1) * sizeof(char *));
1205                 }
1206
1207                 ttrace->paths.table = npath;
1208                 ttrace->paths.max   = fd;
1209         }
1210
1211         ttrace->paths.table[fd] = strdup(pathname);
1212
1213         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1214 }
1215
1216 static int thread__read_fd_path(struct thread *thread, int fd)
1217 {
1218         char linkname[PATH_MAX], pathname[PATH_MAX];
1219         struct stat st;
1220         int ret;
1221
1222         if (thread->pid_ == thread->tid) {
1223                 scnprintf(linkname, sizeof(linkname),
1224                           "/proc/%d/fd/%d", thread->pid_, fd);
1225         } else {
1226                 scnprintf(linkname, sizeof(linkname),
1227                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1228         }
1229
1230         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1231                 return -1;
1232
1233         ret = readlink(linkname, pathname, sizeof(pathname));
1234
1235         if (ret < 0 || ret > st.st_size)
1236                 return -1;
1237
1238         pathname[ret] = '\0';
1239         return trace__set_fd_pathname(thread, fd, pathname);
1240 }
1241
1242 static const char *thread__fd_path(struct thread *thread, int fd,
1243                                    struct trace *trace)
1244 {
1245         struct thread_trace *ttrace = thread->priv;
1246
1247         if (ttrace == NULL)
1248                 return NULL;
1249
1250         if (fd < 0)
1251                 return NULL;
1252
1253         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1254                 if (!trace->live)
1255                         return NULL;
1256                 ++trace->stats.proc_getname;
1257                 if (thread__read_fd_path(thread, fd)) {
1258                         return NULL;
1259         }
1260
1261         return ttrace->paths.table[fd];
1262 }
1263
1264 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1265                                         struct syscall_arg *arg)
1266 {
1267         int fd = arg->val;
1268         size_t printed = scnprintf(bf, size, "%d", fd);
1269         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1270
1271         if (path)
1272                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1273
1274         return printed;
1275 }
1276
1277 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1278                                               struct syscall_arg *arg)
1279 {
1280         int fd = arg->val;
1281         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1282         struct thread_trace *ttrace = arg->thread->priv;
1283
1284         if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1285                 zfree(&ttrace->paths.table[fd]);
1286
1287         return printed;
1288 }
1289
1290 static bool trace__filter_duration(struct trace *trace, double t)
1291 {
1292         return t < (trace->duration_filter * NSEC_PER_MSEC);
1293 }
1294
1295 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1296 {
1297         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1298
1299         return fprintf(fp, "%10.3f ", ts);
1300 }
1301
1302 static bool done = false;
1303 static bool interrupted = false;
1304
1305 static void sig_handler(int sig)
1306 {
1307         done = true;
1308         interrupted = sig == SIGINT;
1309 }
1310
1311 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1312                                         u64 duration, u64 tstamp, FILE *fp)
1313 {
1314         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1315         printed += fprintf_duration(duration, fp);
1316
1317         if (trace->multiple_threads) {
1318                 if (trace->show_comm)
1319                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1320                 printed += fprintf(fp, "%d ", thread->tid);
1321         }
1322
1323         return printed;
1324 }
1325
1326 static int trace__process_event(struct trace *trace, struct machine *machine,
1327                                 union perf_event *event, struct perf_sample *sample)
1328 {
1329         int ret = 0;
1330
1331         switch (event->header.type) {
1332         case PERF_RECORD_LOST:
1333                 color_fprintf(trace->output, PERF_COLOR_RED,
1334                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1335                 ret = machine__process_lost_event(machine, event, sample);
1336         default:
1337                 ret = machine__process_event(machine, event, sample);
1338                 break;
1339         }
1340
1341         return ret;
1342 }
1343
1344 static int trace__tool_process(struct perf_tool *tool,
1345                                union perf_event *event,
1346                                struct perf_sample *sample,
1347                                struct machine *machine)
1348 {
1349         struct trace *trace = container_of(tool, struct trace, tool);
1350         return trace__process_event(trace, machine, event, sample);
1351 }
1352
1353 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1354 {
1355         int err = symbol__init();
1356
1357         if (err)
1358                 return err;
1359
1360         trace->host = machine__new_host();
1361         if (trace->host == NULL)
1362                 return -ENOMEM;
1363
1364         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1365                                             evlist->threads, trace__tool_process, false);
1366         if (err)
1367                 symbol__exit();
1368
1369         return err;
1370 }
1371
1372 static int syscall__set_arg_fmts(struct syscall *sc)
1373 {
1374         struct format_field *field;
1375         int idx = 0;
1376
1377         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1378         if (sc->arg_scnprintf == NULL)
1379                 return -1;
1380
1381         if (sc->fmt)
1382                 sc->arg_parm = sc->fmt->arg_parm;
1383
1384         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1385                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1386                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1387                 else if (field->flags & FIELD_IS_POINTER)
1388                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1389                 ++idx;
1390         }
1391
1392         return 0;
1393 }
1394
1395 static int trace__read_syscall_info(struct trace *trace, int id)
1396 {
1397         char tp_name[128];
1398         struct syscall *sc;
1399         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1400
1401         if (name == NULL)
1402                 return -1;
1403
1404         if (id > trace->syscalls.max) {
1405                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1406
1407                 if (nsyscalls == NULL)
1408                         return -1;
1409
1410                 if (trace->syscalls.max != -1) {
1411                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1412                                (id - trace->syscalls.max) * sizeof(*sc));
1413                 } else {
1414                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1415                 }
1416
1417                 trace->syscalls.table = nsyscalls;
1418                 trace->syscalls.max   = id;
1419         }
1420
1421         sc = trace->syscalls.table + id;
1422         sc->name = name;
1423
1424         if (trace->ev_qualifier) {
1425                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1426
1427                 if (!(in ^ trace->not_ev_qualifier)) {
1428                         sc->filtered = true;
1429                         /*
1430                          * No need to do read tracepoint information since this will be
1431                          * filtered out.
1432                          */
1433                         return 0;
1434                 }
1435         }
1436
1437         sc->fmt  = syscall_fmt__find(sc->name);
1438
1439         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1440         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1441
1442         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1443                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1444                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1445         }
1446
1447         if (sc->tp_format == NULL)
1448                 return -1;
1449
1450         return syscall__set_arg_fmts(sc);
1451 }
1452
1453 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1454                                       unsigned long *args, struct trace *trace,
1455                                       struct thread *thread)
1456 {
1457         size_t printed = 0;
1458
1459         if (sc->tp_format != NULL) {
1460                 struct format_field *field;
1461                 u8 bit = 1;
1462                 struct syscall_arg arg = {
1463                         .idx    = 0,
1464                         .mask   = 0,
1465                         .trace  = trace,
1466                         .thread = thread,
1467                 };
1468
1469                 for (field = sc->tp_format->format.fields->next; field;
1470                      field = field->next, ++arg.idx, bit <<= 1) {
1471                         if (arg.mask & bit)
1472                                 continue;
1473                         /*
1474                          * Suppress this argument if its value is zero and
1475                          * and we don't have a string associated in an
1476                          * strarray for it.
1477                          */
1478                         if (args[arg.idx] == 0 &&
1479                             !(sc->arg_scnprintf &&
1480                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1481                               sc->arg_parm[arg.idx]))
1482                                 continue;
1483
1484                         printed += scnprintf(bf + printed, size - printed,
1485                                              "%s%s: ", printed ? ", " : "", field->name);
1486                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1487                                 arg.val = args[arg.idx];
1488                                 if (sc->arg_parm)
1489                                         arg.parm = sc->arg_parm[arg.idx];
1490                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1491                                                                       size - printed, &arg);
1492                         } else {
1493                                 printed += scnprintf(bf + printed, size - printed,
1494                                                      "%ld", args[arg.idx]);
1495                         }
1496                 }
1497         } else {
1498                 int i = 0;
1499
1500                 while (i < 6) {
1501                         printed += scnprintf(bf + printed, size - printed,
1502                                              "%sarg%d: %ld",
1503                                              printed ? ", " : "", i, args[i]);
1504                         ++i;
1505                 }
1506         }
1507
1508         return printed;
1509 }
1510
1511 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1512                                   struct perf_sample *sample);
1513
1514 static struct syscall *trace__syscall_info(struct trace *trace,
1515                                            struct perf_evsel *evsel, int id)
1516 {
1517
1518         if (id < 0) {
1519
1520                 /*
1521                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1522                  * before that, leaving at a higher verbosity level till that is
1523                  * explained. Reproduced with plain ftrace with:
1524                  *
1525                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1526                  * grep "NR -1 " /t/trace_pipe
1527                  *
1528                  * After generating some load on the machine.
1529                  */
1530                 if (verbose > 1) {
1531                         static u64 n;
1532                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1533                                 id, perf_evsel__name(evsel), ++n);
1534                 }
1535                 return NULL;
1536         }
1537
1538         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1539             trace__read_syscall_info(trace, id))
1540                 goto out_cant_read;
1541
1542         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1543                 goto out_cant_read;
1544
1545         return &trace->syscalls.table[id];
1546
1547 out_cant_read:
1548         if (verbose) {
1549                 fprintf(trace->output, "Problems reading syscall %d", id);
1550                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1551                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1552                 fputs(" information\n", trace->output);
1553         }
1554         return NULL;
1555 }
1556
1557 static void thread__update_stats(struct thread_trace *ttrace,
1558                                  int id, struct perf_sample *sample)
1559 {
1560         struct int_node *inode;
1561         struct stats *stats;
1562         u64 duration = 0;
1563
1564         inode = intlist__findnew(ttrace->syscall_stats, id);
1565         if (inode == NULL)
1566                 return;
1567
1568         stats = inode->priv;
1569         if (stats == NULL) {
1570                 stats = malloc(sizeof(struct stats));
1571                 if (stats == NULL)
1572                         return;
1573                 init_stats(stats);
1574                 inode->priv = stats;
1575         }
1576
1577         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1578                 duration = sample->time - ttrace->entry_time;
1579
1580         update_stats(stats, duration);
1581 }
1582
1583 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1584                             struct perf_sample *sample)
1585 {
1586         char *msg;
1587         void *args;
1588         size_t printed = 0;
1589         struct thread *thread;
1590         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1591         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1592         struct thread_trace *ttrace;
1593
1594         if (sc == NULL)
1595                 return -1;
1596
1597         if (sc->filtered)
1598                 return 0;
1599
1600         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1601         ttrace = thread__trace(thread, trace->output);
1602         if (ttrace == NULL)
1603                 return -1;
1604
1605         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1606         ttrace = thread->priv;
1607
1608         if (ttrace->entry_str == NULL) {
1609                 ttrace->entry_str = malloc(1024);
1610                 if (!ttrace->entry_str)
1611                         return -1;
1612         }
1613
1614         ttrace->entry_time = sample->time;
1615         msg = ttrace->entry_str;
1616         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1617
1618         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1619                                            args, trace, thread);
1620
1621         if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1622                 if (!trace->duration_filter && !trace->summary_only) {
1623                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1624                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1625                 }
1626         } else
1627                 ttrace->entry_pending = true;
1628
1629         return 0;
1630 }
1631
1632 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1633                            struct perf_sample *sample)
1634 {
1635         int ret;
1636         u64 duration = 0;
1637         struct thread *thread;
1638         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1639         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1640         struct thread_trace *ttrace;
1641
1642         if (sc == NULL)
1643                 return -1;
1644
1645         if (sc->filtered)
1646                 return 0;
1647
1648         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1649         ttrace = thread__trace(thread, trace->output);
1650         if (ttrace == NULL)
1651                 return -1;
1652
1653         if (trace->summary)
1654                 thread__update_stats(ttrace, id, sample);
1655
1656         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1657
1658         if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1659                 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1660                 trace->last_vfs_getname = NULL;
1661                 ++trace->stats.vfs_getname;
1662         }
1663
1664         ttrace = thread->priv;
1665
1666         ttrace->exit_time = sample->time;
1667
1668         if (ttrace->entry_time) {
1669                 duration = sample->time - ttrace->entry_time;
1670                 if (trace__filter_duration(trace, duration))
1671                         goto out;
1672         } else if (trace->duration_filter)
1673                 goto out;
1674
1675         if (trace->summary_only)
1676                 goto out;
1677
1678         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1679
1680         if (ttrace->entry_pending) {
1681                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1682         } else {
1683                 fprintf(trace->output, " ... [");
1684                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1685                 fprintf(trace->output, "]: %s()", sc->name);
1686         }
1687
1688         if (sc->fmt == NULL) {
1689 signed_print:
1690                 fprintf(trace->output, ") = %d", ret);
1691         } else if (ret < 0 && sc->fmt->errmsg) {
1692                 char bf[256];
1693                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1694                            *e = audit_errno_to_name(-ret);
1695
1696                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1697         } else if (ret == 0 && sc->fmt->timeout)
1698                 fprintf(trace->output, ") = 0 Timeout");
1699         else if (sc->fmt->hexret)
1700                 fprintf(trace->output, ") = %#x", ret);
1701         else
1702                 goto signed_print;
1703
1704         fputc('\n', trace->output);
1705 out:
1706         ttrace->entry_pending = false;
1707
1708         return 0;
1709 }
1710
1711 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1712                               struct perf_sample *sample)
1713 {
1714         trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1715         return 0;
1716 }
1717
1718 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1719                                      struct perf_sample *sample)
1720 {
1721         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1722         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1723         struct thread *thread = machine__findnew_thread(trace->host,
1724                                                         sample->pid,
1725                                                         sample->tid);
1726         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1727
1728         if (ttrace == NULL)
1729                 goto out_dump;
1730
1731         ttrace->runtime_ms += runtime_ms;
1732         trace->runtime_ms += runtime_ms;
1733         return 0;
1734
1735 out_dump:
1736         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1737                evsel->name,
1738                perf_evsel__strval(evsel, sample, "comm"),
1739                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1740                runtime,
1741                perf_evsel__intval(evsel, sample, "vruntime"));
1742         return 0;
1743 }
1744
1745 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1746 {
1747         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1748             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1749                 return false;
1750
1751         if (trace->pid_list || trace->tid_list)
1752                 return true;
1753
1754         return false;
1755 }
1756
1757 static int trace__process_sample(struct perf_tool *tool,
1758                                  union perf_event *event __maybe_unused,
1759                                  struct perf_sample *sample,
1760                                  struct perf_evsel *evsel,
1761                                  struct machine *machine __maybe_unused)
1762 {
1763         struct trace *trace = container_of(tool, struct trace, tool);
1764         int err = 0;
1765
1766         tracepoint_handler handler = evsel->handler;
1767
1768         if (skip_sample(trace, sample))
1769                 return 0;
1770
1771         if (!trace->full_time && trace->base_time == 0)
1772                 trace->base_time = sample->time;
1773
1774         if (handler) {
1775                 ++trace->nr_events;
1776                 handler(trace, evsel, sample);
1777         }
1778
1779         return err;
1780 }
1781
1782 static int parse_target_str(struct trace *trace)
1783 {
1784         if (trace->opts.target.pid) {
1785                 trace->pid_list = intlist__new(trace->opts.target.pid);
1786                 if (trace->pid_list == NULL) {
1787                         pr_err("Error parsing process id string\n");
1788                         return -EINVAL;
1789                 }
1790         }
1791
1792         if (trace->opts.target.tid) {
1793                 trace->tid_list = intlist__new(trace->opts.target.tid);
1794                 if (trace->tid_list == NULL) {
1795                         pr_err("Error parsing thread id string\n");
1796                         return -EINVAL;
1797                 }
1798         }
1799
1800         return 0;
1801 }
1802
1803 static int trace__record(int argc, const char **argv)
1804 {
1805         unsigned int rec_argc, i, j;
1806         const char **rec_argv;
1807         const char * const record_args[] = {
1808                 "record",
1809                 "-R",
1810                 "-m", "1024",
1811                 "-c", "1",
1812                 "-e",
1813         };
1814
1815         /* +1 is for the event string below */
1816         rec_argc = ARRAY_SIZE(record_args) + 1 + argc;
1817         rec_argv = calloc(rec_argc + 1, sizeof(char *));
1818
1819         if (rec_argv == NULL)
1820                 return -ENOMEM;
1821
1822         for (i = 0; i < ARRAY_SIZE(record_args); i++)
1823                 rec_argv[i] = record_args[i];
1824
1825         /* event string may be different for older kernels - e.g., RHEL6 */
1826         if (is_valid_tracepoint("raw_syscalls:sys_enter"))
1827                 rec_argv[i] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
1828         else if (is_valid_tracepoint("syscalls:sys_enter"))
1829                 rec_argv[i] = "syscalls:sys_enter,syscalls:sys_exit";
1830         else {
1831                 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
1832                 return -1;
1833         }
1834         i++;
1835
1836         for (j = 0; j < (unsigned int)argc; j++, i++)
1837                 rec_argv[i] = argv[j];
1838
1839         return cmd_record(i, rec_argv, NULL);
1840 }
1841
1842 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1843
1844 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1845 {
1846         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1847         if (evsel == NULL)
1848                 return;
1849
1850         if (perf_evsel__field(evsel, "pathname") == NULL) {
1851                 perf_evsel__delete(evsel);
1852                 return;
1853         }
1854
1855         evsel->handler = trace__vfs_getname;
1856         perf_evlist__add(evlist, evsel);
1857 }
1858
1859 static int trace__run(struct trace *trace, int argc, const char **argv)
1860 {
1861         struct perf_evlist *evlist = perf_evlist__new();
1862         struct perf_evsel *evsel;
1863         int err = -1, i;
1864         unsigned long before;
1865         const bool forks = argc > 0;
1866
1867         trace->live = true;
1868
1869         if (evlist == NULL) {
1870                 fprintf(trace->output, "Not enough memory to run!\n");
1871                 goto out;
1872         }
1873
1874         if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1875                 goto out_error_tp;
1876
1877         perf_evlist__add_vfs_getname(evlist);
1878
1879         if (trace->sched &&
1880                 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1881                                 trace__sched_stat_runtime))
1882                 goto out_error_tp;
1883
1884         err = perf_evlist__create_maps(evlist, &trace->opts.target);
1885         if (err < 0) {
1886                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1887                 goto out_delete_evlist;
1888         }
1889
1890         err = trace__symbols_init(trace, evlist);
1891         if (err < 0) {
1892                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1893                 goto out_delete_evlist;
1894         }
1895
1896         perf_evlist__config(evlist, &trace->opts);
1897
1898         signal(SIGCHLD, sig_handler);
1899         signal(SIGINT, sig_handler);
1900
1901         if (forks) {
1902                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1903                                                     argv, false, NULL);
1904                 if (err < 0) {
1905                         fprintf(trace->output, "Couldn't run the workload!\n");
1906                         goto out_delete_evlist;
1907                 }
1908         }
1909
1910         err = perf_evlist__open(evlist);
1911         if (err < 0)
1912                 goto out_error_open;
1913
1914         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
1915         if (err < 0) {
1916                 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1917                 goto out_delete_evlist;
1918         }
1919
1920         perf_evlist__enable(evlist);
1921
1922         if (forks)
1923                 perf_evlist__start_workload(evlist);
1924
1925         trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1926 again:
1927         before = trace->nr_events;
1928
1929         for (i = 0; i < evlist->nr_mmaps; i++) {
1930                 union perf_event *event;
1931
1932                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1933                         const u32 type = event->header.type;
1934                         tracepoint_handler handler;
1935                         struct perf_sample sample;
1936
1937                         ++trace->nr_events;
1938
1939                         err = perf_evlist__parse_sample(evlist, event, &sample);
1940                         if (err) {
1941                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1942                                 goto next_event;
1943                         }
1944
1945                         if (!trace->full_time && trace->base_time == 0)
1946                                 trace->base_time = sample.time;
1947
1948                         if (type != PERF_RECORD_SAMPLE) {
1949                                 trace__process_event(trace, trace->host, event, &sample);
1950                                 continue;
1951                         }
1952
1953                         evsel = perf_evlist__id2evsel(evlist, sample.id);
1954                         if (evsel == NULL) {
1955                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1956                                 goto next_event;
1957                         }
1958
1959                         if (sample.raw_data == NULL) {
1960                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1961                                        perf_evsel__name(evsel), sample.tid,
1962                                        sample.cpu, sample.raw_size);
1963                                 goto next_event;
1964                         }
1965
1966                         handler = evsel->handler;
1967                         handler(trace, evsel, &sample);
1968 next_event:
1969                         perf_evlist__mmap_consume(evlist, i);
1970
1971                         if (interrupted)
1972                                 goto out_disable;
1973                 }
1974         }
1975
1976         if (trace->nr_events == before) {
1977                 int timeout = done ? 100 : -1;
1978
1979                 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
1980                         goto again;
1981         } else {
1982                 goto again;
1983         }
1984
1985 out_disable:
1986         perf_evlist__disable(evlist);
1987
1988         if (!err) {
1989                 if (trace->summary)
1990                         trace__fprintf_thread_summary(trace, trace->output);
1991
1992                 if (trace->show_tool_stats) {
1993                         fprintf(trace->output, "Stats:\n "
1994                                                " vfs_getname : %" PRIu64 "\n"
1995                                                " proc_getname: %" PRIu64 "\n",
1996                                 trace->stats.vfs_getname,
1997                                 trace->stats.proc_getname);
1998                 }
1999         }
2000
2001 out_delete_evlist:
2002         perf_evlist__delete(evlist);
2003 out:
2004         trace->live = false;
2005         return err;
2006 {
2007         char errbuf[BUFSIZ];
2008
2009 out_error_tp:
2010         perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
2011         goto out_error;
2012
2013 out_error_open:
2014         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2015
2016 out_error:
2017         fprintf(trace->output, "%s\n", errbuf);
2018         goto out_delete_evlist;
2019 }
2020 }
2021
2022 static int trace__replay(struct trace *trace)
2023 {
2024         const struct perf_evsel_str_handler handlers[] = {
2025                 { "probe:vfs_getname",       trace__vfs_getname, },
2026         };
2027         struct perf_data_file file = {
2028                 .path  = input_name,
2029                 .mode  = PERF_DATA_MODE_READ,
2030         };
2031         struct perf_session *session;
2032         struct perf_evsel *evsel;
2033         int err = -1;
2034
2035         trace->tool.sample        = trace__process_sample;
2036         trace->tool.mmap          = perf_event__process_mmap;
2037         trace->tool.mmap2         = perf_event__process_mmap2;
2038         trace->tool.comm          = perf_event__process_comm;
2039         trace->tool.exit          = perf_event__process_exit;
2040         trace->tool.fork          = perf_event__process_fork;
2041         trace->tool.attr          = perf_event__process_attr;
2042         trace->tool.tracing_data = perf_event__process_tracing_data;
2043         trace->tool.build_id      = perf_event__process_build_id;
2044
2045         trace->tool.ordered_samples = true;
2046         trace->tool.ordering_requires_timestamps = true;
2047
2048         /* add tid to output */
2049         trace->multiple_threads = true;
2050
2051         if (symbol__init() < 0)
2052                 return -1;
2053
2054         session = perf_session__new(&file, false, &trace->tool);
2055         if (session == NULL)
2056                 return -ENOMEM;
2057
2058         trace->host = &session->machines.host;
2059
2060         err = perf_session__set_tracepoints_handlers(session, handlers);
2061         if (err)
2062                 goto out;
2063
2064         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2065                                                      "raw_syscalls:sys_enter");
2066         /* older kernels have syscalls tp versus raw_syscalls */
2067         if (evsel == NULL)
2068                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2069                                                              "syscalls:sys_enter");
2070         if (evsel == NULL) {
2071                 pr_err("Data file does not have raw_syscalls:sys_enter event\n");
2072                 goto out;
2073         }
2074
2075         if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2076             perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
2077                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2078                 goto out;
2079         }
2080
2081         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2082                                                      "raw_syscalls:sys_exit");
2083         if (evsel == NULL)
2084                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2085                                                              "syscalls:sys_exit");
2086         if (evsel == NULL) {
2087                 pr_err("Data file does not have raw_syscalls:sys_exit event\n");
2088                 goto out;
2089         }
2090
2091         if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2092             perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
2093                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2094                 goto out;
2095         }
2096
2097         err = parse_target_str(trace);
2098         if (err != 0)
2099                 goto out;
2100
2101         setup_pager();
2102
2103         err = perf_session__process_events(session, &trace->tool);
2104         if (err)
2105                 pr_err("Failed to process events, error %d", err);
2106
2107         else if (trace->summary)
2108                 trace__fprintf_thread_summary(trace, trace->output);
2109
2110 out:
2111         perf_session__delete(session);
2112
2113         return err;
2114 }
2115
2116 static size_t trace__fprintf_threads_header(FILE *fp)
2117 {
2118         size_t printed;
2119
2120         printed  = fprintf(fp, "\n Summary of events:\n\n");
2121
2122         return printed;
2123 }
2124
2125 static size_t thread__dump_stats(struct thread_trace *ttrace,
2126                                  struct trace *trace, FILE *fp)
2127 {
2128         struct stats *stats;
2129         size_t printed = 0;
2130         struct syscall *sc;
2131         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2132
2133         if (inode == NULL)
2134                 return 0;
2135
2136         printed += fprintf(fp, "\n");
2137
2138         printed += fprintf(fp, "   syscall            calls      min       avg       max      stddev\n");
2139         printed += fprintf(fp, "                               (msec)    (msec)    (msec)        (%%)\n");
2140         printed += fprintf(fp, "   --------------- -------- --------- --------- ---------     ------\n");
2141
2142         /* each int_node is a syscall */
2143         while (inode) {
2144                 stats = inode->priv;
2145                 if (stats) {
2146                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2147                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2148                         double avg = avg_stats(stats);
2149                         double pct;
2150                         u64 n = (u64) stats->n;
2151
2152                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2153                         avg /= NSEC_PER_MSEC;
2154
2155                         sc = &trace->syscalls.table[inode->i];
2156                         printed += fprintf(fp, "   %-15s", sc->name);
2157                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2158                                            n, min, avg);
2159                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2160                 }
2161
2162                 inode = intlist__next(inode);
2163         }
2164
2165         printed += fprintf(fp, "\n\n");
2166
2167         return printed;
2168 }
2169
2170 /* struct used to pass data to per-thread function */
2171 struct summary_data {
2172         FILE *fp;
2173         struct trace *trace;
2174         size_t printed;
2175 };
2176
2177 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2178 {
2179         struct summary_data *data = priv;
2180         FILE *fp = data->fp;
2181         size_t printed = data->printed;
2182         struct trace *trace = data->trace;
2183         struct thread_trace *ttrace = thread->priv;
2184         double ratio;
2185
2186         if (ttrace == NULL)
2187                 return 0;
2188
2189         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2190
2191         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2192         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2193         printed += fprintf(fp, "%.1f%%", ratio);
2194         printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2195         printed += thread__dump_stats(ttrace, trace, fp);
2196
2197         data->printed += printed;
2198
2199         return 0;
2200 }
2201
2202 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2203 {
2204         struct summary_data data = {
2205                 .fp = fp,
2206                 .trace = trace
2207         };
2208         data.printed = trace__fprintf_threads_header(fp);
2209
2210         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2211
2212         return data.printed;
2213 }
2214
2215 static int trace__set_duration(const struct option *opt, const char *str,
2216                                int unset __maybe_unused)
2217 {
2218         struct trace *trace = opt->value;
2219
2220         trace->duration_filter = atof(str);
2221         return 0;
2222 }
2223
2224 static int trace__open_output(struct trace *trace, const char *filename)
2225 {
2226         struct stat st;
2227
2228         if (!stat(filename, &st) && st.st_size) {
2229                 char oldname[PATH_MAX];
2230
2231                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2232                 unlink(oldname);
2233                 rename(filename, oldname);
2234         }
2235
2236         trace->output = fopen(filename, "w");
2237
2238         return trace->output == NULL ? -errno : 0;
2239 }
2240
2241 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2242 {
2243         const char * const trace_usage[] = {
2244                 "perf trace [<options>] [<command>]",
2245                 "perf trace [<options>] -- <command> [<options>]",
2246                 "perf trace record [<options>] [<command>]",
2247                 "perf trace record [<options>] -- <command> [<options>]",
2248                 NULL
2249         };
2250         struct trace trace = {
2251                 .audit = {
2252                         .machine = audit_detect_machine(),
2253                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2254                 },
2255                 .syscalls = {
2256                         . max = -1,
2257                 },
2258                 .opts = {
2259                         .target = {
2260                                 .uid       = UINT_MAX,
2261                                 .uses_mmap = true,
2262                         },
2263                         .user_freq     = UINT_MAX,
2264                         .user_interval = ULLONG_MAX,
2265                         .no_buffering  = true,
2266                         .mmap_pages    = 1024,
2267                 },
2268                 .output = stdout,
2269                 .show_comm = true,
2270         };
2271         const char *output_name = NULL;
2272         const char *ev_qualifier_str = NULL;
2273         const struct option trace_options[] = {
2274         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2275                     "show the thread COMM next to its id"),
2276         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2277         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2278                     "list of events to trace"),
2279         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2280         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2281         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2282                     "trace events on existing process id"),
2283         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2284                     "trace events on existing thread id"),
2285         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2286                     "system-wide collection from all CPUs"),
2287         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2288                     "list of cpus to monitor"),
2289         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2290                     "child tasks do not inherit counters"),
2291         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2292                      "number of mmap data pages",
2293                      perf_evlist__parse_mmap_pages),
2294         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2295                    "user to profile"),
2296         OPT_CALLBACK(0, "duration", &trace, "float",
2297                      "show only events with duration > N.M ms",
2298                      trace__set_duration),
2299         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2300         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2301         OPT_BOOLEAN('T', "time", &trace.full_time,
2302                     "Show full timestamp, not time relative to first start"),
2303         OPT_BOOLEAN('s', "summary", &trace.summary_only,
2304                     "Show only syscall summary with statistics"),
2305         OPT_BOOLEAN('S', "with-summary", &trace.summary,
2306                     "Show all syscalls and summary with statistics"),
2307         OPT_END()
2308         };
2309         int err;
2310         char bf[BUFSIZ];
2311
2312         if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2313                 return trace__record(argc-2, &argv[2]);
2314
2315         argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2316
2317         /* summary_only implies summary option, but don't overwrite summary if set */
2318         if (trace.summary_only)
2319                 trace.summary = trace.summary_only;
2320
2321         if (output_name != NULL) {
2322                 err = trace__open_output(&trace, output_name);
2323                 if (err < 0) {
2324                         perror("failed to create output file");
2325                         goto out;
2326                 }
2327         }
2328
2329         if (ev_qualifier_str != NULL) {
2330                 const char *s = ev_qualifier_str;
2331
2332                 trace.not_ev_qualifier = *s == '!';
2333                 if (trace.not_ev_qualifier)
2334                         ++s;
2335                 trace.ev_qualifier = strlist__new(true, s);
2336                 if (trace.ev_qualifier == NULL) {
2337                         fputs("Not enough memory to parse event qualifier",
2338                               trace.output);
2339                         err = -ENOMEM;
2340                         goto out_close;
2341                 }
2342         }
2343
2344         err = target__validate(&trace.opts.target);
2345         if (err) {
2346                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2347                 fprintf(trace.output, "%s", bf);
2348                 goto out_close;
2349         }
2350
2351         err = target__parse_uid(&trace.opts.target);
2352         if (err) {
2353                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2354                 fprintf(trace.output, "%s", bf);
2355                 goto out_close;
2356         }
2357
2358         if (!argc && target__none(&trace.opts.target))
2359                 trace.opts.target.system_wide = true;
2360
2361         if (input_name)
2362                 err = trace__replay(&trace);
2363         else
2364                 err = trace__run(&trace, argc, argv);
2365
2366 out_close:
2367         if (output_name != NULL)
2368                 fclose(trace.output);
2369 out:
2370         return err;
2371 }