perf: Optimize perf_tp_event_match()
[pandora-kernel.git] / kernel / trace / trace_syscalls.c
1 #include <trace/syscall.h>
2 #include <trace/events/syscalls.h>
3 #include <linux/slab.h>
4 #include <linux/kernel.h>
5 #include <linux/ftrace.h>
6 #include <linux/perf_event.h>
7 #include <asm/syscall.h>
8
9 #include "trace_output.h"
10 #include "trace.h"
11
12 static DEFINE_MUTEX(syscall_trace_lock);
13 static int sys_refcount_enter;
14 static int sys_refcount_exit;
15 static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
16 static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
17
18 extern unsigned long __start_syscalls_metadata[];
19 extern unsigned long __stop_syscalls_metadata[];
20
21 static struct syscall_metadata **syscalls_metadata;
22
23 static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
24 {
25         struct syscall_metadata *start;
26         struct syscall_metadata *stop;
27         char str[KSYM_SYMBOL_LEN];
28
29
30         start = (struct syscall_metadata *)__start_syscalls_metadata;
31         stop = (struct syscall_metadata *)__stop_syscalls_metadata;
32         kallsyms_lookup(syscall, NULL, NULL, NULL, str);
33
34         for ( ; start < stop; start++) {
35                 /*
36                  * Only compare after the "sys" prefix. Archs that use
37                  * syscall wrappers may have syscalls symbols aliases prefixed
38                  * with "SyS" instead of "sys", leading to an unwanted
39                  * mismatch.
40                  */
41                 if (start->name && !strcmp(start->name + 3, str + 3))
42                         return start;
43         }
44         return NULL;
45 }
46
47 static struct syscall_metadata *syscall_nr_to_meta(int nr)
48 {
49         if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
50                 return NULL;
51
52         return syscalls_metadata[nr];
53 }
54
55 enum print_line_t
56 print_syscall_enter(struct trace_iterator *iter, int flags)
57 {
58         struct trace_seq *s = &iter->seq;
59         struct trace_entry *ent = iter->ent;
60         struct syscall_trace_enter *trace;
61         struct syscall_metadata *entry;
62         int i, ret, syscall;
63
64         trace = (typeof(trace))ent;
65         syscall = trace->nr;
66         entry = syscall_nr_to_meta(syscall);
67
68         if (!entry)
69                 goto end;
70
71         if (entry->enter_event->id != ent->type) {
72                 WARN_ON_ONCE(1);
73                 goto end;
74         }
75
76         ret = trace_seq_printf(s, "%s(", entry->name);
77         if (!ret)
78                 return TRACE_TYPE_PARTIAL_LINE;
79
80         for (i = 0; i < entry->nb_args; i++) {
81                 /* parameter types */
82                 if (trace_flags & TRACE_ITER_VERBOSE) {
83                         ret = trace_seq_printf(s, "%s ", entry->types[i]);
84                         if (!ret)
85                                 return TRACE_TYPE_PARTIAL_LINE;
86                 }
87                 /* parameter values */
88                 ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i],
89                                        trace->args[i],
90                                        i == entry->nb_args - 1 ? "" : ", ");
91                 if (!ret)
92                         return TRACE_TYPE_PARTIAL_LINE;
93         }
94
95         ret = trace_seq_putc(s, ')');
96         if (!ret)
97                 return TRACE_TYPE_PARTIAL_LINE;
98
99 end:
100         ret =  trace_seq_putc(s, '\n');
101         if (!ret)
102                 return TRACE_TYPE_PARTIAL_LINE;
103
104         return TRACE_TYPE_HANDLED;
105 }
106
107 enum print_line_t
108 print_syscall_exit(struct trace_iterator *iter, int flags)
109 {
110         struct trace_seq *s = &iter->seq;
111         struct trace_entry *ent = iter->ent;
112         struct syscall_trace_exit *trace;
113         int syscall;
114         struct syscall_metadata *entry;
115         int ret;
116
117         trace = (typeof(trace))ent;
118         syscall = trace->nr;
119         entry = syscall_nr_to_meta(syscall);
120
121         if (!entry) {
122                 trace_seq_printf(s, "\n");
123                 return TRACE_TYPE_HANDLED;
124         }
125
126         if (entry->exit_event->id != ent->type) {
127                 WARN_ON_ONCE(1);
128                 return TRACE_TYPE_UNHANDLED;
129         }
130
131         ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
132                                 trace->ret);
133         if (!ret)
134                 return TRACE_TYPE_PARTIAL_LINE;
135
136         return TRACE_TYPE_HANDLED;
137 }
138
139 extern char *__bad_type_size(void);
140
141 #define SYSCALL_FIELD(type, name)                                       \
142         sizeof(type) != sizeof(trace.name) ?                            \
143                 __bad_type_size() :                                     \
144                 #type, #name, offsetof(typeof(trace), name),            \
145                 sizeof(trace.name), is_signed_type(type)
146
147 static
148 int  __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
149 {
150         int i;
151         int pos = 0;
152
153         /* When len=0, we just calculate the needed length */
154 #define LEN_OR_ZERO (len ? len - pos : 0)
155
156         pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
157         for (i = 0; i < entry->nb_args; i++) {
158                 pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
159                                 entry->args[i], sizeof(unsigned long),
160                                 i == entry->nb_args - 1 ? "" : ", ");
161         }
162         pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
163
164         for (i = 0; i < entry->nb_args; i++) {
165                 pos += snprintf(buf + pos, LEN_OR_ZERO,
166                                 ", ((unsigned long)(REC->%s))", entry->args[i]);
167         }
168
169 #undef LEN_OR_ZERO
170
171         /* return the length of print_fmt */
172         return pos;
173 }
174
175 static int set_syscall_print_fmt(struct ftrace_event_call *call)
176 {
177         char *print_fmt;
178         int len;
179         struct syscall_metadata *entry = call->data;
180
181         if (entry->enter_event != call) {
182                 call->print_fmt = "\"0x%lx\", REC->ret";
183                 return 0;
184         }
185
186         /* First: called with 0 length to calculate the needed length */
187         len = __set_enter_print_fmt(entry, NULL, 0);
188
189         print_fmt = kmalloc(len + 1, GFP_KERNEL);
190         if (!print_fmt)
191                 return -ENOMEM;
192
193         /* Second: actually write the @print_fmt */
194         __set_enter_print_fmt(entry, print_fmt, len + 1);
195         call->print_fmt = print_fmt;
196
197         return 0;
198 }
199
200 static void free_syscall_print_fmt(struct ftrace_event_call *call)
201 {
202         struct syscall_metadata *entry = call->data;
203
204         if (entry->enter_event == call)
205                 kfree(call->print_fmt);
206 }
207
208 int syscall_enter_define_fields(struct ftrace_event_call *call)
209 {
210         struct syscall_trace_enter trace;
211         struct syscall_metadata *meta = call->data;
212         int ret;
213         int i;
214         int offset = offsetof(typeof(trace), args);
215
216         ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
217         if (ret)
218                 return ret;
219
220         for (i = 0; i < meta->nb_args; i++) {
221                 ret = trace_define_field(call, meta->types[i],
222                                          meta->args[i], offset,
223                                          sizeof(unsigned long), 0,
224                                          FILTER_OTHER);
225                 offset += sizeof(unsigned long);
226         }
227
228         return ret;
229 }
230
231 int syscall_exit_define_fields(struct ftrace_event_call *call)
232 {
233         struct syscall_trace_exit trace;
234         int ret;
235
236         ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
237         if (ret)
238                 return ret;
239
240         ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
241                                  FILTER_OTHER);
242
243         return ret;
244 }
245
246 void ftrace_syscall_enter(struct pt_regs *regs, long id)
247 {
248         struct syscall_trace_enter *entry;
249         struct syscall_metadata *sys_data;
250         struct ring_buffer_event *event;
251         struct ring_buffer *buffer;
252         int size;
253         int syscall_nr;
254
255         syscall_nr = syscall_get_nr(current, regs);
256         if (syscall_nr < 0)
257                 return;
258         if (!test_bit(syscall_nr, enabled_enter_syscalls))
259                 return;
260
261         sys_data = syscall_nr_to_meta(syscall_nr);
262         if (!sys_data)
263                 return;
264
265         size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
266
267         event = trace_current_buffer_lock_reserve(&buffer,
268                         sys_data->enter_event->id, size, 0, 0);
269         if (!event)
270                 return;
271
272         entry = ring_buffer_event_data(event);
273         entry->nr = syscall_nr;
274         syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
275
276         if (!filter_current_check_discard(buffer, sys_data->enter_event,
277                                           entry, event))
278                 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
279 }
280
281 void ftrace_syscall_exit(struct pt_regs *regs, long ret)
282 {
283         struct syscall_trace_exit *entry;
284         struct syscall_metadata *sys_data;
285         struct ring_buffer_event *event;
286         struct ring_buffer *buffer;
287         int syscall_nr;
288
289         syscall_nr = syscall_get_nr(current, regs);
290         if (syscall_nr < 0)
291                 return;
292         if (!test_bit(syscall_nr, enabled_exit_syscalls))
293                 return;
294
295         sys_data = syscall_nr_to_meta(syscall_nr);
296         if (!sys_data)
297                 return;
298
299         event = trace_current_buffer_lock_reserve(&buffer,
300                         sys_data->exit_event->id, sizeof(*entry), 0, 0);
301         if (!event)
302                 return;
303
304         entry = ring_buffer_event_data(event);
305         entry->nr = syscall_nr;
306         entry->ret = syscall_get_return_value(current, regs);
307
308         if (!filter_current_check_discard(buffer, sys_data->exit_event,
309                                           entry, event))
310                 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
311 }
312
313 int reg_event_syscall_enter(struct ftrace_event_call *call)
314 {
315         int ret = 0;
316         int num;
317
318         num = ((struct syscall_metadata *)call->data)->syscall_nr;
319         if (num < 0 || num >= NR_syscalls)
320                 return -ENOSYS;
321         mutex_lock(&syscall_trace_lock);
322         if (!sys_refcount_enter)
323                 ret = register_trace_sys_enter(ftrace_syscall_enter);
324         if (!ret) {
325                 set_bit(num, enabled_enter_syscalls);
326                 sys_refcount_enter++;
327         }
328         mutex_unlock(&syscall_trace_lock);
329         return ret;
330 }
331
332 void unreg_event_syscall_enter(struct ftrace_event_call *call)
333 {
334         int num;
335
336         num = ((struct syscall_metadata *)call->data)->syscall_nr;
337         if (num < 0 || num >= NR_syscalls)
338                 return;
339         mutex_lock(&syscall_trace_lock);
340         sys_refcount_enter--;
341         clear_bit(num, enabled_enter_syscalls);
342         if (!sys_refcount_enter)
343                 unregister_trace_sys_enter(ftrace_syscall_enter);
344         mutex_unlock(&syscall_trace_lock);
345 }
346
347 int reg_event_syscall_exit(struct ftrace_event_call *call)
348 {
349         int ret = 0;
350         int num;
351
352         num = ((struct syscall_metadata *)call->data)->syscall_nr;
353         if (num < 0 || num >= NR_syscalls)
354                 return -ENOSYS;
355         mutex_lock(&syscall_trace_lock);
356         if (!sys_refcount_exit)
357                 ret = register_trace_sys_exit(ftrace_syscall_exit);
358         if (!ret) {
359                 set_bit(num, enabled_exit_syscalls);
360                 sys_refcount_exit++;
361         }
362         mutex_unlock(&syscall_trace_lock);
363         return ret;
364 }
365
366 void unreg_event_syscall_exit(struct ftrace_event_call *call)
367 {
368         int num;
369
370         num = ((struct syscall_metadata *)call->data)->syscall_nr;
371         if (num < 0 || num >= NR_syscalls)
372                 return;
373         mutex_lock(&syscall_trace_lock);
374         sys_refcount_exit--;
375         clear_bit(num, enabled_exit_syscalls);
376         if (!sys_refcount_exit)
377                 unregister_trace_sys_exit(ftrace_syscall_exit);
378         mutex_unlock(&syscall_trace_lock);
379 }
380
381 int init_syscall_trace(struct ftrace_event_call *call)
382 {
383         int id;
384
385         if (set_syscall_print_fmt(call) < 0)
386                 return -ENOMEM;
387
388         id = trace_event_raw_init(call);
389
390         if (id < 0) {
391                 free_syscall_print_fmt(call);
392                 return id;
393         }
394
395         return id;
396 }
397
398 unsigned long __init arch_syscall_addr(int nr)
399 {
400         return (unsigned long)sys_call_table[nr];
401 }
402
403 int __init init_ftrace_syscalls(void)
404 {
405         struct syscall_metadata *meta;
406         unsigned long addr;
407         int i;
408
409         syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
410                                         NR_syscalls, GFP_KERNEL);
411         if (!syscalls_metadata) {
412                 WARN_ON(1);
413                 return -ENOMEM;
414         }
415
416         for (i = 0; i < NR_syscalls; i++) {
417                 addr = arch_syscall_addr(i);
418                 meta = find_syscall_meta(addr);
419                 if (!meta)
420                         continue;
421
422                 meta->syscall_nr = i;
423                 syscalls_metadata[i] = meta;
424         }
425
426         return 0;
427 }
428 core_initcall(init_ftrace_syscalls);
429
430 #ifdef CONFIG_PERF_EVENTS
431
432 static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls);
433 static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
434 static int sys_perf_refcount_enter;
435 static int sys_perf_refcount_exit;
436
437 static void perf_syscall_enter(struct pt_regs *regs, long id)
438 {
439         struct syscall_metadata *sys_data;
440         struct syscall_trace_enter *rec;
441         struct hlist_head *head;
442         int syscall_nr;
443         int rctx;
444         int size;
445
446         syscall_nr = syscall_get_nr(current, regs);
447         if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
448                 return;
449
450         sys_data = syscall_nr_to_meta(syscall_nr);
451         if (!sys_data)
452                 return;
453
454         /* get the size after alignment with the u32 buffer size field */
455         size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
456         size = ALIGN(size + sizeof(u32), sizeof(u64));
457         size -= sizeof(u32);
458
459         if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
460                       "perf buffer not large enough"))
461                 return;
462
463         rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
464                                 sys_data->enter_event->id, regs, &rctx);
465         if (!rec)
466                 return;
467
468         rec->nr = syscall_nr;
469         syscall_get_arguments(current, regs, 0, sys_data->nb_args,
470                                (unsigned long *)&rec->args);
471
472         head = per_cpu_ptr(sys_data->enter_event->perf_events, smp_processor_id());
473         perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
474 }
475
476 int perf_sysenter_enable(struct ftrace_event_call *call)
477 {
478         int ret = 0;
479         int num;
480
481         num = ((struct syscall_metadata *)call->data)->syscall_nr;
482
483         mutex_lock(&syscall_trace_lock);
484         if (!sys_perf_refcount_enter)
485                 ret = register_trace_sys_enter(perf_syscall_enter);
486         if (ret) {
487                 pr_info("event trace: Could not activate"
488                                 "syscall entry trace point");
489         } else {
490                 set_bit(num, enabled_perf_enter_syscalls);
491                 sys_perf_refcount_enter++;
492         }
493         mutex_unlock(&syscall_trace_lock);
494         return ret;
495 }
496
497 void perf_sysenter_disable(struct ftrace_event_call *call)
498 {
499         int num;
500
501         num = ((struct syscall_metadata *)call->data)->syscall_nr;
502
503         mutex_lock(&syscall_trace_lock);
504         sys_perf_refcount_enter--;
505         clear_bit(num, enabled_perf_enter_syscalls);
506         if (!sys_perf_refcount_enter)
507                 unregister_trace_sys_enter(perf_syscall_enter);
508         mutex_unlock(&syscall_trace_lock);
509 }
510
511 static void perf_syscall_exit(struct pt_regs *regs, long ret)
512 {
513         struct syscall_metadata *sys_data;
514         struct syscall_trace_exit *rec;
515         struct hlist_head *head;
516         int syscall_nr;
517         int rctx;
518         int size;
519
520         syscall_nr = syscall_get_nr(current, regs);
521         if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
522                 return;
523
524         sys_data = syscall_nr_to_meta(syscall_nr);
525         if (!sys_data)
526                 return;
527
528         /* We can probably do that at build time */
529         size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
530         size -= sizeof(u32);
531
532         /*
533          * Impossible, but be paranoid with the future
534          * How to put this check outside runtime?
535          */
536         if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
537                 "exit event has grown above perf buffer size"))
538                 return;
539
540         rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
541                                 sys_data->exit_event->id, regs, &rctx);
542         if (!rec)
543                 return;
544
545         rec->nr = syscall_nr;
546         rec->ret = syscall_get_return_value(current, regs);
547
548         head = per_cpu_ptr(sys_data->exit_event->perf_events, smp_processor_id());
549         perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
550 }
551
552 int perf_sysexit_enable(struct ftrace_event_call *call)
553 {
554         int ret = 0;
555         int num;
556
557         num = ((struct syscall_metadata *)call->data)->syscall_nr;
558
559         mutex_lock(&syscall_trace_lock);
560         if (!sys_perf_refcount_exit)
561                 ret = register_trace_sys_exit(perf_syscall_exit);
562         if (ret) {
563                 pr_info("event trace: Could not activate"
564                                 "syscall exit trace point");
565         } else {
566                 set_bit(num, enabled_perf_exit_syscalls);
567                 sys_perf_refcount_exit++;
568         }
569         mutex_unlock(&syscall_trace_lock);
570         return ret;
571 }
572
573 void perf_sysexit_disable(struct ftrace_event_call *call)
574 {
575         int num;
576
577         num = ((struct syscall_metadata *)call->data)->syscall_nr;
578
579         mutex_lock(&syscall_trace_lock);
580         sys_perf_refcount_exit--;
581         clear_bit(num, enabled_perf_exit_syscalls);
582         if (!sys_perf_refcount_exit)
583                 unregister_trace_sys_exit(perf_syscall_exit);
584         mutex_unlock(&syscall_trace_lock);
585 }
586
587 #endif /* CONFIG_PERF_EVENTS */
588