8aeb24cc295f0b875916b819bad996a2e7df5be9
[pandora-kernel.git] / kernel / trace / trace_kprobe.c
1 /*
2  * kprobe based kernel tracer
3  *
4  * Created by Masami Hiramatsu <mhiramat@redhat.com>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  */
19
20 #include <linux/module.h>
21 #include <linux/uaccess.h>
22 #include <linux/kprobes.h>
23 #include <linux/seq_file.h>
24 #include <linux/slab.h>
25 #include <linux/smp.h>
26 #include <linux/debugfs.h>
27 #include <linux/types.h>
28 #include <linux/string.h>
29 #include <linux/ctype.h>
30 #include <linux/ptrace.h>
31
32 #include "trace.h"
33 #include "trace_output.h"
34
35 #define MAX_TRACE_ARGS 128
36 #define MAX_ARGSTR_LEN 63
37 #define MAX_EVENT_NAME_LEN 64
38
39 /* currently, trace_kprobe only supports X86. */
40
41 struct fetch_func {
42         unsigned long (*func)(struct pt_regs *, void *);
43         void *data;
44 };
45
46 static __kprobes unsigned long call_fetch(struct fetch_func *f,
47                                           struct pt_regs *regs)
48 {
49         return f->func(regs, f->data);
50 }
51
52 /* fetch handlers */
53 static __kprobes unsigned long fetch_register(struct pt_regs *regs,
54                                               void *offset)
55 {
56         return regs_get_register(regs, (unsigned int)((unsigned long)offset));
57 }
58
59 static __kprobes unsigned long fetch_stack(struct pt_regs *regs,
60                                            void *num)
61 {
62         return regs_get_kernel_stack_nth(regs,
63                                          (unsigned int)((unsigned long)num));
64 }
65
66 static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr)
67 {
68         unsigned long retval;
69
70         if (probe_kernel_address(addr, retval))
71                 return 0;
72         return retval;
73 }
74
75 static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num)
76 {
77         return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num));
78 }
79
80 static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs,
81                                               void *dummy)
82 {
83         return regs_return_value(regs);
84 }
85
86 static __kprobes unsigned long fetch_ip(struct pt_regs *regs, void *dummy)
87 {
88         return instruction_pointer(regs);
89 }
90
91 static __kprobes unsigned long fetch_stack_address(struct pt_regs *regs,
92                                                    void *dummy)
93 {
94         return kernel_stack_pointer(regs);
95 }
96
97 /* Memory fetching by symbol */
98 struct symbol_cache {
99         char *symbol;
100         long offset;
101         unsigned long addr;
102 };
103
104 static unsigned long update_symbol_cache(struct symbol_cache *sc)
105 {
106         sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
107         if (sc->addr)
108                 sc->addr += sc->offset;
109         return sc->addr;
110 }
111
112 static void free_symbol_cache(struct symbol_cache *sc)
113 {
114         kfree(sc->symbol);
115         kfree(sc);
116 }
117
118 static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
119 {
120         struct symbol_cache *sc;
121
122         if (!sym || strlen(sym) == 0)
123                 return NULL;
124         sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
125         if (!sc)
126                 return NULL;
127
128         sc->symbol = kstrdup(sym, GFP_KERNEL);
129         if (!sc->symbol) {
130                 kfree(sc);
131                 return NULL;
132         }
133         sc->offset = offset;
134
135         update_symbol_cache(sc);
136         return sc;
137 }
138
139 static __kprobes unsigned long fetch_symbol(struct pt_regs *regs, void *data)
140 {
141         struct symbol_cache *sc = data;
142
143         if (sc->addr)
144                 return fetch_memory(regs, (void *)sc->addr);
145         else
146                 return 0;
147 }
148
149 /* Special indirect memory access interface */
150 struct indirect_fetch_data {
151         struct fetch_func orig;
152         long offset;
153 };
154
155 static __kprobes unsigned long fetch_indirect(struct pt_regs *regs, void *data)
156 {
157         struct indirect_fetch_data *ind = data;
158         unsigned long addr;
159
160         addr = call_fetch(&ind->orig, regs);
161         if (addr) {
162                 addr += ind->offset;
163                 return fetch_memory(regs, (void *)addr);
164         } else
165                 return 0;
166 }
167
168 static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data)
169 {
170         if (data->orig.func == fetch_indirect)
171                 free_indirect_fetch_data(data->orig.data);
172         else if (data->orig.func == fetch_symbol)
173                 free_symbol_cache(data->orig.data);
174         kfree(data);
175 }
176
177 /**
178  * kprobe_trace_core
179  */
180
181 struct trace_probe {
182         struct list_head        list;
183         union {
184                 struct kprobe           kp;
185                 struct kretprobe        rp;
186         };
187         const char              *symbol;        /* symbol name */
188         struct ftrace_event_call        call;
189         unsigned int            nr_args;
190         struct fetch_func       args[];
191 };
192
193 #define SIZEOF_TRACE_PROBE(n)                   \
194         (offsetof(struct trace_probe, args) +   \
195         (sizeof(struct fetch_func) * (n)))
196
197 static int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs);
198 static int kretprobe_trace_func(struct kretprobe_instance *ri,
199                                 struct pt_regs *regs);
200
201 static __kprobes int probe_is_return(struct trace_probe *tp)
202 {
203         return (tp->rp.handler == kretprobe_trace_func);
204 }
205
206 static __kprobes const char *probe_symbol(struct trace_probe *tp)
207 {
208         return tp->symbol ? tp->symbol : "unknown";
209 }
210
211 static __kprobes long probe_offset(struct trace_probe *tp)
212 {
213         return (probe_is_return(tp)) ? tp->rp.kp.offset : tp->kp.offset;
214 }
215
216 static __kprobes void *probe_address(struct trace_probe *tp)
217 {
218         return (probe_is_return(tp)) ? tp->rp.kp.addr : tp->kp.addr;
219 }
220
221 static int trace_arg_string(char *buf, size_t n, struct fetch_func *ff)
222 {
223         int ret = -EINVAL;
224
225         if (ff->func == fetch_argument)
226                 ret = snprintf(buf, n, "a%lu", (unsigned long)ff->data);
227         else if (ff->func == fetch_register) {
228                 const char *name;
229                 name = regs_query_register_name((unsigned int)((long)ff->data));
230                 ret = snprintf(buf, n, "%%%s", name);
231         } else if (ff->func == fetch_stack)
232                 ret = snprintf(buf, n, "s%lu", (unsigned long)ff->data);
233         else if (ff->func == fetch_memory)
234                 ret = snprintf(buf, n, "@0x%p", ff->data);
235         else if (ff->func == fetch_symbol) {
236                 struct symbol_cache *sc = ff->data;
237                 ret = snprintf(buf, n, "@%s%+ld", sc->symbol, sc->offset);
238         } else if (ff->func == fetch_retvalue)
239                 ret = snprintf(buf, n, "rv");
240         else if (ff->func == fetch_ip)
241                 ret = snprintf(buf, n, "ra");
242         else if (ff->func == fetch_stack_address)
243                 ret = snprintf(buf, n, "sa");
244         else if (ff->func == fetch_indirect) {
245                 struct indirect_fetch_data *id = ff->data;
246                 size_t l = 0;
247                 ret = snprintf(buf, n, "%+ld(", id->offset);
248                 if (ret >= n)
249                         goto end;
250                 l += ret;
251                 ret = trace_arg_string(buf + l, n - l, &id->orig);
252                 if (ret < 0)
253                         goto end;
254                 l += ret;
255                 ret = snprintf(buf + l, n - l, ")");
256                 ret += l;
257         }
258 end:
259         if (ret >= n)
260                 return -ENOSPC;
261         return ret;
262 }
263
264 static int register_probe_event(struct trace_probe *tp);
265 static void unregister_probe_event(struct trace_probe *tp);
266
267 static DEFINE_MUTEX(probe_lock);
268 static LIST_HEAD(probe_list);
269
270 static struct trace_probe *alloc_trace_probe(const char *symbol,
271                                              const char *event, int nargs)
272 {
273         struct trace_probe *tp;
274
275         tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL);
276         if (!tp)
277                 return ERR_PTR(-ENOMEM);
278
279         if (symbol) {
280                 tp->symbol = kstrdup(symbol, GFP_KERNEL);
281                 if (!tp->symbol)
282                         goto error;
283         }
284         if (!event)
285                 goto error;
286         tp->call.name = kstrdup(event, GFP_KERNEL);
287         if (!tp->call.name)
288                 goto error;
289
290         INIT_LIST_HEAD(&tp->list);
291         return tp;
292 error:
293         kfree(tp->symbol);
294         kfree(tp);
295         return ERR_PTR(-ENOMEM);
296 }
297
298 static void free_trace_probe(struct trace_probe *tp)
299 {
300         int i;
301
302         for (i = 0; i < tp->nr_args; i++)
303                 if (tp->args[i].func == fetch_symbol)
304                         free_symbol_cache(tp->args[i].data);
305                 else if (tp->args[i].func == fetch_indirect)
306                         free_indirect_fetch_data(tp->args[i].data);
307
308         kfree(tp->call.name);
309         kfree(tp->symbol);
310         kfree(tp);
311 }
312
313 static struct trace_probe *find_probe_event(const char *event)
314 {
315         struct trace_probe *tp;
316
317         list_for_each_entry(tp, &probe_list, list)
318                 if (!strcmp(tp->call.name, event))
319                         return tp;
320         return NULL;
321 }
322
323 static void __unregister_trace_probe(struct trace_probe *tp)
324 {
325         if (probe_is_return(tp))
326                 unregister_kretprobe(&tp->rp);
327         else
328                 unregister_kprobe(&tp->kp);
329 }
330
331 /* Unregister a trace_probe and probe_event: call with locking probe_lock */
332 static void unregister_trace_probe(struct trace_probe *tp)
333 {
334         unregister_probe_event(tp);
335         __unregister_trace_probe(tp);
336         list_del(&tp->list);
337 }
338
339 /* Register a trace_probe and probe_event */
340 static int register_trace_probe(struct trace_probe *tp)
341 {
342         struct trace_probe *old_tp;
343         int ret;
344
345         mutex_lock(&probe_lock);
346
347         if (probe_is_return(tp))
348                 ret = register_kretprobe(&tp->rp);
349         else
350                 ret = register_kprobe(&tp->kp);
351
352         if (ret) {
353                 pr_warning("Could not insert probe(%d)\n", ret);
354                 if (ret == -EILSEQ) {
355                         pr_warning("Probing address(0x%p) is not an "
356                                    "instruction boundary.\n",
357                                    probe_address(tp));
358                         ret = -EINVAL;
359                 }
360                 goto end;
361         }
362         /* register as an event */
363         old_tp = find_probe_event(tp->call.name);
364         if (old_tp) {
365                 /* delete old event */
366                 unregister_trace_probe(old_tp);
367                 free_trace_probe(old_tp);
368         }
369         ret = register_probe_event(tp);
370         if (ret) {
371                 pr_warning("Faild to register probe event(%d)\n", ret);
372                 __unregister_trace_probe(tp);
373         }
374         list_add_tail(&tp->list, &probe_list);
375 end:
376         mutex_unlock(&probe_lock);
377         return ret;
378 }
379
380 /* Split symbol and offset. */
381 static int split_symbol_offset(char *symbol, long *offset)
382 {
383         char *tmp;
384         int ret;
385
386         if (!offset)
387                 return -EINVAL;
388
389         tmp = strchr(symbol, '+');
390         if (!tmp)
391                 tmp = strchr(symbol, '-');
392
393         if (tmp) {
394                 /* skip sign because strict_strtol doesn't accept '+' */
395                 ret = strict_strtol(tmp + 1, 0, offset);
396                 if (ret)
397                         return ret;
398                 if (*tmp == '-')
399                         *offset = -(*offset);
400                 *tmp = '\0';
401         } else
402                 *offset = 0;
403         return 0;
404 }
405
406 #define PARAM_MAX_ARGS 16
407 #define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
408
409 static int parse_trace_arg(char *arg, struct fetch_func *ff, int is_return)
410 {
411         int ret = 0;
412         unsigned long param;
413         long offset;
414         char *tmp;
415
416         switch (arg[0]) {
417         case 'a':       /* argument */
418                 ret = strict_strtoul(arg + 1, 10, &param);
419                 if (ret || param > PARAM_MAX_ARGS)
420                         ret = -EINVAL;
421                 else {
422                         ff->func = fetch_argument;
423                         ff->data = (void *)param;
424                 }
425                 break;
426         case 'r':       /* retval or retaddr */
427                 if (is_return && arg[1] == 'v') {
428                         ff->func = fetch_retvalue;
429                         ff->data = NULL;
430                 } else if (is_return && arg[1] == 'a') {
431                         ff->func = fetch_ip;
432                         ff->data = NULL;
433                 } else
434                         ret = -EINVAL;
435                 break;
436         case '%':       /* named register */
437                 ret = regs_query_register_offset(arg + 1);
438                 if (ret >= 0) {
439                         ff->func = fetch_register;
440                         ff->data = (void *)(unsigned long)ret;
441                         ret = 0;
442                 }
443                 break;
444         case 's':       /* stack */
445                 if (arg[1] == 'a') {
446                         ff->func = fetch_stack_address;
447                         ff->data = NULL;
448                 } else {
449                         ret = strict_strtoul(arg + 1, 10, &param);
450                         if (ret || param > PARAM_MAX_STACK)
451                                 ret = -EINVAL;
452                         else {
453                                 ff->func = fetch_stack;
454                                 ff->data = (void *)param;
455                         }
456                 }
457                 break;
458         case '@':       /* memory or symbol */
459                 if (isdigit(arg[1])) {
460                         ret = strict_strtoul(arg + 1, 0, &param);
461                         if (ret)
462                                 break;
463                         ff->func = fetch_memory;
464                         ff->data = (void *)param;
465                 } else {
466                         ret = split_symbol_offset(arg + 1, &offset);
467                         if (ret)
468                                 break;
469                         ff->data = alloc_symbol_cache(arg + 1,
470                                                               offset);
471                         if (ff->data)
472                                 ff->func = fetch_symbol;
473                         else
474                                 ret = -EINVAL;
475                 }
476                 break;
477         case '+':       /* indirect memory */
478         case '-':
479                 tmp = strchr(arg, '(');
480                 if (!tmp) {
481                         ret = -EINVAL;
482                         break;
483                 }
484                 *tmp = '\0';
485                 ret = strict_strtol(arg + 1, 0, &offset);
486                 if (ret)
487                         break;
488                 if (arg[0] == '-')
489                         offset = -offset;
490                 arg = tmp + 1;
491                 tmp = strrchr(arg, ')');
492                 if (tmp) {
493                         struct indirect_fetch_data *id;
494                         *tmp = '\0';
495                         id = kzalloc(sizeof(struct indirect_fetch_data),
496                                      GFP_KERNEL);
497                         if (!id)
498                                 return -ENOMEM;
499                         id->offset = offset;
500                         ret = parse_trace_arg(arg, &id->orig, is_return);
501                         if (ret)
502                                 kfree(id);
503                         else {
504                                 ff->func = fetch_indirect;
505                                 ff->data = (void *)id;
506                         }
507                 } else
508                         ret = -EINVAL;
509                 break;
510         default:
511                 /* TODO: support custom handler */
512                 ret = -EINVAL;
513         }
514         return ret;
515 }
516
517 static int create_trace_probe(int argc, char **argv)
518 {
519         /*
520          * Argument syntax:
521          *  - Add kprobe: p[:EVENT] SYMBOL[+OFFS|-OFFS]|ADDRESS [FETCHARGS]
522          *  - Add kretprobe: r[:EVENT] SYMBOL[+0] [FETCHARGS]
523          * Fetch args:
524          *  aN  : fetch Nth of function argument. (N:0-)
525          *  rv  : fetch return value
526          *  ra  : fetch return address
527          *  sa  : fetch stack address
528          *  sN  : fetch Nth of stack (N:0-)
529          *  @ADDR       : fetch memory at ADDR (ADDR should be in kernel)
530          *  @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
531          *  %REG        : fetch register REG
532          * Indirect memory fetch:
533          *  +|-offs(ARG) : fetch memory at ARG +|- offs address.
534          */
535         struct trace_probe *tp;
536         struct kprobe *kp;
537         int i, ret = 0;
538         int is_return = 0;
539         char *symbol = NULL, *event = NULL;
540         long offset = 0;
541         void *addr = NULL;
542
543         if (argc < 2)
544                 return -EINVAL;
545
546         if (argv[0][0] == 'p')
547                 is_return = 0;
548         else if (argv[0][0] == 'r')
549                 is_return = 1;
550         else
551                 return -EINVAL;
552
553         if (argv[0][1] == ':') {
554                 event = &argv[0][2];
555                 if (strlen(event) == 0) {
556                         pr_info("Event name is not specifiled\n");
557                         return -EINVAL;
558                 }
559         }
560
561         if (isdigit(argv[1][0])) {
562                 if (is_return)
563                         return -EINVAL;
564                 /* an address specified */
565                 ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr);
566                 if (ret)
567                         return ret;
568         } else {
569                 /* a symbol specified */
570                 symbol = argv[1];
571                 /* TODO: support .init module functions */
572                 ret = split_symbol_offset(symbol, &offset);
573                 if (ret)
574                         return ret;
575                 if (offset && is_return)
576                         return -EINVAL;
577         }
578         argc -= 2; argv += 2;
579
580         /* setup a probe */
581         if (!event) {
582                 /* Make a new event name */
583                 char buf[MAX_EVENT_NAME_LEN];
584                 if (symbol)
585                         snprintf(buf, MAX_EVENT_NAME_LEN, "%c@%s%+ld",
586                                  is_return ? 'r' : 'p', symbol, offset);
587                 else
588                         snprintf(buf, MAX_EVENT_NAME_LEN, "%c@0x%p",
589                                  is_return ? 'r' : 'p', addr);
590                 tp = alloc_trace_probe(symbol, buf, argc);
591         } else
592                 tp = alloc_trace_probe(symbol, event, argc);
593         if (IS_ERR(tp))
594                 return PTR_ERR(tp);
595
596         if (is_return) {
597                 kp = &tp->rp.kp;
598                 tp->rp.handler = kretprobe_trace_func;
599         } else {
600                 kp = &tp->kp;
601                 tp->kp.pre_handler = kprobe_trace_func;
602         }
603
604         if (tp->symbol) {
605                 kp->symbol_name = tp->symbol;
606                 kp->offset = offset;
607         } else
608                 kp->addr = addr;
609
610         /* parse arguments */
611         ret = 0;
612         for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
613                 if (strlen(argv[i]) > MAX_ARGSTR_LEN) {
614                         pr_info("Argument%d(%s) is too long.\n", i, argv[i]);
615                         ret = -ENOSPC;
616                         goto error;
617                 }
618                 ret = parse_trace_arg(argv[i], &tp->args[i], is_return);
619                 if (ret)
620                         goto error;
621         }
622         tp->nr_args = i;
623
624         ret = register_trace_probe(tp);
625         if (ret)
626                 goto error;
627         return 0;
628
629 error:
630         free_trace_probe(tp);
631         return ret;
632 }
633
634 static void cleanup_all_probes(void)
635 {
636         struct trace_probe *tp;
637
638         mutex_lock(&probe_lock);
639         /* TODO: Use batch unregistration */
640         while (!list_empty(&probe_list)) {
641                 tp = list_entry(probe_list.next, struct trace_probe, list);
642                 unregister_trace_probe(tp);
643                 free_trace_probe(tp);
644         }
645         mutex_unlock(&probe_lock);
646 }
647
648
649 /* Probes listing interfaces */
650 static void *probes_seq_start(struct seq_file *m, loff_t *pos)
651 {
652         mutex_lock(&probe_lock);
653         return seq_list_start(&probe_list, *pos);
654 }
655
656 static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
657 {
658         return seq_list_next(v, &probe_list, pos);
659 }
660
661 static void probes_seq_stop(struct seq_file *m, void *v)
662 {
663         mutex_unlock(&probe_lock);
664 }
665
666 static int probes_seq_show(struct seq_file *m, void *v)
667 {
668         struct trace_probe *tp = v;
669         int i, ret;
670         char buf[MAX_ARGSTR_LEN + 1];
671
672         seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p');
673         seq_printf(m, ":%s", tp->call.name);
674
675         if (tp->symbol)
676                 seq_printf(m, " %s%+ld", probe_symbol(tp), probe_offset(tp));
677         else
678                 seq_printf(m, " 0x%p", probe_address(tp));
679
680         for (i = 0; i < tp->nr_args; i++) {
681                 ret = trace_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]);
682                 if (ret < 0) {
683                         pr_warning("Argument%d decoding error(%d).\n", i, ret);
684                         return ret;
685                 }
686                 seq_printf(m, " %s", buf);
687         }
688         seq_printf(m, "\n");
689         return 0;
690 }
691
692 static const struct seq_operations probes_seq_op = {
693         .start  = probes_seq_start,
694         .next   = probes_seq_next,
695         .stop   = probes_seq_stop,
696         .show   = probes_seq_show
697 };
698
699 static int probes_open(struct inode *inode, struct file *file)
700 {
701         if ((file->f_mode & FMODE_WRITE) &&
702             (file->f_flags & O_TRUNC))
703                 cleanup_all_probes();
704
705         return seq_open(file, &probes_seq_op);
706 }
707
708 static int command_trace_probe(const char *buf)
709 {
710         char **argv;
711         int argc = 0, ret = 0;
712
713         argv = argv_split(GFP_KERNEL, buf, &argc);
714         if (!argv)
715                 return -ENOMEM;
716
717         if (argc)
718                 ret = create_trace_probe(argc, argv);
719
720         argv_free(argv);
721         return ret;
722 }
723
724 #define WRITE_BUFSIZE 128
725
726 static ssize_t probes_write(struct file *file, const char __user *buffer,
727                             size_t count, loff_t *ppos)
728 {
729         char *kbuf, *tmp;
730         int ret;
731         size_t done;
732         size_t size;
733
734         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
735         if (!kbuf)
736                 return -ENOMEM;
737
738         ret = done = 0;
739         while (done < count) {
740                 size = count - done;
741                 if (size >= WRITE_BUFSIZE)
742                         size = WRITE_BUFSIZE - 1;
743                 if (copy_from_user(kbuf, buffer + done, size)) {
744                         ret = -EFAULT;
745                         goto out;
746                 }
747                 kbuf[size] = '\0';
748                 tmp = strchr(kbuf, '\n');
749                 if (tmp) {
750                         *tmp = '\0';
751                         size = tmp - kbuf + 1;
752                 } else if (done + size < count) {
753                         pr_warning("Line length is too long: "
754                                    "Should be less than %d.", WRITE_BUFSIZE);
755                         ret = -EINVAL;
756                         goto out;
757                 }
758                 done += size;
759                 /* Remove comments */
760                 tmp = strchr(kbuf, '#');
761                 if (tmp)
762                         *tmp = '\0';
763
764                 ret = command_trace_probe(kbuf);
765                 if (ret)
766                         goto out;
767         }
768         ret = done;
769 out:
770         kfree(kbuf);
771         return ret;
772 }
773
774 static const struct file_operations kprobe_events_ops = {
775         .owner          = THIS_MODULE,
776         .open           = probes_open,
777         .read           = seq_read,
778         .llseek         = seq_lseek,
779         .release        = seq_release,
780         .write          = probes_write,
781 };
782
783 /* Kprobe handler */
784 static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
785 {
786         struct trace_probe *tp = container_of(kp, struct trace_probe, kp);
787         struct kprobe_trace_entry *entry;
788         struct ring_buffer_event *event;
789         int size, i, pc;
790         unsigned long irq_flags;
791         struct ftrace_event_call *call = &tp->call;
792
793         local_save_flags(irq_flags);
794         pc = preempt_count();
795
796         size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
797
798         event = trace_current_buffer_lock_reserve(TRACE_KPROBE, size,
799                                                   irq_flags, pc);
800         if (!event)
801                 return 0;
802
803         entry = ring_buffer_event_data(event);
804         entry->nargs = tp->nr_args;
805         entry->ip = (unsigned long)kp->addr;
806         for (i = 0; i < tp->nr_args; i++)
807                 entry->args[i] = call_fetch(&tp->args[i], regs);
808
809         if (!filter_current_check_discard(call, entry, event))
810                 trace_nowake_buffer_unlock_commit(event, irq_flags, pc);
811         return 0;
812 }
813
814 /* Kretprobe handler */
815 static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
816                                           struct pt_regs *regs)
817 {
818         struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
819         struct kretprobe_trace_entry *entry;
820         struct ring_buffer_event *event;
821         int size, i, pc;
822         unsigned long irq_flags;
823         struct ftrace_event_call *call = &tp->call;
824
825         local_save_flags(irq_flags);
826         pc = preempt_count();
827
828         size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
829
830         event = trace_current_buffer_lock_reserve(TRACE_KRETPROBE, size,
831                                                   irq_flags, pc);
832         if (!event)
833                 return 0;
834
835         entry = ring_buffer_event_data(event);
836         entry->nargs = tp->nr_args;
837         entry->func = (unsigned long)probe_address(tp);
838         entry->ret_ip = (unsigned long)ri->ret_addr;
839         for (i = 0; i < tp->nr_args; i++)
840                 entry->args[i] = call_fetch(&tp->args[i], regs);
841
842         if (!filter_current_check_discard(call, entry, event))
843                 trace_nowake_buffer_unlock_commit(event, irq_flags, pc);
844
845         return 0;
846 }
847
848 /* Event entry printers */
849 enum print_line_t
850 print_kprobe_event(struct trace_iterator *iter, int flags)
851 {
852         struct kprobe_trace_entry *field;
853         struct trace_seq *s = &iter->seq;
854         int i;
855
856         trace_assign_type(field, iter->ent);
857
858         if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
859                 goto partial;
860
861         if (!trace_seq_puts(s, ":"))
862                 goto partial;
863
864         for (i = 0; i < field->nargs; i++)
865                 if (!trace_seq_printf(s, " 0x%lx", field->args[i]))
866                         goto partial;
867
868         if (!trace_seq_puts(s, "\n"))
869                 goto partial;
870
871         return TRACE_TYPE_HANDLED;
872 partial:
873         return TRACE_TYPE_PARTIAL_LINE;
874 }
875
876 enum print_line_t
877 print_kretprobe_event(struct trace_iterator *iter, int flags)
878 {
879         struct kretprobe_trace_entry *field;
880         struct trace_seq *s = &iter->seq;
881         int i;
882
883         trace_assign_type(field, iter->ent);
884
885         if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
886                 goto partial;
887
888         if (!trace_seq_puts(s, " <- "))
889                 goto partial;
890
891         if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
892                 goto partial;
893
894         if (!trace_seq_puts(s, ":"))
895                 goto partial;
896
897         for (i = 0; i < field->nargs; i++)
898                 if (!trace_seq_printf(s, " 0x%lx", field->args[i]))
899                         goto partial;
900
901         if (!trace_seq_puts(s, "\n"))
902                 goto partial;
903
904         return TRACE_TYPE_HANDLED;
905 partial:
906         return TRACE_TYPE_PARTIAL_LINE;
907 }
908
909 static struct trace_event kprobe_trace_event = {
910         .type           = TRACE_KPROBE,
911         .trace          = print_kprobe_event,
912 };
913
914 static struct trace_event kretprobe_trace_event = {
915         .type           = TRACE_KRETPROBE,
916         .trace          = print_kretprobe_event,
917 };
918
919 static int probe_event_enable(struct ftrace_event_call *call)
920 {
921         struct trace_probe *tp = (struct trace_probe *)call->data;
922
923         if (probe_is_return(tp))
924                 return enable_kretprobe(&tp->rp);
925         else
926                 return enable_kprobe(&tp->kp);
927 }
928
929 static void probe_event_disable(struct ftrace_event_call *call)
930 {
931         struct trace_probe *tp = (struct trace_probe *)call->data;
932
933         if (probe_is_return(tp))
934                 disable_kretprobe(&tp->rp);
935         else
936                 disable_kprobe(&tp->kp);
937 }
938
939 static int probe_event_raw_init(struct ftrace_event_call *event_call)
940 {
941         INIT_LIST_HEAD(&event_call->fields);
942         init_preds(event_call);
943         return 0;
944 }
945
946 #undef DEFINE_FIELD
947 #define DEFINE_FIELD(type, item, name, is_signed)                       \
948         do {                                                            \
949                 ret = trace_define_field(event_call, #type, name,       \
950                                          offsetof(typeof(field), item), \
951                                          sizeof(field.item), is_signed, \
952                                          FILTER_OTHER);                 \
953                 if (ret)                                                \
954                         return ret;                                     \
955         } while (0)
956
957 static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
958 {
959         int ret, i;
960         struct kprobe_trace_entry field;
961         char buf[MAX_ARGSTR_LEN + 1];
962         struct trace_probe *tp = (struct trace_probe *)event_call->data;
963
964         ret = trace_define_common_fields(event_call);
965         if (!ret)
966                 return ret;
967
968         DEFINE_FIELD(unsigned long, ip, "ip", 0);
969         DEFINE_FIELD(int, nargs, "nargs", 1);
970         for (i = 0; i < tp->nr_args; i++) {
971                 /* Set argN as a field */
972                 sprintf(buf, "arg%d", i);
973                 DEFINE_FIELD(unsigned long, args[i], buf, 0);
974                 /* Set argument string as an alias field */
975                 ret = trace_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]);
976                 if (ret < 0)
977                         return ret;
978                 DEFINE_FIELD(unsigned long, args[i], buf, 0);
979         }
980         return 0;
981 }
982
983 static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
984 {
985         int ret, i;
986         struct kretprobe_trace_entry field;
987         char buf[MAX_ARGSTR_LEN + 1];
988         struct trace_probe *tp = (struct trace_probe *)event_call->data;
989
990         ret = trace_define_common_fields(event_call);
991         if (!ret)
992                 return ret;
993
994         DEFINE_FIELD(unsigned long, func, "func", 0);
995         DEFINE_FIELD(unsigned long, ret_ip, "ret_ip", 0);
996         DEFINE_FIELD(int, nargs, "nargs", 1);
997         for (i = 0; i < tp->nr_args; i++) {
998                 /* Set argN as a field */
999                 sprintf(buf, "arg%d", i);
1000                 DEFINE_FIELD(unsigned long, args[i], buf, 0);
1001                 /* Set argument string as an alias field */
1002                 ret = trace_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]);
1003                 if (ret < 0)
1004                         return ret;
1005                 DEFINE_FIELD(unsigned long, args[i], buf, 0);
1006         }
1007         return 0;
1008 }
1009
1010 static int __probe_event_show_format(struct trace_seq *s,
1011                                      struct trace_probe *tp, const char *fmt,
1012                                      const char *arg)
1013 {
1014         int i, ret;
1015         char buf[MAX_ARGSTR_LEN + 1];
1016
1017         /* Show aliases */
1018         for (i = 0; i < tp->nr_args; i++) {
1019                 ret = trace_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]);
1020                 if (ret < 0)
1021                         return ret;
1022                 if (!trace_seq_printf(s, "\talias: %s;\toriginal: arg%d;\n",
1023                                       buf, i))
1024                         return 0;
1025         }
1026         /* Show format */
1027         if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt))
1028                 return 0;
1029
1030         for (i = 0; i < tp->nr_args; i++)
1031                 if (!trace_seq_puts(s, " 0x%lx"))
1032                         return 0;
1033
1034         if (!trace_seq_printf(s, "\", %s", arg))
1035                 return 0;
1036
1037         for (i = 0; i < tp->nr_args; i++)
1038                 if (!trace_seq_printf(s, ", arg%d", i))
1039                         return 0;
1040
1041         return trace_seq_puts(s, "\n");
1042 }
1043
1044 #undef SHOW_FIELD
1045 #define SHOW_FIELD(type, item, name)                                    \
1046         do {                                                            \
1047                 ret = trace_seq_printf(s, "\tfield: " #type " %s;\t"    \
1048                                 "offset:%u;tsize:%u;\n", name,          \
1049                                 (unsigned int)offsetof(typeof(field), item),\
1050                                 (unsigned int)sizeof(type));            \
1051                 if (!ret)                                               \
1052                         return 0;                                       \
1053         } while (0)
1054
1055 static int kprobe_event_show_format(struct ftrace_event_call *call,
1056                                     struct trace_seq *s)
1057 {
1058         struct kprobe_trace_entry field __attribute__((unused));
1059         int ret, i;
1060         char buf[8];
1061         struct trace_probe *tp = (struct trace_probe *)call->data;
1062
1063         SHOW_FIELD(unsigned long, ip, "ip");
1064         SHOW_FIELD(int, nargs, "nargs");
1065
1066         /* Show fields */
1067         for (i = 0; i < tp->nr_args; i++) {
1068                 sprintf(buf, "arg%d", i);
1069                 SHOW_FIELD(unsigned long, args[i], buf);
1070         }
1071         trace_seq_puts(s, "\n");
1072
1073         return __probe_event_show_format(s, tp, "%lx:", "ip");
1074 }
1075
1076 static int kretprobe_event_show_format(struct ftrace_event_call *call,
1077                                        struct trace_seq *s)
1078 {
1079         struct kretprobe_trace_entry field __attribute__((unused));
1080         int ret, i;
1081         char buf[8];
1082         struct trace_probe *tp = (struct trace_probe *)call->data;
1083
1084         SHOW_FIELD(unsigned long, func, "func");
1085         SHOW_FIELD(unsigned long, ret_ip, "ret_ip");
1086         SHOW_FIELD(int, nargs, "nargs");
1087
1088         /* Show fields */
1089         for (i = 0; i < tp->nr_args; i++) {
1090                 sprintf(buf, "arg%d", i);
1091                 SHOW_FIELD(unsigned long, args[i], buf);
1092         }
1093         trace_seq_puts(s, "\n");
1094
1095         return __probe_event_show_format(s, tp, "%lx <- %lx:",
1096                                           "func, ret_ip");
1097 }
1098
1099 static int register_probe_event(struct trace_probe *tp)
1100 {
1101         struct ftrace_event_call *call = &tp->call;
1102         int ret;
1103
1104         /* Initialize ftrace_event_call */
1105         call->system = "kprobes";
1106         if (probe_is_return(tp)) {
1107                 call->event = &kretprobe_trace_event;
1108                 call->id = TRACE_KRETPROBE;
1109                 call->raw_init = probe_event_raw_init;
1110                 call->show_format = kretprobe_event_show_format;
1111                 call->define_fields = kretprobe_event_define_fields;
1112         } else {
1113                 call->event = &kprobe_trace_event;
1114                 call->id = TRACE_KPROBE;
1115                 call->raw_init = probe_event_raw_init;
1116                 call->show_format = kprobe_event_show_format;
1117                 call->define_fields = kprobe_event_define_fields;
1118         }
1119         call->enabled = 1;
1120         call->regfunc = probe_event_enable;
1121         call->unregfunc = probe_event_disable;
1122         call->data = tp;
1123         ret = trace_add_event_call(call);
1124         if (ret)
1125                 pr_info("Failed to register kprobe event: %s\n", call->name);
1126         return ret;
1127 }
1128
1129 static void unregister_probe_event(struct trace_probe *tp)
1130 {
1131         /*
1132          * Prevent to unregister event itself because the event is shared
1133          * among other probes.
1134          */
1135         tp->call.event = NULL;
1136         trace_remove_event_call(&tp->call);
1137 }
1138
1139 /* Make a debugfs interface for controling probe points */
1140 static __init int init_kprobe_trace(void)
1141 {
1142         struct dentry *d_tracer;
1143         struct dentry *entry;
1144         int ret;
1145
1146         ret = register_ftrace_event(&kprobe_trace_event);
1147         if (!ret) {
1148                 pr_warning("Could not register kprobe_trace_event type.\n");
1149                 return 0;
1150         }
1151         ret = register_ftrace_event(&kretprobe_trace_event);
1152         if (!ret) {
1153                 pr_warning("Could not register kretprobe_trace_event type.\n");
1154                 return 0;
1155         }
1156
1157         d_tracer = tracing_init_dentry();
1158         if (!d_tracer)
1159                 return 0;
1160
1161         entry = debugfs_create_file("kprobe_events", 0644, d_tracer,
1162                                     NULL, &kprobe_events_ops);
1163
1164         if (!entry)
1165                 pr_warning("Could not create debugfs "
1166                            "'kprobe_events' entry\n");
1167         return 0;
1168 }
1169 fs_initcall(init_kprobe_trace);
1170
1171
1172 #ifdef CONFIG_FTRACE_STARTUP_TEST
1173
1174 static int kprobe_trace_selftest_target(int a1, int a2, int a3,
1175                                         int a4, int a5, int a6)
1176 {
1177         return a1 + a2 + a3 + a4 + a5 + a6;
1178 }
1179
1180 static __init int kprobe_trace_self_tests_init(void)
1181 {
1182         int ret;
1183         int (*target)(int, int, int, int, int, int);
1184
1185         target = kprobe_trace_selftest_target;
1186
1187         pr_info("Testing kprobe tracing: ");
1188
1189         ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
1190                                   "a1 a2 a3 a4 a5 a6");
1191         if (WARN_ON_ONCE(ret))
1192                 pr_warning("error enabling function entry\n");
1193
1194         ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
1195                                   "ra rv");
1196         if (WARN_ON_ONCE(ret))
1197                 pr_warning("error enabling function return\n");
1198
1199         ret = target(1, 2, 3, 4, 5, 6);
1200
1201         cleanup_all_probes();
1202
1203         pr_cont("OK\n");
1204         return 0;
1205 }
1206
1207 late_initcall(kprobe_trace_self_tests_init);
1208
1209 #endif