Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ieee1394...
[pandora-kernel.git] / kernel / trace / trace_kprobe.c
1 /*
2  * Kprobes-based tracing events
3  *
4  * Created by Masami Hiramatsu <mhiramat@redhat.com>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  */
19
20 #include <linux/module.h>
21 #include <linux/uaccess.h>
22 #include <linux/kprobes.h>
23 #include <linux/seq_file.h>
24 #include <linux/slab.h>
25 #include <linux/smp.h>
26 #include <linux/debugfs.h>
27 #include <linux/types.h>
28 #include <linux/string.h>
29 #include <linux/ctype.h>
30 #include <linux/ptrace.h>
31 #include <linux/perf_event.h>
32 #include <linux/stringify.h>
33 #include <asm/bitsperlong.h>
34
35 #include "trace.h"
36 #include "trace_output.h"
37
38 #define MAX_TRACE_ARGS 128
39 #define MAX_ARGSTR_LEN 63
40 #define MAX_EVENT_NAME_LEN 64
41 #define KPROBE_EVENT_SYSTEM "kprobes"
42
43 /* Reserved field names */
44 #define FIELD_STRING_IP "__probe_ip"
45 #define FIELD_STRING_RETIP "__probe_ret_ip"
46 #define FIELD_STRING_FUNC "__probe_func"
47
48 const char *reserved_field_names[] = {
49         "common_type",
50         "common_flags",
51         "common_preempt_count",
52         "common_pid",
53         "common_tgid",
54         "common_lock_depth",
55         FIELD_STRING_IP,
56         FIELD_STRING_RETIP,
57         FIELD_STRING_FUNC,
58 };
59
60 /* Printing function type */
61 typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *);
62 #define PRINT_TYPE_FUNC_NAME(type)      print_type_##type
63 #define PRINT_TYPE_FMT_NAME(type)       print_type_format_##type
64
65 /* Printing  in basic type function template */
66 #define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast)                   \
67 static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s,    \
68                                                 const char *name, void *data)\
69 {                                                                       \
70         return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\
71 }                                                                       \
72 static const char PRINT_TYPE_FMT_NAME(type)[] = fmt;
73
74 DEFINE_BASIC_PRINT_TYPE_FUNC(u8, "%x", unsigned int)
75 DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "%x", unsigned int)
76 DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "%lx", unsigned long)
77 DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "%llx", unsigned long long)
78 DEFINE_BASIC_PRINT_TYPE_FUNC(s8, "%d", int)
79 DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d", int)
80 DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long)
81 DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long)
82
83 /* Data fetch function type */
84 typedef void (*fetch_func_t)(struct pt_regs *, void *, void *);
85
86 struct fetch_param {
87         fetch_func_t    fn;
88         void *data;
89 };
90
91 static __kprobes void call_fetch(struct fetch_param *fprm,
92                                  struct pt_regs *regs, void *dest)
93 {
94         return fprm->fn(regs, fprm->data, dest);
95 }
96
97 #define FETCH_FUNC_NAME(kind, type)     fetch_##kind##_##type
98 /*
99  * Define macro for basic types - we don't need to define s* types, because
100  * we have to care only about bitwidth at recording time.
101  */
102 #define DEFINE_BASIC_FETCH_FUNCS(kind)  \
103 DEFINE_FETCH_##kind(u8)                 \
104 DEFINE_FETCH_##kind(u16)                \
105 DEFINE_FETCH_##kind(u32)                \
106 DEFINE_FETCH_##kind(u64)
107
108 #define CHECK_BASIC_FETCH_FUNCS(kind, fn)       \
109         ((FETCH_FUNC_NAME(kind, u8) == fn) ||   \
110          (FETCH_FUNC_NAME(kind, u16) == fn) ||  \
111          (FETCH_FUNC_NAME(kind, u32) == fn) ||  \
112          (FETCH_FUNC_NAME(kind, u64) == fn))
113
114 /* Data fetch function templates */
115 #define DEFINE_FETCH_reg(type)                                          \
116 static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs,  \
117                                           void *offset, void *dest)     \
118 {                                                                       \
119         *(type *)dest = (type)regs_get_register(regs,                   \
120                                 (unsigned int)((unsigned long)offset)); \
121 }
122 DEFINE_BASIC_FETCH_FUNCS(reg)
123
124 #define DEFINE_FETCH_stack(type)                                        \
125 static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
126                                           void *offset, void *dest)     \
127 {                                                                       \
128         *(type *)dest = (type)regs_get_kernel_stack_nth(regs,           \
129                                 (unsigned int)((unsigned long)offset)); \
130 }
131 DEFINE_BASIC_FETCH_FUNCS(stack)
132
133 #define DEFINE_FETCH_retval(type)                                       \
134 static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\
135                                           void *dummy, void *dest)      \
136 {                                                                       \
137         *(type *)dest = (type)regs_return_value(regs);                  \
138 }
139 DEFINE_BASIC_FETCH_FUNCS(retval)
140
141 #define DEFINE_FETCH_memory(type)                                       \
142 static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
143                                           void *addr, void *dest)       \
144 {                                                                       \
145         type retval;                                                    \
146         if (probe_kernel_address(addr, retval))                         \
147                 *(type *)dest = 0;                                      \
148         else                                                            \
149                 *(type *)dest = retval;                                 \
150 }
151 DEFINE_BASIC_FETCH_FUNCS(memory)
152
153 /* Memory fetching by symbol */
154 struct symbol_cache {
155         char *symbol;
156         long offset;
157         unsigned long addr;
158 };
159
160 static unsigned long update_symbol_cache(struct symbol_cache *sc)
161 {
162         sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
163         if (sc->addr)
164                 sc->addr += sc->offset;
165         return sc->addr;
166 }
167
168 static void free_symbol_cache(struct symbol_cache *sc)
169 {
170         kfree(sc->symbol);
171         kfree(sc);
172 }
173
174 static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
175 {
176         struct symbol_cache *sc;
177
178         if (!sym || strlen(sym) == 0)
179                 return NULL;
180         sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
181         if (!sc)
182                 return NULL;
183
184         sc->symbol = kstrdup(sym, GFP_KERNEL);
185         if (!sc->symbol) {
186                 kfree(sc);
187                 return NULL;
188         }
189         sc->offset = offset;
190
191         update_symbol_cache(sc);
192         return sc;
193 }
194
195 #define DEFINE_FETCH_symbol(type)                                       \
196 static __kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,\
197                                           void *data, void *dest)       \
198 {                                                                       \
199         struct symbol_cache *sc = data;                                 \
200         if (sc->addr)                                                   \
201                 fetch_memory_##type(regs, (void *)sc->addr, dest);      \
202         else                                                            \
203                 *(type *)dest = 0;                                      \
204 }
205 DEFINE_BASIC_FETCH_FUNCS(symbol)
206
207 /* Dereference memory access function */
208 struct deref_fetch_param {
209         struct fetch_param orig;
210         long offset;
211 };
212
213 #define DEFINE_FETCH_deref(type)                                        \
214 static __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,\
215                                             void *data, void *dest)     \
216 {                                                                       \
217         struct deref_fetch_param *dprm = data;                          \
218         unsigned long addr;                                             \
219         call_fetch(&dprm->orig, regs, &addr);                           \
220         if (addr) {                                                     \
221                 addr += dprm->offset;                                   \
222                 fetch_memory_##type(regs, (void *)addr, dest);          \
223         } else                                                          \
224                 *(type *)dest = 0;                                      \
225 }
226 DEFINE_BASIC_FETCH_FUNCS(deref)
227
228 static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
229 {
230         if (CHECK_BASIC_FETCH_FUNCS(deref, data->orig.fn))
231                 free_deref_fetch_param(data->orig.data);
232         else if (CHECK_BASIC_FETCH_FUNCS(symbol, data->orig.fn))
233                 free_symbol_cache(data->orig.data);
234         kfree(data);
235 }
236
237 /* Default (unsigned long) fetch type */
238 #define __DEFAULT_FETCH_TYPE(t) u##t
239 #define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t)
240 #define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG)
241 #define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE)
242
243 #define ASSIGN_FETCH_FUNC(kind, type)   \
244         .kind = FETCH_FUNC_NAME(kind, type)
245
246 #define ASSIGN_FETCH_TYPE(ptype, ftype, sign)   \
247         {.name = #ptype,                        \
248          .size = sizeof(ftype),                 \
249          .is_signed = sign,                     \
250          .print = PRINT_TYPE_FUNC_NAME(ptype),  \
251          .fmt = PRINT_TYPE_FMT_NAME(ptype),     \
252 ASSIGN_FETCH_FUNC(reg, ftype),                  \
253 ASSIGN_FETCH_FUNC(stack, ftype),                \
254 ASSIGN_FETCH_FUNC(retval, ftype),               \
255 ASSIGN_FETCH_FUNC(memory, ftype),               \
256 ASSIGN_FETCH_FUNC(symbol, ftype),               \
257 ASSIGN_FETCH_FUNC(deref, ftype),                \
258         }
259
260 /* Fetch type information table */
261 static const struct fetch_type {
262         const char      *name;          /* Name of type */
263         size_t          size;           /* Byte size of type */
264         int             is_signed;      /* Signed flag */
265         print_type_func_t       print;  /* Print functions */
266         const char      *fmt;           /* Fromat string */
267         /* Fetch functions */
268         fetch_func_t    reg;
269         fetch_func_t    stack;
270         fetch_func_t    retval;
271         fetch_func_t    memory;
272         fetch_func_t    symbol;
273         fetch_func_t    deref;
274 } fetch_type_table[] = {
275         ASSIGN_FETCH_TYPE(u8,  u8,  0),
276         ASSIGN_FETCH_TYPE(u16, u16, 0),
277         ASSIGN_FETCH_TYPE(u32, u32, 0),
278         ASSIGN_FETCH_TYPE(u64, u64, 0),
279         ASSIGN_FETCH_TYPE(s8,  u8,  1),
280         ASSIGN_FETCH_TYPE(s16, u16, 1),
281         ASSIGN_FETCH_TYPE(s32, u32, 1),
282         ASSIGN_FETCH_TYPE(s64, u64, 1),
283 };
284
285 static const struct fetch_type *find_fetch_type(const char *type)
286 {
287         int i;
288
289         if (!type)
290                 type = DEFAULT_FETCH_TYPE_STR;
291
292         for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++)
293                 if (strcmp(type, fetch_type_table[i].name) == 0)
294                         return &fetch_type_table[i];
295         return NULL;
296 }
297
298 /* Special function : only accept unsigned long */
299 static __kprobes void fetch_stack_address(struct pt_regs *regs,
300                                           void *dummy, void *dest)
301 {
302         *(unsigned long *)dest = kernel_stack_pointer(regs);
303 }
304
305 /**
306  * Kprobe event core functions
307  */
308
309 struct probe_arg {
310         struct fetch_param      fetch;
311         unsigned int            offset; /* Offset from argument entry */
312         const char              *name;  /* Name of this argument */
313         const char              *comm;  /* Command of this argument */
314         const struct fetch_type *type;  /* Type of this argument */
315 };
316
317 /* Flags for trace_probe */
318 #define TP_FLAG_TRACE   1
319 #define TP_FLAG_PROFILE 2
320
321 struct trace_probe {
322         struct list_head        list;
323         struct kretprobe        rp;     /* Use rp.kp for kprobe use */
324         unsigned long           nhit;
325         unsigned int            flags;  /* For TP_FLAG_* */
326         const char              *symbol;        /* symbol name */
327         struct ftrace_event_class       class;
328         struct ftrace_event_call        call;
329         ssize_t                 size;           /* trace entry size */
330         unsigned int            nr_args;
331         struct probe_arg        args[];
332 };
333
334 #define SIZEOF_TRACE_PROBE(n)                   \
335         (offsetof(struct trace_probe, args) +   \
336         (sizeof(struct probe_arg) * (n)))
337
338
339 static __kprobes int probe_is_return(struct trace_probe *tp)
340 {
341         return tp->rp.handler != NULL;
342 }
343
344 static __kprobes const char *probe_symbol(struct trace_probe *tp)
345 {
346         return tp->symbol ? tp->symbol : "unknown";
347 }
348
349 static int register_probe_event(struct trace_probe *tp);
350 static void unregister_probe_event(struct trace_probe *tp);
351
352 static DEFINE_MUTEX(probe_lock);
353 static LIST_HEAD(probe_list);
354
355 static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
356 static int kretprobe_dispatcher(struct kretprobe_instance *ri,
357                                 struct pt_regs *regs);
358
359 /* Check the name is good for event/group */
360 static int check_event_name(const char *name)
361 {
362         if (!isalpha(*name) && *name != '_')
363                 return 0;
364         while (*++name != '\0') {
365                 if (!isalpha(*name) && !isdigit(*name) && *name != '_')
366                         return 0;
367         }
368         return 1;
369 }
370
371 /*
372  * Allocate new trace_probe and initialize it (including kprobes).
373  */
374 static struct trace_probe *alloc_trace_probe(const char *group,
375                                              const char *event,
376                                              void *addr,
377                                              const char *symbol,
378                                              unsigned long offs,
379                                              int nargs, int is_return)
380 {
381         struct trace_probe *tp;
382         int ret = -ENOMEM;
383
384         tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL);
385         if (!tp)
386                 return ERR_PTR(ret);
387
388         if (symbol) {
389                 tp->symbol = kstrdup(symbol, GFP_KERNEL);
390                 if (!tp->symbol)
391                         goto error;
392                 tp->rp.kp.symbol_name = tp->symbol;
393                 tp->rp.kp.offset = offs;
394         } else
395                 tp->rp.kp.addr = addr;
396
397         if (is_return)
398                 tp->rp.handler = kretprobe_dispatcher;
399         else
400                 tp->rp.kp.pre_handler = kprobe_dispatcher;
401
402         if (!event || !check_event_name(event)) {
403                 ret = -EINVAL;
404                 goto error;
405         }
406
407         tp->call.class = &tp->class;
408         tp->call.name = kstrdup(event, GFP_KERNEL);
409         if (!tp->call.name)
410                 goto error;
411
412         if (!group || !check_event_name(group)) {
413                 ret = -EINVAL;
414                 goto error;
415         }
416
417         tp->class.system = kstrdup(group, GFP_KERNEL);
418         if (!tp->class.system)
419                 goto error;
420
421         INIT_LIST_HEAD(&tp->list);
422         return tp;
423 error:
424         kfree(tp->call.name);
425         kfree(tp->symbol);
426         kfree(tp);
427         return ERR_PTR(ret);
428 }
429
430 static void free_probe_arg(struct probe_arg *arg)
431 {
432         if (CHECK_BASIC_FETCH_FUNCS(deref, arg->fetch.fn))
433                 free_deref_fetch_param(arg->fetch.data);
434         else if (CHECK_BASIC_FETCH_FUNCS(symbol, arg->fetch.fn))
435                 free_symbol_cache(arg->fetch.data);
436         kfree(arg->name);
437         kfree(arg->comm);
438 }
439
440 static void free_trace_probe(struct trace_probe *tp)
441 {
442         int i;
443
444         for (i = 0; i < tp->nr_args; i++)
445                 free_probe_arg(&tp->args[i]);
446
447         kfree(tp->call.class->system);
448         kfree(tp->call.name);
449         kfree(tp->symbol);
450         kfree(tp);
451 }
452
453 static struct trace_probe *find_probe_event(const char *event,
454                                             const char *group)
455 {
456         struct trace_probe *tp;
457
458         list_for_each_entry(tp, &probe_list, list)
459                 if (strcmp(tp->call.name, event) == 0 &&
460                     strcmp(tp->call.class->system, group) == 0)
461                         return tp;
462         return NULL;
463 }
464
465 /* Unregister a trace_probe and probe_event: call with locking probe_lock */
466 static void unregister_trace_probe(struct trace_probe *tp)
467 {
468         if (probe_is_return(tp))
469                 unregister_kretprobe(&tp->rp);
470         else
471                 unregister_kprobe(&tp->rp.kp);
472         list_del(&tp->list);
473         unregister_probe_event(tp);
474 }
475
476 /* Register a trace_probe and probe_event */
477 static int register_trace_probe(struct trace_probe *tp)
478 {
479         struct trace_probe *old_tp;
480         int ret;
481
482         mutex_lock(&probe_lock);
483
484         /* register as an event */
485         old_tp = find_probe_event(tp->call.name, tp->call.class->system);
486         if (old_tp) {
487                 /* delete old event */
488                 unregister_trace_probe(old_tp);
489                 free_trace_probe(old_tp);
490         }
491         ret = register_probe_event(tp);
492         if (ret) {
493                 pr_warning("Faild to register probe event(%d)\n", ret);
494                 goto end;
495         }
496
497         tp->rp.kp.flags |= KPROBE_FLAG_DISABLED;
498         if (probe_is_return(tp))
499                 ret = register_kretprobe(&tp->rp);
500         else
501                 ret = register_kprobe(&tp->rp.kp);
502
503         if (ret) {
504                 pr_warning("Could not insert probe(%d)\n", ret);
505                 if (ret == -EILSEQ) {
506                         pr_warning("Probing address(0x%p) is not an "
507                                    "instruction boundary.\n",
508                                    tp->rp.kp.addr);
509                         ret = -EINVAL;
510                 }
511                 unregister_probe_event(tp);
512         } else
513                 list_add_tail(&tp->list, &probe_list);
514 end:
515         mutex_unlock(&probe_lock);
516         return ret;
517 }
518
519 /* Split symbol and offset. */
520 static int split_symbol_offset(char *symbol, unsigned long *offset)
521 {
522         char *tmp;
523         int ret;
524
525         if (!offset)
526                 return -EINVAL;
527
528         tmp = strchr(symbol, '+');
529         if (tmp) {
530                 /* skip sign because strict_strtol doesn't accept '+' */
531                 ret = strict_strtoul(tmp + 1, 0, offset);
532                 if (ret)
533                         return ret;
534                 *tmp = '\0';
535         } else
536                 *offset = 0;
537         return 0;
538 }
539
540 #define PARAM_MAX_ARGS 16
541 #define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
542
543 static int parse_probe_vars(char *arg, const struct fetch_type *t,
544                             struct fetch_param *f, int is_return)
545 {
546         int ret = 0;
547         unsigned long param;
548
549         if (strcmp(arg, "retval") == 0) {
550                 if (is_return)
551                         f->fn = t->retval;
552                 else
553                         ret = -EINVAL;
554         } else if (strncmp(arg, "stack", 5) == 0) {
555                 if (arg[5] == '\0') {
556                         if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR) == 0)
557                                 f->fn = fetch_stack_address;
558                         else
559                                 ret = -EINVAL;
560                 } else if (isdigit(arg[5])) {
561                         ret = strict_strtoul(arg + 5, 10, &param);
562                         if (ret || param > PARAM_MAX_STACK)
563                                 ret = -EINVAL;
564                         else {
565                                 f->fn = t->stack;
566                                 f->data = (void *)param;
567                         }
568                 } else
569                         ret = -EINVAL;
570         } else
571                 ret = -EINVAL;
572         return ret;
573 }
574
575 /* Recursive argument parser */
576 static int __parse_probe_arg(char *arg, const struct fetch_type *t,
577                              struct fetch_param *f, int is_return)
578 {
579         int ret = 0;
580         unsigned long param;
581         long offset;
582         char *tmp;
583
584         switch (arg[0]) {
585         case '$':
586                 ret = parse_probe_vars(arg + 1, t, f, is_return);
587                 break;
588         case '%':       /* named register */
589                 ret = regs_query_register_offset(arg + 1);
590                 if (ret >= 0) {
591                         f->fn = t->reg;
592                         f->data = (void *)(unsigned long)ret;
593                         ret = 0;
594                 }
595                 break;
596         case '@':       /* memory or symbol */
597                 if (isdigit(arg[1])) {
598                         ret = strict_strtoul(arg + 1, 0, &param);
599                         if (ret)
600                                 break;
601                         f->fn = t->memory;
602                         f->data = (void *)param;
603                 } else {
604                         ret = split_symbol_offset(arg + 1, &offset);
605                         if (ret)
606                                 break;
607                         f->data = alloc_symbol_cache(arg + 1, offset);
608                         if (f->data)
609                                 f->fn = t->symbol;
610                 }
611                 break;
612         case '+':       /* deref memory */
613         case '-':
614                 tmp = strchr(arg, '(');
615                 if (!tmp)
616                         break;
617                 *tmp = '\0';
618                 ret = strict_strtol(arg + 1, 0, &offset);
619                 if (ret)
620                         break;
621                 if (arg[0] == '-')
622                         offset = -offset;
623                 arg = tmp + 1;
624                 tmp = strrchr(arg, ')');
625                 if (tmp) {
626                         struct deref_fetch_param *dprm;
627                         const struct fetch_type *t2 = find_fetch_type(NULL);
628                         *tmp = '\0';
629                         dprm = kzalloc(sizeof(struct deref_fetch_param),
630                                        GFP_KERNEL);
631                         if (!dprm)
632                                 return -ENOMEM;
633                         dprm->offset = offset;
634                         ret = __parse_probe_arg(arg, t2, &dprm->orig,
635                                                 is_return);
636                         if (ret)
637                                 kfree(dprm);
638                         else {
639                                 f->fn = t->deref;
640                                 f->data = (void *)dprm;
641                         }
642                 }
643                 break;
644         }
645         if (!ret && !f->fn)
646                 ret = -EINVAL;
647         return ret;
648 }
649
650 /* String length checking wrapper */
651 static int parse_probe_arg(char *arg, struct trace_probe *tp,
652                            struct probe_arg *parg, int is_return)
653 {
654         const char *t;
655
656         if (strlen(arg) > MAX_ARGSTR_LEN) {
657                 pr_info("Argument is too long.: %s\n",  arg);
658                 return -ENOSPC;
659         }
660         parg->comm = kstrdup(arg, GFP_KERNEL);
661         if (!parg->comm) {
662                 pr_info("Failed to allocate memory for command '%s'.\n", arg);
663                 return -ENOMEM;
664         }
665         t = strchr(parg->comm, ':');
666         if (t) {
667                 arg[t - parg->comm] = '\0';
668                 t++;
669         }
670         parg->type = find_fetch_type(t);
671         if (!parg->type) {
672                 pr_info("Unsupported type: %s\n", t);
673                 return -EINVAL;
674         }
675         parg->offset = tp->size;
676         tp->size += parg->type->size;
677         return __parse_probe_arg(arg, parg->type, &parg->fetch, is_return);
678 }
679
680 /* Return 1 if name is reserved or already used by another argument */
681 static int conflict_field_name(const char *name,
682                                struct probe_arg *args, int narg)
683 {
684         int i;
685         for (i = 0; i < ARRAY_SIZE(reserved_field_names); i++)
686                 if (strcmp(reserved_field_names[i], name) == 0)
687                         return 1;
688         for (i = 0; i < narg; i++)
689                 if (strcmp(args[i].name, name) == 0)
690                         return 1;
691         return 0;
692 }
693
694 static int create_trace_probe(int argc, char **argv)
695 {
696         /*
697          * Argument syntax:
698          *  - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
699          *  - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
700          * Fetch args:
701          *  $retval     : fetch return value
702          *  $stack      : fetch stack address
703          *  $stackN     : fetch Nth of stack (N:0-)
704          *  @ADDR       : fetch memory at ADDR (ADDR should be in kernel)
705          *  @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
706          *  %REG        : fetch register REG
707          * Dereferencing memory fetch:
708          *  +|-offs(ARG) : fetch memory at ARG +|- offs address.
709          * Alias name of args:
710          *  NAME=FETCHARG : set NAME as alias of FETCHARG.
711          * Type of args:
712          *  FETCHARG:TYPE : use TYPE instead of unsigned long.
713          */
714         struct trace_probe *tp;
715         int i, ret = 0;
716         int is_return = 0, is_delete = 0;
717         char *symbol = NULL, *event = NULL, *group = NULL;
718         char *arg, *tmp;
719         unsigned long offset = 0;
720         void *addr = NULL;
721         char buf[MAX_EVENT_NAME_LEN];
722
723         /* argc must be >= 1 */
724         if (argv[0][0] == 'p')
725                 is_return = 0;
726         else if (argv[0][0] == 'r')
727                 is_return = 1;
728         else if (argv[0][0] == '-')
729                 is_delete = 1;
730         else {
731                 pr_info("Probe definition must be started with 'p', 'r' or"
732                         " '-'.\n");
733                 return -EINVAL;
734         }
735
736         if (argv[0][1] == ':') {
737                 event = &argv[0][2];
738                 if (strchr(event, '/')) {
739                         group = event;
740                         event = strchr(group, '/') + 1;
741                         event[-1] = '\0';
742                         if (strlen(group) == 0) {
743                                 pr_info("Group name is not specified\n");
744                                 return -EINVAL;
745                         }
746                 }
747                 if (strlen(event) == 0) {
748                         pr_info("Event name is not specified\n");
749                         return -EINVAL;
750                 }
751         }
752         if (!group)
753                 group = KPROBE_EVENT_SYSTEM;
754
755         if (is_delete) {
756                 if (!event) {
757                         pr_info("Delete command needs an event name.\n");
758                         return -EINVAL;
759                 }
760                 tp = find_probe_event(event, group);
761                 if (!tp) {
762                         pr_info("Event %s/%s doesn't exist.\n", group, event);
763                         return -ENOENT;
764                 }
765                 /* delete an event */
766                 unregister_trace_probe(tp);
767                 free_trace_probe(tp);
768                 return 0;
769         }
770
771         if (argc < 2) {
772                 pr_info("Probe point is not specified.\n");
773                 return -EINVAL;
774         }
775         if (isdigit(argv[1][0])) {
776                 if (is_return) {
777                         pr_info("Return probe point must be a symbol.\n");
778                         return -EINVAL;
779                 }
780                 /* an address specified */
781                 ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr);
782                 if (ret) {
783                         pr_info("Failed to parse address.\n");
784                         return ret;
785                 }
786         } else {
787                 /* a symbol specified */
788                 symbol = argv[1];
789                 /* TODO: support .init module functions */
790                 ret = split_symbol_offset(symbol, &offset);
791                 if (ret) {
792                         pr_info("Failed to parse symbol.\n");
793                         return ret;
794                 }
795                 if (offset && is_return) {
796                         pr_info("Return probe must be used without offset.\n");
797                         return -EINVAL;
798                 }
799         }
800         argc -= 2; argv += 2;
801
802         /* setup a probe */
803         if (!event) {
804                 /* Make a new event name */
805                 if (symbol)
806                         snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
807                                  is_return ? 'r' : 'p', symbol, offset);
808                 else
809                         snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
810                                  is_return ? 'r' : 'p', addr);
811                 event = buf;
812         }
813         tp = alloc_trace_probe(group, event, addr, symbol, offset, argc,
814                                is_return);
815         if (IS_ERR(tp)) {
816                 pr_info("Failed to allocate trace_probe.(%d)\n",
817                         (int)PTR_ERR(tp));
818                 return PTR_ERR(tp);
819         }
820
821         /* parse arguments */
822         ret = 0;
823         for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
824                 /* Parse argument name */
825                 arg = strchr(argv[i], '=');
826                 if (arg)
827                         *arg++ = '\0';
828                 else
829                         arg = argv[i];
830
831                 tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
832                 if (!tp->args[i].name) {
833                         pr_info("Failed to allocate argument%d name '%s'.\n",
834                                 i, argv[i]);
835                         ret = -ENOMEM;
836                         goto error;
837                 }
838                 tmp = strchr(tp->args[i].name, ':');
839                 if (tmp)
840                         *tmp = '_';     /* convert : to _ */
841
842                 if (conflict_field_name(tp->args[i].name, tp->args, i)) {
843                         pr_info("Argument%d name '%s' conflicts with "
844                                 "another field.\n", i, argv[i]);
845                         ret = -EINVAL;
846                         goto error;
847                 }
848
849                 /* Parse fetch argument */
850                 ret = parse_probe_arg(arg, tp, &tp->args[i], is_return);
851                 if (ret) {
852                         pr_info("Parse error at argument%d. (%d)\n", i, ret);
853                         kfree(tp->args[i].name);
854                         goto error;
855                 }
856
857                 tp->nr_args++;
858         }
859
860         ret = register_trace_probe(tp);
861         if (ret)
862                 goto error;
863         return 0;
864
865 error:
866         free_trace_probe(tp);
867         return ret;
868 }
869
870 static void cleanup_all_probes(void)
871 {
872         struct trace_probe *tp;
873
874         mutex_lock(&probe_lock);
875         /* TODO: Use batch unregistration */
876         while (!list_empty(&probe_list)) {
877                 tp = list_entry(probe_list.next, struct trace_probe, list);
878                 unregister_trace_probe(tp);
879                 free_trace_probe(tp);
880         }
881         mutex_unlock(&probe_lock);
882 }
883
884
885 /* Probes listing interfaces */
886 static void *probes_seq_start(struct seq_file *m, loff_t *pos)
887 {
888         mutex_lock(&probe_lock);
889         return seq_list_start(&probe_list, *pos);
890 }
891
892 static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
893 {
894         return seq_list_next(v, &probe_list, pos);
895 }
896
897 static void probes_seq_stop(struct seq_file *m, void *v)
898 {
899         mutex_unlock(&probe_lock);
900 }
901
902 static int probes_seq_show(struct seq_file *m, void *v)
903 {
904         struct trace_probe *tp = v;
905         int i;
906
907         seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p');
908         seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name);
909
910         if (!tp->symbol)
911                 seq_printf(m, " 0x%p", tp->rp.kp.addr);
912         else if (tp->rp.kp.offset)
913                 seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset);
914         else
915                 seq_printf(m, " %s", probe_symbol(tp));
916
917         for (i = 0; i < tp->nr_args; i++)
918                 seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm);
919         seq_printf(m, "\n");
920
921         return 0;
922 }
923
924 static const struct seq_operations probes_seq_op = {
925         .start  = probes_seq_start,
926         .next   = probes_seq_next,
927         .stop   = probes_seq_stop,
928         .show   = probes_seq_show
929 };
930
931 static int probes_open(struct inode *inode, struct file *file)
932 {
933         if ((file->f_mode & FMODE_WRITE) &&
934             (file->f_flags & O_TRUNC))
935                 cleanup_all_probes();
936
937         return seq_open(file, &probes_seq_op);
938 }
939
940 static int command_trace_probe(const char *buf)
941 {
942         char **argv;
943         int argc = 0, ret = 0;
944
945         argv = argv_split(GFP_KERNEL, buf, &argc);
946         if (!argv)
947                 return -ENOMEM;
948
949         if (argc)
950                 ret = create_trace_probe(argc, argv);
951
952         argv_free(argv);
953         return ret;
954 }
955
956 #define WRITE_BUFSIZE 128
957
958 static ssize_t probes_write(struct file *file, const char __user *buffer,
959                             size_t count, loff_t *ppos)
960 {
961         char *kbuf, *tmp;
962         int ret;
963         size_t done;
964         size_t size;
965
966         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
967         if (!kbuf)
968                 return -ENOMEM;
969
970         ret = done = 0;
971         while (done < count) {
972                 size = count - done;
973                 if (size >= WRITE_BUFSIZE)
974                         size = WRITE_BUFSIZE - 1;
975                 if (copy_from_user(kbuf, buffer + done, size)) {
976                         ret = -EFAULT;
977                         goto out;
978                 }
979                 kbuf[size] = '\0';
980                 tmp = strchr(kbuf, '\n');
981                 if (tmp) {
982                         *tmp = '\0';
983                         size = tmp - kbuf + 1;
984                 } else if (done + size < count) {
985                         pr_warning("Line length is too long: "
986                                    "Should be less than %d.", WRITE_BUFSIZE);
987                         ret = -EINVAL;
988                         goto out;
989                 }
990                 done += size;
991                 /* Remove comments */
992                 tmp = strchr(kbuf, '#');
993                 if (tmp)
994                         *tmp = '\0';
995
996                 ret = command_trace_probe(kbuf);
997                 if (ret)
998                         goto out;
999         }
1000         ret = done;
1001 out:
1002         kfree(kbuf);
1003         return ret;
1004 }
1005
1006 static const struct file_operations kprobe_events_ops = {
1007         .owner          = THIS_MODULE,
1008         .open           = probes_open,
1009         .read           = seq_read,
1010         .llseek         = seq_lseek,
1011         .release        = seq_release,
1012         .write          = probes_write,
1013 };
1014
1015 /* Probes profiling interfaces */
1016 static int probes_profile_seq_show(struct seq_file *m, void *v)
1017 {
1018         struct trace_probe *tp = v;
1019
1020         seq_printf(m, "  %-44s %15lu %15lu\n", tp->call.name, tp->nhit,
1021                    tp->rp.kp.nmissed);
1022
1023         return 0;
1024 }
1025
1026 static const struct seq_operations profile_seq_op = {
1027         .start  = probes_seq_start,
1028         .next   = probes_seq_next,
1029         .stop   = probes_seq_stop,
1030         .show   = probes_profile_seq_show
1031 };
1032
1033 static int profile_open(struct inode *inode, struct file *file)
1034 {
1035         return seq_open(file, &profile_seq_op);
1036 }
1037
1038 static const struct file_operations kprobe_profile_ops = {
1039         .owner          = THIS_MODULE,
1040         .open           = profile_open,
1041         .read           = seq_read,
1042         .llseek         = seq_lseek,
1043         .release        = seq_release,
1044 };
1045
1046 /* Kprobe handler */
1047 static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
1048 {
1049         struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1050         struct kprobe_trace_entry_head *entry;
1051         struct ring_buffer_event *event;
1052         struct ring_buffer *buffer;
1053         u8 *data;
1054         int size, i, pc;
1055         unsigned long irq_flags;
1056         struct ftrace_event_call *call = &tp->call;
1057
1058         tp->nhit++;
1059
1060         local_save_flags(irq_flags);
1061         pc = preempt_count();
1062
1063         size = sizeof(*entry) + tp->size;
1064
1065         event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
1066                                                   size, irq_flags, pc);
1067         if (!event)
1068                 return;
1069
1070         entry = ring_buffer_event_data(event);
1071         entry->ip = (unsigned long)kp->addr;
1072         data = (u8 *)&entry[1];
1073         for (i = 0; i < tp->nr_args; i++)
1074                 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
1075
1076         if (!filter_current_check_discard(buffer, call, entry, event))
1077                 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
1078 }
1079
1080 /* Kretprobe handler */
1081 static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
1082                                           struct pt_regs *regs)
1083 {
1084         struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1085         struct kretprobe_trace_entry_head *entry;
1086         struct ring_buffer_event *event;
1087         struct ring_buffer *buffer;
1088         u8 *data;
1089         int size, i, pc;
1090         unsigned long irq_flags;
1091         struct ftrace_event_call *call = &tp->call;
1092
1093         local_save_flags(irq_flags);
1094         pc = preempt_count();
1095
1096         size = sizeof(*entry) + tp->size;
1097
1098         event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
1099                                                   size, irq_flags, pc);
1100         if (!event)
1101                 return;
1102
1103         entry = ring_buffer_event_data(event);
1104         entry->func = (unsigned long)tp->rp.kp.addr;
1105         entry->ret_ip = (unsigned long)ri->ret_addr;
1106         data = (u8 *)&entry[1];
1107         for (i = 0; i < tp->nr_args; i++)
1108                 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
1109
1110         if (!filter_current_check_discard(buffer, call, entry, event))
1111                 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
1112 }
1113
1114 /* Event entry printers */
1115 enum print_line_t
1116 print_kprobe_event(struct trace_iterator *iter, int flags,
1117                    struct trace_event *event)
1118 {
1119         struct kprobe_trace_entry_head *field;
1120         struct trace_seq *s = &iter->seq;
1121         struct trace_probe *tp;
1122         u8 *data;
1123         int i;
1124
1125         field = (struct kprobe_trace_entry_head *)iter->ent;
1126         tp = container_of(event, struct trace_probe, call.event);
1127
1128         if (!trace_seq_printf(s, "%s: (", tp->call.name))
1129                 goto partial;
1130
1131         if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
1132                 goto partial;
1133
1134         if (!trace_seq_puts(s, ")"))
1135                 goto partial;
1136
1137         data = (u8 *)&field[1];
1138         for (i = 0; i < tp->nr_args; i++)
1139                 if (!tp->args[i].type->print(s, tp->args[i].name,
1140                                              data + tp->args[i].offset))
1141                         goto partial;
1142
1143         if (!trace_seq_puts(s, "\n"))
1144                 goto partial;
1145
1146         return TRACE_TYPE_HANDLED;
1147 partial:
1148         return TRACE_TYPE_PARTIAL_LINE;
1149 }
1150
1151 enum print_line_t
1152 print_kretprobe_event(struct trace_iterator *iter, int flags,
1153                       struct trace_event *event)
1154 {
1155         struct kretprobe_trace_entry_head *field;
1156         struct trace_seq *s = &iter->seq;
1157         struct trace_probe *tp;
1158         u8 *data;
1159         int i;
1160
1161         field = (struct kretprobe_trace_entry_head *)iter->ent;
1162         tp = container_of(event, struct trace_probe, call.event);
1163
1164         if (!trace_seq_printf(s, "%s: (", tp->call.name))
1165                 goto partial;
1166
1167         if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
1168                 goto partial;
1169
1170         if (!trace_seq_puts(s, " <- "))
1171                 goto partial;
1172
1173         if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
1174                 goto partial;
1175
1176         if (!trace_seq_puts(s, ")"))
1177                 goto partial;
1178
1179         data = (u8 *)&field[1];
1180         for (i = 0; i < tp->nr_args; i++)
1181                 if (!tp->args[i].type->print(s, tp->args[i].name,
1182                                              data + tp->args[i].offset))
1183                         goto partial;
1184
1185         if (!trace_seq_puts(s, "\n"))
1186                 goto partial;
1187
1188         return TRACE_TYPE_HANDLED;
1189 partial:
1190         return TRACE_TYPE_PARTIAL_LINE;
1191 }
1192
1193 static int probe_event_enable(struct ftrace_event_call *call)
1194 {
1195         struct trace_probe *tp = (struct trace_probe *)call->data;
1196
1197         tp->flags |= TP_FLAG_TRACE;
1198         if (probe_is_return(tp))
1199                 return enable_kretprobe(&tp->rp);
1200         else
1201                 return enable_kprobe(&tp->rp.kp);
1202 }
1203
1204 static void probe_event_disable(struct ftrace_event_call *call)
1205 {
1206         struct trace_probe *tp = (struct trace_probe *)call->data;
1207
1208         tp->flags &= ~TP_FLAG_TRACE;
1209         if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) {
1210                 if (probe_is_return(tp))
1211                         disable_kretprobe(&tp->rp);
1212                 else
1213                         disable_kprobe(&tp->rp.kp);
1214         }
1215 }
1216
1217 static int probe_event_raw_init(struct ftrace_event_call *event_call)
1218 {
1219         return 0;
1220 }
1221
1222 #undef DEFINE_FIELD
1223 #define DEFINE_FIELD(type, item, name, is_signed)                       \
1224         do {                                                            \
1225                 ret = trace_define_field(event_call, #type, name,       \
1226                                          offsetof(typeof(field), item), \
1227                                          sizeof(field.item), is_signed, \
1228                                          FILTER_OTHER);                 \
1229                 if (ret)                                                \
1230                         return ret;                                     \
1231         } while (0)
1232
1233 static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
1234 {
1235         int ret, i;
1236         struct kprobe_trace_entry_head field;
1237         struct trace_probe *tp = (struct trace_probe *)event_call->data;
1238
1239         DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1240         /* Set argument names as fields */
1241         for (i = 0; i < tp->nr_args; i++) {
1242                 ret = trace_define_field(event_call, tp->args[i].type->name,
1243                                          tp->args[i].name,
1244                                          sizeof(field) + tp->args[i].offset,
1245                                          tp->args[i].type->size,
1246                                          tp->args[i].type->is_signed,
1247                                          FILTER_OTHER);
1248                 if (ret)
1249                         return ret;
1250         }
1251         return 0;
1252 }
1253
1254 static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
1255 {
1256         int ret, i;
1257         struct kretprobe_trace_entry_head field;
1258         struct trace_probe *tp = (struct trace_probe *)event_call->data;
1259
1260         DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
1261         DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1262         /* Set argument names as fields */
1263         for (i = 0; i < tp->nr_args; i++) {
1264                 ret = trace_define_field(event_call, tp->args[i].type->name,
1265                                          tp->args[i].name,
1266                                          sizeof(field) + tp->args[i].offset,
1267                                          tp->args[i].type->size,
1268                                          tp->args[i].type->is_signed,
1269                                          FILTER_OTHER);
1270                 if (ret)
1271                         return ret;
1272         }
1273         return 0;
1274 }
1275
1276 static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)
1277 {
1278         int i;
1279         int pos = 0;
1280
1281         const char *fmt, *arg;
1282
1283         if (!probe_is_return(tp)) {
1284                 fmt = "(%lx)";
1285                 arg = "REC->" FIELD_STRING_IP;
1286         } else {
1287                 fmt = "(%lx <- %lx)";
1288                 arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
1289         }
1290
1291         /* When len=0, we just calculate the needed length */
1292 #define LEN_OR_ZERO (len ? len - pos : 0)
1293
1294         pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
1295
1296         for (i = 0; i < tp->nr_args; i++) {
1297                 pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s",
1298                                 tp->args[i].name, tp->args[i].type->fmt);
1299         }
1300
1301         pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
1302
1303         for (i = 0; i < tp->nr_args; i++) {
1304                 pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
1305                                 tp->args[i].name);
1306         }
1307
1308 #undef LEN_OR_ZERO
1309
1310         /* return the length of print_fmt */
1311         return pos;
1312 }
1313
1314 static int set_print_fmt(struct trace_probe *tp)
1315 {
1316         int len;
1317         char *print_fmt;
1318
1319         /* First: called with 0 length to calculate the needed length */
1320         len = __set_print_fmt(tp, NULL, 0);
1321         print_fmt = kmalloc(len + 1, GFP_KERNEL);
1322         if (!print_fmt)
1323                 return -ENOMEM;
1324
1325         /* Second: actually write the @print_fmt */
1326         __set_print_fmt(tp, print_fmt, len + 1);
1327         tp->call.print_fmt = print_fmt;
1328
1329         return 0;
1330 }
1331
1332 #ifdef CONFIG_PERF_EVENTS
1333
1334 /* Kprobe profile handler */
1335 static __kprobes void kprobe_perf_func(struct kprobe *kp,
1336                                          struct pt_regs *regs)
1337 {
1338         struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1339         struct ftrace_event_call *call = &tp->call;
1340         struct kprobe_trace_entry_head *entry;
1341         struct hlist_head *head;
1342         u8 *data;
1343         int size, __size, i;
1344         int rctx;
1345
1346         __size = sizeof(*entry) + tp->size;
1347         size = ALIGN(__size + sizeof(u32), sizeof(u64));
1348         size -= sizeof(u32);
1349         if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
1350                      "profile buffer not large enough"))
1351                 return;
1352
1353         entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
1354         if (!entry)
1355                 return;
1356
1357         entry->ip = (unsigned long)kp->addr;
1358         data = (u8 *)&entry[1];
1359         for (i = 0; i < tp->nr_args; i++)
1360                 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
1361
1362         head = this_cpu_ptr(call->perf_events);
1363         perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head);
1364 }
1365
1366 /* Kretprobe profile handler */
1367 static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1368                                             struct pt_regs *regs)
1369 {
1370         struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1371         struct ftrace_event_call *call = &tp->call;
1372         struct kretprobe_trace_entry_head *entry;
1373         struct hlist_head *head;
1374         u8 *data;
1375         int size, __size, i;
1376         int rctx;
1377
1378         __size = sizeof(*entry) + tp->size;
1379         size = ALIGN(__size + sizeof(u32), sizeof(u64));
1380         size -= sizeof(u32);
1381         if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
1382                      "profile buffer not large enough"))
1383                 return;
1384
1385         entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
1386         if (!entry)
1387                 return;
1388
1389         entry->func = (unsigned long)tp->rp.kp.addr;
1390         entry->ret_ip = (unsigned long)ri->ret_addr;
1391         data = (u8 *)&entry[1];
1392         for (i = 0; i < tp->nr_args; i++)
1393                 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
1394
1395         head = this_cpu_ptr(call->perf_events);
1396         perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head);
1397 }
1398
1399 static int probe_perf_enable(struct ftrace_event_call *call)
1400 {
1401         struct trace_probe *tp = (struct trace_probe *)call->data;
1402
1403         tp->flags |= TP_FLAG_PROFILE;
1404
1405         if (probe_is_return(tp))
1406                 return enable_kretprobe(&tp->rp);
1407         else
1408                 return enable_kprobe(&tp->rp.kp);
1409 }
1410
1411 static void probe_perf_disable(struct ftrace_event_call *call)
1412 {
1413         struct trace_probe *tp = (struct trace_probe *)call->data;
1414
1415         tp->flags &= ~TP_FLAG_PROFILE;
1416
1417         if (!(tp->flags & TP_FLAG_TRACE)) {
1418                 if (probe_is_return(tp))
1419                         disable_kretprobe(&tp->rp);
1420                 else
1421                         disable_kprobe(&tp->rp.kp);
1422         }
1423 }
1424 #endif  /* CONFIG_PERF_EVENTS */
1425
1426 static __kprobes
1427 int kprobe_register(struct ftrace_event_call *event, enum trace_reg type)
1428 {
1429         switch (type) {
1430         case TRACE_REG_REGISTER:
1431                 return probe_event_enable(event);
1432         case TRACE_REG_UNREGISTER:
1433                 probe_event_disable(event);
1434                 return 0;
1435
1436 #ifdef CONFIG_PERF_EVENTS
1437         case TRACE_REG_PERF_REGISTER:
1438                 return probe_perf_enable(event);
1439         case TRACE_REG_PERF_UNREGISTER:
1440                 probe_perf_disable(event);
1441                 return 0;
1442 #endif
1443         }
1444         return 0;
1445 }
1446
1447 static __kprobes
1448 int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1449 {
1450         struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1451
1452         if (tp->flags & TP_FLAG_TRACE)
1453                 kprobe_trace_func(kp, regs);
1454 #ifdef CONFIG_PERF_EVENTS
1455         if (tp->flags & TP_FLAG_PROFILE)
1456                 kprobe_perf_func(kp, regs);
1457 #endif
1458         return 0;       /* We don't tweek kernel, so just return 0 */
1459 }
1460
1461 static __kprobes
1462 int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1463 {
1464         struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1465
1466         if (tp->flags & TP_FLAG_TRACE)
1467                 kretprobe_trace_func(ri, regs);
1468 #ifdef CONFIG_PERF_EVENTS
1469         if (tp->flags & TP_FLAG_PROFILE)
1470                 kretprobe_perf_func(ri, regs);
1471 #endif
1472         return 0;       /* We don't tweek kernel, so just return 0 */
1473 }
1474
1475 static struct trace_event_functions kretprobe_funcs = {
1476         .trace          = print_kretprobe_event
1477 };
1478
1479 static struct trace_event_functions kprobe_funcs = {
1480         .trace          = print_kprobe_event
1481 };
1482
1483 static int register_probe_event(struct trace_probe *tp)
1484 {
1485         struct ftrace_event_call *call = &tp->call;
1486         int ret;
1487
1488         /* Initialize ftrace_event_call */
1489         if (probe_is_return(tp)) {
1490                 INIT_LIST_HEAD(&call->class->fields);
1491                 call->event.funcs = &kretprobe_funcs;
1492                 call->class->raw_init = probe_event_raw_init;
1493                 call->class->define_fields = kretprobe_event_define_fields;
1494         } else {
1495                 INIT_LIST_HEAD(&call->class->fields);
1496                 call->event.funcs = &kprobe_funcs;
1497                 call->class->raw_init = probe_event_raw_init;
1498                 call->class->define_fields = kprobe_event_define_fields;
1499         }
1500         if (set_print_fmt(tp) < 0)
1501                 return -ENOMEM;
1502         ret = register_ftrace_event(&call->event);
1503         if (!ret) {
1504                 kfree(call->print_fmt);
1505                 return -ENODEV;
1506         }
1507         call->flags = 0;
1508         call->class->reg = kprobe_register;
1509         call->data = tp;
1510         ret = trace_add_event_call(call);
1511         if (ret) {
1512                 pr_info("Failed to register kprobe event: %s\n", call->name);
1513                 kfree(call->print_fmt);
1514                 unregister_ftrace_event(&call->event);
1515         }
1516         return ret;
1517 }
1518
1519 static void unregister_probe_event(struct trace_probe *tp)
1520 {
1521         /* tp->event is unregistered in trace_remove_event_call() */
1522         trace_remove_event_call(&tp->call);
1523         kfree(tp->call.print_fmt);
1524 }
1525
1526 /* Make a debugfs interface for controling probe points */
1527 static __init int init_kprobe_trace(void)
1528 {
1529         struct dentry *d_tracer;
1530         struct dentry *entry;
1531
1532         d_tracer = tracing_init_dentry();
1533         if (!d_tracer)
1534                 return 0;
1535
1536         entry = debugfs_create_file("kprobe_events", 0644, d_tracer,
1537                                     NULL, &kprobe_events_ops);
1538
1539         /* Event list interface */
1540         if (!entry)
1541                 pr_warning("Could not create debugfs "
1542                            "'kprobe_events' entry\n");
1543
1544         /* Profile interface */
1545         entry = debugfs_create_file("kprobe_profile", 0444, d_tracer,
1546                                     NULL, &kprobe_profile_ops);
1547
1548         if (!entry)
1549                 pr_warning("Could not create debugfs "
1550                            "'kprobe_profile' entry\n");
1551         return 0;
1552 }
1553 fs_initcall(init_kprobe_trace);
1554
1555
1556 #ifdef CONFIG_FTRACE_STARTUP_TEST
1557
1558 static int kprobe_trace_selftest_target(int a1, int a2, int a3,
1559                                         int a4, int a5, int a6)
1560 {
1561         return a1 + a2 + a3 + a4 + a5 + a6;
1562 }
1563
1564 static __init int kprobe_trace_self_tests_init(void)
1565 {
1566         int ret, warn = 0;
1567         int (*target)(int, int, int, int, int, int);
1568         struct trace_probe *tp;
1569
1570         target = kprobe_trace_selftest_target;
1571
1572         pr_info("Testing kprobe tracing: ");
1573
1574         ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
1575                                   "$stack $stack0 +0($stack)");
1576         if (WARN_ON_ONCE(ret)) {
1577                 pr_warning("error on probing function entry.\n");
1578                 warn++;
1579         } else {
1580                 /* Enable trace point */
1581                 tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM);
1582                 if (WARN_ON_ONCE(tp == NULL)) {
1583                         pr_warning("error on getting new probe.\n");
1584                         warn++;
1585                 } else
1586                         probe_event_enable(&tp->call);
1587         }
1588
1589         ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
1590                                   "$retval");
1591         if (WARN_ON_ONCE(ret)) {
1592                 pr_warning("error on probing function return.\n");
1593                 warn++;
1594         } else {
1595                 /* Enable trace point */
1596                 tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM);
1597                 if (WARN_ON_ONCE(tp == NULL)) {
1598                         pr_warning("error on getting new probe.\n");
1599                         warn++;
1600                 } else
1601                         probe_event_enable(&tp->call);
1602         }
1603
1604         if (warn)
1605                 goto end;
1606
1607         ret = target(1, 2, 3, 4, 5, 6);
1608
1609         ret = command_trace_probe("-:testprobe");
1610         if (WARN_ON_ONCE(ret)) {
1611                 pr_warning("error on deleting a probe.\n");
1612                 warn++;
1613         }
1614
1615         ret = command_trace_probe("-:testprobe2");
1616         if (WARN_ON_ONCE(ret)) {
1617                 pr_warning("error on deleting a probe.\n");
1618                 warn++;
1619         }
1620
1621 end:
1622         cleanup_all_probes();
1623         if (warn)
1624                 pr_cont("NG: Some tests are failed. Please check them.\n");
1625         else
1626                 pr_cont("OK\n");
1627         return 0;
1628 }
1629
1630 late_initcall(kprobe_trace_self_tests_init);
1631
1632 #endif