Merge commit 'tracing/core' into tracing/kprobes
authorFrederic Weisbecker <fweisbec@gmail.com>
Thu, 10 Sep 2009 23:09:23 +0000 (01:09 +0200)
committerFrederic Weisbecker <fweisbec@gmail.com>
Thu, 10 Sep 2009 23:09:23 +0000 (01:09 +0200)
Conflicts:
kernel/trace/trace_export.c
kernel/trace/trace_kprobe.c

Merge reason: This topic branch lacks an important
build fix in tracing/core:

0dd7b74787eaf7858c6c573353a83c3e2766e674:
tracing: Fix double CPP substitution in TRACE_EVENT_FN

that prevents from multiple tracepoint headers inclusion crashes.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
34 files changed:
Documentation/trace/kprobetrace.txt [new file with mode: 0644]
arch/x86/Kconfig.debug
arch/x86/Makefile
arch/x86/include/asm/inat.h [new file with mode: 0644]
arch/x86/include/asm/inat_types.h [new file with mode: 0644]
arch/x86/include/asm/insn.h [new file with mode: 0644]
arch/x86/include/asm/ptrace.h
arch/x86/kernel/entry_64.S
arch/x86/kernel/kprobes.c
arch/x86/kernel/ptrace.c
arch/x86/lib/Makefile
arch/x86/lib/inat.c [new file with mode: 0644]
arch/x86/lib/insn.c [new file with mode: 0644]
arch/x86/lib/x86-opcode-map.txt [new file with mode: 0644]
arch/x86/mm/fault.c
arch/x86/tools/Makefile [new file with mode: 0644]
arch/x86/tools/distill.awk [new file with mode: 0644]
arch/x86/tools/gen-insn-attr-x86.awk [new file with mode: 0644]
arch/x86/tools/test_get_len.c [new file with mode: 0644]
include/linux/ftrace_event.h
include/linux/kprobes.h
include/linux/syscalls.h
include/trace/ftrace.h
include/trace/syscall.h
kernel/kprobes.c
kernel/notifier.c
kernel/trace/Kconfig
kernel/trace/Makefile
kernel/trace/trace.h
kernel/trace/trace_event_types.h
kernel/trace/trace_events.c
kernel/trace/trace_export.c
kernel/trace/trace_kprobe.c [new file with mode: 0644]
kernel/trace/trace_syscalls.c

diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
new file mode 100644 (file)
index 0000000..3de7517
--- /dev/null
@@ -0,0 +1,148 @@
+                         Kprobe-based Event Tracer
+                         =========================
+
+                 Documentation is written by Masami Hiramatsu
+
+
+Overview
+--------
+This tracer is similar to the events tracer which is based on Tracepoint
+infrastructure. Instead of Tracepoint, this tracer is based on kprobes(kprobe
+and kretprobe). It probes anywhere where kprobes can probe(this means, all
+functions body except for __kprobes functions).
+
+Unlike the function tracer, this tracer can probe instructions inside of
+kernel functions. It allows you to check which instruction has been executed.
+
+Unlike the Tracepoint based events tracer, this tracer can add and remove
+probe points on the fly.
+
+Similar to the events tracer, this tracer doesn't need to be activated via
+current_tracer, instead of that, just set probe points via
+/sys/kernel/debug/tracing/kprobe_events. And you can set filters on each
+probe events via /sys/kernel/debug/tracing/events/kprobes/<EVENT>/filter.
+
+
+Synopsis of kprobe_events
+-------------------------
+  p[:EVENT] SYMBOL[+offs|-offs]|MEMADDR [FETCHARGS]    : Set a probe
+  r[:EVENT] SYMBOL[+0] [FETCHARGS]                     : Set a return probe
+
+ EVENT                 : Event name. If omitted, the event name is generated
+                         based on SYMBOL+offs or MEMADDR.
+ SYMBOL[+offs|-offs]   : Symbol+offset where the probe is inserted.
+ MEMADDR               : Address where the probe is inserted.
+
+ FETCHARGS             : Arguments. Each probe can have up to 128 args.
+  %REG : Fetch register REG
+  sN   : Fetch Nth entry of stack (N >= 0)
+  sa   : Fetch stack address.
+  @ADDR        : Fetch memory at ADDR (ADDR should be in kernel)
+  @SYM[+|-offs]        : Fetch memory at SYM +|- offs (SYM should be a data symbol)
+  aN   : Fetch function argument. (N >= 0)(*)
+  rv   : Fetch return value.(**)
+  ra   : Fetch return address.(**)
+  +|-offs(FETCHARG) : fetch memory at FETCHARG +|- offs address.(***)
+
+  (*) aN may not correct on asmlinkaged functions and at the middle of
+      function body.
+  (**) only for return probe.
+  (***) this is useful for fetching a field of data structures.
+
+
+Per-Probe Event Filtering
+-------------------------
+ Per-probe event filtering feature allows you to set different filter on each
+probe and gives you what arguments will be shown in trace buffer. If an event
+name is specified right after 'p:' or 'r:' in kprobe_events, the tracer adds
+an event under tracing/events/kprobes/<EVENT>, at the directory you can see
+'id', 'enabled', 'format' and 'filter'.
+
+enabled:
+  You can enable/disable the probe by writing 1 or 0 on it.
+
+format:
+  It shows the format of this probe event. It also shows aliases of arguments
+ which you specified to kprobe_events.
+
+filter:
+  You can write filtering rules of this event. And you can use both of aliase
+ names and field names for describing filters.
+
+
+Event Profiling
+---------------
+ You can check the total number of probe hits and probe miss-hits via
+/sys/kernel/debug/tracing/kprobe_profile.
+ The first column is event name, the second is the number of probe hits,
+the third is the number of probe miss-hits.
+
+
+Usage examples
+--------------
+To add a probe as a new event, write a new definition to kprobe_events
+as below.
+
+  echo p:myprobe do_sys_open a0 a1 a2 a3 > /sys/kernel/debug/tracing/kprobe_events
+
+ This sets a kprobe on the top of do_sys_open() function with recording
+1st to 4th arguments as "myprobe" event.
+
+  echo r:myretprobe do_sys_open rv ra >> /sys/kernel/debug/tracing/kprobe_events
+
+ This sets a kretprobe on the return point of do_sys_open() function with
+recording return value and return address as "myretprobe" event.
+ You can see the format of these events via
+/sys/kernel/debug/tracing/events/kprobes/<EVENT>/format.
+
+  cat /sys/kernel/debug/tracing/events/kprobes/myprobe/format
+name: myprobe
+ID: 23
+format:
+       field:unsigned short common_type;       offset:0;       size:2;
+       field:unsigned char common_flags;       offset:2;       size:1;
+       field:unsigned char common_preempt_count;       offset:3;       size:1;
+       field:int common_pid;   offset:4;       size:4;
+       field:int common_tgid;  offset:8;       size:4;
+
+       field: unsigned long ip;        offset:16;tsize:8;
+       field: int nargs;       offset:24;tsize:4;
+       field: unsigned long arg0;      offset:32;tsize:8;
+       field: unsigned long arg1;      offset:40;tsize:8;
+       field: unsigned long arg2;      offset:48;tsize:8;
+       field: unsigned long arg3;      offset:56;tsize:8;
+
+       alias: a0;      original: arg0;
+       alias: a1;      original: arg1;
+       alias: a2;      original: arg2;
+       alias: a3;      original: arg3;
+
+print fmt: "%lx: 0x%lx 0x%lx 0x%lx 0x%lx", ip, arg0, arg1, arg2, arg3
+
+
+ You can see that the event has 4 arguments and alias expressions
+corresponding to it.
+
+  echo > /sys/kernel/debug/tracing/kprobe_events
+
+ This clears all probe points. and you can see the traced information via
+/sys/kernel/debug/tracing/trace.
+
+  cat /sys/kernel/debug/tracing/trace
+# tracer: nop
+#
+#           TASK-PID    CPU#    TIMESTAMP  FUNCTION
+#              | |       |          |         |
+           <...>-1447  [001] 1038282.286875: do_sys_open+0x0/0xd6: 0x3 0x7fffd1ec4440 0x8000 0x0
+           <...>-1447  [001] 1038282.286878: sys_openat+0xc/0xe <- do_sys_open: 0xfffffffffffffffe 0xffffffff81367a3a
+           <...>-1447  [001] 1038282.286885: do_sys_open+0x0/0xd6: 0xffffff9c 0x40413c 0x8000 0x1b6
+           <...>-1447  [001] 1038282.286915: sys_open+0x1b/0x1d <- do_sys_open: 0x3 0xffffffff81367a3a
+           <...>-1447  [001] 1038282.286969: do_sys_open+0x0/0xd6: 0xffffff9c 0x4041c6 0x98800 0x10
+           <...>-1447  [001] 1038282.286976: sys_open+0x1b/0x1d <- do_sys_open: 0x3 0xffffffff81367a3a
+
+
+ Each line shows when the kernel hits a probe, and <- SYMBOL means kernel
+returns from SYMBOL(e.g. "sys_open+0x1b/0x1d <- do_sys_open" means kernel
+returns from do_sys_open to sys_open+0x1b).
+
+
index d105f29..7d0b681 100644 (file)
@@ -186,6 +186,15 @@ config X86_DS_SELFTEST
 config HAVE_MMIOTRACE_SUPPORT
        def_bool y
 
+config X86_DECODER_SELFTEST
+     bool "x86 instruction decoder selftest"
+     depends on DEBUG_KERNEL
+       ---help---
+        Perform x86 instruction decoder selftests at build time.
+        This option is useful for checking the sanity of x86 instruction
+        decoder code.
+        If unsure, say "N".
+
 #
 # IO delay types:
 #
index 1b68659..5fe16bf 100644 (file)
@@ -154,6 +154,9 @@ all: bzImage
 KBUILD_IMAGE := $(boot)/bzImage
 
 bzImage: vmlinux
+ifeq ($(CONFIG_X86_DECODER_SELFTEST),y)
+       $(Q)$(MAKE) $(build)=arch/x86/tools posttest
+endif
        $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE)
        $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot
        $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@
diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h
new file mode 100644 (file)
index 0000000..2866fdd
--- /dev/null
@@ -0,0 +1,188 @@
+#ifndef _ASM_X86_INAT_H
+#define _ASM_X86_INAT_H
+/*
+ * x86 instruction attributes
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+#include <asm/inat_types.h>
+
+/*
+ * Internal bits. Don't use bitmasks directly, because these bits are
+ * unstable. You should use checking functions.
+ */
+
+#define INAT_OPCODE_TABLE_SIZE 256
+#define INAT_GROUP_TABLE_SIZE 8
+
+/* Legacy instruction prefixes */
+#define INAT_PFX_OPNDSZ        1       /* 0x66 */ /* LPFX1 */
+#define INAT_PFX_REPNE 2       /* 0xF2 */ /* LPFX2 */
+#define INAT_PFX_REPE  3       /* 0xF3 */ /* LPFX3 */
+#define INAT_PFX_LOCK  4       /* 0xF0 */
+#define INAT_PFX_CS    5       /* 0x2E */
+#define INAT_PFX_DS    6       /* 0x3E */
+#define INAT_PFX_ES    7       /* 0x26 */
+#define INAT_PFX_FS    8       /* 0x64 */
+#define INAT_PFX_GS    9       /* 0x65 */
+#define INAT_PFX_SS    10      /* 0x36 */
+#define INAT_PFX_ADDRSZ        11      /* 0x67 */
+
+#define INAT_LPREFIX_MAX       3
+
+/* Immediate size */
+#define INAT_IMM_BYTE          1
+#define INAT_IMM_WORD          2
+#define INAT_IMM_DWORD         3
+#define INAT_IMM_QWORD         4
+#define INAT_IMM_PTR           5
+#define INAT_IMM_VWORD32       6
+#define INAT_IMM_VWORD         7
+
+/* Legacy prefix */
+#define INAT_PFX_OFFS  0
+#define INAT_PFX_BITS  4
+#define INAT_PFX_MAX    ((1 << INAT_PFX_BITS) - 1)
+#define INAT_PFX_MASK  (INAT_PFX_MAX << INAT_PFX_OFFS)
+/* Escape opcodes */
+#define INAT_ESC_OFFS  (INAT_PFX_OFFS + INAT_PFX_BITS)
+#define INAT_ESC_BITS  2
+#define INAT_ESC_MAX   ((1 << INAT_ESC_BITS) - 1)
+#define INAT_ESC_MASK  (INAT_ESC_MAX << INAT_ESC_OFFS)
+/* Group opcodes (1-16) */
+#define INAT_GRP_OFFS  (INAT_ESC_OFFS + INAT_ESC_BITS)
+#define INAT_GRP_BITS  5
+#define INAT_GRP_MAX   ((1 << INAT_GRP_BITS) - 1)
+#define INAT_GRP_MASK  (INAT_GRP_MAX << INAT_GRP_OFFS)
+/* Immediates */
+#define INAT_IMM_OFFS  (INAT_GRP_OFFS + INAT_GRP_BITS)
+#define INAT_IMM_BITS  3
+#define INAT_IMM_MASK  (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS)
+/* Flags */
+#define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS)
+#define INAT_REXPFX    (1 << INAT_FLAG_OFFS)
+#define INAT_MODRM     (1 << (INAT_FLAG_OFFS + 1))
+#define INAT_FORCE64   (1 << (INAT_FLAG_OFFS + 2))
+#define INAT_SCNDIMM   (1 << (INAT_FLAG_OFFS + 3))
+#define INAT_MOFFSET   (1 << (INAT_FLAG_OFFS + 4))
+#define INAT_VARIANT   (1 << (INAT_FLAG_OFFS + 5))
+/* Attribute making macros for attribute tables */
+#define INAT_MAKE_PREFIX(pfx)  (pfx << INAT_PFX_OFFS)
+#define INAT_MAKE_ESCAPE(esc)  (esc << INAT_ESC_OFFS)
+#define INAT_MAKE_GROUP(grp)   ((grp << INAT_GRP_OFFS) | INAT_MODRM)
+#define INAT_MAKE_IMM(imm)     (imm << INAT_IMM_OFFS)
+
+/* Attribute search APIs */
+extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
+extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode,
+                                            insn_byte_t last_pfx,
+                                            insn_attr_t esc_attr);
+extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm,
+                                           insn_byte_t last_pfx,
+                                           insn_attr_t esc_attr);
+
+/* Attribute checking functions */
+static inline int inat_is_prefix(insn_attr_t attr)
+{
+       return attr & INAT_PFX_MASK;
+}
+
+static inline int inat_is_address_size_prefix(insn_attr_t attr)
+{
+       return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ;
+}
+
+static inline int inat_is_operand_size_prefix(insn_attr_t attr)
+{
+       return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ;
+}
+
+static inline int inat_last_prefix_id(insn_attr_t attr)
+{
+       if ((attr & INAT_PFX_MASK) > INAT_LPREFIX_MAX)
+               return 0;
+       else
+               return attr & INAT_PFX_MASK;
+}
+
+static inline int inat_is_escape(insn_attr_t attr)
+{
+       return attr & INAT_ESC_MASK;
+}
+
+static inline int inat_escape_id(insn_attr_t attr)
+{
+       return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS;
+}
+
+static inline int inat_is_group(insn_attr_t attr)
+{
+       return attr & INAT_GRP_MASK;
+}
+
+static inline int inat_group_id(insn_attr_t attr)
+{
+       return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS;
+}
+
+static inline int inat_group_common_attribute(insn_attr_t attr)
+{
+       return attr & ~INAT_GRP_MASK;
+}
+
+static inline int inat_has_immediate(insn_attr_t attr)
+{
+       return attr & INAT_IMM_MASK;
+}
+
+static inline int inat_immediate_size(insn_attr_t attr)
+{
+       return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS;
+}
+
+static inline int inat_is_rex_prefix(insn_attr_t attr)
+{
+       return attr & INAT_REXPFX;
+}
+
+static inline int inat_has_modrm(insn_attr_t attr)
+{
+       return attr & INAT_MODRM;
+}
+
+static inline int inat_is_force64(insn_attr_t attr)
+{
+       return attr & INAT_FORCE64;
+}
+
+static inline int inat_has_second_immediate(insn_attr_t attr)
+{
+       return attr & INAT_SCNDIMM;
+}
+
+static inline int inat_has_moffset(insn_attr_t attr)
+{
+       return attr & INAT_MOFFSET;
+}
+
+static inline int inat_has_variant(insn_attr_t attr)
+{
+       return attr & INAT_VARIANT;
+}
+
+#endif
diff --git a/arch/x86/include/asm/inat_types.h b/arch/x86/include/asm/inat_types.h
new file mode 100644 (file)
index 0000000..cb3c20c
--- /dev/null
@@ -0,0 +1,29 @@
+#ifndef _ASM_X86_INAT_TYPES_H
+#define _ASM_X86_INAT_TYPES_H
+/*
+ * x86 instruction attributes
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+/* Instruction attributes */
+typedef unsigned int insn_attr_t;
+typedef unsigned char insn_byte_t;
+typedef signed int insn_value_t;
+
+#endif
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
new file mode 100644 (file)
index 0000000..12b4e37
--- /dev/null
@@ -0,0 +1,143 @@
+#ifndef _ASM_X86_INSN_H
+#define _ASM_X86_INSN_H
+/*
+ * x86 instruction analysis
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2009
+ */
+
+/* insn_attr_t is defined in inat.h */
+#include <asm/inat.h>
+
+struct insn_field {
+       union {
+               insn_value_t value;
+               insn_byte_t bytes[4];
+       };
+       /* !0 if we've run insn_get_xxx() for this field */
+       unsigned char got;
+       unsigned char nbytes;
+};
+
+struct insn {
+       struct insn_field prefixes;     /*
+                                        * Prefixes
+                                        * prefixes.bytes[3]: last prefix
+                                        */
+       struct insn_field rex_prefix;   /* REX prefix */
+       struct insn_field opcode;       /*
+                                        * opcode.bytes[0]: opcode1
+                                        * opcode.bytes[1]: opcode2
+                                        * opcode.bytes[2]: opcode3
+                                        */
+       struct insn_field modrm;
+       struct insn_field sib;
+       struct insn_field displacement;
+       union {
+               struct insn_field immediate;
+               struct insn_field moffset1;     /* for 64bit MOV */
+               struct insn_field immediate1;   /* for 64bit imm or off16/32 */
+       };
+       union {
+               struct insn_field moffset2;     /* for 64bit MOV */
+               struct insn_field immediate2;   /* for 64bit imm or seg16 */
+       };
+
+       insn_attr_t attr;
+       unsigned char opnd_bytes;
+       unsigned char addr_bytes;
+       unsigned char length;
+       unsigned char x86_64;
+
+       const insn_byte_t *kaddr;       /* kernel address of insn to analyze */
+       const insn_byte_t *next_byte;
+};
+
+#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6)
+#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3)
+#define X86_MODRM_RM(modrm) ((modrm) & 0x07)
+
+#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6)
+#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3)
+#define X86_SIB_BASE(sib) ((sib) & 0x07)
+
+#define X86_REX_W(rex) ((rex) & 8)
+#define X86_REX_R(rex) ((rex) & 4)
+#define X86_REX_X(rex) ((rex) & 2)
+#define X86_REX_B(rex) ((rex) & 1)
+
+/* The last prefix is needed for two-byte and three-byte opcodes */
+static inline insn_byte_t insn_last_prefix(struct insn *insn)
+{
+       return insn->prefixes.bytes[3];
+}
+
+extern void insn_init(struct insn *insn, const void *kaddr, int x86_64);
+extern void insn_get_prefixes(struct insn *insn);
+extern void insn_get_opcode(struct insn *insn);
+extern void insn_get_modrm(struct insn *insn);
+extern void insn_get_sib(struct insn *insn);
+extern void insn_get_displacement(struct insn *insn);
+extern void insn_get_immediate(struct insn *insn);
+extern void insn_get_length(struct insn *insn);
+
+/* Attribute will be determined after getting ModRM (for opcode groups) */
+static inline void insn_get_attribute(struct insn *insn)
+{
+       insn_get_modrm(insn);
+}
+
+/* Instruction uses RIP-relative addressing */
+extern int insn_rip_relative(struct insn *insn);
+
+/* Init insn for kernel text */
+static inline void kernel_insn_init(struct insn *insn, const void *kaddr)
+{
+#ifdef CONFIG_X86_64
+       insn_init(insn, kaddr, 1);
+#else /* CONFIG_X86_32 */
+       insn_init(insn, kaddr, 0);
+#endif
+}
+
+/* Offset of each field from kaddr */
+static inline int insn_offset_rex_prefix(struct insn *insn)
+{
+       return insn->prefixes.nbytes;
+}
+static inline int insn_offset_opcode(struct insn *insn)
+{
+       return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes;
+}
+static inline int insn_offset_modrm(struct insn *insn)
+{
+       return insn_offset_opcode(insn) + insn->opcode.nbytes;
+}
+static inline int insn_offset_sib(struct insn *insn)
+{
+       return insn_offset_modrm(insn) + insn->modrm.nbytes;
+}
+static inline int insn_offset_displacement(struct insn *insn)
+{
+       return insn_offset_sib(insn) + insn->sib.nbytes;
+}
+static inline int insn_offset_immediate(struct insn *insn)
+{
+       return insn_offset_displacement(insn) + insn->displacement.nbytes;
+}
+
+#endif /* _ASM_X86_INSN_H */
index 0f0d908..a3d49dd 100644 (file)
@@ -7,6 +7,7 @@
 
 #ifdef __KERNEL__
 #include <asm/segment.h>
+#include <asm/page_types.h>
 #endif
 
 #ifndef __ASSEMBLY__
@@ -216,6 +217,67 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs)
        return regs->sp;
 }
 
+/* Query offset/name of register from its name/offset */
+extern int regs_query_register_offset(const char *name);
+extern const char *regs_query_register_name(unsigned int offset);
+#define MAX_REG_OFFSET (offsetof(struct pt_regs, ss))
+
+/**
+ * regs_get_register() - get register value from its offset
+ * @regs:      pt_regs from which register value is gotten.
+ * @offset:    offset number of the register.
+ *
+ * regs_get_register returns the value of a register whose offset from @regs
+ * is @offset. The @offset is the offset of the register in struct pt_regs.
+ * If @offset is bigger than MAX_REG_OFFSET, this returns 0.
+ */
+static inline unsigned long regs_get_register(struct pt_regs *regs,
+                                             unsigned int offset)
+{
+       if (unlikely(offset > MAX_REG_OFFSET))
+               return 0;
+       return *(unsigned long *)((unsigned long)regs + offset);
+}
+
+/**
+ * regs_within_kernel_stack() - check the address in the stack
+ * @regs:      pt_regs which contains kernel stack pointer.
+ * @addr:      address which is checked.
+ *
+ * regs_within_kenel_stack() checks @addr is within the kernel stack page(s).
+ * If @addr is within the kernel stack, it returns true. If not, returns false.
+ */
+static inline int regs_within_kernel_stack(struct pt_regs *regs,
+                                          unsigned long addr)
+{
+       return ((addr & ~(THREAD_SIZE - 1))  ==
+               (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1)));
+}
+
+/**
+ * regs_get_kernel_stack_nth() - get Nth entry of the stack
+ * @regs:      pt_regs which contains kernel stack pointer.
+ * @n:         stack entry number.
+ *
+ * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
+ * is specifined by @regs. If the @n th entry is NOT in the kernel stack,
+ * this returns 0.
+ */
+static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
+                                                     unsigned int n)
+{
+       unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
+       addr += n;
+       if (regs_within_kernel_stack(regs, (unsigned long)addr))
+               return *addr;
+       else
+               return 0;
+}
+
+/* Get Nth argument at function call */
+extern unsigned long regs_get_argument_nth(struct pt_regs *regs,
+                                          unsigned int n);
+
 /*
  * These are defined as per linux/ptrace.h, which see.
  */
index c251be7..36e2ef5 100644 (file)
@@ -809,6 +809,10 @@ END(interrupt)
        call \func
        .endm
 
+/*
+ * Interrupt entry/exit should be protected against kprobes
+ */
+       .pushsection .kprobes.text, "ax"
        /*
         * The interrupt stubs push (~vector+0x80) onto the stack and
         * then jump to common_interrupt.
@@ -947,6 +951,10 @@ ENTRY(retint_kernel)
 
        CFI_ENDPROC
 END(common_interrupt)
+/*
+ * End of kprobes section
+ */
+       .popsection
 
 /*
  * APIC interrupts.
index 7b5169d..c5f1f11 100644 (file)
 #include <linux/preempt.h>
 #include <linux/module.h>
 #include <linux/kdebug.h>
+#include <linux/kallsyms.h>
 
 #include <asm/cacheflush.h>
 #include <asm/desc.h>
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
 #include <asm/alternative.h>
+#include <asm/insn.h>
 
 void jprobe_return_end(void);
 
@@ -106,50 +108,6 @@ static const u32 twobyte_is_boostable[256 / 32] = {
        /*      -----------------------------------------------         */
        /*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
 };
-static const u32 onebyte_has_modrm[256 / 32] = {
-       /*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
-       /*      -----------------------------------------------         */
-       W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 00 */
-       W(0x10, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 10 */
-       W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 20 */
-       W(0x30, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 30 */
-       W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
-       W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */
-       W(0x60, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0) | /* 60 */
-       W(0x70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 70 */
-       W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
-       W(0x90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 90 */
-       W(0xa0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* a0 */
-       W(0xb0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* b0 */
-       W(0xc0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* c0 */
-       W(0xd0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
-       W(0xe0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* e0 */
-       W(0xf0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1)   /* f0 */
-       /*      -----------------------------------------------         */
-       /*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
-};
-static const u32 twobyte_has_modrm[256 / 32] = {
-       /*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
-       /*      -----------------------------------------------         */
-       W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1) | /* 0f */
-       W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0) , /* 1f */
-       W(0x20, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 2f */
-       W(0x30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 3f */
-       W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 4f */
-       W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 5f */
-       W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 6f */
-       W(0x70, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1) , /* 7f */
-       W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 8f */
-       W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 9f */
-       W(0xa0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) | /* af */
-       W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* bf */
-       W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* cf */
-       W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* df */
-       W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* ef */
-       W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0)   /* ff */
-       /*      -----------------------------------------------         */
-       /*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
-};
 #undef W
 
 struct kretprobe_blackpoint kretprobe_blacklist[] = {
@@ -244,6 +202,75 @@ retry:
        }
 }
 
+/* Recover the probed instruction at addr for further analysis. */
+static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
+{
+       struct kprobe *kp;
+       kp = get_kprobe((void *)addr);
+       if (!kp)
+               return -EINVAL;
+
+       /*
+        *  Basically, kp->ainsn.insn has an original instruction.
+        *  However, RIP-relative instruction can not do single-stepping
+        *  at different place, fix_riprel() tweaks the displacement of
+        *  that instruction. In that case, we can't recover the instruction
+        *  from the kp->ainsn.insn.
+        *
+        *  On the other hand, kp->opcode has a copy of the first byte of
+        *  the probed instruction, which is overwritten by int3. And
+        *  the instruction at kp->addr is not modified by kprobes except
+        *  for the first byte, we can recover the original instruction
+        *  from it and kp->opcode.
+        */
+       memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+       buf[0] = kp->opcode;
+       return 0;
+}
+
+/* Dummy buffers for kallsyms_lookup */
+static char __dummy_buf[KSYM_NAME_LEN];
+
+/* Check if paddr is at an instruction boundary */
+static int __kprobes can_probe(unsigned long paddr)
+{
+       int ret;
+       unsigned long addr, offset = 0;
+       struct insn insn;
+       kprobe_opcode_t buf[MAX_INSN_SIZE];
+
+       if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf))
+               return 0;
+
+       /* Decode instructions */
+       addr = paddr - offset;
+       while (addr < paddr) {
+               kernel_insn_init(&insn, (void *)addr);
+               insn_get_opcode(&insn);
+
+               /*
+                * Check if the instruction has been modified by another
+                * kprobe, in which case we replace the breakpoint by the
+                * original instruction in our buffer.
+                */
+               if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
+                       ret = recover_probed_instruction(buf, addr);
+                       if (ret)
+                               /*
+                                * Another debugging subsystem might insert
+                                * this breakpoint. In that case, we can't
+                                * recover it.
+                                */
+                               return 0;
+                       kernel_insn_init(&insn, buf);
+               }
+               insn_get_length(&insn);
+               addr += insn.length;
+       }
+
+       return (addr == paddr);
+}
+
 /*
  * Returns non-zero if opcode modifies the interrupt flag.
  */
@@ -277,68 +304,30 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
 static void __kprobes fix_riprel(struct kprobe *p)
 {
 #ifdef CONFIG_X86_64
-       u8 *insn = p->ainsn.insn;
-       s64 disp;
-       int need_modrm;
-
-       /* Skip legacy instruction prefixes.  */
-       while (1) {
-               switch (*insn) {
-               case 0x66:
-               case 0x67:
-               case 0x2e:
-               case 0x3e:
-               case 0x26:
-               case 0x64:
-               case 0x65:
-               case 0x36:
-               case 0xf0:
-               case 0xf3:
-               case 0xf2:
-                       ++insn;
-                       continue;
-               }
-               break;
-       }
+       struct insn insn;
+       kernel_insn_init(&insn, p->ainsn.insn);
 
-       /* Skip REX instruction prefix.  */
-       if (is_REX_prefix(insn))
-               ++insn;
-
-       if (*insn == 0x0f) {
-               /* Two-byte opcode.  */
-               ++insn;
-               need_modrm = test_bit(*insn,
-                                     (unsigned long *)twobyte_has_modrm);
-       } else
-               /* One-byte opcode.  */
-               need_modrm = test_bit(*insn,
-                                     (unsigned long *)onebyte_has_modrm);
-
-       if (need_modrm) {
-               u8 modrm = *++insn;
-               if ((modrm & 0xc7) == 0x05) {
-                       /* %rip+disp32 addressing mode */
-                       /* Displacement follows ModRM byte.  */
-                       ++insn;
-                       /*
-                        * The copied instruction uses the %rip-relative
-                        * addressing mode.  Adjust the displacement for the
-                        * difference between the original location of this
-                        * instruction and the location of the copy that will
-                        * actually be run.  The tricky bit here is making sure
-                        * that the sign extension happens correctly in this
-                        * calculation, since we need a signed 32-bit result to
-                        * be sign-extended to 64 bits when it's added to the
-                        * %rip value and yield the same 64-bit result that the
-                        * sign-extension of the original signed 32-bit
-                        * displacement would have given.
-                        */
-                       disp = (u8 *) p->addr + *((s32 *) insn) -
-                              (u8 *) p->ainsn.insn;
-                       BUG_ON((s64) (s32) disp != disp); /* Sanity check.  */
-                       *(s32 *)insn = (s32) disp;
-               }
+       if (insn_rip_relative(&insn)) {
+               s64 newdisp;
+               u8 *disp;
+               insn_get_displacement(&insn);
+               /*
+                * The copied instruction uses the %rip-relative addressing
+                * mode.  Adjust the displacement for the difference between
+                * the original location of this instruction and the location
+                * of the copy that will actually be run.  The tricky bit here
+                * is making sure that the sign extension happens correctly in
+                * this calculation, since we need a signed 32-bit result to
+                * be sign-extended to 64 bits when it's added to the %rip
+                * value and yield the same 64-bit result that the sign-
+                * extension of the original signed 32-bit displacement would
+                * have given.
+                */
+               newdisp = (u8 *) p->addr + (s64) insn.displacement.value -
+                         (u8 *) p->ainsn.insn;
+               BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check.  */
+               disp = (u8 *) p->ainsn.insn + insn_offset_displacement(&insn);
+               *(s32 *) disp = (s32) newdisp;
        }
 #endif
 }
@@ -359,6 +348,8 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p)
 
 int __kprobes arch_prepare_kprobe(struct kprobe *p)
 {
+       if (!can_probe((unsigned long)p->addr))
+               return -EILSEQ;
        /* insn: must be on special executable page on x86. */
        p->ainsn.insn = get_insn_slot();
        if (!p->ainsn.insn)
@@ -472,17 +463,6 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
 {
        switch (kcb->kprobe_status) {
        case KPROBE_HIT_SSDONE:
-#ifdef CONFIG_X86_64
-               /* TODO: Provide re-entrancy from post_kprobes_handler() and
-                * avoid exception stack corruption while single-stepping on
-                * the instruction of the new probe.
-                */
-               arch_disarm_kprobe(p);
-               regs->ip = (unsigned long)p->addr;
-               reset_current_kprobe();
-               preempt_enable_no_resched();
-               break;
-#endif
        case KPROBE_HIT_ACTIVE:
                save_previous_kprobe(kcb);
                set_current_kprobe(p, regs, kcb);
@@ -491,18 +471,16 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
                kcb->kprobe_status = KPROBE_REENTER;
                break;
        case KPROBE_HIT_SS:
-               if (p == kprobe_running()) {
-                       regs->flags &= ~X86_EFLAGS_TF;
-                       regs->flags |= kcb->kprobe_saved_flags;
-                       return 0;
-               } else {
-                       /* A probe has been hit in the codepath leading up
-                        * to, or just after, single-stepping of a probed
-                        * instruction. This entire codepath should strictly
-                        * reside in .kprobes.text section. Raise a warning
-                        * to highlight this peculiar case.
-                        */
-               }
+               /* A probe has been hit in the codepath leading up to, or just
+                * after, single-stepping of a probed instruction. This entire
+                * codepath should strictly reside in .kprobes.text section.
+                * Raise a BUG or we'll continue in an endless reentering loop
+                * and eventually a stack overflow.
+                */
+               printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n",
+                      p->addr);
+               dump_kprobe(p);
+               BUG();
        default:
                /* impossible cases */
                WARN_ON(1);
index 8d7d5c9..a33a17d 100644 (file)
@@ -49,6 +49,118 @@ enum x86_regset {
        REGSET_IOPERM32,
 };
 
+struct pt_regs_offset {
+       const char *name;
+       int offset;
+};
+
+#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
+#define REG_OFFSET_END {.name = NULL, .offset = 0}
+
+static const struct pt_regs_offset regoffset_table[] = {
+#ifdef CONFIG_X86_64
+       REG_OFFSET_NAME(r15),
+       REG_OFFSET_NAME(r14),
+       REG_OFFSET_NAME(r13),
+       REG_OFFSET_NAME(r12),
+       REG_OFFSET_NAME(r11),
+       REG_OFFSET_NAME(r10),
+       REG_OFFSET_NAME(r9),
+       REG_OFFSET_NAME(r8),
+#endif
+       REG_OFFSET_NAME(bx),
+       REG_OFFSET_NAME(cx),
+       REG_OFFSET_NAME(dx),
+       REG_OFFSET_NAME(si),
+       REG_OFFSET_NAME(di),
+       REG_OFFSET_NAME(bp),
+       REG_OFFSET_NAME(ax),
+#ifdef CONFIG_X86_32
+       REG_OFFSET_NAME(ds),
+       REG_OFFSET_NAME(es),
+       REG_OFFSET_NAME(fs),
+       REG_OFFSET_NAME(gs),
+#endif
+       REG_OFFSET_NAME(orig_ax),
+       REG_OFFSET_NAME(ip),
+       REG_OFFSET_NAME(cs),
+       REG_OFFSET_NAME(flags),
+       REG_OFFSET_NAME(sp),
+       REG_OFFSET_NAME(ss),
+       REG_OFFSET_END,
+};
+
+/**
+ * regs_query_register_offset() - query register offset from its name
+ * @name:      the name of a register
+ *
+ * regs_query_register_offset() returns the offset of a register in struct
+ * pt_regs from its name. If the name is invalid, this returns -EINVAL;
+ */
+int regs_query_register_offset(const char *name)
+{
+       const struct pt_regs_offset *roff;
+       for (roff = regoffset_table; roff->name != NULL; roff++)
+               if (!strcmp(roff->name, name))
+                       return roff->offset;
+       return -EINVAL;
+}
+
+/**
+ * regs_query_register_name() - query register name from its offset
+ * @offset:    the offset of a register in struct pt_regs.
+ *
+ * regs_query_register_name() returns the name of a register from its
+ * offset in struct pt_regs. If the @offset is invalid, this returns NULL;
+ */
+const char *regs_query_register_name(unsigned int offset)
+{
+       const struct pt_regs_offset *roff;
+       for (roff = regoffset_table; roff->name != NULL; roff++)
+               if (roff->offset == offset)
+                       return roff->name;
+       return NULL;
+}
+
+static const int arg_offs_table[] = {
+#ifdef CONFIG_X86_32
+       [0] = offsetof(struct pt_regs, ax),
+       [1] = offsetof(struct pt_regs, dx),
+       [2] = offsetof(struct pt_regs, cx)
+#else /* CONFIG_X86_64 */
+       [0] = offsetof(struct pt_regs, di),
+       [1] = offsetof(struct pt_regs, si),
+       [2] = offsetof(struct pt_regs, dx),
+       [3] = offsetof(struct pt_regs, cx),
+       [4] = offsetof(struct pt_regs, r8),
+       [5] = offsetof(struct pt_regs, r9)
+#endif
+};
+
+/**
+ * regs_get_argument_nth() - get Nth argument at function call
+ * @regs:      pt_regs which contains registers at function entry.
+ * @n:         argument number.
+ *
+ * regs_get_argument_nth() returns @n th argument of a function call.
+ * Since usually the kernel stack will be changed right after function entry,
+ * you must use this at function entry. If the @n th entry is NOT in the
+ * kernel stack or pt_regs, this returns 0.
+ */
+unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n)
+{
+       if (n < ARRAY_SIZE(arg_offs_table))
+               return *((unsigned long *)regs + arg_offs_table[n]);
+       else {
+               /*
+                * The typical case: arg n is on the stack.
+                * (Note: stack[0] = return address, so skip it)
+                */
+               n -= ARRAY_SIZE(arg_offs_table);
+               return regs_get_kernel_stack_nth(regs, 1 + n);
+       }
+}
+
 /*
  * does not yet catch signals sent when the child dies.
  * in exit.c or in signal.c.
index 07c3189..c77f8a7 100644 (file)
@@ -2,12 +2,25 @@
 # Makefile for x86 specific library files.
 #
 
+inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk
+inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt
+quiet_cmd_inat_tables = GEN     $@
+      cmd_inat_tables = $(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@
+
+$(obj)/inat-tables.c: $(inat_tables_script) $(inat_tables_maps)
+       $(call cmd,inat_tables)
+
+$(obj)/inat.o: $(obj)/inat-tables.c
+
+clean-files := inat-tables.c
+
 obj-$(CONFIG_SMP) := msr.o
 
 lib-y := delay.o
 lib-y += thunk_$(BITS).o
 lib-y += usercopy_$(BITS).o getuser.o putuser.o
 lib-y += memcpy_$(BITS).o
+lib-y += insn.o inat.o
 
 ifeq ($(CONFIG_X86_32),y)
         obj-y += atomic64_32.o
diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c
new file mode 100644 (file)
index 0000000..054656a
--- /dev/null
@@ -0,0 +1,78 @@
+/*
+ * x86 instruction attribute tables
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+#include <asm/insn.h>
+
+/* Attribute tables are generated from opcode map */
+#include "inat-tables.c"
+
+/* Attribute search APIs */
+insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode)
+{
+       return inat_primary_table[opcode];
+}
+
+insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, insn_byte_t last_pfx,
+                                     insn_attr_t esc_attr)
+{
+       const insn_attr_t *table;
+       insn_attr_t lpfx_attr;
+       int n, m = 0;
+
+       n = inat_escape_id(esc_attr);
+       if (last_pfx) {
+               lpfx_attr = inat_get_opcode_attribute(last_pfx);
+               m = inat_last_prefix_id(lpfx_attr);
+       }
+       table = inat_escape_tables[n][0];
+       if (!table)
+               return 0;
+       if (inat_has_variant(table[opcode]) && m) {
+               table = inat_escape_tables[n][m];
+               if (!table)
+                       return 0;
+       }
+       return table[opcode];
+}
+
+insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx,
+                                    insn_attr_t grp_attr)
+{
+       const insn_attr_t *table;
+       insn_attr_t lpfx_attr;
+       int n, m = 0;
+
+       n = inat_group_id(grp_attr);
+       if (last_pfx) {
+               lpfx_attr = inat_get_opcode_attribute(last_pfx);
+               m = inat_last_prefix_id(lpfx_attr);
+       }
+       table = inat_group_tables[n][0];
+       if (!table)
+               return inat_group_common_attribute(grp_attr);
+       if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && m) {
+               table = inat_escape_tables[n][m];
+               if (!table)
+                       return inat_group_common_attribute(grp_attr);
+       }
+       return table[X86_MODRM_REG(modrm)] |
+              inat_group_common_attribute(grp_attr);
+}
+
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
new file mode 100644 (file)
index 0000000..dfd56a3
--- /dev/null
@@ -0,0 +1,464 @@
+/*
+ * x86 instruction analysis
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004, 2009
+ */
+
+#include <linux/string.h>
+#include <asm/inat.h>
+#include <asm/insn.h>
+
+#define get_next(t, insn)      \
+       ({t r; r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
+
+#define peek_next(t, insn)     \
+       ({t r; r = *(t*)insn->next_byte; r; })
+
+/**
+ * insn_init() - initialize struct insn
+ * @insn:      &struct insn to be initialized
+ * @kaddr:     address (in kernel memory) of instruction (or copy thereof)
+ * @x86_64:    !0 for 64-bit kernel or 64-bit app
+ */
+void insn_init(struct insn *insn, const void *kaddr, int x86_64)
+{
+       memset(insn, 0, sizeof(*insn));
+       insn->kaddr = kaddr;
+       insn->next_byte = kaddr;
+       insn->x86_64 = x86_64 ? 1 : 0;
+       insn->opnd_bytes = 4;
+       if (x86_64)
+               insn->addr_bytes = 8;
+       else
+               insn->addr_bytes = 4;
+}
+
+/**
+ * insn_get_prefixes - scan x86 instruction prefix bytes
+ * @insn:      &struct insn containing instruction
+ *
+ * Populates the @insn->prefixes bitmap, and updates @insn->next_byte
+ * to point to the (first) opcode.  No effect if @insn->prefixes.got
+ * is already set.
+ */
+void insn_get_prefixes(struct insn *insn)
+{
+       struct insn_field *prefixes = &insn->prefixes;
+       insn_attr_t attr;
+       insn_byte_t b, lb;
+       int i, nb;
+
+       if (prefixes->got)
+               return;
+
+       nb = 0;
+       lb = 0;
+       b = peek_next(insn_byte_t, insn);
+       attr = inat_get_opcode_attribute(b);
+       while (inat_is_prefix(attr)) {
+               /* Skip if same prefix */
+               for (i = 0; i < nb; i++)
+                       if (prefixes->bytes[i] == b)
+                               goto found;
+               if (nb == 4)
+                       /* Invalid instruction */
+                       break;
+               prefixes->bytes[nb++] = b;
+               if (inat_is_address_size_prefix(attr)) {
+                       /* address size switches 2/4 or 4/8 */
+                       if (insn->x86_64)
+                               insn->addr_bytes ^= 12;
+                       else
+                               insn->addr_bytes ^= 6;
+               } else if (inat_is_operand_size_prefix(attr)) {
+                       /* oprand size switches 2/4 */
+                       insn->opnd_bytes ^= 6;
+               }
+found:
+               prefixes->nbytes++;
+               insn->next_byte++;
+               lb = b;
+               b = peek_next(insn_byte_t, insn);
+               attr = inat_get_opcode_attribute(b);
+       }
+       /* Set the last prefix */
+       if (lb && lb != insn->prefixes.bytes[3]) {
+               if (unlikely(insn->prefixes.bytes[3])) {
+                       /* Swap the last prefix */
+                       b = insn->prefixes.bytes[3];
+                       for (i = 0; i < nb; i++)
+                               if (prefixes->bytes[i] == lb)
+                                       prefixes->bytes[i] = b;
+               }
+               insn->prefixes.bytes[3] = lb;
+       }
+
+       if (insn->x86_64) {
+               b = peek_next(insn_byte_t, insn);
+               attr = inat_get_opcode_attribute(b);
+               if (inat_is_rex_prefix(attr)) {
+                       insn->rex_prefix.value = b;
+                       insn->rex_prefix.nbytes = 1;
+                       insn->next_byte++;
+                       if (X86_REX_W(b))
+                               /* REX.W overrides opnd_size */
+                               insn->opnd_bytes = 8;
+               }
+       }
+       insn->rex_prefix.got = 1;
+       prefixes->got = 1;
+       return;
+}
+
+/**
+ * insn_get_opcode - collect opcode(s)
+ * @insn:      &struct insn containing instruction
+ *
+ * Populates @insn->opcode, updates @insn->next_byte to point past the
+ * opcode byte(s), and set @insn->attr (except for groups).
+ * If necessary, first collects any preceding (prefix) bytes.
+ * Sets @insn->opcode.value = opcode1.  No effect if @insn->opcode.got
+ * is already 1.
+ */
+void insn_get_opcode(struct insn *insn)
+{
+       struct insn_field *opcode = &insn->opcode;
+       insn_byte_t op, pfx;
+       if (opcode->got)
+               return;
+       if (!insn->prefixes.got)
+               insn_get_prefixes(insn);
+
+       /* Get first opcode */
+       op = get_next(insn_byte_t, insn);
+       opcode->bytes[0] = op;
+       opcode->nbytes = 1;
+       insn->attr = inat_get_opcode_attribute(op);
+       while (inat_is_escape(insn->attr)) {
+               /* Get escaped opcode */
+               op = get_next(insn_byte_t, insn);
+               opcode->bytes[opcode->nbytes++] = op;
+               pfx = insn_last_prefix(insn);
+               insn->attr = inat_get_escape_attribute(op, pfx, insn->attr);
+       }
+       opcode->got = 1;
+}
+
+/**
+ * insn_get_modrm - collect ModRM byte, if any
+ * @insn:      &struct insn containing instruction
+ *
+ * Populates @insn->modrm and updates @insn->next_byte to point past the
+ * ModRM byte, if any.  If necessary, first collects the preceding bytes
+ * (prefixes and opcode(s)).  No effect if @insn->modrm.got is already 1.
+ */
+void insn_get_modrm(struct insn *insn)
+{
+       struct insn_field *modrm = &insn->modrm;
+       insn_byte_t pfx, mod;
+       if (modrm->got)
+               return;
+       if (!insn->opcode.got)
+               insn_get_opcode(insn);
+
+       if (inat_has_modrm(insn->attr)) {
+               mod = get_next(insn_byte_t, insn);
+               modrm->value = mod;
+               modrm->nbytes = 1;
+               if (inat_is_group(insn->attr)) {
+                       pfx = insn_last_prefix(insn);
+                       insn->attr = inat_get_group_attribute(mod, pfx,
+                                                             insn->attr);
+               }
+       }
+
+       if (insn->x86_64 && inat_is_force64(insn->attr))
+               insn->opnd_bytes = 8;
+       modrm->got = 1;
+}
+
+
+/**
+ * insn_rip_relative() - Does instruction use RIP-relative addressing mode?
+ * @insn:      &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * ModRM byte.  No effect if @insn->x86_64 is 0.
+ */
+int insn_rip_relative(struct insn *insn)
+{
+       struct insn_field *modrm = &insn->modrm;
+
+       if (!insn->x86_64)
+               return 0;
+       if (!modrm->got)
+               insn_get_modrm(insn);
+       /*
+        * For rip-relative instructions, the mod field (top 2 bits)
+        * is zero and the r/m field (bottom 3 bits) is 0x5.
+        */
+       return (modrm->nbytes && (modrm->value & 0xc7) == 0x5);
+}
+
+/**
+ * insn_get_sib() - Get the SIB byte of instruction
+ * @insn:      &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * ModRM byte.
+ */
+void insn_get_sib(struct insn *insn)
+{
+       insn_byte_t modrm;
+
+       if (insn->sib.got)
+               return;
+       if (!insn->modrm.got)
+               insn_get_modrm(insn);
+       if (insn->modrm.nbytes) {
+               modrm = (insn_byte_t)insn->modrm.value;
+               if (insn->addr_bytes != 2 &&
+                   X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) {
+                       insn->sib.value = get_next(insn_byte_t, insn);
+                       insn->sib.nbytes = 1;
+               }
+       }
+       insn->sib.got = 1;
+}
+
+
+/**
+ * insn_get_displacement() - Get the displacement of instruction
+ * @insn:      &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * SIB byte.
+ * Displacement value is sign-expanded.
+ */
+void insn_get_displacement(struct insn *insn)
+{
+       insn_byte_t mod, rm, base;
+
+       if (insn->displacement.got)
+               return;
+       if (!insn->sib.got)
+               insn_get_sib(insn);
+       if (insn->modrm.nbytes) {
+               /*
+                * Interpreting the modrm byte:
+                * mod = 00 - no displacement fields (exceptions below)
+                * mod = 01 - 1-byte displacement field
+                * mod = 10 - displacement field is 4 bytes, or 2 bytes if
+                *      address size = 2 (0x67 prefix in 32-bit mode)
+                * mod = 11 - no memory operand
+                *
+                * If address size = 2...
+                * mod = 00, r/m = 110 - displacement field is 2 bytes
+                *
+                * If address size != 2...
+                * mod != 11, r/m = 100 - SIB byte exists
+                * mod = 00, SIB base = 101 - displacement field is 4 bytes
+                * mod = 00, r/m = 101 - rip-relative addressing, displacement
+                *      field is 4 bytes
+                */
+               mod = X86_MODRM_MOD(insn->modrm.value);
+               rm = X86_MODRM_RM(insn->modrm.value);
+               base = X86_SIB_BASE(insn->sib.value);
+               if (mod == 3)
+                       goto out;
+               if (mod == 1) {
+                       insn->displacement.value = get_next(char, insn);
+                       insn->displacement.nbytes = 1;
+               } else if (insn->addr_bytes == 2) {
+                       if ((mod == 0 && rm == 6) || mod == 2) {
+                               insn->displacement.value =
+                                        get_next(short, insn);
+                               insn->displacement.nbytes = 2;
+                       }
+               } else {
+                       if ((mod == 0 && rm == 5) || mod == 2 ||
+                           (mod == 0 && base == 5)) {
+                               insn->displacement.value = get_next(int, insn);
+                               insn->displacement.nbytes = 4;
+                       }
+               }
+       }
+out:
+       insn->displacement.got = 1;
+}
+
+/* Decode moffset16/32/64 */
+static void __get_moffset(struct insn *insn)
+{
+       switch (insn->addr_bytes) {
+       case 2:
+               insn->moffset1.value = get_next(short, insn);
+               insn->moffset1.nbytes = 2;
+               break;
+       case 4:
+               insn->moffset1.value = get_next(int, insn);
+               insn->moffset1.nbytes = 4;
+               break;
+       case 8:
+               insn->moffset1.value = get_next(int, insn);
+               insn->moffset1.nbytes = 4;
+               insn->moffset2.value = get_next(int, insn);
+               insn->moffset2.nbytes = 4;
+               break;
+       }
+       insn->moffset1.got = insn->moffset2.got = 1;
+}
+
+/* Decode imm v32(Iz) */
+static void __get_immv32(struct insn *insn)
+{
+       switch (insn->opnd_bytes) {
+       case 2:
+               insn->immediate.value = get_next(short, insn);
+               insn->immediate.nbytes = 2;
+               break;
+       case 4:
+       case 8:
+               insn->immediate.value = get_next(int, insn);
+               insn->immediate.nbytes = 4;
+               break;
+       }
+}
+
+/* Decode imm v64(Iv/Ov) */
+static void __get_immv(struct insn *insn)
+{
+       switch (insn->opnd_bytes) {
+       case 2:
+               insn->immediate1.value = get_next(short, insn);
+               insn->immediate1.nbytes = 2;
+               break;
+       case 4:
+               insn->immediate1.value = get_next(int, insn);
+               insn->immediate1.nbytes = 4;
+               break;
+       case 8:
+               insn->immediate1.value = get_next(int, insn);
+               insn->immediate1.nbytes = 4;
+               insn->immediate2.value = get_next(int, insn);
+               insn->immediate2.nbytes = 4;
+               break;
+       }
+       insn->immediate1.got = insn->immediate2.got = 1;
+}
+
+/* Decode ptr16:16/32(Ap) */
+static void __get_immptr(struct insn *insn)
+{
+       switch (insn->opnd_bytes) {
+       case 2:
+               insn->immediate1.value = get_next(short, insn);
+               insn->immediate1.nbytes = 2;
+               break;
+       case 4:
+               insn->immediate1.value = get_next(int, insn);
+               insn->immediate1.nbytes = 4;
+               break;
+       case 8:
+               /* ptr16:64 is not exist (no segment) */
+               return;
+       }
+       insn->immediate2.value = get_next(unsigned short, insn);
+       insn->immediate2.nbytes = 2;
+       insn->immediate1.got = insn->immediate2.got = 1;
+}
+
+/**
+ * insn_get_immediate() - Get the immediates of instruction
+ * @insn:      &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * displacement bytes.
+ * Basically, most of immediates are sign-expanded. Unsigned-value can be
+ * get by bit masking with ((1 << (nbytes * 8)) - 1)
+ */
+void insn_get_immediate(struct insn *insn)
+{
+       if (insn->immediate.got)
+               return;
+       if (!insn->displacement.got)
+               insn_get_displacement(insn);
+
+       if (inat_has_moffset(insn->attr)) {
+               __get_moffset(insn);
+               goto done;
+       }
+
+       if (!inat_has_immediate(insn->attr))
+               /* no immediates */
+               goto done;
+
+       switch (inat_immediate_size(insn->attr)) {
+       case INAT_IMM_BYTE:
+               insn->immediate.value = get_next(char, insn);
+               insn->immediate.nbytes = 1;
+               break;
+       case INAT_IMM_WORD:
+               insn->immediate.value = get_next(short, insn);
+               insn->immediate.nbytes = 2;
+               break;
+       case INAT_IMM_DWORD:
+               insn->immediate.value = get_next(int, insn);
+               insn->immediate.nbytes = 4;
+               break;
+       case INAT_IMM_QWORD:
+               insn->immediate1.value = get_next(int, insn);
+               insn->immediate1.nbytes = 4;
+               insn->immediate2.value = get_next(int, insn);
+               insn->immediate2.nbytes = 4;
+               break;
+       case INAT_IMM_PTR:
+               __get_immptr(insn);
+               break;
+       case INAT_IMM_VWORD32:
+               __get_immv32(insn);
+               break;
+       case INAT_IMM_VWORD:
+               __get_immv(insn);
+               break;
+       default:
+               break;
+       }
+       if (inat_has_second_immediate(insn->attr)) {
+               insn->immediate2.value = get_next(char, insn);
+               insn->immediate2.nbytes = 1;
+       }
+done:
+       insn->immediate.got = 1;
+}
+
+/**
+ * insn_get_length() - Get the length of instruction
+ * @insn:      &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * immediates bytes.
+ */
+void insn_get_length(struct insn *insn)
+{
+       if (insn->length)
+               return;
+       if (!insn->immediate.got)
+               insn_get_immediate(insn);
+       insn->length = (unsigned char)((unsigned long)insn->next_byte
+                                    - (unsigned long)insn->kaddr);
+}
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
new file mode 100644 (file)
index 0000000..083dd59
--- /dev/null
@@ -0,0 +1,719 @@
+# x86 Opcode Maps
+#
+#<Opcode maps>
+# Table: table-name
+# Referrer: escaped-name
+# opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
+# (or)
+# opcode: escape # escaped-name
+# EndTable
+#
+#<group maps>
+# GrpTable: GrpXXX
+# reg:  mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
+# EndTable
+#
+
+Table: one byte opcode
+Referrer:
+# 0x00 - 0x0f
+00: ADD Eb,Gb
+01: ADD Ev,Gv
+02: ADD Gb,Eb
+03: ADD Gv,Ev
+04: ADD AL,Ib
+05: ADD rAX,Iz
+06: PUSH ES (i64)
+07: POP ES (i64)
+08: OR Eb,Gb
+09: OR Ev,Gv
+0a: OR Gb,Eb
+0b: OR Gv,Ev
+0c: OR AL,Ib
+0d: OR rAX,Iz
+0e: PUSH CS (i64)
+0f: escape # 2-byte escape
+# 0x10 - 0x1f
+10: ADC Eb,Gb
+11: ADC Ev,Gv
+12: ADC Gb,Eb
+13: ADC Gv,Ev
+14: ADC AL,Ib
+15: ADC rAX,Iz
+16: PUSH SS (i64)
+17: POP SS (i64)
+18: SBB Eb,Gb
+19: SBB Ev,Gv
+1a: SBB Gb,Eb
+1b: SBB Gv,Ev
+1c: SBB AL,Ib
+1d: SBB rAX,Iz
+1e: PUSH DS (i64)
+1f: POP DS (i64)
+# 0x20 - 0x2f
+20: AND Eb,Gb
+21: AND Ev,Gv
+22: AND Gb,Eb
+23: AND Gv,Ev
+24: AND AL,Ib
+25: AND rAx,Iz
+26: SEG=ES (Prefix)
+27: DAA (i64)
+28: SUB Eb,Gb
+29: SUB Ev,Gv
+2a: SUB Gb,Eb
+2b: SUB Gv,Ev
+2c: SUB AL,Ib
+2d: SUB rAX,Iz
+2e: SEG=CS (Prefix)
+2f: DAS (i64)
+# 0x30 - 0x3f
+30: XOR Eb,Gb
+31: XOR Ev,Gv
+32: XOR Gb,Eb
+33: XOR Gv,Ev
+34: XOR AL,Ib
+35: XOR rAX,Iz
+36: SEG=SS (Prefix)
+37: AAA (i64)
+38: CMP Eb,Gb
+39: CMP Ev,Gv
+3a: CMP Gb,Eb
+3b: CMP Gv,Ev
+3c: CMP AL,Ib
+3d: CMP rAX,Iz
+3e: SEG=DS (Prefix)
+3f: AAS (i64)
+# 0x40 - 0x4f
+40: INC eAX (i64) | REX (o64)
+41: INC eCX (i64) | REX.B (o64)
+42: INC eDX (i64) | REX.X (o64)
+43: INC eBX (i64) | REX.XB (o64)
+44: INC eSP (i64) | REX.R (o64)
+45: INC eBP (i64) | REX.RB (o64)
+46: INC eSI (i64) | REX.RX (o64)
+47: INC eDI (i64) | REX.RXB (o64)
+48: DEC eAX (i64) | REX.W (o64)
+49: DEC eCX (i64) | REX.WB (o64)
+4a: DEC eDX (i64) | REX.WX (o64)
+4b: DEC eBX (i64) | REX.WXB (o64)
+4c: DEC eSP (i64) | REX.WR (o64)
+4d: DEC eBP (i64) | REX.WRB (o64)
+4e: DEC eSI (i64) | REX.WRX (o64)
+4f: DEC eDI (i64) | REX.WRXB (o64)
+# 0x50 - 0x5f
+50: PUSH rAX/r8 (d64)
+51: PUSH rCX/r9 (d64)
+52: PUSH rDX/r10 (d64)
+53: PUSH rBX/r11 (d64)
+54: PUSH rSP/r12 (d64)
+55: PUSH rBP/r13 (d64)
+56: PUSH rSI/r14 (d64)
+57: PUSH rDI/r15 (d64)
+58: POP rAX/r8 (d64)
+59: POP rCX/r9 (d64)
+5a: POP rDX/r10 (d64)
+5b: POP rBX/r11 (d64)
+5c: POP rSP/r12 (d64)
+5d: POP rBP/r13 (d64)
+5e: POP rSI/r14 (d64)
+5f: POP rDI/r15 (d64)
+# 0x60 - 0x6f
+60: PUSHA/PUSHAD (i64)
+61: POPA/POPAD (i64)
+62: BOUND Gv,Ma (i64)
+63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64)
+64: SEG=FS (Prefix)
+65: SEG=GS (Prefix)
+66: Operand-Size (Prefix)
+67: Address-Size (Prefix)
+68: PUSH Iz (d64)
+69: IMUL Gv,Ev,Iz
+6a: PUSH Ib (d64)
+6b: IMUL Gv,Ev,Ib
+6c: INS/INSB Yb,DX
+6d: INS/INSW/INSD Yz,DX
+6e: OUTS/OUTSB DX,Xb
+6f: OUTS/OUTSW/OUTSD DX,Xz
+# 0x70 - 0x7f
+70: JO Jb
+71: JNO Jb
+72: JB/JNAE/JC Jb
+73: JNB/JAE/JNC Jb
+74: JZ/JE Jb
+75: JNZ/JNE Jb
+76: JBE/JNA Jb
+77: JNBE/JA Jb
+78: JS Jb
+79: JNS Jb
+7a: JP/JPE Jb
+7b: JNP/JPO Jb
+7c: JL/JNGE Jb
+7d: JNL/JGE Jb
+7e: JLE/JNG Jb
+7f: JNLE/JG Jb
+# 0x80 - 0x8f
+80: Grp1 Eb,Ib (1A)
+81: Grp1 Ev,Iz (1A)
+82: Grp1 Eb,Ib (1A),(i64)
+83: Grp1 Ev,Ib (1A)
+84: TEST Eb,Gb
+85: TEST Ev,Gv
+86: XCHG Eb,Gb
+87: XCHG Ev,Gv
+88: MOV Eb,Gb
+89: MOV Ev,Gv
+8a: MOV Gb,Eb
+8b: MOV Gv,Ev
+8c: MOV Ev,Sw
+8d: LEA Gv,M
+8e: MOV Sw,Ew
+8f: Grp1A (1A) | POP Ev (d64)
+# 0x90 - 0x9f
+90: NOP | PAUSE (F3) | XCHG r8,rAX
+91: XCHG rCX/r9,rAX
+92: XCHG rDX/r10,rAX
+93: XCHG rBX/r11,rAX
+94: XCHG rSP/r12,rAX
+95: XCHG rBP/r13,rAX
+96: XCHG rSI/r14,rAX
+97: XCHG rDI/r15,rAX
+98: CBW/CWDE/CDQE
+99: CWD/CDQ/CQO
+9a: CALLF Ap (i64)
+9b: FWAIT/WAIT
+9c: PUSHF/D/Q Fv (d64)
+9d: POPF/D/Q Fv (d64)
+9e: SAHF
+9f: LAHF
+# 0xa0 - 0xaf
+a0: MOV AL,Ob
+a1: MOV rAX,Ov
+a2: MOV Ob,AL
+a3: MOV Ov,rAX
+a4: MOVS/B Xb,Yb
+a5: MOVS/W/D/Q Xv,Yv
+a6: CMPS/B Xb,Yb
+a7: CMPS/W/D Xv,Yv
+a8: TEST AL,Ib
+a9: TEST rAX,Iz
+aa: STOS/B Yb,AL
+ab: STOS/W/D/Q Yv,rAX
+ac: LODS/B AL,Xb
+ad: LODS/W/D/Q rAX,Xv
+ae: SCAS/B AL,Yb
+af: SCAS/W/D/Q rAX,Xv
+# 0xb0 - 0xbf
+b0: MOV AL/R8L,Ib
+b1: MOV CL/R9L,Ib
+b2: MOV DL/R10L,Ib
+b3: MOV BL/R11L,Ib
+b4: MOV AH/R12L,Ib
+b5: MOV CH/R13L,Ib
+b6: MOV DH/R14L,Ib
+b7: MOV BH/R15L,Ib
+b8: MOV rAX/r8,Iv
+b9: MOV rCX/r9,Iv
+ba: MOV rDX/r10,Iv
+bb: MOV rBX/r11,Iv
+bc: MOV rSP/r12,Iv
+bd: MOV rBP/r13,Iv
+be: MOV rSI/r14,Iv
+bf: MOV rDI/r15,Iv
+# 0xc0 - 0xcf
+c0: Grp2 Eb,Ib (1A)
+c1: Grp2 Ev,Ib (1A)
+c2: RETN Iw (f64)
+c3: RETN
+c4: LES Gz,Mp (i64)
+c5: LDS Gz,Mp (i64)
+c6: Grp11 Eb,Ib (1A)
+c7: Grp11 Ev,Iz (1A)
+c8: ENTER Iw,Ib
+c9: LEAVE (d64)
+ca: RETF Iw
+cb: RETF
+cc: INT3
+cd: INT Ib
+ce: INTO (i64)
+cf: IRET/D/Q
+# 0xd0 - 0xdf
+d0: Grp2 Eb,1 (1A)
+d1: Grp2 Ev,1 (1A)
+d2: Grp2 Eb,CL (1A)
+d3: Grp2 Ev,CL (1A)
+d4: AAM Ib (i64)
+d5: AAD Ib (i64)
+d6:
+d7: XLAT/XLATB
+d8: ESC
+d9: ESC
+da: ESC
+db: ESC
+dc: ESC
+dd: ESC
+de: ESC
+df: ESC
+# 0xe0 - 0xef
+e0: LOOPNE/LOOPNZ Jb (f64)
+e1: LOOPE/LOOPZ Jb (f64)
+e2: LOOP Jb (f64)
+e3: JrCXZ Jb (f64)
+e4: IN AL,Ib
+e5: IN eAX,Ib
+e6: OUT Ib,AL
+e7: OUT Ib,eAX
+e8: CALL Jz (f64)
+e9: JMP-near Jz (f64)
+ea: JMP-far Ap (i64)
+eb: JMP-short Jb (f64)
+ec: IN AL,DX
+ed: IN eAX,DX
+ee: OUT DX,AL
+ef: OUT DX,eAX
+# 0xf0 - 0xff
+f0: LOCK (Prefix)
+f1:
+f2: REPNE (Prefix)
+f3: REP/REPE (Prefix)
+f4: HLT
+f5: CMC
+f6: Grp3_1 Eb (1A)
+f7: Grp3_2 Ev (1A)
+f8: CLC
+f9: STC
+fa: CLI
+fb: STI
+fc: CLD
+fd: STD
+fe: Grp4 (1A)
+ff: Grp5 (1A)
+EndTable
+
+Table: 2-byte opcode # First Byte is 0x0f
+Referrer: 2-byte escape
+# 0x0f 0x00-0x0f
+00: Grp6 (1A)
+01: Grp7 (1A)
+02: LAR Gv,Ew
+03: LSL Gv,Ew
+04:
+05: SYSCALL (o64)
+06: CLTS
+07: SYSRET (o64)
+08: INVD
+09: WBINVD
+0a:
+0b: UD2 (1B)
+0c:
+0d: NOP Ev
+0e:
+0f:
+# 0x0f 0x10-0x1f
+10:
+11:
+12:
+13:
+14:
+15:
+16:
+17:
+18: Grp16 (1A)
+19:
+1a:
+1b:
+1c:
+1d:
+1e:
+1f: NOP Ev
+# 0x0f 0x20-0x2f
+20: MOV Rd,Cd
+21: MOV Rd,Dd
+22: MOV Cd,Rd
+23: MOV Dd,Rd
+24:
+25:
+26:
+27:
+28: movaps Vps,Wps | movapd Vpd,Wpd (66)
+29: movaps Wps,Vps | movapd Wpd,Vpd (66)
+2a:
+2b:
+2c:
+2d:
+2e:
+2f:
+# 0x0f 0x30-0x3f
+30: WRMSR
+31: RDTSC
+32: RDMSR
+33: RDPMC
+34: SYSENTER
+35: SYSEXIT
+36:
+37: GETSEC
+38: escape # 3-byte escape 1
+39:
+3a: escape # 3-byte escape 2
+3b:
+3c:
+3d:
+3e:
+3f:
+# 0x0f 0x40-0x4f
+40: CMOVO Gv,Ev
+41: CMOVNO Gv,Ev
+42: CMOVB/C/NAE Gv,Ev
+43: CMOVAE/NB/NC Gv,Ev
+44: CMOVE/Z Gv,Ev
+45: CMOVNE/NZ Gv,Ev
+46: CMOVBE/NA Gv,Ev
+47: CMOVA/NBE Gv,Ev
+48: CMOVS Gv,Ev
+49: CMOVNS Gv,Ev
+4a: CMOVP/PE Gv,Ev
+4b: CMOVNP/PO Gv,Ev
+4c: CMOVL/NGE Gv,Ev
+4d: CMOVNL/GE Gv,Ev
+4e: CMOVLE/NG Gv,Ev
+4f: CMOVNLE/G Gv,Ev
+# 0x0f 0x50-0x5f
+50:
+51:
+52:
+53:
+54:
+55:
+56:
+57:
+58:
+59:
+5a:
+5b:
+5c:
+5d:
+5e:
+5f:
+# 0x0f 0x60-0x6f
+60:
+61:
+62:
+63:
+64:
+65:
+66:
+67:
+68:
+69:
+6a:
+6b:
+6c:
+6d:
+6e:
+6f:
+# 0x0f 0x70-0x7f
+70:
+71: Grp12 (1A)
+72: Grp13 (1A)
+73: Grp14 (1A)
+74:
+75:
+76:
+77:
+78: VMREAD Ed/q,Gd/q
+79: VMWRITE Gd/q,Ed/q
+7a:
+7b:
+7c:
+7d:
+7e:
+7f:
+# 0x0f 0x80-0x8f
+80: JO Jz (f64)
+81: JNO Jz (f64)
+82: JB/JNAE/JC Jz (f64)
+83: JNB/JAE/JNC Jz (f64)
+84: JZ/JE Jz (f64)
+85: JNZ/JNE Jz (f64)
+86: JBE/JNA Jz (f64)
+87: JNBE/JA Jz (f64)
+88: JS Jz (f64)
+89: JNS Jz (f64)
+8a: JP/JPE Jz (f64)
+8b: JNP/JPO Jz (f64)
+8c: JL/JNGE Jz (f64)
+8d: JNL/JGE Jz (f64)
+8e: JLE/JNG Jz (f64)
+8f: JNLE/JG Jz (f64)
+# 0x0f 0x90-0x9f
+90: SETO Eb
+91: SETNO Eb
+92: SETB/C/NAE Eb
+93: SETAE/NB/NC Eb
+94: SETE/Z Eb
+95: SETNE/NZ Eb
+96: SETBE/NA Eb
+97: SETA/NBE Eb
+98: SETS Eb
+99: SETNS Eb
+9a: SETP/PE Eb
+9b: SETNP/PO Eb
+9c: SETL/NGE Eb
+9d: SETNL/GE Eb
+9e: SETLE/NG Eb
+9f: SETNLE/G Eb
+# 0x0f 0xa0-0xaf
+a0: PUSH FS (d64)
+a1: POP FS (d64)
+a2: CPUID
+a3: BT Ev,Gv
+a4: SHLD Ev,Gv,Ib
+a5: SHLD Ev,Gv,CL
+a6:
+a7: GrpRNG
+a8: PUSH GS (d64)
+a9: POP GS (d64)
+aa: RSM
+ab: BTS Ev,Gv
+ac: SHRD Ev,Gv,Ib
+ad: SHRD Ev,Gv,CL
+ae: Grp15 (1A),(1C)
+af: IMUL Gv,Ev
+# 0x0f 0xb0-0xbf
+b0: CMPXCHG Eb,Gb
+b1: CMPXCHG Ev,Gv
+b2: LSS Gv,Mp
+b3: BTR Ev,Gv
+b4: LFS Gv,Mp
+b5: LGS Gv,Mp
+b6: MOVZX Gv,Eb
+b7: MOVZX Gv,Ew
+b8: JMPE | POPCNT Gv,Ev (F3)
+b9: Grp10 (1A)
+ba: Grp8 Ev,Ib (1A)
+bb: BTC Ev,Gv
+bc: BSF Gv,Ev
+bd: BSR Gv,Ev
+be: MOVSX Gv,Eb
+bf: MOVSX Gv,Ew
+# 0x0f 0xc0-0xcf
+c0: XADD Eb,Gb
+c1: XADD Ev,Gv
+c2:
+c3: movnti Md/q,Gd/q
+c4:
+c5:
+c6:
+c7: Grp9 (1A)
+c8: BSWAP RAX/EAX/R8/R8D
+c9: BSWAP RCX/ECX/R9/R9D
+ca: BSWAP RDX/EDX/R10/R10D
+cb: BSWAP RBX/EBX/R11/R11D
+cc: BSWAP RSP/ESP/R12/R12D
+cd: BSWAP RBP/EBP/R13/R13D
+ce: BSWAP RSI/ESI/R14/R14D
+cf: BSWAP RDI/EDI/R15/R15D
+# 0x0f 0xd0-0xdf
+d0:
+d1:
+d2:
+d3:
+d4:
+d5:
+d6:
+d7:
+d8:
+d9:
+da:
+db:
+dc:
+dd:
+de:
+df:
+# 0x0f 0xe0-0xef
+e0:
+e1:
+e2:
+e3:
+e4:
+e5:
+e6:
+e7:
+e8:
+e9:
+ea:
+eb:
+ec:
+ed:
+ee:
+ef:
+# 0x0f 0xf0-0xff
+f0:
+f1:
+f2:
+f3:
+f4:
+f5:
+f6:
+f7:
+f8:
+f9:
+fa:
+fb:
+fc:
+fd:
+fe:
+ff:
+EndTable
+
+Table: 3-byte opcode 1
+Referrer: 3-byte escape 1
+80: INVEPT Gd/q,Mdq (66)
+81: INVPID Gd/q,Mdq (66)
+f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2)
+f1: MOVBE Mv,Gv | CRC32 Gd,Ev (F2)
+EndTable
+
+Table: 3-byte opcode 2
+Referrer: 3-byte escape 2
+# all opcode is for SSE
+EndTable
+
+GrpTable: Grp1
+0: ADD
+1: OR
+2: ADC
+3: SBB
+4: AND
+5: SUB
+6: XOR
+7: CMP
+EndTable
+
+GrpTable: Grp1A
+0: POP
+EndTable
+
+GrpTable: Grp2
+0: ROL
+1: ROR
+2: RCL
+3: RCR
+4: SHL/SAL
+5: SHR
+6:
+7: SAR
+EndTable
+
+GrpTable: Grp3_1
+0: TEST Eb,Ib
+1:
+2: NOT Eb
+3: NEG Eb
+4: MUL AL,Eb
+5: IMUL AL,Eb
+6: DIV AL,Eb
+7: IDIV AL,Eb
+EndTable
+
+GrpTable: Grp3_2
+0: TEST Ev,Iz
+1:
+2: NOT Ev
+3: NEG Ev
+4: MUL rAX,Ev
+5: IMUL rAX,Ev
+6: DIV rAX,Ev
+7: IDIV rAX,Ev
+EndTable
+
+GrpTable: Grp4
+0: INC Eb
+1: DEC Eb
+EndTable
+
+GrpTable: Grp5
+0: INC Ev
+1: DEC Ev
+2: CALLN Ev (f64)
+3: CALLF Ep
+4: JMPN Ev (f64)
+5: JMPF Ep
+6: PUSH Ev (d64)
+7:
+EndTable
+
+GrpTable: Grp6
+0: SLDT Rv/Mw
+1: STR Rv/Mw
+2: LLDT Ew
+3: LTR Ew
+4: VERR Ew
+5: VERW Ew
+EndTable
+
+GrpTable: Grp7
+0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B)
+1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001)
+2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B)
+3: LIDT Ms
+4: SMSW Mw/Rv
+5:
+6: LMSW Ew
+7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B)
+EndTable
+
+GrpTable: Grp8
+4: BT
+5: BTS
+6: BTR
+7: BTC
+EndTable
+
+GrpTable: Grp9
+1: CMPXCHG8B/16B Mq/Mdq
+6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3)
+7: VMPTRST Mq
+EndTable
+
+GrpTable: Grp10
+EndTable
+
+GrpTable: Grp11
+0: MOV
+EndTable
+
+GrpTable: Grp12
+EndTable
+
+GrpTable: Grp13
+EndTable
+
+GrpTable: Grp14
+EndTable
+
+GrpTable: Grp15
+0: fxsave
+1: fxstor
+2: ldmxcsr
+3: stmxcsr
+4: XSAVE
+5: XRSTOR | lfence (11B)
+6: mfence (11B)
+7: clflush | sfence (11B)
+EndTable
+
+GrpTable: Grp16
+0: prefetch NTA
+1: prefetch T0
+2: prefetch T1
+3: prefetch T2
+EndTable
+
+GrpTable: GrpRNG
+0: xstore-rng
+1: xcrypt-ecb
+2: xcrypt-cbc
+4: xcrypt-cfb
+5: xcrypt-ofb
+EndTable
index bfae139..c322e59 100644 (file)
@@ -38,7 +38,8 @@ enum x86_pf_error_code {
  * Returns 0 if mmiotrace is disabled, or if the fault is not
  * handled by mmiotrace:
  */
-static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
+static inline int __kprobes
+kmmio_fault(struct pt_regs *regs, unsigned long addr)
 {
        if (unlikely(is_kmmio_active()))
                if (kmmio_handler(regs, addr) == 1)
@@ -46,7 +47,7 @@ static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
        return 0;
 }
 
-static inline int notify_page_fault(struct pt_regs *regs)
+static inline int __kprobes notify_page_fault(struct pt_regs *regs)
 {
        int ret = 0;
 
@@ -239,7 +240,7 @@ void vmalloc_sync_all(void)
  *
  *   Handle a fault on the vmalloc or module mapping area
  */
-static noinline int vmalloc_fault(unsigned long address)
+static noinline __kprobes int vmalloc_fault(unsigned long address)
 {
        unsigned long pgd_paddr;
        pmd_t *pmd_k;
@@ -361,7 +362,7 @@ void vmalloc_sync_all(void)
  *
  * This assumes no large pages in there.
  */
-static noinline int vmalloc_fault(unsigned long address)
+static noinline __kprobes int vmalloc_fault(unsigned long address)
 {
        pgd_t *pgd, *pgd_ref;
        pud_t *pud, *pud_ref;
@@ -858,7 +859,7 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte)
  * There are no security implications to leaving a stale TLB when
  * increasing the permissions on a page.
  */
-static noinline int
+static noinline __kprobes int
 spurious_fault(unsigned long error_code, unsigned long address)
 {
        pgd_t *pgd;
diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile
new file mode 100644 (file)
index 0000000..1bd006c
--- /dev/null
@@ -0,0 +1,15 @@
+PHONY += posttest
+quiet_cmd_posttest = TEST    $@
+      cmd_posttest = $(OBJDUMP) -d -j .text $(objtree)/vmlinux | awk -f $(srctree)/arch/x86/tools/distill.awk | $(obj)/test_get_len $(CONFIG_64BIT)
+
+posttest: $(obj)/test_get_len vmlinux
+       $(call cmd,posttest)
+
+hostprogs-y    := test_get_len
+
+# -I needed for generated C source and C source which in the kernel tree.
+HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/
+
+# Dependancies are also needed.
+$(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c
+
diff --git a/arch/x86/tools/distill.awk b/arch/x86/tools/distill.awk
new file mode 100644 (file)
index 0000000..d433619
--- /dev/null
@@ -0,0 +1,42 @@
+#!/bin/awk -f
+# Usage: objdump -d a.out | awk -f distill.awk | ./test_get_len
+# Distills the disassembly as follows:
+# - Removes all lines except the disassembled instructions.
+# - For instructions that exceed 1 line (7 bytes), crams all the hex bytes
+# into a single line.
+# - Remove bad(or prefix only) instructions
+
+BEGIN {
+       prev_addr = ""
+       prev_hex = ""
+       prev_mnemonic = ""
+       bad_expr = "(\\(bad\\)|^rex|^.byte|^rep(z|nz)$|^lock$|^es$|^cs$|^ss$|^ds$|^fs$|^gs$|^data(16|32)$|^addr(16|32|64))"
+       fwait_expr = "^9b "
+       fwait_str="9b\tfwait"
+}
+
+/^ *[0-9a-f]+:/ {
+       if (split($0, field, "\t") < 3) {
+               # This is a continuation of the same insn.
+               prev_hex = prev_hex field[2]
+       } else {
+               # Skip bad instructions
+               if (match(prev_mnemonic, bad_expr))
+                       prev_addr = ""
+               # Split fwait from other f* instructions
+               if (match(prev_hex, fwait_expr) && prev_mnemonic != "fwait") {
+                       printf "%s\t%s\n", prev_addr, fwait_str
+                       sub(fwait_expr, "", prev_hex)
+               }
+               if (prev_addr != "")
+                       printf "%s\t%s\t%s\n", prev_addr, prev_hex, prev_mnemonic
+               prev_addr = field[1]
+               prev_hex = field[2]
+               prev_mnemonic = field[3]
+       }
+}
+
+END {
+       if (prev_addr != "")
+               printf "%s\t%s\t%s\n", prev_addr, prev_hex, prev_mnemonic
+}
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk
new file mode 100644 (file)
index 0000000..19ba096
--- /dev/null
@@ -0,0 +1,334 @@
+#!/bin/awk -f
+# gen-insn-attr-x86.awk: Instruction attribute table generator
+# Written by Masami Hiramatsu <mhiramat@redhat.com>
+#
+# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c
+
+# Awk implementation sanity check
+function check_awk_implement() {
+       if (!match("abc", "[[:lower:]]+"))
+               return "Your awk doesn't support charactor-class."
+       if (sprintf("%x", 0) != "0")
+               return "Your awk has a printf-format problem."
+       return ""
+}
+
+BEGIN {
+       # Implementation error checking
+       awkchecked = check_awk_implement()
+       if (awkchecked != "") {
+               print "Error: " awkchecked > "/dev/stderr"
+               print "Please try to use gawk." > "/dev/stderr"
+               exit 1
+       }
+
+       # Setup generating tables
+       print "/* x86 opcode map generated from x86-opcode-map.txt */"
+       print "/* Do not change this code. */"
+       ggid = 1
+       geid = 1
+
+       opnd_expr = "^[[:alpha:]]"
+       ext_expr = "^\\("
+       sep_expr = "^\\|$"
+       group_expr = "^Grp[[:alnum:]]+"
+
+       imm_expr = "^[IJAO][[:lower:]]"
+       imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
+       imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
+       imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)"
+       imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)"
+       imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)"
+       imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)"
+       imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
+       imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
+       imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)"
+       imm_flag["Ob"] = "INAT_MOFFSET"
+       imm_flag["Ov"] = "INAT_MOFFSET"
+
+       modrm_expr = "^([CDEGMNPQRSUVW][[:lower:]]+|NTA|T[012])"
+       force64_expr = "\\([df]64\\)"
+       rex_expr = "^REX(\\.[XRWB]+)*"
+       fpu_expr = "^ESC" # TODO
+
+       lprefix1_expr = "\\(66\\)"
+       delete lptable1
+       lprefix2_expr = "\\(F2\\)"
+       delete lptable2
+       lprefix3_expr = "\\(F3\\)"
+       delete lptable3
+       max_lprefix = 4
+
+       prefix_expr = "\\(Prefix\\)"
+       prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
+       prefix_num["REPNE"] = "INAT_PFX_REPNE"
+       prefix_num["REP/REPE"] = "INAT_PFX_REPE"
+       prefix_num["LOCK"] = "INAT_PFX_LOCK"
+       prefix_num["SEG=CS"] = "INAT_PFX_CS"
+       prefix_num["SEG=DS"] = "INAT_PFX_DS"
+       prefix_num["SEG=ES"] = "INAT_PFX_ES"
+       prefix_num["SEG=FS"] = "INAT_PFX_FS"
+       prefix_num["SEG=GS"] = "INAT_PFX_GS"
+       prefix_num["SEG=SS"] = "INAT_PFX_SS"
+       prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
+
+       delete table
+       delete etable
+       delete gtable
+       eid = -1
+       gid = -1
+}
+
+function semantic_error(msg) {
+       print "Semantic error at " NR ": " msg > "/dev/stderr"
+       exit 1
+}
+
+function debug(msg) {
+       print "DEBUG: " msg
+}
+
+function array_size(arr,   i,c) {
+       c = 0
+       for (i in arr)
+               c++
+       return c
+}
+
+/^Table:/ {
+       print "/* " $0 " */"
+}
+
+/^Referrer:/ {
+       if (NF == 1) {
+               # primary opcode table
+               tname = "inat_primary_table"
+               eid = -1
+       } else {
+               # escape opcode table
+               ref = ""
+               for (i = 2; i <= NF; i++)
+                       ref = ref $i
+               eid = escape[ref]
+               tname = sprintf("inat_escape_table_%d", eid)
+       }
+}
+
+/^GrpTable:/ {
+       print "/* " $0 " */"
+       if (!($2 in group))
+               semantic_error("No group: " $2 )
+       gid = group[$2]
+       tname = "inat_group_table_" gid
+}
+
+function print_table(tbl,name,fmt,n)
+{
+       print "const insn_attr_t " name " = {"
+       for (i = 0; i < n; i++) {
+               id = sprintf(fmt, i)
+               if (tbl[id])
+                       print " [" id "] = " tbl[id] ","
+       }
+       print "};"
+}
+
+/^EndTable/ {
+       if (gid != -1) {
+               # print group tables
+               if (array_size(table) != 0) {
+                       print_table(table, tname "[INAT_GROUP_TABLE_SIZE]",
+                                   "0x%x", 8)
+                       gtable[gid,0] = tname
+               }
+               if (array_size(lptable1) != 0) {
+                       print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]",
+                                   "0x%x", 8)
+                       gtable[gid,1] = tname "_1"
+               }
+               if (array_size(lptable2) != 0) {
+                       print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]",
+                                   "0x%x", 8)
+                       gtable[gid,2] = tname "_2"
+               }
+               if (array_size(lptable3) != 0) {
+                       print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]",
+                                   "0x%x", 8)
+                       gtable[gid,3] = tname "_3"
+               }
+       } else {
+               # print primary/escaped tables
+               if (array_size(table) != 0) {
+                       print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]",
+                                   "0x%02x", 256)
+                       etable[eid,0] = tname
+               }
+               if (array_size(lptable1) != 0) {
+                       print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]",
+                                   "0x%02x", 256)
+                       etable[eid,1] = tname "_1"
+               }
+               if (array_size(lptable2) != 0) {
+                       print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]",
+                                   "0x%02x", 256)
+                       etable[eid,2] = tname "_2"
+               }
+               if (array_size(lptable3) != 0) {
+                       print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]",
+                                   "0x%02x", 256)
+                       etable[eid,3] = tname "_3"
+               }
+       }
+       print ""
+       delete table
+       delete lptable1
+       delete lptable2
+       delete lptable3
+       gid = -1
+       eid = -1
+}
+
+function add_flags(old,new) {
+       if (old && new)
+               return old " | " new
+       else if (old)
+               return old
+       else
+               return new
+}
+
+# convert operands to flags.
+function convert_operands(opnd,       i,imm,mod)
+{
+       imm = null
+       mod = null
+       for (i in opnd) {
+               i  = opnd[i]
+               if (match(i, imm_expr) == 1) {
+                       if (!imm_flag[i])
+                               semantic_error("Unknown imm opnd: " i)
+                       if (imm) {
+                               if (i != "Ib")
+                                       semantic_error("Second IMM error")
+                               imm = add_flags(imm, "INAT_SCNDIMM")
+                       } else
+                               imm = imm_flag[i]
+               } else if (match(i, modrm_expr))
+                       mod = "INAT_MODRM"
+       }
+       return add_flags(imm, mod)
+}
+
+/^[0-9a-f]+\:/ {
+       if (NR == 1)
+               next
+       # get index
+       idx = "0x" substr($1, 1, index($1,":") - 1)
+       if (idx in table)
+               semantic_error("Redefine " idx " in " tname)
+
+       # check if escaped opcode
+       if ("escape" == $2) {
+               if ($3 != "#")
+                       semantic_error("No escaped name")
+               ref = ""
+               for (i = 4; i <= NF; i++)
+                       ref = ref $i
+               if (ref in escape)
+                       semantic_error("Redefine escape (" ref ")")
+               escape[ref] = geid
+               geid++
+               table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")"
+               next
+       }
+
+       variant = null
+       # converts
+       i = 2
+       while (i <= NF) {
+               opcode = $(i++)
+               delete opnds
+               ext = null
+               flags = null
+               opnd = null
+               # parse one opcode
+               if (match($i, opnd_expr)) {
+                       opnd = $i
+                       split($(i++), opnds, ",")
+                       flags = convert_operands(opnds)
+               }
+               if (match($i, ext_expr))
+                       ext = $(i++)
+               if (match($i, sep_expr))
+                       i++
+               else if (i < NF)
+                       semantic_error($i " is not a separator")
+
+               # check if group opcode
+               if (match(opcode, group_expr)) {
+                       if (!(opcode in group)) {
+                               group[opcode] = ggid
+                               ggid++
+                       }
+                       flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")")
+               }
+               # check force(or default) 64bit
+               if (match(ext, force64_expr))
+                       flags = add_flags(flags, "INAT_FORCE64")
+
+               # check REX prefix
+               if (match(opcode, rex_expr))
+                       flags = add_flags(flags, "INAT_REXPFX")
+
+               # check coprocessor escape : TODO
+               if (match(opcode, fpu_expr))
+                       flags = add_flags(flags, "INAT_MODRM")
+
+               # check prefixes
+               if (match(ext, prefix_expr)) {
+                       if (!prefix_num[opcode])
+                               semantic_error("Unknown prefix: " opcode)
+                       flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")")
+               }
+               if (length(flags) == 0)
+                       continue
+               # check if last prefix
+               if (match(ext, lprefix1_expr)) {
+                       lptable1[idx] = add_flags(lptable1[idx],flags)
+                       variant = "INAT_VARIANT"
+               } else if (match(ext, lprefix2_expr)) {
+                       lptable2[idx] = add_flags(lptable2[idx],flags)
+                       variant = "INAT_VARIANT"
+               } else if (match(ext, lprefix3_expr)) {
+                       lptable3[idx] = add_flags(lptable3[idx],flags)
+                       variant = "INAT_VARIANT"
+               } else {
+                       table[idx] = add_flags(table[idx],flags)
+               }
+       }
+       if (variant)
+               table[idx] = add_flags(table[idx],variant)
+}
+
+END {
+       if (awkchecked != "")
+               exit 1
+       # print escape opcode map's array
+       print "/* Escape opcode map array */"
+       print "const insn_attr_t const *inat_escape_tables[INAT_ESC_MAX + 1]" \
+             "[INAT_LPREFIX_MAX + 1] = {"
+       for (i = 0; i < geid; i++)
+               for (j = 0; j < max_lprefix; j++)
+                       if (etable[i,j])
+                               print " ["i"]["j"] = "etable[i,j]","
+       print "};\n"
+       # print group opcode map's array
+       print "/* Group opcode map array */"
+       print "const insn_attr_t const *inat_group_tables[INAT_GRP_MAX + 1]"\
+             "[INAT_LPREFIX_MAX + 1] = {"
+       for (i = 0; i < ggid; i++)
+               for (j = 0; j < max_lprefix; j++)
+                       if (gtable[i,j])
+                               print " ["i"]["j"] = "gtable[i,j]","
+       print "};"
+}
diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c
new file mode 100644 (file)
index 0000000..376d338
--- /dev/null
@@ -0,0 +1,108 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2009
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#define unlikely(cond) (cond)
+
+#include <asm/insn.h>
+#include <inat.c>
+#include <insn.c>
+
+/*
+ * Test of instruction analysis in general and insn_get_length() in
+ * particular.  See if insn_get_length() and the disassembler agree
+ * on the length of each instruction in an elf disassembly.
+ *
+ * Usage: objdump -d a.out | awk -f distill.awk | ./test_get_len
+ */
+
+const char *prog;
+
+static void usage(void)
+{
+       fprintf(stderr, "Usage: objdump -d a.out | awk -f distill.awk |"
+               " %s [y|n](64bit flag)\n", prog);
+       exit(1);
+}
+
+static void malformed_line(const char *line, int line_nr)
+{
+       fprintf(stderr, "%s: malformed line %d:\n%s", prog, line_nr, line);
+       exit(3);
+}
+
+#define BUFSIZE 256
+
+int main(int argc, char **argv)
+{
+       char line[BUFSIZE];
+       unsigned char insn_buf[16];
+       struct insn insn;
+       int insns = 0;
+       int x86_64 = 0;
+
+       prog = argv[0];
+       if (argc > 2)
+               usage();
+
+       if (argc == 2 && argv[1][0] == 'y')
+               x86_64 = 1;
+
+       while (fgets(line, BUFSIZE, stdin)) {
+               char copy[BUFSIZE], *s, *tab1, *tab2;
+               int nb = 0;
+               unsigned int b;
+
+               insns++;
+               memset(insn_buf, 0, 16);
+               strcpy(copy, line);
+               tab1 = strchr(copy, '\t');
+               if (!tab1)
+                       malformed_line(line, insns);
+               s = tab1 + 1;
+               s += strspn(s, " ");
+               tab2 = strchr(s, '\t');
+               if (!tab2)
+                       malformed_line(line, insns);
+               *tab2 = '\0';   /* Characters beyond tab2 aren't examined */
+               while (s < tab2) {
+                       if (sscanf(s, "%x", &b) == 1) {
+                               insn_buf[nb++] = (unsigned char) b;
+                               s += 3;
+                       } else
+                               break;
+               }
+               /* Decode an instruction */
+               insn_init(&insn, insn_buf, x86_64);
+               insn_get_length(&insn);
+               if (insn.length != nb) {
+                       fprintf(stderr, "Error: %s", line);
+                       fprintf(stderr, "Error: objdump says %d bytes, but "
+                               "insn_get_length() says %d (attr:%x)\n", nb,
+                               insn.length, insn.attr);
+                       exit(2);
+               }
+       }
+       fprintf(stderr, "Succeed: decoded and checked %d instructions\n",
+               insns);
+       return 0;
+}
index 23f7179..a256c8f 100644 (file)
@@ -116,12 +116,12 @@ struct ftrace_event_call {
        struct dentry           *dir;
        struct trace_event      *event;
        int                     enabled;
-       int                     (*regfunc)(void *);
-       void                    (*unregfunc)(void *);
+       int                     (*regfunc)(struct ftrace_event_call *);
+       void                    (*unregfunc)(struct ftrace_event_call *);
        int                     id;
-       int                     (*raw_init)(void);
-       int                     (*show_format)(struct ftrace_event_call *call,
-                                              struct trace_seq *s);
+       int                     (*raw_init)(struct ftrace_event_call *);
+       int                     (*show_format)(struct ftrace_event_call *,
+                                              struct trace_seq *);
        int                     (*define_fields)(struct ftrace_event_call *);
        struct list_head        fields;
        int                     filter_active;
@@ -151,11 +151,12 @@ enum {
        FILTER_PTR_STRING,
 };
 
-extern int trace_define_field(struct ftrace_event_call *call,
-                             const char *type, const char *name,
-                             int offset, int size, int is_signed,
-                             int filter_type);
 extern int trace_define_common_fields(struct ftrace_event_call *call);
+extern int trace_define_field(struct ftrace_event_call *call, const char *type,
+                             const char *name, int offset, int size,
+                             int is_signed, int filter_type);
+extern int trace_add_event_call(struct ftrace_event_call *call);
+extern void trace_remove_event_call(struct ftrace_event_call *call);
 
 #define is_signed_type(type)   (((type)(-1)) < 0)
 
index bcd9c07..87eb79c 100644 (file)
@@ -296,6 +296,8 @@ void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head);
 int disable_kprobe(struct kprobe *kp);
 int enable_kprobe(struct kprobe *kp);
 
+void dump_kprobe(struct kprobe *kp);
+
 #else /* !CONFIG_KPROBES: */
 
 static inline int kprobes_built_in(void)
index a8e3782..317d913 100644 (file)
@@ -165,7 +165,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \
        struct trace_event enter_syscall_print_##sname = {              \
                .trace                  = print_syscall_enter,          \
        };                                                              \
-       static int init_enter_##sname(void)                             \
+       static int init_enter_##sname(struct ftrace_event_call *call)   \
        {                                                               \
                int num, id;                                            \
                num = syscall_name_to_nr("sys"#sname);                  \
@@ -201,7 +201,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \
        struct trace_event exit_syscall_print_##sname = {               \
                .trace                  = print_syscall_exit,           \
        };                                                              \
-       static int init_exit_##sname(void)                              \
+       static int init_exit_##sname(struct ftrace_event_call *call)    \
        {                                                               \
                int num, id;                                            \
                num = syscall_name_to_nr("sys"#sname);                  \
index 308bafd..5d3df2a 100644 (file)
@@ -435,7 +435,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
  *     event_trace_printk(_RET_IP_, "<call>: " <fmt>);
  * }
  *
- * static int ftrace_reg_event_<call>(void)
+ * static int ftrace_reg_event_<call>(struct ftrace_event_call *unused)
  * {
  *     int ret;
  *
@@ -446,7 +446,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
  *     return ret;
  * }
  *
- * static void ftrace_unreg_event_<call>(void)
+ * static void ftrace_unreg_event_<call>(struct ftrace_event_call *unused)
  * {
  *     unregister_trace_<call>(ftrace_event_<call>);
  * }
@@ -481,7 +481,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
  *     trace_current_buffer_unlock_commit(buffer, event, irq_flags, pc);
  * }
  *
- * static int ftrace_raw_reg_event_<call>(void)
+ * static int ftrace_raw_reg_event_<call>(struct ftrace_event_call *unused)
  * {
  *     int ret;
  *
@@ -492,7 +492,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
  *     return ret;
  * }
  *
- * static void ftrace_unreg_event_<call>(void)
+ * static void ftrace_unreg_event_<call>(struct ftrace_event_call *unused)
  * {
  *     unregister_trace_<call>(ftrace_raw_event_<call>);
  * }
@@ -501,7 +501,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
  *     .trace                  = ftrace_raw_output_<call>, <-- stage 2
  * };
  *
- * static int ftrace_raw_init_event_<call>(void)
+ * static int ftrace_raw_init_event_<call>(struct ftrace_event_call *unused)
  * {
  *     int id;
  *
@@ -598,7 +598,7 @@ static void ftrace_raw_event_##call(proto)                          \
                                                  event, irq_flags, pc); \
 }                                                                      \
                                                                        \
-static int ftrace_raw_reg_event_##call(void *ptr)                      \
+static int ftrace_raw_reg_event_##call(struct ftrace_event_call *unused)\
 {                                                                      \
        int ret;                                                        \
                                                                        \
@@ -609,7 +609,7 @@ static int ftrace_raw_reg_event_##call(void *ptr)                   \
        return ret;                                                     \
 }                                                                      \
                                                                        \
-static void ftrace_raw_unreg_event_##call(void *ptr)                   \
+static void ftrace_raw_unreg_event_##call(struct ftrace_event_call *unused)\
 {                                                                      \
        unregister_trace_##call(ftrace_raw_event_##call);               \
 }                                                                      \
@@ -618,7 +618,7 @@ static struct trace_event ftrace_event_type_##call = {                      \
        .trace                  = ftrace_raw_output_##call,             \
 };                                                                     \
                                                                        \
-static int ftrace_raw_init_event_##call(void)                          \
+static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\
 {                                                                      \
        int id;                                                         \
                                                                        \
index 5dc283b..e290b86 100644 (file)
@@ -39,16 +39,19 @@ void set_syscall_enter_id(int num, int id);
 void set_syscall_exit_id(int num, int id);
 extern struct trace_event event_syscall_enter;
 extern struct trace_event event_syscall_exit;
-extern int reg_event_syscall_enter(void *ptr);
-extern void unreg_event_syscall_enter(void *ptr);
-extern int reg_event_syscall_exit(void *ptr);
-extern void unreg_event_syscall_exit(void *ptr);
+
 extern int syscall_enter_format(struct ftrace_event_call *call,
                                struct trace_seq *s);
 extern int syscall_exit_format(struct ftrace_event_call *call,
                                struct trace_seq *s);
 extern int syscall_enter_define_fields(struct ftrace_event_call *call);
 extern int syscall_exit_define_fields(struct ftrace_event_call *call);
+extern int reg_event_syscall_enter(struct ftrace_event_call *call);
+extern void unreg_event_syscall_enter(struct ftrace_event_call *call);
+extern int reg_event_syscall_exit(struct ftrace_event_call *call);
+extern void unreg_event_syscall_exit(struct ftrace_event_call *call);
+extern int
+ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s);
 enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags);
 enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags);
 #endif
index ef177d6..3267d90 100644 (file)
@@ -90,6 +90,7 @@ static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
  */
 static struct kprobe_blackpoint kprobe_blacklist[] = {
        {"preempt_schedule",},
+       {"native_get_debugreg",},
        {NULL}    /* Terminator */
 };
 
@@ -1141,6 +1142,13 @@ static void __kprobes kill_kprobe(struct kprobe *p)
        arch_remove_kprobe(p);
 }
 
+void __kprobes dump_kprobe(struct kprobe *kp)
+{
+       printk(KERN_WARNING "Dumping kprobe:\n");
+       printk(KERN_WARNING "Name: %s\nAddress: %p\nOffset: %x\n",
+              kp->symbol_name, kp->addr, kp->offset);
+}
+
 /* Module notifier call back, checking kprobes on the module */
 static int __kprobes kprobes_module_callback(struct notifier_block *nb,
                                             unsigned long val, void *data)
index 61d5aa5..acd24e7 100644 (file)
@@ -558,7 +558,7 @@ EXPORT_SYMBOL(unregister_reboot_notifier);
 
 static ATOMIC_NOTIFIER_HEAD(die_chain);
 
-int notrace notify_die(enum die_val val, const char *str,
+int notrace __kprobes notify_die(enum die_val val, const char *str,
               struct pt_regs *regs, long err, int trap, int sig)
 {
        struct die_args args = {
index 1ea0d12..e78dcbd 100644 (file)
@@ -418,6 +418,18 @@ config BLK_DEV_IO_TRACE
 
          If unsure, say N.
 
+config KPROBE_TRACER
+       depends on KPROBES
+       depends on X86
+       bool "Trace kprobes"
+       select TRACING
+       select GENERIC_TRACER
+       help
+         This tracer probes everywhere where kprobes can probe it, and
+         records various registers and memories specified by user.
+         This also allows you to trace kprobe probe points as a dynamic
+         defined events. It provides per-probe event filtering interface.
+
 config DYNAMIC_FTRACE
        bool "enable/disable ftrace tracepoints dynamically"
        depends on FUNCTION_TRACER
index 844164d..7c00a1e 100644 (file)
@@ -54,5 +54,6 @@ obj-$(CONFIG_EVENT_TRACING) += trace_export.o
 obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
 obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
 obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
+obj-$(CONFIG_KPROBE_TRACER) += trace_kprobe.o
 
 libftrace-y := ftrace.o
index fa1dccb..8210649 100644 (file)
@@ -205,6 +205,30 @@ struct syscall_trace_exit {
        unsigned long           ret;
 };
 
+struct kprobe_trace_entry {
+       struct trace_entry      ent;
+       unsigned long           ip;
+       int                     nargs;
+       unsigned long           args[];
+};
+
+#define SIZEOF_KPROBE_TRACE_ENTRY(n)                   \
+       (offsetof(struct kprobe_trace_entry, args) +    \
+       (sizeof(unsigned long) * (n)))
+
+struct kretprobe_trace_entry {
+       struct trace_entry      ent;
+       unsigned long           func;
+       unsigned long           ret_ip;
+       int                     nargs;
+       unsigned long           args[];
+};
+
+#define SIZEOF_KRETPROBE_TRACE_ENTRY(n)                        \
+       (offsetof(struct kretprobe_trace_entry, args) + \
+       (sizeof(unsigned long) * (n)))
+
+
 
 /*
  * trace_flag_type is an enumeration that holds different
index 6db005e..e74f090 100644 (file)
@@ -109,7 +109,7 @@ TRACE_EVENT_FORMAT(bprint, TRACE_BPRINT, bprint_entry, ignore,
        TRACE_STRUCT(
                TRACE_FIELD(unsigned long, ip, ip)
                TRACE_FIELD(char *, fmt, fmt)
-               TRACE_FIELD_ZERO_CHAR(buf)
+               TRACE_FIELD_ZERO(char, buf)
        ),
        TP_RAW_FMT("%08lx (%d) fmt:%p %s")
 );
@@ -117,7 +117,7 @@ TRACE_EVENT_FORMAT(bprint, TRACE_BPRINT, bprint_entry, ignore,
 TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore,
        TRACE_STRUCT(
                TRACE_FIELD(unsigned long, ip, ip)
-               TRACE_FIELD_ZERO_CHAR(buf)
+               TRACE_FIELD_ZERO(char, buf)
        ),
        TP_RAW_FMT("%08lx (%d) fmt:%p %s")
 );
index 78b1ed2..ba34920 100644 (file)
@@ -92,9 +92,7 @@ int trace_define_common_fields(struct ftrace_event_call *call)
 }
 EXPORT_SYMBOL_GPL(trace_define_common_fields);
 
-#ifdef CONFIG_MODULES
-
-static void trace_destroy_fields(struct ftrace_event_call *call)
+void trace_destroy_fields(struct ftrace_event_call *call)
 {
        struct ftrace_event_field *field, *next;
 
@@ -106,8 +104,6 @@ static void trace_destroy_fields(struct ftrace_event_call *call)
        }
 }
 
-#endif /* CONFIG_MODULES */
-
 static void ftrace_event_enable_disable(struct ftrace_event_call *call,
                                        int enable)
 {
@@ -116,14 +112,14 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call,
                if (call->enabled) {
                        call->enabled = 0;
                        tracing_stop_cmdline_record();
-                       call->unregfunc(call->data);
+                       call->unregfunc(call);
                }
                break;
        case 1:
                if (!call->enabled) {
                        call->enabled = 1;
                        tracing_start_cmdline_record();
-                       call->regfunc(call->data);
+                       call->regfunc(call);
                }
                break;
        }
@@ -991,27 +987,43 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
        return 0;
 }
 
-#define for_each_event(event, start, end)                      \
-       for (event = start;                                     \
-            (unsigned long)event < (unsigned long)end;         \
-            event++)
+static int __trace_add_event_call(struct ftrace_event_call *call)
+{
+       struct dentry *d_events;
+       int ret;
 
-#ifdef CONFIG_MODULES
+       if (!call->name)
+               return -EINVAL;
 
-static LIST_HEAD(ftrace_module_file_list);
+       if (call->raw_init) {
+               ret = call->raw_init(call);
+               if (ret < 0) {
+                       if (ret != -ENOSYS)
+                               pr_warning("Could not initialize trace "
+                               "events/%s\n", call->name);
+                       return ret;
+               }
+       }
 
-/*
- * Modules must own their file_operations to keep up with
- * reference counting.
- */
-struct ftrace_module_file_ops {
-       struct list_head                list;
-       struct module                   *mod;
-       struct file_operations          id;
-       struct file_operations          enable;
-       struct file_operations          format;
-       struct file_operations          filter;
-};
+       d_events = event_trace_events_dir();
+       if (!d_events)
+               return -ENOENT;
+
+       list_add(&call->list, &ftrace_events);
+       return event_create_dir(call, d_events, &ftrace_event_id_fops,
+                               &ftrace_enable_fops, &ftrace_event_filter_fops,
+                               &ftrace_event_format_fops);
+}
+
+/* Add an additional event_call dynamically */
+int trace_add_event_call(struct ftrace_event_call *call)
+{
+       int ret;
+       mutex_lock(&event_mutex);
+       ret = __trace_add_event_call(call);
+       mutex_unlock(&event_mutex);
+       return ret;
+}
 
 static void remove_subsystem_dir(const char *name)
 {
@@ -1039,6 +1051,48 @@ static void remove_subsystem_dir(const char *name)
        }
 }
 
+static void __trace_remove_event_call(struct ftrace_event_call *call)
+{
+       ftrace_event_enable_disable(call, 0);
+       if (call->event)
+               __unregister_ftrace_event(call->event);
+       debugfs_remove_recursive(call->dir);
+       list_del(&call->list);
+       trace_destroy_fields(call);
+       destroy_preds(call);
+       remove_subsystem_dir(call->system);
+}
+
+/* Remove an event_call */
+void trace_remove_event_call(struct ftrace_event_call *call)
+{
+       mutex_lock(&event_mutex);
+       __trace_remove_event_call(call);
+       mutex_unlock(&event_mutex);
+}
+
+#define for_each_event(event, start, end)                      \
+       for (event = start;                                     \
+            (unsigned long)event < (unsigned long)end;         \
+            event++)
+
+#ifdef CONFIG_MODULES
+
+static LIST_HEAD(ftrace_module_file_list);
+
+/*
+ * Modules must own their file_operations to keep up with
+ * reference counting.
+ */
+struct ftrace_module_file_ops {
+       struct list_head                list;
+       struct module                   *mod;
+       struct file_operations          id;
+       struct file_operations          enable;
+       struct file_operations          format;
+       struct file_operations          filter;
+};
+
 static struct ftrace_module_file_ops *
 trace_create_file_ops(struct module *mod)
 {
@@ -1096,7 +1150,7 @@ static void trace_module_add_events(struct module *mod)
                if (!call->name)
                        continue;
                if (call->raw_init) {
-                       ret = call->raw_init();
+                       ret = call->raw_init(call);
                        if (ret < 0) {
                                if (ret != -ENOSYS)
                                        pr_warning("Could not initialize trace "
@@ -1131,14 +1185,7 @@ static void trace_module_remove_events(struct module *mod)
        list_for_each_entry_safe(call, p, &ftrace_events, list) {
                if (call->mod == mod) {
                        found = true;
-                       ftrace_event_enable_disable(call, 0);
-                       if (call->event)
-                               __unregister_ftrace_event(call->event);
-                       debugfs_remove_recursive(call->dir);
-                       list_del(&call->list);
-                       trace_destroy_fields(call);
-                       destroy_preds(call);
-                       remove_subsystem_dir(call->system);
+                       __trace_remove_event_call(call);
                }
        }
 
@@ -1256,7 +1303,7 @@ static __init int event_trace_init(void)
                if (!call->name)
                        continue;
                if (call->raw_init) {
-                       ret = call->raw_init();
+                       ret = call->raw_init(call);
                        if (ret < 0) {
                                if (ret != -ENOSYS)
                                        pr_warning("Could not initialize trace "
index df1bf6e..a79ef6f 100644 (file)
@@ -42,9 +42,9 @@ extern void __bad_type_size(void);
        if (!ret)                                                       \
                return 0;
 
-#undef TRACE_FIELD_ZERO_CHAR
-#define TRACE_FIELD_ZERO_CHAR(item)                                    \
-       ret = trace_seq_printf(s, "\tfield:char " #item ";\t"           \
+#undef TRACE_FIELD_ZERO
+#define TRACE_FIELD_ZERO(type, item)                                   \
+       ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"      \
                               "offset:%u;\tsize:0;\n",                 \
                               (unsigned int)offsetof(typeof(field), item)); \
        if (!ret)                                                       \
@@ -92,9 +92,6 @@ ftrace_format_##call(struct ftrace_event_call *unused,                        \
 
 #include "trace_event_types.h"
 
-#undef TRACE_ZERO_CHAR
-#define TRACE_ZERO_CHAR(arg)
-
 #undef TRACE_FIELD
 #define TRACE_FIELD(type, item, assign)\
        entry->item = assign;
@@ -107,6 +104,9 @@ ftrace_format_##call(struct ftrace_event_call *unused,                      \
 #define TRACE_FIELD_SIGN(type, item, assign, is_signed)        \
        TRACE_FIELD(type, item, assign)
 
+#undef TRACE_FIELD_ZERO
+#define TRACE_FIELD_ZERO(type, item)
+
 #undef TP_CMD
 #define TP_CMD(cmd...) cmd
 
@@ -117,10 +117,16 @@ ftrace_format_##call(struct ftrace_event_call *unused,                    \
 #define TRACE_FIELD_SPECIAL(type_item, item, len, cmd) \
        cmd;
 
+static int ftrace_raw_init_event(struct ftrace_event_call *event_call)
+{
+       INIT_LIST_HEAD(&event_call->fields);
+
+       return 0;
+}
+
 #undef TRACE_EVENT_FORMAT
 #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)     \
 int ftrace_define_fields_##call(struct ftrace_event_call *event_call); \
-static int ftrace_raw_init_event_##call(void);                         \
                                                                        \
 struct ftrace_event_call __used                                                \
 __attribute__((__aligned__(4)))                                                \
@@ -128,15 +134,10 @@ __attribute__((section("_ftrace_events"))) event_##call = {               \
        .name                   = #call,                                \
        .id                     = proto,                                \
        .system                 = __stringify(TRACE_SYSTEM),            \
-       .raw_init               = ftrace_raw_init_event_##call,         \
+       .raw_init               = ftrace_raw_init_event,                \
        .show_format            = ftrace_format_##call,                 \
        .define_fields          = ftrace_define_fields_##call,          \
 };                                                                     \
-static int ftrace_raw_init_event_##call(void)                          \
-{                                                                      \
-       INIT_LIST_HEAD(&event_##call.fields);                           \
-       return 0;                                                       \
-}                                                                      \
 
 #undef TRACE_EVENT_FORMAT_NOFILTER
 #define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct,   \
@@ -179,8 +180,8 @@ __attribute__((section("_ftrace_events"))) event_##call = {         \
        if (ret)                                                        \
                return ret;
 
-#undef TRACE_FIELD_ZERO_CHAR
-#define TRACE_FIELD_ZERO_CHAR(item)
+#undef TRACE_FIELD_ZERO
+#define TRACE_FIELD_ZERO(type, item)
 
 #undef TRACE_EVENT_FORMAT
 #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)     \
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
new file mode 100644 (file)
index 0000000..19a6de6
--- /dev/null
@@ -0,0 +1,1233 @@
+/*
+ * kprobe based kernel tracer
+ *
+ * Created by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/kprobes.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/debugfs.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/ptrace.h>
+
+#include "trace.h"
+#include "trace_output.h"
+
+#define MAX_TRACE_ARGS 128
+#define MAX_ARGSTR_LEN 63
+#define MAX_EVENT_NAME_LEN 64
+
+/* currently, trace_kprobe only supports X86. */
+
+struct fetch_func {
+       unsigned long (*func)(struct pt_regs *, void *);
+       void *data;
+};
+
+static __kprobes unsigned long call_fetch(struct fetch_func *f,
+                                         struct pt_regs *regs)
+{
+       return f->func(regs, f->data);
+}
+
+/* fetch handlers */
+static __kprobes unsigned long fetch_register(struct pt_regs *regs,
+                                             void *offset)
+{
+       return regs_get_register(regs, (unsigned int)((unsigned long)offset));
+}
+
+static __kprobes unsigned long fetch_stack(struct pt_regs *regs,
+                                          void *num)
+{
+       return regs_get_kernel_stack_nth(regs,
+                                        (unsigned int)((unsigned long)num));
+}
+
+static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr)
+{
+       unsigned long retval;
+
+       if (probe_kernel_address(addr, retval))
+               return 0;
+       return retval;
+}
+
+static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num)
+{
+       return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num));
+}
+
+static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs,
+                                             void *dummy)
+{
+       return regs_return_value(regs);
+}
+
+static __kprobes unsigned long fetch_ip(struct pt_regs *regs, void *dummy)
+{
+       return instruction_pointer(regs);
+}
+
+static __kprobes unsigned long fetch_stack_address(struct pt_regs *regs,
+                                                  void *dummy)
+{
+       return kernel_stack_pointer(regs);
+}
+
+/* Memory fetching by symbol */
+struct symbol_cache {
+       char *symbol;
+       long offset;
+       unsigned long addr;
+};
+
+static unsigned long update_symbol_cache(struct symbol_cache *sc)
+{
+       sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
+       if (sc->addr)
+               sc->addr += sc->offset;
+       return sc->addr;
+}
+
+static void free_symbol_cache(struct symbol_cache *sc)
+{
+       kfree(sc->symbol);
+       kfree(sc);
+}
+
+static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
+{
+       struct symbol_cache *sc;
+
+       if (!sym || strlen(sym) == 0)
+               return NULL;
+       sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
+       if (!sc)
+               return NULL;
+
+       sc->symbol = kstrdup(sym, GFP_KERNEL);
+       if (!sc->symbol) {
+               kfree(sc);
+               return NULL;
+       }
+       sc->offset = offset;
+
+       update_symbol_cache(sc);
+       return sc;
+}
+
+static __kprobes unsigned long fetch_symbol(struct pt_regs *regs, void *data)
+{
+       struct symbol_cache *sc = data;
+
+       if (sc->addr)
+               return fetch_memory(regs, (void *)sc->addr);
+       else
+               return 0;
+}
+
+/* Special indirect memory access interface */
+struct indirect_fetch_data {
+       struct fetch_func orig;
+       long offset;
+};
+
+static __kprobes unsigned long fetch_indirect(struct pt_regs *regs, void *data)
+{
+       struct indirect_fetch_data *ind = data;
+       unsigned long addr;
+
+       addr = call_fetch(&ind->orig, regs);
+       if (addr) {
+               addr += ind->offset;
+               return fetch_memory(regs, (void *)addr);
+       } else
+               return 0;
+}
+
+static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data)
+{
+       if (data->orig.func == fetch_indirect)
+               free_indirect_fetch_data(data->orig.data);
+       else if (data->orig.func == fetch_symbol)
+               free_symbol_cache(data->orig.data);
+       kfree(data);
+}
+
+/**
+ * kprobe_trace_core
+ */
+
+struct trace_probe {
+       struct list_head        list;
+       union {
+               struct kprobe           kp;
+               struct kretprobe        rp;
+       };
+       unsigned long           nhit;
+       const char              *symbol;        /* symbol name */
+       struct ftrace_event_call        call;
+       struct trace_event              event;
+       unsigned int            nr_args;
+       struct fetch_func       args[];
+};
+
+#define SIZEOF_TRACE_PROBE(n)                  \
+       (offsetof(struct trace_probe, args) +   \
+       (sizeof(struct fetch_func) * (n)))
+
+static int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs);
+static int kretprobe_trace_func(struct kretprobe_instance *ri,
+                               struct pt_regs *regs);
+
+static __kprobes int probe_is_return(struct trace_probe *tp)
+{
+       return (tp->rp.handler == kretprobe_trace_func);
+}
+
+static __kprobes const char *probe_symbol(struct trace_probe *tp)
+{
+       return tp->symbol ? tp->symbol : "unknown";
+}
+
+static __kprobes long probe_offset(struct trace_probe *tp)
+{
+       return (probe_is_return(tp)) ? tp->rp.kp.offset : tp->kp.offset;
+}
+
+static __kprobes void *probe_address(struct trace_probe *tp)
+{
+       return (probe_is_return(tp)) ? tp->rp.kp.addr : tp->kp.addr;
+}
+
+static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff)
+{
+       int ret = -EINVAL;
+
+       if (ff->func == fetch_argument)
+               ret = snprintf(buf, n, "a%lu", (unsigned long)ff->data);
+       else if (ff->func == fetch_register) {
+               const char *name;
+               name = regs_query_register_name((unsigned int)((long)ff->data));
+               ret = snprintf(buf, n, "%%%s", name);
+       } else if (ff->func == fetch_stack)
+               ret = snprintf(buf, n, "s%lu", (unsigned long)ff->data);
+       else if (ff->func == fetch_memory)
+               ret = snprintf(buf, n, "@0x%p", ff->data);
+       else if (ff->func == fetch_symbol) {
+               struct symbol_cache *sc = ff->data;
+               ret = snprintf(buf, n, "@%s%+ld", sc->symbol, sc->offset);
+       } else if (ff->func == fetch_retvalue)
+               ret = snprintf(buf, n, "rv");
+       else if (ff->func == fetch_ip)
+               ret = snprintf(buf, n, "ra");
+       else if (ff->func == fetch_stack_address)
+               ret = snprintf(buf, n, "sa");
+       else if (ff->func == fetch_indirect) {
+               struct indirect_fetch_data *id = ff->data;
+               size_t l = 0;
+               ret = snprintf(buf, n, "%+ld(", id->offset);
+               if (ret >= n)
+                       goto end;
+               l += ret;
+               ret = probe_arg_string(buf + l, n - l, &id->orig);
+               if (ret < 0)
+                       goto end;
+               l += ret;
+               ret = snprintf(buf + l, n - l, ")");
+               ret += l;
+       }
+end:
+       if (ret >= n)
+               return -ENOSPC;
+       return ret;
+}
+
+static int register_probe_event(struct trace_probe *tp);
+static void unregister_probe_event(struct trace_probe *tp);
+
+static DEFINE_MUTEX(probe_lock);
+static LIST_HEAD(probe_list);
+
+static struct trace_probe *alloc_trace_probe(const char *symbol,
+                                            const char *event, int nargs)
+{
+       struct trace_probe *tp;
+
+       tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL);
+       if (!tp)
+               return ERR_PTR(-ENOMEM);
+
+       if (symbol) {
+               tp->symbol = kstrdup(symbol, GFP_KERNEL);
+               if (!tp->symbol)
+                       goto error;
+       }
+       if (!event)
+               goto error;
+       tp->call.name = kstrdup(event, GFP_KERNEL);
+       if (!tp->call.name)
+               goto error;
+
+       INIT_LIST_HEAD(&tp->list);
+       return tp;
+error:
+       kfree(tp->symbol);
+       kfree(tp);
+       return ERR_PTR(-ENOMEM);
+}
+
+static void free_trace_probe(struct trace_probe *tp)
+{
+       int i;
+
+       for (i = 0; i < tp->nr_args; i++)
+               if (tp->args[i].func == fetch_symbol)
+                       free_symbol_cache(tp->args[i].data);
+               else if (tp->args[i].func == fetch_indirect)
+                       free_indirect_fetch_data(tp->args[i].data);
+
+       kfree(tp->call.name);
+       kfree(tp->symbol);
+       kfree(tp);
+}
+
+static struct trace_probe *find_probe_event(const char *event)
+{
+       struct trace_probe *tp;
+
+       list_for_each_entry(tp, &probe_list, list)
+               if (!strcmp(tp->call.name, event))
+                       return tp;
+       return NULL;
+}
+
+static void __unregister_trace_probe(struct trace_probe *tp)
+{
+       if (probe_is_return(tp))
+               unregister_kretprobe(&tp->rp);
+       else
+               unregister_kprobe(&tp->kp);
+}
+
+/* Unregister a trace_probe and probe_event: call with locking probe_lock */
+static void unregister_trace_probe(struct trace_probe *tp)
+{
+       unregister_probe_event(tp);
+       __unregister_trace_probe(tp);
+       list_del(&tp->list);
+}
+
+/* Register a trace_probe and probe_event */
+static int register_trace_probe(struct trace_probe *tp)
+{
+       struct trace_probe *old_tp;
+       int ret;
+
+       mutex_lock(&probe_lock);
+
+       if (probe_is_return(tp))
+               ret = register_kretprobe(&tp->rp);
+       else
+               ret = register_kprobe(&tp->kp);
+
+       if (ret) {
+               pr_warning("Could not insert probe(%d)\n", ret);
+               if (ret == -EILSEQ) {
+                       pr_warning("Probing address(0x%p) is not an "
+                                  "instruction boundary.\n",
+                                  probe_address(tp));
+                       ret = -EINVAL;
+               }
+               goto end;
+       }
+       /* register as an event */
+       old_tp = find_probe_event(tp->call.name);
+       if (old_tp) {
+               /* delete old event */
+               unregister_trace_probe(old_tp);
+               free_trace_probe(old_tp);
+       }
+       ret = register_probe_event(tp);
+       if (ret) {
+               pr_warning("Faild to register probe event(%d)\n", ret);
+               __unregister_trace_probe(tp);
+       }
+       list_add_tail(&tp->list, &probe_list);
+end:
+       mutex_unlock(&probe_lock);
+       return ret;
+}
+
+/* Split symbol and offset. */
+static int split_symbol_offset(char *symbol, long *offset)
+{
+       char *tmp;
+       int ret;
+
+       if (!offset)
+               return -EINVAL;
+
+       tmp = strchr(symbol, '+');
+       if (!tmp)
+               tmp = strchr(symbol, '-');
+
+       if (tmp) {
+               /* skip sign because strict_strtol doesn't accept '+' */
+               ret = strict_strtol(tmp + 1, 0, offset);
+               if (ret)
+                       return ret;
+               if (*tmp == '-')
+                       *offset = -(*offset);
+               *tmp = '\0';
+       } else
+               *offset = 0;
+       return 0;
+}
+
+#define PARAM_MAX_ARGS 16
+#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
+
+static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
+{
+       int ret = 0;
+       unsigned long param;
+       long offset;
+       char *tmp;
+
+       switch (arg[0]) {
+       case 'a':       /* argument */
+               ret = strict_strtoul(arg + 1, 10, &param);
+               if (ret || param > PARAM_MAX_ARGS)
+                       ret = -EINVAL;
+               else {
+                       ff->func = fetch_argument;
+                       ff->data = (void *)param;
+               }
+               break;
+       case 'r':       /* retval or retaddr */
+               if (is_return && arg[1] == 'v') {
+                       ff->func = fetch_retvalue;
+                       ff->data = NULL;
+               } else if (is_return && arg[1] == 'a') {
+                       ff->func = fetch_ip;
+                       ff->data = NULL;
+               } else
+                       ret = -EINVAL;
+               break;
+       case '%':       /* named register */
+               ret = regs_query_register_offset(arg + 1);
+               if (ret >= 0) {
+                       ff->func = fetch_register;
+                       ff->data = (void *)(unsigned long)ret;
+                       ret = 0;
+               }
+               break;
+       case 's':       /* stack */
+               if (arg[1] == 'a') {
+                       ff->func = fetch_stack_address;
+                       ff->data = NULL;
+               } else {
+                       ret = strict_strtoul(arg + 1, 10, &param);
+                       if (ret || param > PARAM_MAX_STACK)
+                               ret = -EINVAL;
+                       else {
+                               ff->func = fetch_stack;
+                               ff->data = (void *)param;
+                       }
+               }
+               break;
+       case '@':       /* memory or symbol */
+               if (isdigit(arg[1])) {
+                       ret = strict_strtoul(arg + 1, 0, &param);
+                       if (ret)
+                               break;
+                       ff->func = fetch_memory;
+                       ff->data = (void *)param;
+               } else {
+                       ret = split_symbol_offset(arg + 1, &offset);
+                       if (ret)
+                               break;
+                       ff->data = alloc_symbol_cache(arg + 1,
+                                                             offset);
+                       if (ff->data)
+                               ff->func = fetch_symbol;
+                       else
+                               ret = -EINVAL;
+               }
+               break;
+       case '+':       /* indirect memory */
+       case '-':
+               tmp = strchr(arg, '(');
+               if (!tmp) {
+                       ret = -EINVAL;
+                       break;
+               }
+               *tmp = '\0';
+               ret = strict_strtol(arg + 1, 0, &offset);
+               if (ret)
+                       break;
+               if (arg[0] == '-')
+                       offset = -offset;
+               arg = tmp + 1;
+               tmp = strrchr(arg, ')');
+               if (tmp) {
+                       struct indirect_fetch_data *id;
+                       *tmp = '\0';
+                       id = kzalloc(sizeof(struct indirect_fetch_data),
+                                    GFP_KERNEL);
+                       if (!id)
+                               return -ENOMEM;
+                       id->offset = offset;
+                       ret = parse_probe_arg(arg, &id->orig, is_return);
+                       if (ret)
+                               kfree(id);
+                       else {
+                               ff->func = fetch_indirect;
+                               ff->data = (void *)id;
+                       }
+               } else
+                       ret = -EINVAL;
+               break;
+       default:
+               /* TODO: support custom handler */
+               ret = -EINVAL;
+       }
+       return ret;
+}
+
+static int create_trace_probe(int argc, char **argv)
+{
+       /*
+        * Argument syntax:
+        *  - Add kprobe: p[:EVENT] SYMBOL[+OFFS|-OFFS]|ADDRESS [FETCHARGS]
+        *  - Add kretprobe: r[:EVENT] SYMBOL[+0] [FETCHARGS]
+        * Fetch args:
+        *  aN  : fetch Nth of function argument. (N:0-)
+        *  rv  : fetch return value
+        *  ra  : fetch return address
+        *  sa  : fetch stack address
+        *  sN  : fetch Nth of stack (N:0-)
+        *  @ADDR       : fetch memory at ADDR (ADDR should be in kernel)
+        *  @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
+        *  %REG        : fetch register REG
+        * Indirect memory fetch:
+        *  +|-offs(ARG) : fetch memory at ARG +|- offs address.
+        */
+       struct trace_probe *tp;
+       struct kprobe *kp;
+       int i, ret = 0;
+       int is_return = 0;
+       char *symbol = NULL, *event = NULL;
+       long offset = 0;
+       void *addr = NULL;
+
+       if (argc < 2)
+               return -EINVAL;
+
+       if (argv[0][0] == 'p')
+               is_return = 0;
+       else if (argv[0][0] == 'r')
+               is_return = 1;
+       else
+               return -EINVAL;
+
+       if (argv[0][1] == ':') {
+               event = &argv[0][2];
+               if (strlen(event) == 0) {
+                       pr_info("Event name is not specifiled\n");
+                       return -EINVAL;
+               }
+       }
+
+       if (isdigit(argv[1][0])) {
+               if (is_return)
+                       return -EINVAL;
+               /* an address specified */
+               ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr);
+               if (ret)
+                       return ret;
+       } else {
+               /* a symbol specified */
+               symbol = argv[1];
+               /* TODO: support .init module functions */
+               ret = split_symbol_offset(symbol, &offset);
+               if (ret)
+                       return ret;
+               if (offset && is_return)
+                       return -EINVAL;
+       }
+       argc -= 2; argv += 2;
+
+       /* setup a probe */
+       if (!event) {
+               /* Make a new event name */
+               char buf[MAX_EVENT_NAME_LEN];
+               if (symbol)
+                       snprintf(buf, MAX_EVENT_NAME_LEN, "%c@%s%+ld",
+                                is_return ? 'r' : 'p', symbol, offset);
+               else
+                       snprintf(buf, MAX_EVENT_NAME_LEN, "%c@0x%p",
+                                is_return ? 'r' : 'p', addr);
+               tp = alloc_trace_probe(symbol, buf, argc);
+       } else
+               tp = alloc_trace_probe(symbol, event, argc);
+       if (IS_ERR(tp))
+               return PTR_ERR(tp);
+
+       if (is_return) {
+               kp = &tp->rp.kp;
+               tp->rp.handler = kretprobe_trace_func;
+       } else {
+               kp = &tp->kp;
+               tp->kp.pre_handler = kprobe_trace_func;
+       }
+
+       if (tp->symbol) {
+               kp->symbol_name = tp->symbol;
+               kp->offset = offset;
+       } else
+               kp->addr = addr;
+
+       /* parse arguments */
+       ret = 0;
+       for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
+               if (strlen(argv[i]) > MAX_ARGSTR_LEN) {
+                       pr_info("Argument%d(%s) is too long.\n", i, argv[i]);
+                       ret = -ENOSPC;
+                       goto error;
+               }
+               ret = parse_probe_arg(argv[i], &tp->args[i], is_return);
+               if (ret)
+                       goto error;
+       }
+       tp->nr_args = i;
+
+       ret = register_trace_probe(tp);
+       if (ret)
+               goto error;
+       return 0;
+
+error:
+       free_trace_probe(tp);
+       return ret;
+}
+
+static void cleanup_all_probes(void)
+{
+       struct trace_probe *tp;
+
+       mutex_lock(&probe_lock);
+       /* TODO: Use batch unregistration */
+       while (!list_empty(&probe_list)) {
+               tp = list_entry(probe_list.next, struct trace_probe, list);
+               unregister_trace_probe(tp);
+               free_trace_probe(tp);
+       }
+       mutex_unlock(&probe_lock);
+}
+
+
+/* Probes listing interfaces */
+static void *probes_seq_start(struct seq_file *m, loff_t *pos)
+{
+       mutex_lock(&probe_lock);
+       return seq_list_start(&probe_list, *pos);
+}
+
+static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+       return seq_list_next(v, &probe_list, pos);
+}
+
+static void probes_seq_stop(struct seq_file *m, void *v)
+{
+       mutex_unlock(&probe_lock);
+}
+
+static int probes_seq_show(struct seq_file *m, void *v)
+{
+       struct trace_probe *tp = v;
+       int i, ret;
+       char buf[MAX_ARGSTR_LEN + 1];
+
+       seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p');
+       seq_printf(m, ":%s", tp->call.name);
+
+       if (tp->symbol)
+               seq_printf(m, " %s%+ld", probe_symbol(tp), probe_offset(tp));
+       else
+               seq_printf(m, " 0x%p", probe_address(tp));
+
+       for (i = 0; i < tp->nr_args; i++) {
+               ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]);
+               if (ret < 0) {
+                       pr_warning("Argument%d decoding error(%d).\n", i, ret);
+                       return ret;
+               }
+               seq_printf(m, " %s", buf);
+       }
+       seq_printf(m, "\n");
+       return 0;
+}
+
+static const struct seq_operations probes_seq_op = {
+       .start  = probes_seq_start,
+       .next   = probes_seq_next,
+       .stop   = probes_seq_stop,
+       .show   = probes_seq_show
+};
+
+static int probes_open(struct inode *inode, struct file *file)
+{
+       if ((file->f_mode & FMODE_WRITE) &&
+           (file->f_flags & O_TRUNC))
+               cleanup_all_probes();
+
+       return seq_open(file, &probes_seq_op);
+}
+
+static int command_trace_probe(const char *buf)
+{
+       char **argv;
+       int argc = 0, ret = 0;
+
+       argv = argv_split(GFP_KERNEL, buf, &argc);
+       if (!argv)
+               return -ENOMEM;
+
+       if (argc)
+               ret = create_trace_probe(argc, argv);
+
+       argv_free(argv);
+       return ret;
+}
+
+#define WRITE_BUFSIZE 128
+
+static ssize_t probes_write(struct file *file, const char __user *buffer,
+                           size_t count, loff_t *ppos)
+{
+       char *kbuf, *tmp;
+       int ret;
+       size_t done;
+       size_t size;
+
+       kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
+       if (!kbuf)
+               return -ENOMEM;
+
+       ret = done = 0;
+       while (done < count) {
+               size = count - done;
+               if (size >= WRITE_BUFSIZE)
+                       size = WRITE_BUFSIZE - 1;
+               if (copy_from_user(kbuf, buffer + done, size)) {
+                       ret = -EFAULT;
+                       goto out;
+               }
+               kbuf[size] = '\0';
+               tmp = strchr(kbuf, '\n');
+               if (tmp) {
+                       *tmp = '\0';
+                       size = tmp - kbuf + 1;
+               } else if (done + size < count) {
+                       pr_warning("Line length is too long: "
+                                  "Should be less than %d.", WRITE_BUFSIZE);
+                       ret = -EINVAL;
+                       goto out;
+               }
+               done += size;
+               /* Remove comments */
+               tmp = strchr(kbuf, '#');
+               if (tmp)
+                       *tmp = '\0';
+
+               ret = command_trace_probe(kbuf);
+               if (ret)
+                       goto out;
+       }
+       ret = done;
+out:
+       kfree(kbuf);
+       return ret;
+}
+
+static const struct file_operations kprobe_events_ops = {
+       .owner          = THIS_MODULE,
+       .open           = probes_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = seq_release,
+       .write          = probes_write,
+};
+
+/* Probes profiling interfaces */
+static int probes_profile_seq_show(struct seq_file *m, void *v)
+{
+       struct trace_probe *tp = v;
+
+       seq_printf(m, "  %-44s %15lu %15lu\n", tp->call.name, tp->nhit,
+                  probe_is_return(tp) ? tp->rp.kp.nmissed : tp->kp.nmissed);
+
+       return 0;
+}
+
+static const struct seq_operations profile_seq_op = {
+       .start  = probes_seq_start,
+       .next   = probes_seq_next,
+       .stop   = probes_seq_stop,
+       .show   = probes_profile_seq_show
+};
+
+static int profile_open(struct inode *inode, struct file *file)
+{
+       return seq_open(file, &profile_seq_op);
+}
+
+static const struct file_operations kprobe_profile_ops = {
+       .owner          = THIS_MODULE,
+       .open           = profile_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = seq_release,
+};
+
+/* Kprobe handler */
+static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
+{
+       struct trace_probe *tp = container_of(kp, struct trace_probe, kp);
+       struct kprobe_trace_entry *entry;
+       struct ring_buffer_event *event;
+       struct ring_buffer *buffer;
+       int size, i, pc;
+       unsigned long irq_flags;
+       struct ftrace_event_call *call = &tp->call;
+
+       tp->nhit++;
+
+       local_save_flags(irq_flags);
+       pc = preempt_count();
+
+       size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
+
+       event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
+                                                 irq_flags, pc);
+       if (!event)
+               return 0;
+
+       entry = ring_buffer_event_data(event);
+       entry->nargs = tp->nr_args;
+       entry->ip = (unsigned long)kp->addr;
+       for (i = 0; i < tp->nr_args; i++)
+               entry->args[i] = call_fetch(&tp->args[i], regs);
+
+       if (!filter_current_check_discard(buffer, call, entry, event))
+               trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
+       return 0;
+}
+
+/* Kretprobe handler */
+static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
+                                         struct pt_regs *regs)
+{
+       struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
+       struct kretprobe_trace_entry *entry;
+       struct ring_buffer_event *event;
+       struct ring_buffer *buffer;
+       int size, i, pc;
+       unsigned long irq_flags;
+       struct ftrace_event_call *call = &tp->call;
+
+       local_save_flags(irq_flags);
+       pc = preempt_count();
+
+       size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
+
+       event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
+                                                 irq_flags, pc);
+       if (!event)
+               return 0;
+
+       entry = ring_buffer_event_data(event);
+       entry->nargs = tp->nr_args;
+       entry->func = (unsigned long)probe_address(tp);
+       entry->ret_ip = (unsigned long)ri->ret_addr;
+       for (i = 0; i < tp->nr_args; i++)
+               entry->args[i] = call_fetch(&tp->args[i], regs);
+
+       if (!filter_current_check_discard(buffer, call, entry, event))
+               trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
+
+       return 0;
+}
+
+/* Event entry printers */
+enum print_line_t
+print_kprobe_event(struct trace_iterator *iter, int flags)
+{
+       struct kprobe_trace_entry *field;
+       struct trace_seq *s = &iter->seq;
+       int i;
+
+       field = (struct kprobe_trace_entry *)iter->ent;
+
+       if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
+               goto partial;
+
+       if (!trace_seq_puts(s, ":"))
+               goto partial;
+
+       for (i = 0; i < field->nargs; i++)
+               if (!trace_seq_printf(s, " 0x%lx", field->args[i]))
+                       goto partial;
+
+       if (!trace_seq_puts(s, "\n"))
+               goto partial;
+
+       return TRACE_TYPE_HANDLED;
+partial:
+       return TRACE_TYPE_PARTIAL_LINE;
+}
+
+enum print_line_t
+print_kretprobe_event(struct trace_iterator *iter, int flags)
+{
+       struct kretprobe_trace_entry *field;
+       struct trace_seq *s = &iter->seq;
+       int i;
+
+       field = (struct kretprobe_trace_entry *)iter->ent;
+
+       if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
+               goto partial;
+
+       if (!trace_seq_puts(s, " <- "))
+               goto partial;
+
+       if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
+               goto partial;
+
+       if (!trace_seq_puts(s, ":"))
+               goto partial;
+
+       for (i = 0; i < field->nargs; i++)
+               if (!trace_seq_printf(s, " 0x%lx", field->args[i]))
+                       goto partial;
+
+       if (!trace_seq_puts(s, "\n"))
+               goto partial;
+
+       return TRACE_TYPE_HANDLED;
+partial:
+       return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static int probe_event_enable(struct ftrace_event_call *call)
+{
+       struct trace_probe *tp = (struct trace_probe *)call->data;
+
+       if (probe_is_return(tp))
+               return enable_kretprobe(&tp->rp);
+       else
+               return enable_kprobe(&tp->kp);
+}
+
+static void probe_event_disable(struct ftrace_event_call *call)
+{
+       struct trace_probe *tp = (struct trace_probe *)call->data;
+
+       if (probe_is_return(tp))
+               disable_kretprobe(&tp->rp);
+       else
+               disable_kprobe(&tp->kp);
+}
+
+static int probe_event_raw_init(struct ftrace_event_call *event_call)
+{
+       INIT_LIST_HEAD(&event_call->fields);
+
+       return 0;
+}
+
+#undef DEFINE_FIELD
+#define DEFINE_FIELD(type, item, name, is_signed)                      \
+       do {                                                            \
+               ret = trace_define_field(event_call, #type, name,       \
+                                        offsetof(typeof(field), item), \
+                                        sizeof(field.item), is_signed, \
+                                        FILTER_OTHER);                 \
+               if (ret)                                                \
+                       return ret;                                     \
+       } while (0)
+
+static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
+{
+       int ret, i;
+       struct kprobe_trace_entry field;
+       char buf[MAX_ARGSTR_LEN + 1];
+       struct trace_probe *tp = (struct trace_probe *)event_call->data;
+
+       ret = trace_define_common_fields(event_call);
+       if (!ret)
+               return ret;
+
+       DEFINE_FIELD(unsigned long, ip, "ip", 0);
+       DEFINE_FIELD(int, nargs, "nargs", 1);
+       for (i = 0; i < tp->nr_args; i++) {
+               /* Set argN as a field */
+               sprintf(buf, "arg%d", i);
+               DEFINE_FIELD(unsigned long, args[i], buf, 0);
+               /* Set argument string as an alias field */
+               ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]);
+               if (ret < 0)
+                       return ret;
+               DEFINE_FIELD(unsigned long, args[i], buf, 0);
+       }
+       return 0;
+}
+
+static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
+{
+       int ret, i;
+       struct kretprobe_trace_entry field;
+       char buf[MAX_ARGSTR_LEN + 1];
+       struct trace_probe *tp = (struct trace_probe *)event_call->data;
+
+       ret = trace_define_common_fields(event_call);
+       if (!ret)
+               return ret;
+
+       DEFINE_FIELD(unsigned long, func, "func", 0);
+       DEFINE_FIELD(unsigned long, ret_ip, "ret_ip", 0);
+       DEFINE_FIELD(int, nargs, "nargs", 1);
+       for (i = 0; i < tp->nr_args; i++) {
+               /* Set argN as a field */
+               sprintf(buf, "arg%d", i);
+               DEFINE_FIELD(unsigned long, args[i], buf, 0);
+               /* Set argument string as an alias field */
+               ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]);
+               if (ret < 0)
+                       return ret;
+               DEFINE_FIELD(unsigned long, args[i], buf, 0);
+       }
+       return 0;
+}
+
+static int __probe_event_show_format(struct trace_seq *s,
+                                    struct trace_probe *tp, const char *fmt,
+                                    const char *arg)
+{
+       int i, ret;
+       char buf[MAX_ARGSTR_LEN + 1];
+
+       /* Show aliases */
+       for (i = 0; i < tp->nr_args; i++) {
+               ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]);
+               if (ret < 0)
+                       return ret;
+               if (!trace_seq_printf(s, "\talias: %s;\toriginal: arg%d;\n",
+                                     buf, i))
+                       return 0;
+       }
+       /* Show format */
+       if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt))
+               return 0;
+
+       for (i = 0; i < tp->nr_args; i++)
+               if (!trace_seq_puts(s, " 0x%lx"))
+                       return 0;
+
+       if (!trace_seq_printf(s, "\", %s", arg))
+               return 0;
+
+       for (i = 0; i < tp->nr_args; i++)
+               if (!trace_seq_printf(s, ", arg%d", i))
+                       return 0;
+
+       return trace_seq_puts(s, "\n");
+}
+
+#undef SHOW_FIELD
+#define SHOW_FIELD(type, item, name)                                   \
+       do {                                                            \
+               ret = trace_seq_printf(s, "\tfield: " #type " %s;\t"    \
+                               "offset:%u;\tsize:%u;\n", name,         \
+                               (unsigned int)offsetof(typeof(field), item),\
+                               (unsigned int)sizeof(type));            \
+               if (!ret)                                               \
+                       return 0;                                       \
+       } while (0)
+
+static int kprobe_event_show_format(struct ftrace_event_call *call,
+                                   struct trace_seq *s)
+{
+       struct kprobe_trace_entry field __attribute__((unused));
+       int ret, i;
+       char buf[8];
+       struct trace_probe *tp = (struct trace_probe *)call->data;
+
+       SHOW_FIELD(unsigned long, ip, "ip");
+       SHOW_FIELD(int, nargs, "nargs");
+
+       /* Show fields */
+       for (i = 0; i < tp->nr_args; i++) {
+               sprintf(buf, "arg%d", i);
+               SHOW_FIELD(unsigned long, args[i], buf);
+       }
+       trace_seq_puts(s, "\n");
+
+       return __probe_event_show_format(s, tp, "%lx:", "ip");
+}
+
+static int kretprobe_event_show_format(struct ftrace_event_call *call,
+                                      struct trace_seq *s)
+{
+       struct kretprobe_trace_entry field __attribute__((unused));
+       int ret, i;
+       char buf[8];
+       struct trace_probe *tp = (struct trace_probe *)call->data;
+
+       SHOW_FIELD(unsigned long, func, "func");
+       SHOW_FIELD(unsigned long, ret_ip, "ret_ip");
+       SHOW_FIELD(int, nargs, "nargs");
+
+       /* Show fields */
+       for (i = 0; i < tp->nr_args; i++) {
+               sprintf(buf, "arg%d", i);
+               SHOW_FIELD(unsigned long, args[i], buf);
+       }
+       trace_seq_puts(s, "\n");
+
+       return __probe_event_show_format(s, tp, "%lx <- %lx:",
+                                         "func, ret_ip");
+}
+
+static int register_probe_event(struct trace_probe *tp)
+{
+       struct ftrace_event_call *call = &tp->call;
+       int ret;
+
+       /* Initialize ftrace_event_call */
+       call->system = "kprobes";
+       if (probe_is_return(tp)) {
+               tp->event.trace = print_kretprobe_event;
+               call->raw_init = probe_event_raw_init;
+               call->show_format = kretprobe_event_show_format;
+               call->define_fields = kretprobe_event_define_fields;
+       } else {
+               tp->event.trace = print_kprobe_event;
+               call->raw_init = probe_event_raw_init;
+               call->show_format = kprobe_event_show_format;
+               call->define_fields = kprobe_event_define_fields;
+       }
+       call->event = &tp->event;
+       call->id = register_ftrace_event(&tp->event);
+       if (!call->id)
+               return -ENODEV;
+       call->enabled = 1;
+       call->regfunc = probe_event_enable;
+       call->unregfunc = probe_event_disable;
+       call->data = tp;
+       ret = trace_add_event_call(call);
+       if (ret) {
+               pr_info("Failed to register kprobe event: %s\n", call->name);
+               unregister_ftrace_event(&tp->event);
+       }
+       return ret;
+}
+
+static void unregister_probe_event(struct trace_probe *tp)
+{
+       /* tp->event is unregistered in trace_remove_event_call() */
+       trace_remove_event_call(&tp->call);
+}
+
+/* Make a debugfs interface for controling probe points */
+static __init int init_kprobe_trace(void)
+{
+       struct dentry *d_tracer;
+       struct dentry *entry;
+
+       d_tracer = tracing_init_dentry();
+       if (!d_tracer)
+               return 0;
+
+       entry = debugfs_create_file("kprobe_events", 0644, d_tracer,
+                                   NULL, &kprobe_events_ops);
+
+       /* Event list interface */
+       if (!entry)
+               pr_warning("Could not create debugfs "
+                          "'kprobe_events' entry\n");
+
+       /* Profile interface */
+       entry = debugfs_create_file("kprobe_profile", 0444, d_tracer,
+                                   NULL, &kprobe_profile_ops);
+
+       if (!entry)
+               pr_warning("Could not create debugfs "
+                          "'kprobe_profile' entry\n");
+       return 0;
+}
+fs_initcall(init_kprobe_trace);
+
+
+#ifdef CONFIG_FTRACE_STARTUP_TEST
+
+static int kprobe_trace_selftest_target(int a1, int a2, int a3,
+                                       int a4, int a5, int a6)
+{
+       return a1 + a2 + a3 + a4 + a5 + a6;
+}
+
+static __init int kprobe_trace_self_tests_init(void)
+{
+       int ret;
+       int (*target)(int, int, int, int, int, int);
+
+       target = kprobe_trace_selftest_target;
+
+       pr_info("Testing kprobe tracing: ");
+
+       ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
+                                 "a1 a2 a3 a4 a5 a6");
+       if (WARN_ON_ONCE(ret))
+               pr_warning("error enabling function entry\n");
+
+       ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
+                                 "ra rv");
+       if (WARN_ON_ONCE(ret))
+               pr_warning("error enabling function return\n");
+
+       ret = target(1, 2, 3, 4, 5, 6);
+
+       cleanup_all_probes();
+
+       pr_cont("OK\n");
+       return 0;
+}
+
+late_initcall(kprobe_trace_self_tests_init);
+
+#endif
index 8712ce3..dfc55fe 100644 (file)
@@ -285,13 +285,13 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
                trace_current_buffer_unlock_commit(buffer, event, 0, 0);
 }
 
-int reg_event_syscall_enter(void *ptr)
+int reg_event_syscall_enter(struct ftrace_event_call *call)
 {
        int ret = 0;
        int num;
        char *name;
 
-       name = (char *)ptr;
+       name = (char *)call->data;
        num = syscall_name_to_nr(name);
        if (num < 0 || num >= NR_syscalls)
                return -ENOSYS;
@@ -309,12 +309,12 @@ int reg_event_syscall_enter(void *ptr)
        return ret;
 }
 
-void unreg_event_syscall_enter(void *ptr)
+void unreg_event_syscall_enter(struct ftrace_event_call *call)
 {
        int num;
        char *name;
 
-       name = (char *)ptr;
+       name = (char *)call->data;
        num = syscall_name_to_nr(name);
        if (num < 0 || num >= NR_syscalls)
                return;
@@ -326,13 +326,13 @@ void unreg_event_syscall_enter(void *ptr)
        mutex_unlock(&syscall_trace_lock);
 }
 
-int reg_event_syscall_exit(void *ptr)
+int reg_event_syscall_exit(struct ftrace_event_call *call)
 {
        int ret = 0;
        int num;
        char *name;
 
-       name = (char *)ptr;
+       name = call->data;
        num = syscall_name_to_nr(name);
        if (num < 0 || num >= NR_syscalls)
                return -ENOSYS;
@@ -350,12 +350,12 @@ int reg_event_syscall_exit(void *ptr)
        return ret;
 }
 
-void unreg_event_syscall_exit(void *ptr)
+void unreg_event_syscall_exit(struct ftrace_event_call *call)
 {
        int num;
        char *name;
 
-       name = (char *)ptr;
+       name = call->data;
        num = syscall_name_to_nr(name);
        if (num < 0 || num >= NR_syscalls)
                return;