Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorIngo Molnar <mingo@elte.hu>
Thu, 26 Jan 2012 11:11:41 +0000 (12:11 +0100)
committerIngo Molnar <mingo@elte.hu>
Thu, 26 Jan 2012 11:11:41 +0000 (12:11 +0100)
Pull in the latest perf/core improvements and fixes.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
37 files changed:
tools/perf/Documentation/perf-record.txt
tools/perf/Documentation/perf-top.txt
tools/perf/Makefile
tools/perf/bench/bench.h
tools/perf/bench/mem-memcpy-x86-64-asm-def.h
tools/perf/bench/mem-memcpy-x86-64-asm.S
tools/perf/bench/mem-memcpy.c
tools/perf/bench/mem-memset-arch.h [new file with mode: 0644]
tools/perf/bench/mem-memset-x86-64-asm-def.h [new file with mode: 0644]
tools/perf/bench/mem-memset-x86-64-asm.S [new file with mode: 0644]
tools/perf/bench/mem-memset.c [new file with mode: 0644]
tools/perf/builtin-bench.c
tools/perf/builtin-probe.c
tools/perf/builtin-record.c
tools/perf/builtin-stat.c
tools/perf/builtin-test.c
tools/perf/builtin-top.c
tools/perf/perf.h
tools/perf/util/cpumap.c
tools/perf/util/cpumap.h
tools/perf/util/evlist.c
tools/perf/util/evlist.h
tools/perf/util/header.c
tools/perf/util/hist.h
tools/perf/util/include/asm/dwarf2.h
tools/perf/util/probe-event.c
tools/perf/util/python.c
tools/perf/util/symbol.c
tools/perf/util/thread_map.c
tools/perf/util/thread_map.h
tools/perf/util/top.c
tools/perf/util/top.h
tools/perf/util/trace-event-parse.c
tools/perf/util/ui/browsers/hists.c
tools/perf/util/ui/helpline.c
tools/perf/util/usage.c
tools/perf/util/util.h

index 2937f7e..ff9a66e 100644 (file)
@@ -58,6 +58,10 @@ OPTIONS
 --tid=::
         Record events on existing thread ID.
 
+-u::
+--uid=::
+        Record events in threads owned by uid. Name or number.
+
 -r::
 --realtime=::
        Collect data with this RT SCHED_FIFO priority.
index b1a5bbb..ab1454e 100644 (file)
@@ -78,6 +78,10 @@ Default is to monitor all CPUS.
 --tid=<tid>::
         Profile events on existing thread ID.
 
+-u::
+--uid=::
+        Record events in threads owned by uid. Name or number.
+
 -r <priority>::
 --realtime=<priority>::
        Collect data with this RT SCHED_FIFO priority.
index ac86d67..d64f581 100644 (file)
@@ -61,7 +61,7 @@ ifeq ($(ARCH),x86_64)
        ifeq (${IS_X86_64}, 1)
                RAW_ARCH := x86_64
                ARCH_CFLAGS := -DARCH_X86_64
-               ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S
+               ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S
        endif
 endif
 
@@ -104,7 +104,7 @@ endif
 
 CFLAGS = -fno-omit-frame-pointer -ggdb3 -Wall -Wextra -std=gnu99 $(CFLAGS_WERROR) $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
 EXTLIBS = -lpthread -lrt -lelf -lm
-ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
+ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
 ALL_LDFLAGS = $(LDFLAGS)
 STRIP ?= strip
 
@@ -168,10 +168,7 @@ endif
 
 ### --- END CONFIGURATION SECTION ---
 
-# Those must not be GNU-specific; they are shared with perl/ which may
-# be built by a different compiler. (Note that this is an artifact now
-# but it still might be nice to keep that distinction.)
-BASIC_CFLAGS = -Iutil/include -Iarch/$(ARCH)/include
+BASIC_CFLAGS = -Iutil/include -Iarch/$(ARCH)/include -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
 BASIC_LDFLAGS =
 
 # Guard against environment variables
@@ -362,8 +359,10 @@ BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o
 BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o
 ifeq ($(RAW_ARCH),x86_64)
 BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o
+BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o
 endif
 BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o
+BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o
 
 BUILTIN_OBJS += $(OUTPUT)builtin-diff.o
 BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o
index f7781c6..a09bece 100644 (file)
@@ -4,6 +4,7 @@
 extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
 extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
 extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used);
+extern int bench_mem_memset(int argc, const char **argv, const char *prefix);
 
 #define BENCH_FORMAT_DEFAULT_STR       "default"
 #define BENCH_FORMAT_DEFAULT           0
index d588b87..d66ab79 100644 (file)
@@ -2,3 +2,11 @@
 MEMCPY_FN(__memcpy,
        "x86-64-unrolled",
        "unrolled memcpy() in arch/x86/lib/memcpy_64.S")
+
+MEMCPY_FN(memcpy_c,
+       "x86-64-movsq",
+       "movsq-based memcpy() in arch/x86/lib/memcpy_64.S")
+
+MEMCPY_FN(memcpy_c_e,
+       "x86-64-movsb",
+       "movsb-based memcpy() in arch/x86/lib/memcpy_64.S")
index a57b66e..a20780b 100644 (file)
@@ -1,2 +1,6 @@
-
+#define memcpy MEMCPY /* don't hide glibc's memcpy() */
+#define altinstr_replacement text
+#define globl p2align 4; .globl
+#define Lmemcpy_c globl memcpy_c; memcpy_c
+#define Lmemcpy_c_e globl memcpy_c_e; memcpy_c_e
 #include "../../../arch/x86/lib/memcpy_64.S"
index db82021..6ad2b1c 100644 (file)
@@ -24,6 +24,7 @@
 
 static const char      *length_str     = "1MB";
 static const char      *routine        = "default";
+static int             iterations      = 1;
 static bool            use_clock;
 static int             clock_fd;
 static bool            only_prefault;
@@ -35,6 +36,8 @@ static const struct option options[] = {
                    "available unit: B, MB, GB (upper and lower)"),
        OPT_STRING('r', "routine", &routine, "default",
                    "Specify routine to copy"),
+       OPT_INTEGER('i', "iterations", &iterations,
+                   "repeat memcpy() invocation this number of times"),
        OPT_BOOLEAN('c', "clock", &use_clock,
                    "Use CPU clock for measuring"),
        OPT_BOOLEAN('o', "only-prefault", &only_prefault,
@@ -121,6 +124,7 @@ static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault)
 {
        u64 clock_start = 0ULL, clock_end = 0ULL;
        void *src = NULL, *dst = NULL;
+       int i;
 
        alloc_mem(&src, &dst, len);
 
@@ -128,7 +132,8 @@ static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault)
                fn(dst, src, len);
 
        clock_start = get_clock();
-       fn(dst, src, len);
+       for (i = 0; i < iterations; ++i)
+               fn(dst, src, len);
        clock_end = get_clock();
 
        free(src);
@@ -140,6 +145,7 @@ static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
 {
        struct timeval tv_start, tv_end, tv_diff;
        void *src = NULL, *dst = NULL;
+       int i;
 
        alloc_mem(&src, &dst, len);
 
@@ -147,7 +153,8 @@ static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
                fn(dst, src, len);
 
        BUG_ON(gettimeofday(&tv_start, NULL));
-       fn(dst, src, len);
+       for (i = 0; i < iterations; ++i)
+               fn(dst, src, len);
        BUG_ON(gettimeofday(&tv_end, NULL));
 
        timersub(&tv_end, &tv_start, &tv_diff);
diff --git a/tools/perf/bench/mem-memset-arch.h b/tools/perf/bench/mem-memset-arch.h
new file mode 100644 (file)
index 0000000..a040fa7
--- /dev/null
@@ -0,0 +1,12 @@
+
+#ifdef ARCH_X86_64
+
+#define MEMSET_FN(fn, name, desc)              \
+       extern void *fn(void *, int, size_t);
+
+#include "mem-memset-x86-64-asm-def.h"
+
+#undef MEMSET_FN
+
+#endif
+
diff --git a/tools/perf/bench/mem-memset-x86-64-asm-def.h b/tools/perf/bench/mem-memset-x86-64-asm-def.h
new file mode 100644 (file)
index 0000000..a71dff9
--- /dev/null
@@ -0,0 +1,12 @@
+
+MEMSET_FN(__memset,
+       "x86-64-unrolled",
+       "unrolled memset() in arch/x86/lib/memset_64.S")
+
+MEMSET_FN(memset_c,
+       "x86-64-stosq",
+       "movsq-based memset() in arch/x86/lib/memset_64.S")
+
+MEMSET_FN(memset_c_e,
+       "x86-64-stosb",
+       "movsb-based memset() in arch/x86/lib/memset_64.S")
diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S
new file mode 100644 (file)
index 0000000..cb92170
--- /dev/null
@@ -0,0 +1,6 @@
+#define memset MEMSET /* don't hide glibc's memset() */
+#define altinstr_replacement text
+#define globl p2align 4; .globl
+#define Lmemset_c globl memset_c; memset_c
+#define Lmemset_c_e globl memset_c_e; memset_c_e
+#include "../../../arch/x86/lib/memset_64.S"
diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c
new file mode 100644 (file)
index 0000000..59d4933
--- /dev/null
@@ -0,0 +1,298 @@
+/*
+ * mem-memset.c
+ *
+ * memset: Simple memory set in various ways
+ *
+ * Trivial clone of mem-memcpy.c.
+ */
+#include <ctype.h>
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/parse-options.h"
+#include "../util/header.h"
+#include "bench.h"
+#include "mem-memset-arch.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <errno.h>
+
+#define K 1024
+
+static const char      *length_str     = "1MB";
+static const char      *routine        = "default";
+static int             iterations      = 1;
+static bool            use_clock;
+static int             clock_fd;
+static bool            only_prefault;
+static bool            no_prefault;
+
+static const struct option options[] = {
+       OPT_STRING('l', "length", &length_str, "1MB",
+                   "Specify length of memory to copy. "
+                   "available unit: B, MB, GB (upper and lower)"),
+       OPT_STRING('r', "routine", &routine, "default",
+                   "Specify routine to copy"),
+       OPT_INTEGER('i', "iterations", &iterations,
+                   "repeat memset() invocation this number of times"),
+       OPT_BOOLEAN('c', "clock", &use_clock,
+                   "Use CPU clock for measuring"),
+       OPT_BOOLEAN('o', "only-prefault", &only_prefault,
+                   "Show only the result with page faults before memset()"),
+       OPT_BOOLEAN('n', "no-prefault", &no_prefault,
+                   "Show only the result without page faults before memset()"),
+       OPT_END()
+};
+
+typedef void *(*memset_t)(void *, int, size_t);
+
+struct routine {
+       const char *name;
+       const char *desc;
+       memset_t fn;
+};
+
+static const struct routine routines[] = {
+       { "default",
+         "Default memset() provided by glibc",
+         memset },
+#ifdef ARCH_X86_64
+
+#define MEMSET_FN(fn, name, desc) { name, desc, fn },
+#include "mem-memset-x86-64-asm-def.h"
+#undef MEMSET_FN
+
+#endif
+
+       { NULL,
+         NULL,
+         NULL   }
+};
+
+static const char * const bench_mem_memset_usage[] = {
+       "perf bench mem memset <options>",
+       NULL
+};
+
+static struct perf_event_attr clock_attr = {
+       .type           = PERF_TYPE_HARDWARE,
+       .config         = PERF_COUNT_HW_CPU_CYCLES
+};
+
+static void init_clock(void)
+{
+       clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0);
+
+       if (clock_fd < 0 && errno == ENOSYS)
+               die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
+       else
+               BUG_ON(clock_fd < 0);
+}
+
+static u64 get_clock(void)
+{
+       int ret;
+       u64 clk;
+
+       ret = read(clock_fd, &clk, sizeof(u64));
+       BUG_ON(ret != sizeof(u64));
+
+       return clk;
+}
+
+static double timeval2double(struct timeval *ts)
+{
+       return (double)ts->tv_sec +
+               (double)ts->tv_usec / (double)1000000;
+}
+
+static void alloc_mem(void **dst, size_t length)
+{
+       *dst = zalloc(length);
+       if (!dst)
+               die("memory allocation failed - maybe length is too large?\n");
+}
+
+static u64 do_memset_clock(memset_t fn, size_t len, bool prefault)
+{
+       u64 clock_start = 0ULL, clock_end = 0ULL;
+       void *dst = NULL;
+       int i;
+
+       alloc_mem(&dst, len);
+
+       if (prefault)
+               fn(dst, -1, len);
+
+       clock_start = get_clock();
+       for (i = 0; i < iterations; ++i)
+               fn(dst, i, len);
+       clock_end = get_clock();
+
+       free(dst);
+       return clock_end - clock_start;
+}
+
+static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault)
+{
+       struct timeval tv_start, tv_end, tv_diff;
+       void *dst = NULL;
+       int i;
+
+       alloc_mem(&dst, len);
+
+       if (prefault)
+               fn(dst, -1, len);
+
+       BUG_ON(gettimeofday(&tv_start, NULL));
+       for (i = 0; i < iterations; ++i)
+               fn(dst, i, len);
+       BUG_ON(gettimeofday(&tv_end, NULL));
+
+       timersub(&tv_end, &tv_start, &tv_diff);
+
+       free(dst);
+       return (double)((double)len / timeval2double(&tv_diff));
+}
+
+#define pf (no_prefault ? 0 : 1)
+
+#define print_bps(x) do {                                      \
+               if (x < K)                                      \
+                       printf(" %14lf B/Sec", x);              \
+               else if (x < K * K)                             \
+                       printf(" %14lfd KB/Sec", x / K);        \
+               else if (x < K * K * K)                         \
+                       printf(" %14lf MB/Sec", x / K / K);     \
+               else                                            \
+                       printf(" %14lf GB/Sec", x / K / K / K); \
+       } while (0)
+
+int bench_mem_memset(int argc, const char **argv,
+                    const char *prefix __used)
+{
+       int i;
+       size_t len;
+       double result_bps[2];
+       u64 result_clock[2];
+
+       argc = parse_options(argc, argv, options,
+                            bench_mem_memset_usage, 0);
+
+       if (use_clock)
+               init_clock();
+
+       len = (size_t)perf_atoll((char *)length_str);
+
+       result_clock[0] = result_clock[1] = 0ULL;
+       result_bps[0] = result_bps[1] = 0.0;
+
+       if ((s64)len <= 0) {
+               fprintf(stderr, "Invalid length:%s\n", length_str);
+               return 1;
+       }
+
+       /* same to without specifying either of prefault and no-prefault */
+       if (only_prefault && no_prefault)
+               only_prefault = no_prefault = false;
+
+       for (i = 0; routines[i].name; i++) {
+               if (!strcmp(routines[i].name, routine))
+                       break;
+       }
+       if (!routines[i].name) {
+               printf("Unknown routine:%s\n", routine);
+               printf("Available routines...\n");
+               for (i = 0; routines[i].name; i++) {
+                       printf("\t%s ... %s\n",
+                              routines[i].name, routines[i].desc);
+               }
+               return 1;
+       }
+
+       if (bench_format == BENCH_FORMAT_DEFAULT)
+               printf("# Copying %s Bytes ...\n\n", length_str);
+
+       if (!only_prefault && !no_prefault) {
+               /* show both of results */
+               if (use_clock) {
+                       result_clock[0] =
+                               do_memset_clock(routines[i].fn, len, false);
+                       result_clock[1] =
+                               do_memset_clock(routines[i].fn, len, true);
+               } else {
+                       result_bps[0] =
+                               do_memset_gettimeofday(routines[i].fn,
+                                               len, false);
+                       result_bps[1] =
+                               do_memset_gettimeofday(routines[i].fn,
+                                               len, true);
+               }
+       } else {
+               if (use_clock) {
+                       result_clock[pf] =
+                               do_memset_clock(routines[i].fn,
+                                               len, only_prefault);
+               } else {
+                       result_bps[pf] =
+                               do_memset_gettimeofday(routines[i].fn,
+                                               len, only_prefault);
+               }
+       }
+
+       switch (bench_format) {
+       case BENCH_FORMAT_DEFAULT:
+               if (!only_prefault && !no_prefault) {
+                       if (use_clock) {
+                               printf(" %14lf Clock/Byte\n",
+                                       (double)result_clock[0]
+                                       / (double)len);
+                               printf(" %14lf Clock/Byte (with prefault)\n ",
+                                       (double)result_clock[1]
+                                       / (double)len);
+                       } else {
+                               print_bps(result_bps[0]);
+                               printf("\n");
+                               print_bps(result_bps[1]);
+                               printf(" (with prefault)\n");
+                       }
+               } else {
+                       if (use_clock) {
+                               printf(" %14lf Clock/Byte",
+                                       (double)result_clock[pf]
+                                       / (double)len);
+                       } else
+                               print_bps(result_bps[pf]);
+
+                       printf("%s\n", only_prefault ? " (with prefault)" : "");
+               }
+               break;
+       case BENCH_FORMAT_SIMPLE:
+               if (!only_prefault && !no_prefault) {
+                       if (use_clock) {
+                               printf("%lf %lf\n",
+                                       (double)result_clock[0] / (double)len,
+                                       (double)result_clock[1] / (double)len);
+                       } else {
+                               printf("%lf %lf\n",
+                                       result_bps[0], result_bps[1]);
+                       }
+               } else {
+                       if (use_clock) {
+                               printf("%lf\n", (double)result_clock[pf]
+                                       / (double)len);
+                       } else
+                               printf("%lf\n", result_bps[pf]);
+               }
+               break;
+       default:
+               /* reaching this means there's some disaster: */
+               die("unknown format: %d\n", bench_format);
+               break;
+       }
+
+       return 0;
+}
index fcb9626..b0e74ab 100644 (file)
@@ -52,6 +52,9 @@ static struct bench_suite mem_suites[] = {
        { "memcpy",
          "Simple memory copy in various ways",
          bench_mem_memcpy },
+       { "memset",
+         "Simple memory set in various ways",
+         bench_mem_memset },
        suite_all,
        { NULL,
          NULL,
index 59d43ab..fb85661 100644 (file)
@@ -20,7 +20,6 @@
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  *
  */
-#define _GNU_SOURCE
 #include <sys/utsname.h>
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -31,7 +30,6 @@
 #include <stdlib.h>
 #include <string.h>
 
-#undef _GNU_SOURCE
 #include "perf.h"
 #include "builtin.h"
 #include "util/util.h"
index 0abfb18..32870ee 100644 (file)
@@ -44,6 +44,7 @@ struct perf_record {
        struct perf_evlist      *evlist;
        struct perf_session     *session;
        const char              *progname;
+       const char              *uid_str;
        int                     output;
        unsigned int            page_size;
        int                     realtime_prio;
@@ -727,6 +728,7 @@ const struct option record_options[] = {
        OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
                     "monitor event in cgroup name only",
                     parse_cgroups),
+       OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"),
        OPT_END()
 };
 
@@ -748,7 +750,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
        argc = parse_options(argc, argv, record_options, record_usage,
                            PARSE_OPT_STOP_AT_NON_OPTION);
        if (!argc && rec->opts.target_pid == -1 && rec->opts.target_tid == -1 &&
-               !rec->opts.system_wide && !rec->opts.cpu_list)
+               !rec->opts.system_wide && !rec->opts.cpu_list && !rec->uid_str)
                usage_with_options(record_usage, record_options);
 
        if (rec->force && rec->append_file) {
@@ -788,11 +790,17 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
                goto out_symbol_exit;
        }
 
+       rec->opts.uid = parse_target_uid(rec->uid_str, rec->opts.target_tid,
+                                        rec->opts.target_pid);
+       if (rec->uid_str != NULL && rec->opts.uid == UINT_MAX - 1)
+               goto out_free_fd;
+
        if (rec->opts.target_pid != -1)
                rec->opts.target_tid = rec->opts.target_pid;
 
        if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid,
-                                    rec->opts.target_tid, rec->opts.cpu_list) < 0)
+                                    rec->opts.target_tid, rec->opts.uid,
+                                    rec->opts.cpu_list) < 0)
                usage_with_options(record_usage, record_options);
 
        list_for_each_entry(pos, &evsel_list->entries, node) {
index f5d2a63..459b862 100644 (file)
@@ -1201,7 +1201,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
        if (target_pid != -1)
                target_tid = target_pid;
 
-       evsel_list->threads = thread_map__new(target_pid, target_tid);
+       evsel_list->threads = thread_map__new(target_pid, target_tid, UINT_MAX);
        if (evsel_list->threads == NULL) {
                pr_err("Problems finding threads of monitor\n");
                usage_with_options(stat_usage, options);
index 3854e86..3ce709e 100644 (file)
@@ -276,7 +276,7 @@ static int test__open_syscall_event(void)
                return -1;
        }
 
-       threads = thread_map__new(-1, getpid());
+       threads = thread_map__new(-1, getpid(), UINT_MAX);
        if (threads == NULL) {
                pr_debug("thread_map__new\n");
                return -1;
@@ -342,7 +342,7 @@ static int test__open_syscall_event_on_all_cpus(void)
                return -1;
        }
 
-       threads = thread_map__new(-1, getpid());
+       threads = thread_map__new(-1, getpid(), UINT_MAX);
        if (threads == NULL) {
                pr_debug("thread_map__new\n");
                return -1;
@@ -490,7 +490,7 @@ static int test__basic_mmap(void)
                expected_nr_events[i] = random() % 257;
        }
 
-       threads = thread_map__new(-1, getpid());
+       threads = thread_map__new(-1, getpid(), UINT_MAX);
        if (threads == NULL) {
                pr_debug("thread_map__new\n");
                return -1;
@@ -1054,7 +1054,7 @@ static int test__PERF_RECORD(void)
         * we're monitoring, the one forked there.
         */
        err = perf_evlist__create_maps(evlist, opts.target_pid,
-                                      opts.target_tid, opts.cpu_list);
+                                      opts.target_tid, UINT_MAX, opts.cpu_list);
        if (err < 0) {
                pr_debug("Not enough memory to create thread/cpu maps\n");
                goto out_delete_evlist;
index 8f80df8..e8b033c 100644 (file)
@@ -64,7 +64,6 @@
 #include <linux/unistd.h>
 #include <linux/types.h>
 
-
 void get_term_dimensions(struct winsize *ws)
 {
        char *s = getenv("LINES");
@@ -537,10 +536,20 @@ static void perf_top__sort_new_samples(void *arg)
 
 static void *display_thread_tui(void *arg)
 {
+       struct perf_evsel *pos;
        struct perf_top *top = arg;
        const char *help = "For a higher level overview, try: perf top --sort comm,dso";
 
        perf_top__sort_new_samples(top);
+
+       /*
+        * Initialize the uid_filter_str, in the future the TUI will allow
+        * Zooming in/out UIDs. For now juse use whatever the user passed
+        * via --uid.
+        */
+       list_for_each_entry(pos, &top->evlist->entries, node)
+               pos->hists.uid_filter_str = top->uid_str;
+
        perf_evlist__tui_browse_hists(top->evlist, help,
                                      perf_top__sort_new_samples,
                                      top, top->delay_secs);
@@ -949,7 +958,7 @@ static int __cmd_top(struct perf_top *top)
        if (ret)
                goto out_delete;
 
-       if (top->target_tid != -1)
+       if (top->target_tid != -1 || top->uid != UINT_MAX)
                perf_event__synthesize_thread_map(&top->tool, top->evlist->threads,
                                                  perf_event__process,
                                                  &top->session->host_machine);
@@ -1089,6 +1098,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
                .delay_secs          = 2,
                .target_pid          = -1,
                .target_tid          = -1,
+               .uid                 = UINT_MAX,
                .freq                = 1000, /* 1 KHz */
                .sample_id_all_avail = true,
                .mmap_pages          = 128,
@@ -1162,6 +1172,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
                    "Display raw encoding of assembly instructions (default)"),
        OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
                   "Specify disassembler style (e.g. -M intel for intel syntax)"),
+       OPT_STRING('u', "uid", &top.uid_str, "user", "user to profile"),
        OPT_END()
        };
 
@@ -1187,6 +1198,10 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
 
        setup_browser(false);
 
+       top.uid = parse_target_uid(top.uid_str, top.target_tid, top.target_pid);
+       if (top.uid_str != NULL && top.uid == UINT_MAX - 1)
+               goto out_delete_evlist;
+
        /* CPU and PID are mutually exclusive */
        if (top.target_tid > 0 && top.cpu_list) {
                printf("WARNING: PID switch overriding CPU\n");
@@ -1198,7 +1213,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
                top.target_tid = top.target_pid;
 
        if (perf_evlist__create_maps(top.evlist, top.target_pid,
-                                    top.target_tid, top.cpu_list) < 0)
+                                    top.target_tid, top.uid, top.cpu_list) < 0)
                usage_with_options(top_usage, options);
 
        if (!top.evlist->nr_entries &&
@@ -1262,6 +1277,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
 
        status = __cmd_top(&top);
 
+out_delete_evlist:
        perf_evlist__delete(top.evlist);
 
        return status;
index 64f8bee..92af168 100644 (file)
@@ -188,6 +188,7 @@ void pthread__unblock_sigwinch(void);
 struct perf_record_opts {
        pid_t        target_pid;
        pid_t        target_tid;
+       uid_t        uid;
        bool         call_graph;
        bool         group;
        bool         inherit_stat;
index 6893eec..adc72f0 100644 (file)
@@ -166,6 +166,17 @@ out:
        return cpus;
 }
 
+size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp)
+{
+       int i;
+       size_t printed = fprintf(fp, "%d cpu%s: ",
+                                map->nr, map->nr > 1 ? "s" : "");
+       for (i = 0; i < map->nr; ++i)
+               printed += fprintf(fp, "%s%d", i ? ", " : "", map->map[i]);
+
+       return printed + fprintf(fp, "\n");
+}
+
 struct cpu_map *cpu_map__dummy_new(void)
 {
        struct cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int));
index 072c0a3..c415185 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef __PERF_CPUMAP_H
 #define __PERF_CPUMAP_H
 
+#include <stdio.h>
+
 struct cpu_map {
        int nr;
        int map[];
@@ -10,4 +12,6 @@ struct cpu_map *cpu_map__new(const char *cpu_list);
 struct cpu_map *cpu_map__dummy_new(void);
 void cpu_map__delete(struct cpu_map *map);
 
+size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp);
+
 #endif /* __PERF_CPUMAP_H */
index 3f16e08..a6d50e3 100644 (file)
@@ -594,14 +594,14 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
 }
 
 int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid,
-                            pid_t target_tid, const char *cpu_list)
+                            pid_t target_tid, uid_t uid, const char *cpu_list)
 {
-       evlist->threads = thread_map__new(target_pid, target_tid);
+       evlist->threads = thread_map__new(target_pid, target_tid, uid);
 
        if (evlist->threads == NULL)
                return -1;
 
-       if (cpu_list == NULL && target_tid != -1)
+       if (uid != UINT_MAX || (cpu_list == NULL && target_tid != -1))
                evlist->cpus = cpu_map__dummy_new();
        else
                evlist->cpus = cpu_map__new(cpu_list);
index 8922aee..9c51660 100644 (file)
@@ -107,7 +107,7 @@ static inline void perf_evlist__set_maps(struct perf_evlist *evlist,
 }
 
 int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid,
-                            pid_t target_tid, const char *cpu_list);
+                            pid_t tid, uid_t uid, const char *cpu_list);
 void perf_evlist__delete_maps(struct perf_evlist *evlist);
 int perf_evlist__set_filters(struct perf_evlist *evlist);
 
index 3e7e0b0..ecd7f4d 100644 (file)
@@ -2105,7 +2105,7 @@ int perf_event__synthesize_event_type(struct perf_tool *tool,
        strncpy(ev.event_type.event_type.name, name, MAX_EVENT_NAME - 1);
 
        ev.event_type.header.type = PERF_RECORD_HEADER_EVENT_TYPE;
-       size = strlen(name);
+       size = strlen(ev.event_type.event_type.name);
        size = ALIGN(size, sizeof(u64));
        ev.event_type.header.size = sizeof(ev.event_type) -
                (sizeof(ev.event_type.event_type.name) - size);
index f55f0a8..0d48613 100644 (file)
@@ -55,6 +55,7 @@ struct hists {
        u64                     nr_entries;
        const struct thread     *thread_filter;
        const struct dso        *dso_filter;
+       const char              *uid_filter_str;
        pthread_mutex_t         lock;
        struct events_stats     stats;
        u64                     event_stream;
index bb4198e..afe3819 100644 (file)
@@ -2,10 +2,12 @@
 #ifndef PERF_DWARF2_H
 #define PERF_DWARF2_H
 
-/* dwarf2.h ... dummy header file for including arch/x86/lib/memcpy_64.S */
+/* dwarf2.h ... dummy header file for including arch/x86/lib/mem{cpy,set}_64.S */
 
 #define CFI_STARTPROC
 #define CFI_ENDPROC
+#define CFI_REMEMBER_STATE
+#define CFI_RESTORE_STATE
 
 #endif /* PERF_DWARF2_H */
 
index eb25900..b9bbdd2 100644 (file)
@@ -19,7 +19,6 @@
  *
  */
 
-#define _GNU_SOURCE
 #include <sys/utsname.h>
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -33,7 +32,6 @@
 #include <limits.h>
 #include <elf.h>
 
-#undef _GNU_SOURCE
 #include "util.h"
 #include "event.h"
 #include "string.h"
@@ -1731,7 +1729,7 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
        }
 
        ret = 0;
-       printf("Add new event%s\n", (ntevs > 1) ? "s:" : ":");
+       printf("Added new event%s\n", (ntevs > 1) ? "s:" : ":");
        for (i = 0; i < ntevs; i++) {
                tev = &tevs[i];
                if (pev->event)
@@ -1786,7 +1784,7 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
 
        if (ret >= 0) {
                /* Show how to use the event. */
-               printf("\nYou can now use it on all perf tools, such as:\n\n");
+               printf("\nYou can now use it in all perf tools, such as:\n\n");
                printf("\tperf record -e %s:%s -aR sleep 1\n\n", tev->group,
                         tev->event);
        }
@@ -1961,7 +1959,7 @@ static int __del_trace_probe_event(int fd, struct str_node *ent)
                goto error;
        }
 
-       printf("Remove event: %s\n", ent->s);
+       printf("Removed event: %s\n", ent->s);
        return 0;
 error:
        pr_warning("Failed to delete event: %s\n", strerror(-ret));
index 9dd47a4..e03b58a 100644 (file)
@@ -425,14 +425,14 @@ struct pyrf_thread_map {
 static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads,
                                 PyObject *args, PyObject *kwargs)
 {
-       static char *kwlist[] = { "pid", "tid", NULL };
-       int pid = -1, tid = -1;
+       static char *kwlist[] = { "pid", "tid", "uid", NULL };
+       int pid = -1, tid = -1, uid = UINT_MAX;
 
-       if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii",
-                                        kwlist, &pid, &tid))
+       if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iii",
+                                        kwlist, &pid, &tid, &uid))
                return -1;
 
-       pthreads->threads = thread_map__new(pid, tid);
+       pthreads->threads = thread_map__new(pid, tid, uid);
        if (pthreads->threads == NULL)
                return -1;
        return 0;
index 215d50f..0975438 100644 (file)
@@ -1,4 +1,3 @@
-#define _GNU_SOURCE
 #include <ctype.h>
 #include <dirent.h>
 #include <errno.h>
index a5df131..3d4b6c5 100644 (file)
@@ -1,6 +1,11 @@
 #include <dirent.h>
+#include <limits.h>
+#include <stdbool.h>
 #include <stdlib.h>
 #include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
 #include "thread_map.h"
 
 /* Skip "." and ".." directories */
@@ -23,7 +28,7 @@ struct thread_map *thread_map__new_by_pid(pid_t pid)
        sprintf(name, "/proc/%d/task", pid);
        items = scandir(name, &namelist, filter, NULL);
        if (items <= 0)
-                return NULL;
+               return NULL;
 
        threads = malloc(sizeof(*threads) + sizeof(pid_t) * items);
        if (threads != NULL) {
@@ -51,10 +56,99 @@ struct thread_map *thread_map__new_by_tid(pid_t tid)
        return threads;
 }
 
-struct thread_map *thread_map__new(pid_t pid, pid_t tid)
+struct thread_map *thread_map__new_by_uid(uid_t uid)
+{
+       DIR *proc;
+       int max_threads = 32, items, i;
+       char path[256];
+       struct dirent dirent, *next, **namelist = NULL;
+       struct thread_map *threads = malloc(sizeof(*threads) +
+                                           max_threads * sizeof(pid_t));
+       if (threads == NULL)
+               goto out;
+
+       proc = opendir("/proc");
+       if (proc == NULL)
+               goto out_free_threads;
+
+       threads->nr = 0;
+
+       while (!readdir_r(proc, &dirent, &next) && next) {
+               char *end;
+               bool grow = false;
+               struct stat st;
+               pid_t pid = strtol(dirent.d_name, &end, 10);
+
+               if (*end) /* only interested in proper numerical dirents */
+                       continue;
+
+               snprintf(path, sizeof(path), "/proc/%s", dirent.d_name);
+
+               if (stat(path, &st) != 0)
+                       continue;
+
+               if (st.st_uid != uid)
+                       continue;
+
+               snprintf(path, sizeof(path), "/proc/%d/task", pid);
+               items = scandir(path, &namelist, filter, NULL);
+               if (items <= 0)
+                       goto out_free_closedir;
+
+               while (threads->nr + items >= max_threads) {
+                       max_threads *= 2;
+                       grow = true;
+               }
+
+               if (grow) {
+                       struct thread_map *tmp;
+
+                       tmp = realloc(threads, (sizeof(*threads) +
+                                               max_threads * sizeof(pid_t)));
+                       if (tmp == NULL)
+                               goto out_free_namelist;
+
+                       threads = tmp;
+               }
+
+               for (i = 0; i < items; i++)
+                       threads->map[threads->nr + i] = atoi(namelist[i]->d_name);
+
+               for (i = 0; i < items; i++)
+                       free(namelist[i]);
+               free(namelist);
+
+               threads->nr += items;
+       }
+
+out_closedir:
+       closedir(proc);
+out:
+       return threads;
+
+out_free_threads:
+       free(threads);
+       return NULL;
+
+out_free_namelist:
+       for (i = 0; i < items; i++)
+               free(namelist[i]);
+       free(namelist);
+
+out_free_closedir:
+       free(threads);
+       threads = NULL;
+       goto out_closedir;
+}
+
+struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid)
 {
        if (pid != -1)
                return thread_map__new_by_pid(pid);
+
+       if (tid == -1 && uid != UINT_MAX)
+               return thread_map__new_by_uid(uid);
+
        return thread_map__new_by_tid(tid);
 }
 
@@ -62,3 +156,14 @@ void thread_map__delete(struct thread_map *threads)
 {
        free(threads);
 }
+
+size_t thread_map__fprintf(struct thread_map *threads, FILE *fp)
+{
+       int i;
+       size_t printed = fprintf(fp, "%d thread%s: ",
+                                threads->nr, threads->nr > 1 ? "s" : "");
+       for (i = 0; i < threads->nr; ++i)
+               printed += fprintf(fp, "%s%d", i ? ", " : "", threads->map[i]);
+
+       return printed + fprintf(fp, "\n");
+}
index 3cb9073..c75ddba 100644 (file)
@@ -2,6 +2,7 @@
 #define __PERF_THREAD_MAP_H
 
 #include <sys/types.h>
+#include <stdio.h>
 
 struct thread_map {
        int nr;
@@ -10,6 +11,10 @@ struct thread_map {
 
 struct thread_map *thread_map__new_by_pid(pid_t pid);
 struct thread_map *thread_map__new_by_tid(pid_t tid);
-struct thread_map *thread_map__new(pid_t pid, pid_t tid);
+struct thread_map *thread_map__new_by_uid(uid_t uid);
+struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid);
 void thread_map__delete(struct thread_map *threads);
+
+size_t thread_map__fprintf(struct thread_map *threads, FILE *fp);
+
 #endif /* __PERF_THREAD_MAP_H */
index 500471d..e4370ca 100644 (file)
@@ -75,6 +75,9 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
        else if (top->target_tid != -1)
                ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %d",
                                top->target_tid);
+       else if (top->uid_str != NULL)
+               ret += SNPRINTF(bf + ret, size - ret, " (uid: %s",
+                               top->uid_str);
        else
                ret += SNPRINTF(bf + ret, size - ret, " (all");
 
index a248f3c..def3e53 100644 (file)
@@ -24,6 +24,7 @@ struct perf_top {
        int                print_entries, count_filter, delay_secs;
        int                freq;
        pid_t              target_pid, target_tid;
+       uid_t              uid;
        bool               hide_kernel_symbols, hide_user_symbols, zero;
        bool               system_wide;
        bool               use_tui, use_stdio;
@@ -45,6 +46,7 @@ struct perf_top {
        int                realtime_prio;
        int                sym_pcnt_filter;
        const char         *sym_filter;
+       const char         *uid_str;
 };
 
 size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size);
index 6c164dc..1a8d4dc 100644 (file)
  *  The parts for function graph printing was taken and modified from the
  *  Linux Kernel that were written by Frederic Weisbecker.
  */
-#define _GNU_SOURCE
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
 #include <errno.h>
 
-#undef _GNU_SOURCE
 #include "../perf.h"
 #include "util.h"
 #include "trace-event.h"
index 1212a38..bfba049 100644 (file)
@@ -1,6 +1,4 @@
-#define _GNU_SOURCE
 #include <stdio.h>
-#undef _GNU_SOURCE
 #include "../libslang.h"
 #include <stdlib.h>
 #include <string.h>
@@ -841,6 +839,9 @@ static int hists__browser_title(struct hists *self, char *bf, size_t size,
        nr_events = convert_unit(nr_events, &unit);
        printed = snprintf(bf, size, "Events: %lu%c %s", nr_events, unit, ev_name);
 
+       if (self->uid_filter_str)
+               printed += snprintf(bf + printed, size - printed,
+                                   ", UID: %s", self->uid_filter_str);
        if (thread)
                printed += snprintf(bf + printed, size - printed,
                                    ", Thread: %s(%d)",
index 6ef3c56..4f48f59 100644 (file)
@@ -1,4 +1,3 @@
-#define _GNU_SOURCE
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
index d76d1c0..d0c0139 100644 (file)
@@ -7,6 +7,7 @@
  * Copyright (C) Linus Torvalds, 2005
  */
 #include "util.h"
+#include "debug.h"
 
 static void report(const char *prefix, const char *err, va_list params)
 {
@@ -81,3 +82,41 @@ void warning(const char *warn, ...)
        warn_routine(warn, params);
        va_end(params);
 }
+
+uid_t parse_target_uid(const char *str, pid_t tid, pid_t pid)
+{
+       struct passwd pwd, *result;
+       char buf[1024];
+
+       if (str == NULL)
+               return UINT_MAX;
+
+       /* CPU and PID are mutually exclusive */
+       if (tid > 0 || pid > 0) {
+               ui__warning("PID/TID switch overriding UID\n");
+               sleep(1);
+               return UINT_MAX;
+       }
+
+       getpwnam_r(str, &pwd, buf, sizeof(buf), &result);
+
+       if (result == NULL) {
+               char *endptr;
+               int uid = strtol(str, &endptr, 10);
+
+               if (*endptr != '\0') {
+                       ui__error("Invalid user %s\n", str);
+                       return UINT_MAX - 1;
+               }
+
+               getpwuid_r(uid, &pwd, buf, sizeof(buf), &result);
+
+               if (result == NULL) {
+                       ui__error("Problems obtaining information for user %s\n",
+                                 str);
+                       return UINT_MAX - 1;
+               }
+       }
+
+       return result->pw_uid;
+}
index b9c530c..232d17e 100644 (file)
@@ -40,7 +40,6 @@
 #define decimal_length(x)      ((int)(sizeof(x) * 2.56 + 0.5) + 1)
 
 #define _ALL_SOURCE 1
-#define _GNU_SOURCE 1
 #define _BSD_SOURCE 1
 #define HAS_BOOL
 
@@ -246,6 +245,8 @@ struct perf_event_attr;
 
 void event_attr_init(struct perf_event_attr *attr);
 
+uid_t parse_target_uid(const char *str, pid_t tid, pid_t pid);
+
 #define _STR(x) #x
 #define STR(x) _STR(x)