tools/perf/util/evsel.c

   1 /*
   2  * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
   3  *
   4  * Parts came from builtin-{top,stat,record}.c, see those files for further
   5  * copyright notes.
   6  *
   7  * Released under the GPL v2. (and only v2, not any later version)
   8  */
   9
  10 #include <byteswap.h>
  11 #include "asm/bug.h"
  12 #include "evsel.h"
  13 #include "evlist.h"
  14 #include "util.h"
  15 #include "cpumap.h"
  16 #include "thread_map.h"
  17
  18 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
  19
  20 int __perf_evsel__sample_size(u64 sample_type)
  21 {
  22         u64 mask = sample_type & PERF_SAMPLE_MASK;
  23         int size = 0;
  24         int i;
  25
  26         for (i = 0; i < 64; i++) {
  27                 if (mask & (1ULL << i))
  28                         size++;
  29         }
  30
  31         size *= sizeof(u64);
  32
  33         return size;
  34 }
  35
  36 void perf_evsel__init(struct perf_evsel *evsel,
  37                       struct perf_event_attr *attr, int idx)
  38 {
  39         evsel->idx         = idx;
  40         evsel->attr        = *attr;
  41         INIT_LIST_HEAD(&evsel->node);
  42 }
  43
  44 struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx)
  45 {
  46         struct perf_evsel *evsel = zalloc(sizeof(*evsel));
  47
  48         if (evsel != NULL)
  49                 perf_evsel__init(evsel, attr, idx);
  50
  51         return evsel;
  52 }
  53
  54 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
  55 {
  56         int cpu, thread;
  57         evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
  58
  59         if (evsel->fd) {
  60                 for (cpu = 0; cpu < ncpus; cpu++) {
  61                         for (thread = 0; thread < nthreads; thread++) {
  62                                 FD(evsel, cpu, thread) = -1;
  63                         }
  64                 }
  65         }
  66
  67         return evsel->fd != NULL ? 0 : -ENOMEM;
  68 }
  69
  70 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
  71 {
  72         evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
  73         if (evsel->sample_id == NULL)
  74                 return -ENOMEM;
  75
  76         evsel->id = zalloc(ncpus * nthreads * sizeof(u64));
  77         if (evsel->id == NULL) {
  78                 xyarray__delete(evsel->sample_id);
  79                 evsel->sample_id = NULL;
  80                 return -ENOMEM;
  81         }
  82
  83         return 0;
  84 }
  85
  86 int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
  87 {
  88         evsel->counts = zalloc((sizeof(*evsel->counts) +
  89                                 (ncpus * sizeof(struct perf_counts_values))));
  90         return evsel->counts != NULL ? 0 : -ENOMEM;
  91 }
  92
  93 void perf_evsel__free_fd(struct perf_evsel *evsel)
  94 {
  95         xyarray__delete(evsel->fd);
  96         evsel->fd = NULL;
  97 }
  98
  99 void perf_evsel__free_id(struct perf_evsel *evsel)
 100 {
 101         xyarray__delete(evsel->sample_id);
 102         evsel->sample_id = NULL;
 103         free(evsel->id);
 104         evsel->id = NULL;
 105 }
 106
 107 void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
 108 {
 109         int cpu, thread;
 110
 111         for (cpu = 0; cpu < ncpus; cpu++)
 112                 for (thread = 0; thread < nthreads; ++thread) {
 113                         close(FD(evsel, cpu, thread));
 114                         FD(evsel, cpu, thread) = -1;
 115                 }
 116 }
 117
 118 void perf_evsel__exit(struct perf_evsel *evsel)
 119 {
 120         assert(list_empty(&evsel->node));
 121         xyarray__delete(evsel->fd);
 122         xyarray__delete(evsel->sample_id);
 123         free(evsel->id);
 124 }
 125
 126 void perf_evsel__delete(struct perf_evsel *evsel)
 127 {
 128         perf_evsel__exit(evsel);
 129         close_cgroup(evsel->cgrp);
 130         free(evsel->name);
 131         free(evsel);
 132 }
 133
 134 int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
 135                               int cpu, int thread, bool scale)
 136 {
 137         struct perf_counts_values count;
 138         size_t nv = scale ? 3 : 1;
 139
 140         if (FD(evsel, cpu, thread) < 0)
 141                 return -EINVAL;
 142
 143         if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1) < 0)
 144                 return -ENOMEM;
 145
 146         if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0)
 147                 return -errno;
 148
 149         if (scale) {
 150                 if (count.run == 0)
 151                         count.val = 0;
 152                 else if (count.run < count.ena)
 153                         count.val = (u64)((double)count.val * count.ena / count.run + 0.5);
 154         } else
 155                 count.ena = count.run = 0;
 156
 157         evsel->counts->cpu[cpu] = count;
 158         return 0;
 159 }
 160
 161 int __perf_evsel__read(struct perf_evsel *evsel,
 162                        int ncpus, int nthreads, bool scale)
 163 {
 164         size_t nv = scale ? 3 : 1;
 165         int cpu, thread;
 166         struct perf_counts_values *aggr = &evsel->counts->aggr, count;
 167
 168         aggr->val = aggr->ena = aggr->run = 0;
 169
 170         for (cpu = 0; cpu < ncpus; cpu++) {
 171                 for (thread = 0; thread < nthreads; thread++) {
 172                         if (FD(evsel, cpu, thread) < 0)
 173                                 continue;
 174
 175                         if (readn(FD(evsel, cpu, thread),
 176                                   &count, nv * sizeof(u64)) < 0)
 177                                 return -errno;
 178
 179                         aggr->val += count.val;
 180                         if (scale) {
 181                                 aggr->ena += count.ena;
 182                                 aggr->run += count.run;
 183                         }
 184                 }
 185         }
 186
 187         evsel->counts->scaled = 0;
 188         if (scale) {
 189                 if (aggr->run == 0) {
 190                         evsel->counts->scaled = -1;
 191                         aggr->val = 0;
 192                         return 0;
 193                 }
 194
 195                 if (aggr->run < aggr->ena) {
 196                         evsel->counts->scaled = 1;
 197                         aggr->val = (u64)((double)aggr->val * aggr->ena / aggr->run + 0.5);
 198                 }
 199         } else
 200                 aggr->ena = aggr->run = 0;
 201
 202         return 0;
 203 }
 204
 205 static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
 206                               struct thread_map *threads, bool group)
 207 {
 208         int cpu, thread;
 209         unsigned long flags = 0;
 210         int pid = -1;
 211
 212         if (evsel->fd == NULL &&
 213             perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
 214                 return -1;
 215
 216         if (evsel->cgrp) {
 217                 flags = PERF_FLAG_PID_CGROUP;
 218                 pid = evsel->cgrp->fd;
 219         }
 220
 221         for (cpu = 0; cpu < cpus->nr; cpu++) {
 222                 int group_fd = -1;
 223
 224                 for (thread = 0; thread < threads->nr; thread++) {
 225
 226                         if (!evsel->cgrp)
 227                                 pid = threads->map[thread];
 228
 229                         FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
 230                                                                      pid,
 231                                                                      cpus->map[cpu],
 232                                                                      group_fd, flags);
 233                         if (FD(evsel, cpu, thread) < 0)
 234                                 goto out_close;
 235
 236                         if (group && group_fd == -1)
 237                                 group_fd = FD(evsel, cpu, thread);
 238                 }
 239         }
 240
 241         return 0;
 242
 243 out_close:
 244         do {
 245                 while (--thread >= 0) {
 246                         close(FD(evsel, cpu, thread));
 247                         FD(evsel, cpu, thread) = -1;
 248                 }
 249                 thread = threads->nr;
 250         } while (--cpu >= 0);
 251         return -1;
 252 }
 253
 254 static struct {
 255         struct cpu_map map;
 256         int cpus[1];
 257 } empty_cpu_map = {
 258         .map.nr = 1,
 259         .cpus   = { -1, },
 260 };
 261
 262 static struct {
 263         struct thread_map map;
 264         int threads[1];
 265 } empty_thread_map = {
 266         .map.nr  = 1,
 267         .threads = { -1, },
 268 };
 269
 270 int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
 271                      struct thread_map *threads, bool group)
 272 {
 273         if (cpus == NULL) {
 274                 /* Work around old compiler warnings about strict aliasing */
 275                 cpus = &empty_cpu_map.map;
 276         }
 277
 278         if (threads == NULL)
 279                 threads = &empty_thread_map.map;
 280
 281         return __perf_evsel__open(evsel, cpus, threads, group);
 282 }
 283
 284 int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
 285                              struct cpu_map *cpus, bool group)
 286 {
 287         return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group);
 288 }
 289
 290 int perf_evsel__open_per_thread(struct perf_evsel *evsel,
 291                                 struct thread_map *threads, bool group)
 292 {
 293         return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group);
 294 }
 295
 296 static int perf_event__parse_id_sample(const union perf_event *event, u64 type,
 297                                        struct perf_sample *sample)
 298 {
 299         const u64 *array = event->sample.array;
 300
 301         array += ((event->header.size -
 302                    sizeof(event->header)) / sizeof(u64)) - 1;
 303
 304         if (type & PERF_SAMPLE_CPU) {
 305                 u32 *p = (u32 *)array;
 306                 sample->cpu = *p;
 307                 array--;
 308         }
 309
 310         if (type & PERF_SAMPLE_STREAM_ID) {
 311                 sample->stream_id = *array;
 312                 array--;
 313         }
 314
 315         if (type & PERF_SAMPLE_ID) {
 316                 sample->id = *array;
 317                 array--;
 318         }
 319
 320         if (type & PERF_SAMPLE_TIME) {
 321                 sample->time = *array;
 322                 array--;
 323         }
 324
 325         if (type & PERF_SAMPLE_TID) {
 326                 u32 *p = (u32 *)array;
 327                 sample->pid = p[0];
 328                 sample->tid = p[1];
 329         }
 330
 331         return 0;
 332 }
 333
 334 static bool sample_overlap(const union perf_event *event,
 335                            const void *offset, u64 size)
 336 {
 337         const void *base = event;
 338
 339         if (offset + size > base + event->header.size)
 340                 return true;
 341
 342         return false;
 343 }
 344
 345 int perf_event__parse_sample(const union perf_event *event, u64 type,
 346                              int sample_size, bool sample_id_all,
 347                              struct perf_sample *data, bool swapped)
 348 {
 349         const u64 *array;
 350
 351         /*
 352          * used for cross-endian analysis. See git commit 65014ab3
 353          * for why this goofiness is needed.
 354          */
 355         union {
 356                 u64 val64;
 357                 u32 val32[2];
 358         } u;
 359
 360
 361         data->cpu = data->pid = data->tid = -1;
 362         data->stream_id = data->id = data->time = -1ULL;
 363
 364         if (event->header.type != PERF_RECORD_SAMPLE) {
 365                 if (!sample_id_all)
 366                         return 0;
 367                 return perf_event__parse_id_sample(event, type, data);
 368         }
 369
 370         array = event->sample.array;
 371
 372         if (sample_size + sizeof(event->header) > event->header.size)
 373                 return -EFAULT;
 374
 375         if (type & PERF_SAMPLE_IP) {
 376                 data->ip = event->ip.ip;
 377                 array++;
 378         }
 379
 380         if (type & PERF_SAMPLE_TID) {
 381                 u.val64 = *array;
 382                 if (swapped) {
 383                         /* undo swap of u64, then swap on individual u32s */
 384                         u.val64 = bswap_64(u.val64);
 385                         u.val32[0] = bswap_32(u.val32[0]);
 386                         u.val32[1] = bswap_32(u.val32[1]);
 387                 }
 388
 389                 data->pid = u.val32[0];
 390                 data->tid = u.val32[1];
 391                 array++;
 392         }
 393
 394         if (type & PERF_SAMPLE_TIME) {
 395                 data->time = *array;
 396                 array++;
 397         }
 398
 399         data->addr = 0;
 400         if (type & PERF_SAMPLE_ADDR) {
 401                 data->addr = *array;
 402                 array++;
 403         }
 404
 405         data->id = -1ULL;
 406         if (type & PERF_SAMPLE_ID) {
 407                 data->id = *array;
 408                 array++;
 409         }
 410
 411         if (type & PERF_SAMPLE_STREAM_ID) {
 412                 data->stream_id = *array;
 413                 array++;
 414         }
 415
 416         if (type & PERF_SAMPLE_CPU) {
 417
 418                 u.val64 = *array;
 419                 if (swapped) {
 420                         /* undo swap of u64, then swap on individual u32s */
 421                         u.val64 = bswap_64(u.val64);
 422                         u.val32[0] = bswap_32(u.val32[0]);
 423                 }
 424
 425                 data->cpu = u.val32[0];
 426                 array++;
 427         }
 428
 429         if (type & PERF_SAMPLE_PERIOD) {
 430                 data->period = *array;
 431                 array++;
 432         }
 433
 434         if (type & PERF_SAMPLE_READ) {
 435                 fprintf(stderr, "PERF_SAMPLE_READ is unsuported for now\n");
 436                 return -1;
 437         }
 438
 439         if (type & PERF_SAMPLE_CALLCHAIN) {
 440                 if (sample_overlap(event, array, sizeof(data->callchain->nr)))
 441                         return -EFAULT;
 442
 443                 data->callchain = (struct ip_callchain *)array;
 444
 445                 if (sample_overlap(event, array, data->callchain->nr))
 446                         return -EFAULT;
 447
 448                 array += 1 + data->callchain->nr;
 449         }
 450
 451         if (type & PERF_SAMPLE_RAW) {
 452                 const u64 *pdata;
 453
 454                 u.val64 = *array;
 455                 if (WARN_ONCE(swapped,
 456                               "Endianness of raw data not corrected!\n")) {
 457                         /* undo swap of u64, then swap on individual u32s */
 458                         u.val64 = bswap_64(u.val64);
 459                         u.val32[0] = bswap_32(u.val32[0]);
 460                         u.val32[1] = bswap_32(u.val32[1]);
 461                 }
 462
 463                 if (sample_overlap(event, array, sizeof(u32)))
 464                         return -EFAULT;
 465
 466                 data->raw_size = u.val32[0];
 467                 pdata = (void *) array + sizeof(u32);
 468
 469                 if (sample_overlap(event, pdata, data->raw_size))
 470                         return -EFAULT;
 471
 472                 data->raw_data = (void *) pdata;
 473         }
 474
 475         return 0;
 476 }