x86: Simplify code by removing a !SMP #ifdefs from 'struct cpuinfo_x86'
[pandora-kernel.git] / arch / x86 / kernel / cpu / perf_event.c
1 /*
2  * Performance events x86 architecture code
3  *
4  *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
5  *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
6  *  Copyright (C) 2009 Jaswinder Singh Rajput
7  *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
8  *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
9  *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
10  *  Copyright (C) 2009 Google, Inc., Stephane Eranian
11  *
12  *  For licencing details see kernel-base/COPYING
13  */
14
15 #include <linux/perf_event.h>
16 #include <linux/capability.h>
17 #include <linux/notifier.h>
18 #include <linux/hardirq.h>
19 #include <linux/kprobes.h>
20 #include <linux/module.h>
21 #include <linux/kdebug.h>
22 #include <linux/sched.h>
23 #include <linux/uaccess.h>
24 #include <linux/slab.h>
25 #include <linux/cpu.h>
26 #include <linux/bitops.h>
27
28 #include <asm/apic.h>
29 #include <asm/stacktrace.h>
30 #include <asm/nmi.h>
31 #include <asm/compat.h>
32 #include <asm/smp.h>
33 #include <asm/alternative.h>
34
35 #include "perf_event.h"
36
37 #if 0
38 #undef wrmsrl
39 #define wrmsrl(msr, val)                                        \
40 do {                                                            \
41         trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\
42                         (unsigned long)(val));                  \
43         native_write_msr((msr), (u32)((u64)(val)),              \
44                         (u32)((u64)(val) >> 32));               \
45 } while (0)
46 #endif
47
48 struct x86_pmu x86_pmu __read_mostly;
49
50 DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
51         .enabled = 1,
52 };
53
54 u64 __read_mostly hw_cache_event_ids
55                                 [PERF_COUNT_HW_CACHE_MAX]
56                                 [PERF_COUNT_HW_CACHE_OP_MAX]
57                                 [PERF_COUNT_HW_CACHE_RESULT_MAX];
58 u64 __read_mostly hw_cache_extra_regs
59                                 [PERF_COUNT_HW_CACHE_MAX]
60                                 [PERF_COUNT_HW_CACHE_OP_MAX]
61                                 [PERF_COUNT_HW_CACHE_RESULT_MAX];
62
63 /*
64  * Propagate event elapsed time into the generic event.
65  * Can only be executed on the CPU where the event is active.
66  * Returns the delta events processed.
67  */
68 u64 x86_perf_event_update(struct perf_event *event)
69 {
70         struct hw_perf_event *hwc = &event->hw;
71         int shift = 64 - x86_pmu.cntval_bits;
72         u64 prev_raw_count, new_raw_count;
73         int idx = hwc->idx;
74         s64 delta;
75
76         if (idx == X86_PMC_IDX_FIXED_BTS)
77                 return 0;
78
79         /*
80          * Careful: an NMI might modify the previous event value.
81          *
82          * Our tactic to handle this is to first atomically read and
83          * exchange a new raw count - then add that new-prev delta
84          * count to the generic event atomically:
85          */
86 again:
87         prev_raw_count = local64_read(&hwc->prev_count);
88         rdmsrl(hwc->event_base, new_raw_count);
89
90         if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
91                                         new_raw_count) != prev_raw_count)
92                 goto again;
93
94         /*
95          * Now we have the new raw value and have updated the prev
96          * timestamp already. We can now calculate the elapsed delta
97          * (event-)time and add that to the generic event.
98          *
99          * Careful, not all hw sign-extends above the physical width
100          * of the count.
101          */
102         delta = (new_raw_count << shift) - (prev_raw_count << shift);
103         delta >>= shift;
104
105         local64_add(delta, &event->count);
106         local64_sub(delta, &hwc->period_left);
107
108         return new_raw_count;
109 }
110
111 /*
112  * Find and validate any extra registers to set up.
113  */
114 static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
115 {
116         struct hw_perf_event_extra *reg;
117         struct extra_reg *er;
118
119         reg = &event->hw.extra_reg;
120
121         if (!x86_pmu.extra_regs)
122                 return 0;
123
124         for (er = x86_pmu.extra_regs; er->msr; er++) {
125                 if (er->event != (config & er->config_mask))
126                         continue;
127                 if (event->attr.config1 & ~er->valid_mask)
128                         return -EINVAL;
129
130                 reg->idx = er->idx;
131                 reg->config = event->attr.config1;
132                 reg->reg = er->msr;
133                 break;
134         }
135         return 0;
136 }
137
138 static atomic_t active_events;
139 static DEFINE_MUTEX(pmc_reserve_mutex);
140
141 #ifdef CONFIG_X86_LOCAL_APIC
142
143 static bool reserve_pmc_hardware(void)
144 {
145         int i;
146
147         for (i = 0; i < x86_pmu.num_counters; i++) {
148                 if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
149                         goto perfctr_fail;
150         }
151
152         for (i = 0; i < x86_pmu.num_counters; i++) {
153                 if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
154                         goto eventsel_fail;
155         }
156
157         return true;
158
159 eventsel_fail:
160         for (i--; i >= 0; i--)
161                 release_evntsel_nmi(x86_pmu_config_addr(i));
162
163         i = x86_pmu.num_counters;
164
165 perfctr_fail:
166         for (i--; i >= 0; i--)
167                 release_perfctr_nmi(x86_pmu_event_addr(i));
168
169         return false;
170 }
171
172 static void release_pmc_hardware(void)
173 {
174         int i;
175
176         for (i = 0; i < x86_pmu.num_counters; i++) {
177                 release_perfctr_nmi(x86_pmu_event_addr(i));
178                 release_evntsel_nmi(x86_pmu_config_addr(i));
179         }
180 }
181
182 #else
183
184 static bool reserve_pmc_hardware(void) { return true; }
185 static void release_pmc_hardware(void) {}
186
187 #endif
188
189 static bool check_hw_exists(void)
190 {
191         u64 val, val_new = 0;
192         int i, reg, ret = 0;
193
194         /*
195          * Check to see if the BIOS enabled any of the counters, if so
196          * complain and bail.
197          */
198         for (i = 0; i < x86_pmu.num_counters; i++) {
199                 reg = x86_pmu_config_addr(i);
200                 ret = rdmsrl_safe(reg, &val);
201                 if (ret)
202                         goto msr_fail;
203                 if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
204                         goto bios_fail;
205         }
206
207         if (x86_pmu.num_counters_fixed) {
208                 reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
209                 ret = rdmsrl_safe(reg, &val);
210                 if (ret)
211                         goto msr_fail;
212                 for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
213                         if (val & (0x03 << i*4))
214                                 goto bios_fail;
215                 }
216         }
217
218         /*
219          * Now write a value and read it back to see if it matches,
220          * this is needed to detect certain hardware emulators (qemu/kvm)
221          * that don't trap on the MSR access and always return 0s.
222          */
223         val = 0xabcdUL;
224         ret = checking_wrmsrl(x86_pmu_event_addr(0), val);
225         ret |= rdmsrl_safe(x86_pmu_event_addr(0), &val_new);
226         if (ret || val != val_new)
227                 goto msr_fail;
228
229         return true;
230
231 bios_fail:
232         /*
233          * We still allow the PMU driver to operate:
234          */
235         printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n");
236         printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val);
237
238         return true;
239
240 msr_fail:
241         printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
242
243         return false;
244 }
245
246 static void hw_perf_event_destroy(struct perf_event *event)
247 {
248         if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
249                 release_pmc_hardware();
250                 release_ds_buffers();
251                 mutex_unlock(&pmc_reserve_mutex);
252         }
253 }
254
255 static inline int x86_pmu_initialized(void)
256 {
257         return x86_pmu.handle_irq != NULL;
258 }
259
260 static inline int
261 set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
262 {
263         struct perf_event_attr *attr = &event->attr;
264         unsigned int cache_type, cache_op, cache_result;
265         u64 config, val;
266
267         config = attr->config;
268
269         cache_type = (config >>  0) & 0xff;
270         if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
271                 return -EINVAL;
272
273         cache_op = (config >>  8) & 0xff;
274         if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
275                 return -EINVAL;
276
277         cache_result = (config >> 16) & 0xff;
278         if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
279                 return -EINVAL;
280
281         val = hw_cache_event_ids[cache_type][cache_op][cache_result];
282
283         if (val == 0)
284                 return -ENOENT;
285
286         if (val == -1)
287                 return -EINVAL;
288
289         hwc->config |= val;
290         attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result];
291         return x86_pmu_extra_regs(val, event);
292 }
293
294 int x86_setup_perfctr(struct perf_event *event)
295 {
296         struct perf_event_attr *attr = &event->attr;
297         struct hw_perf_event *hwc = &event->hw;
298         u64 config;
299
300         if (!is_sampling_event(event)) {
301                 hwc->sample_period = x86_pmu.max_period;
302                 hwc->last_period = hwc->sample_period;
303                 local64_set(&hwc->period_left, hwc->sample_period);
304         } else {
305                 /*
306                  * If we have a PMU initialized but no APIC
307                  * interrupts, we cannot sample hardware
308                  * events (user-space has to fall back and
309                  * sample via a hrtimer based software event):
310                  */
311                 if (!x86_pmu.apic)
312                         return -EOPNOTSUPP;
313         }
314
315         if (attr->type == PERF_TYPE_RAW)
316                 return x86_pmu_extra_regs(event->attr.config, event);
317
318         if (attr->type == PERF_TYPE_HW_CACHE)
319                 return set_ext_hw_attr(hwc, event);
320
321         if (attr->config >= x86_pmu.max_events)
322                 return -EINVAL;
323
324         /*
325          * The generic map:
326          */
327         config = x86_pmu.event_map(attr->config);
328
329         if (config == 0)
330                 return -ENOENT;
331
332         if (config == -1LL)
333                 return -EINVAL;
334
335         /*
336          * Branch tracing:
337          */
338         if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
339             !attr->freq && hwc->sample_period == 1) {
340                 /* BTS is not supported by this architecture. */
341                 if (!x86_pmu.bts_active)
342                         return -EOPNOTSUPP;
343
344                 /* BTS is currently only allowed for user-mode. */
345                 if (!attr->exclude_kernel)
346                         return -EOPNOTSUPP;
347         }
348
349         hwc->config |= config;
350
351         return 0;
352 }
353
354 int x86_pmu_hw_config(struct perf_event *event)
355 {
356         if (event->attr.precise_ip) {
357                 int precise = 0;
358
359                 /* Support for constant skid */
360                 if (x86_pmu.pebs_active) {
361                         precise++;
362
363                         /* Support for IP fixup */
364                         if (x86_pmu.lbr_nr)
365                                 precise++;
366                 }
367
368                 if (event->attr.precise_ip > precise)
369                         return -EOPNOTSUPP;
370         }
371
372         /*
373          * Generate PMC IRQs:
374          * (keep 'enabled' bit clear for now)
375          */
376         event->hw.config = ARCH_PERFMON_EVENTSEL_INT;
377
378         /*
379          * Count user and OS events unless requested not to
380          */
381         if (!event->attr.exclude_user)
382                 event->hw.config |= ARCH_PERFMON_EVENTSEL_USR;
383         if (!event->attr.exclude_kernel)
384                 event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;
385
386         if (event->attr.type == PERF_TYPE_RAW)
387                 event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
388
389         return x86_setup_perfctr(event);
390 }
391
392 /*
393  * Setup the hardware configuration for a given attr_type
394  */
395 static int __x86_pmu_event_init(struct perf_event *event)
396 {
397         int err;
398
399         if (!x86_pmu_initialized())
400                 return -ENODEV;
401
402         err = 0;
403         if (!atomic_inc_not_zero(&active_events)) {
404                 mutex_lock(&pmc_reserve_mutex);
405                 if (atomic_read(&active_events) == 0) {
406                         if (!reserve_pmc_hardware())
407                                 err = -EBUSY;
408                         else
409                                 reserve_ds_buffers();
410                 }
411                 if (!err)
412                         atomic_inc(&active_events);
413                 mutex_unlock(&pmc_reserve_mutex);
414         }
415         if (err)
416                 return err;
417
418         event->destroy = hw_perf_event_destroy;
419
420         event->hw.idx = -1;
421         event->hw.last_cpu = -1;
422         event->hw.last_tag = ~0ULL;
423
424         /* mark unused */
425         event->hw.extra_reg.idx = EXTRA_REG_NONE;
426
427         return x86_pmu.hw_config(event);
428 }
429
430 void x86_pmu_disable_all(void)
431 {
432         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
433         int idx;
434
435         for (idx = 0; idx < x86_pmu.num_counters; idx++) {
436                 u64 val;
437
438                 if (!test_bit(idx, cpuc->active_mask))
439                         continue;
440                 rdmsrl(x86_pmu_config_addr(idx), val);
441                 if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
442                         continue;
443                 val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
444                 wrmsrl(x86_pmu_config_addr(idx), val);
445         }
446 }
447
448 static void x86_pmu_disable(struct pmu *pmu)
449 {
450         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
451
452         if (!x86_pmu_initialized())
453                 return;
454
455         if (!cpuc->enabled)
456                 return;
457
458         cpuc->n_added = 0;
459         cpuc->enabled = 0;
460         barrier();
461
462         x86_pmu.disable_all();
463 }
464
465 void x86_pmu_enable_all(int added)
466 {
467         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
468         int idx;
469
470         for (idx = 0; idx < x86_pmu.num_counters; idx++) {
471                 struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
472
473                 if (!test_bit(idx, cpuc->active_mask))
474                         continue;
475
476                 __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
477         }
478 }
479
480 static struct pmu pmu;
481
482 static inline int is_x86_event(struct perf_event *event)
483 {
484         return event->pmu == &pmu;
485 }
486
487 int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
488 {
489         struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
490         unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
491         int i, j, w, wmax, num = 0;
492         struct hw_perf_event *hwc;
493
494         bitmap_zero(used_mask, X86_PMC_IDX_MAX);
495
496         for (i = 0; i < n; i++) {
497                 c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
498                 constraints[i] = c;
499         }
500
501         /*
502          * fastpath, try to reuse previous register
503          */
504         for (i = 0; i < n; i++) {
505                 hwc = &cpuc->event_list[i]->hw;
506                 c = constraints[i];
507
508                 /* never assigned */
509                 if (hwc->idx == -1)
510                         break;
511
512                 /* constraint still honored */
513                 if (!test_bit(hwc->idx, c->idxmsk))
514                         break;
515
516                 /* not already used */
517                 if (test_bit(hwc->idx, used_mask))
518                         break;
519
520                 __set_bit(hwc->idx, used_mask);
521                 if (assign)
522                         assign[i] = hwc->idx;
523         }
524         if (i == n)
525                 goto done;
526
527         /*
528          * begin slow path
529          */
530
531         bitmap_zero(used_mask, X86_PMC_IDX_MAX);
532
533         /*
534          * weight = number of possible counters
535          *
536          * 1    = most constrained, only works on one counter
537          * wmax = least constrained, works on any counter
538          *
539          * assign events to counters starting with most
540          * constrained events.
541          */
542         wmax = x86_pmu.num_counters;
543
544         /*
545          * when fixed event counters are present,
546          * wmax is incremented by 1 to account
547          * for one more choice
548          */
549         if (x86_pmu.num_counters_fixed)
550                 wmax++;
551
552         for (w = 1, num = n; num && w <= wmax; w++) {
553                 /* for each event */
554                 for (i = 0; num && i < n; i++) {
555                         c = constraints[i];
556                         hwc = &cpuc->event_list[i]->hw;
557
558                         if (c->weight != w)
559                                 continue;
560
561                         for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
562                                 if (!test_bit(j, used_mask))
563                                         break;
564                         }
565
566                         if (j == X86_PMC_IDX_MAX)
567                                 break;
568
569                         __set_bit(j, used_mask);
570
571                         if (assign)
572                                 assign[i] = j;
573                         num--;
574                 }
575         }
576 done:
577         /*
578          * scheduling failed or is just a simulation,
579          * free resources if necessary
580          */
581         if (!assign || num) {
582                 for (i = 0; i < n; i++) {
583                         if (x86_pmu.put_event_constraints)
584                                 x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]);
585                 }
586         }
587         return num ? -EINVAL : 0;
588 }
589
590 /*
591  * dogrp: true if must collect siblings events (group)
592  * returns total number of events and error code
593  */
594 static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
595 {
596         struct perf_event *event;
597         int n, max_count;
598
599         max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed;
600
601         /* current number of events already accepted */
602         n = cpuc->n_events;
603
604         if (is_x86_event(leader)) {
605                 if (n >= max_count)
606                         return -EINVAL;
607                 cpuc->event_list[n] = leader;
608                 n++;
609         }
610         if (!dogrp)
611                 return n;
612
613         list_for_each_entry(event, &leader->sibling_list, group_entry) {
614                 if (!is_x86_event(event) ||
615                     event->state <= PERF_EVENT_STATE_OFF)
616                         continue;
617
618                 if (n >= max_count)
619                         return -EINVAL;
620
621                 cpuc->event_list[n] = event;
622                 n++;
623         }
624         return n;
625 }
626
627 static inline void x86_assign_hw_event(struct perf_event *event,
628                                 struct cpu_hw_events *cpuc, int i)
629 {
630         struct hw_perf_event *hwc = &event->hw;
631
632         hwc->idx = cpuc->assign[i];
633         hwc->last_cpu = smp_processor_id();
634         hwc->last_tag = ++cpuc->tags[i];
635
636         if (hwc->idx == X86_PMC_IDX_FIXED_BTS) {
637                 hwc->config_base = 0;
638                 hwc->event_base = 0;
639         } else if (hwc->idx >= X86_PMC_IDX_FIXED) {
640                 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
641                 hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - X86_PMC_IDX_FIXED);
642         } else {
643                 hwc->config_base = x86_pmu_config_addr(hwc->idx);
644                 hwc->event_base  = x86_pmu_event_addr(hwc->idx);
645         }
646 }
647
648 static inline int match_prev_assignment(struct hw_perf_event *hwc,
649                                         struct cpu_hw_events *cpuc,
650                                         int i)
651 {
652         return hwc->idx == cpuc->assign[i] &&
653                 hwc->last_cpu == smp_processor_id() &&
654                 hwc->last_tag == cpuc->tags[i];
655 }
656
657 static void x86_pmu_start(struct perf_event *event, int flags);
658
659 static void x86_pmu_enable(struct pmu *pmu)
660 {
661         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
662         struct perf_event *event;
663         struct hw_perf_event *hwc;
664         int i, added = cpuc->n_added;
665
666         if (!x86_pmu_initialized())
667                 return;
668
669         if (cpuc->enabled)
670                 return;
671
672         if (cpuc->n_added) {
673                 int n_running = cpuc->n_events - cpuc->n_added;
674                 /*
675                  * apply assignment obtained either from
676                  * hw_perf_group_sched_in() or x86_pmu_enable()
677                  *
678                  * step1: save events moving to new counters
679                  * step2: reprogram moved events into new counters
680                  */
681                 for (i = 0; i < n_running; i++) {
682                         event = cpuc->event_list[i];
683                         hwc = &event->hw;
684
685                         /*
686                          * we can avoid reprogramming counter if:
687                          * - assigned same counter as last time
688                          * - running on same CPU as last time
689                          * - no other event has used the counter since
690                          */
691                         if (hwc->idx == -1 ||
692                             match_prev_assignment(hwc, cpuc, i))
693                                 continue;
694
695                         /*
696                          * Ensure we don't accidentally enable a stopped
697                          * counter simply because we rescheduled.
698                          */
699                         if (hwc->state & PERF_HES_STOPPED)
700                                 hwc->state |= PERF_HES_ARCH;
701
702                         x86_pmu_stop(event, PERF_EF_UPDATE);
703                 }
704
705                 for (i = 0; i < cpuc->n_events; i++) {
706                         event = cpuc->event_list[i];
707                         hwc = &event->hw;
708
709                         if (!match_prev_assignment(hwc, cpuc, i))
710                                 x86_assign_hw_event(event, cpuc, i);
711                         else if (i < n_running)
712                                 continue;
713
714                         if (hwc->state & PERF_HES_ARCH)
715                                 continue;
716
717                         x86_pmu_start(event, PERF_EF_RELOAD);
718                 }
719                 cpuc->n_added = 0;
720                 perf_events_lapic_init();
721         }
722
723         cpuc->enabled = 1;
724         barrier();
725
726         x86_pmu.enable_all(added);
727 }
728
729 static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
730
731 /*
732  * Set the next IRQ period, based on the hwc->period_left value.
733  * To be called with the event disabled in hw:
734  */
735 int x86_perf_event_set_period(struct perf_event *event)
736 {
737         struct hw_perf_event *hwc = &event->hw;
738         s64 left = local64_read(&hwc->period_left);
739         s64 period = hwc->sample_period;
740         int ret = 0, idx = hwc->idx;
741
742         if (idx == X86_PMC_IDX_FIXED_BTS)
743                 return 0;
744
745         /*
746          * If we are way outside a reasonable range then just skip forward:
747          */
748         if (unlikely(left <= -period)) {
749                 left = period;
750                 local64_set(&hwc->period_left, left);
751                 hwc->last_period = period;
752                 ret = 1;
753         }
754
755         if (unlikely(left <= 0)) {
756                 left += period;
757                 local64_set(&hwc->period_left, left);
758                 hwc->last_period = period;
759                 ret = 1;
760         }
761         /*
762          * Quirk: certain CPUs dont like it if just 1 hw_event is left:
763          */
764         if (unlikely(left < 2))
765                 left = 2;
766
767         if (left > x86_pmu.max_period)
768                 left = x86_pmu.max_period;
769
770         per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
771
772         /*
773          * The hw event starts counting from this event offset,
774          * mark it to be able to extra future deltas:
775          */
776         local64_set(&hwc->prev_count, (u64)-left);
777
778         wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
779
780         /*
781          * Due to erratum on certan cpu we need
782          * a second write to be sure the register
783          * is updated properly
784          */
785         if (x86_pmu.perfctr_second_write) {
786                 wrmsrl(hwc->event_base,
787                         (u64)(-left) & x86_pmu.cntval_mask);
788         }
789
790         perf_event_update_userpage(event);
791
792         return ret;
793 }
794
795 void x86_pmu_enable_event(struct perf_event *event)
796 {
797         if (__this_cpu_read(cpu_hw_events.enabled))
798                 __x86_pmu_enable_event(&event->hw,
799                                        ARCH_PERFMON_EVENTSEL_ENABLE);
800 }
801
802 /*
803  * Add a single event to the PMU.
804  *
805  * The event is added to the group of enabled events
806  * but only if it can be scehduled with existing events.
807  */
808 static int x86_pmu_add(struct perf_event *event, int flags)
809 {
810         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
811         struct hw_perf_event *hwc;
812         int assign[X86_PMC_IDX_MAX];
813         int n, n0, ret;
814
815         hwc = &event->hw;
816
817         perf_pmu_disable(event->pmu);
818         n0 = cpuc->n_events;
819         ret = n = collect_events(cpuc, event, false);
820         if (ret < 0)
821                 goto out;
822
823         hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
824         if (!(flags & PERF_EF_START))
825                 hwc->state |= PERF_HES_ARCH;
826
827         /*
828          * If group events scheduling transaction was started,
829          * skip the schedulability test here, it will be performed
830          * at commit time (->commit_txn) as a whole
831          */
832         if (cpuc->group_flag & PERF_EVENT_TXN)
833                 goto done_collect;
834
835         ret = x86_pmu.schedule_events(cpuc, n, assign);
836         if (ret)
837                 goto out;
838         /*
839          * copy new assignment, now we know it is possible
840          * will be used by hw_perf_enable()
841          */
842         memcpy(cpuc->assign, assign, n*sizeof(int));
843
844 done_collect:
845         cpuc->n_events = n;
846         cpuc->n_added += n - n0;
847         cpuc->n_txn += n - n0;
848
849         ret = 0;
850 out:
851         perf_pmu_enable(event->pmu);
852         return ret;
853 }
854
855 static void x86_pmu_start(struct perf_event *event, int flags)
856 {
857         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
858         int idx = event->hw.idx;
859
860         if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
861                 return;
862
863         if (WARN_ON_ONCE(idx == -1))
864                 return;
865
866         if (flags & PERF_EF_RELOAD) {
867                 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
868                 x86_perf_event_set_period(event);
869         }
870
871         event->hw.state = 0;
872
873         cpuc->events[idx] = event;
874         __set_bit(idx, cpuc->active_mask);
875         __set_bit(idx, cpuc->running);
876         x86_pmu.enable(event);
877         perf_event_update_userpage(event);
878 }
879
880 void perf_event_print_debug(void)
881 {
882         u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
883         u64 pebs;
884         struct cpu_hw_events *cpuc;
885         unsigned long flags;
886         int cpu, idx;
887
888         if (!x86_pmu.num_counters)
889                 return;
890
891         local_irq_save(flags);
892
893         cpu = smp_processor_id();
894         cpuc = &per_cpu(cpu_hw_events, cpu);
895
896         if (x86_pmu.version >= 2) {
897                 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
898                 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
899                 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
900                 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
901                 rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
902
903                 pr_info("\n");
904                 pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
905                 pr_info("CPU#%d: status:     %016llx\n", cpu, status);
906                 pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
907                 pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
908                 pr_info("CPU#%d: pebs:       %016llx\n", cpu, pebs);
909         }
910         pr_info("CPU#%d: active:     %016llx\n", cpu, *(u64 *)cpuc->active_mask);
911
912         for (idx = 0; idx < x86_pmu.num_counters; idx++) {
913                 rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
914                 rdmsrl(x86_pmu_event_addr(idx), pmc_count);
915
916                 prev_left = per_cpu(pmc_prev_left[idx], cpu);
917
918                 pr_info("CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
919                         cpu, idx, pmc_ctrl);
920                 pr_info("CPU#%d:   gen-PMC%d count: %016llx\n",
921                         cpu, idx, pmc_count);
922                 pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
923                         cpu, idx, prev_left);
924         }
925         for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
926                 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
927
928                 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
929                         cpu, idx, pmc_count);
930         }
931         local_irq_restore(flags);
932 }
933
934 void x86_pmu_stop(struct perf_event *event, int flags)
935 {
936         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
937         struct hw_perf_event *hwc = &event->hw;
938
939         if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
940                 x86_pmu.disable(event);
941                 cpuc->events[hwc->idx] = NULL;
942                 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
943                 hwc->state |= PERF_HES_STOPPED;
944         }
945
946         if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
947                 /*
948                  * Drain the remaining delta count out of a event
949                  * that we are disabling:
950                  */
951                 x86_perf_event_update(event);
952                 hwc->state |= PERF_HES_UPTODATE;
953         }
954 }
955
956 static void x86_pmu_del(struct perf_event *event, int flags)
957 {
958         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
959         int i;
960
961         /*
962          * If we're called during a txn, we don't need to do anything.
963          * The events never got scheduled and ->cancel_txn will truncate
964          * the event_list.
965          */
966         if (cpuc->group_flag & PERF_EVENT_TXN)
967                 return;
968
969         x86_pmu_stop(event, PERF_EF_UPDATE);
970
971         for (i = 0; i < cpuc->n_events; i++) {
972                 if (event == cpuc->event_list[i]) {
973
974                         if (x86_pmu.put_event_constraints)
975                                 x86_pmu.put_event_constraints(cpuc, event);
976
977                         while (++i < cpuc->n_events)
978                                 cpuc->event_list[i-1] = cpuc->event_list[i];
979
980                         --cpuc->n_events;
981                         break;
982                 }
983         }
984         perf_event_update_userpage(event);
985 }
986
987 int x86_pmu_handle_irq(struct pt_regs *regs)
988 {
989         struct perf_sample_data data;
990         struct cpu_hw_events *cpuc;
991         struct perf_event *event;
992         int idx, handled = 0;
993         u64 val;
994
995         perf_sample_data_init(&data, 0);
996
997         cpuc = &__get_cpu_var(cpu_hw_events);
998
999         /*
1000          * Some chipsets need to unmask the LVTPC in a particular spot
1001          * inside the nmi handler.  As a result, the unmasking was pushed
1002          * into all the nmi handlers.
1003          *
1004          * This generic handler doesn't seem to have any issues where the
1005          * unmasking occurs so it was left at the top.
1006          */
1007         apic_write(APIC_LVTPC, APIC_DM_NMI);
1008
1009         for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1010                 if (!test_bit(idx, cpuc->active_mask)) {
1011                         /*
1012                          * Though we deactivated the counter some cpus
1013                          * might still deliver spurious interrupts still
1014                          * in flight. Catch them:
1015                          */
1016                         if (__test_and_clear_bit(idx, cpuc->running))
1017                                 handled++;
1018                         continue;
1019                 }
1020
1021                 event = cpuc->events[idx];
1022
1023                 val = x86_perf_event_update(event);
1024                 if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
1025                         continue;
1026
1027                 /*
1028                  * event overflow
1029                  */
1030                 handled++;
1031                 data.period     = event->hw.last_period;
1032
1033                 if (!x86_perf_event_set_period(event))
1034                         continue;
1035
1036                 if (perf_event_overflow(event, &data, regs))
1037                         x86_pmu_stop(event, 0);
1038         }
1039
1040         if (handled)
1041                 inc_irq_stat(apic_perf_irqs);
1042
1043         return handled;
1044 }
1045
1046 void perf_events_lapic_init(void)
1047 {
1048         if (!x86_pmu.apic || !x86_pmu_initialized())
1049                 return;
1050
1051         /*
1052          * Always use NMI for PMU
1053          */
1054         apic_write(APIC_LVTPC, APIC_DM_NMI);
1055 }
1056
1057 static int __kprobes
1058 perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
1059 {
1060         if (!atomic_read(&active_events))
1061                 return NMI_DONE;
1062
1063         return x86_pmu.handle_irq(regs);
1064 }
1065
1066 struct event_constraint emptyconstraint;
1067 struct event_constraint unconstrained;
1068
1069 static int __cpuinit
1070 x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
1071 {
1072         unsigned int cpu = (long)hcpu;
1073         struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1074         int ret = NOTIFY_OK;
1075
1076         switch (action & ~CPU_TASKS_FROZEN) {
1077         case CPU_UP_PREPARE:
1078                 cpuc->kfree_on_online = NULL;
1079                 if (x86_pmu.cpu_prepare)
1080                         ret = x86_pmu.cpu_prepare(cpu);
1081                 break;
1082
1083         case CPU_STARTING:
1084                 if (x86_pmu.cpu_starting)
1085                         x86_pmu.cpu_starting(cpu);
1086                 break;
1087
1088         case CPU_ONLINE:
1089                 kfree(cpuc->kfree_on_online);
1090                 break;
1091
1092         case CPU_DYING:
1093                 if (x86_pmu.cpu_dying)
1094                         x86_pmu.cpu_dying(cpu);
1095                 break;
1096
1097         case CPU_UP_CANCELED:
1098         case CPU_DEAD:
1099                 if (x86_pmu.cpu_dead)
1100                         x86_pmu.cpu_dead(cpu);
1101                 break;
1102
1103         default:
1104                 break;
1105         }
1106
1107         return ret;
1108 }
1109
1110 static void __init pmu_check_apic(void)
1111 {
1112         if (cpu_has_apic)
1113                 return;
1114
1115         x86_pmu.apic = 0;
1116         pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
1117         pr_info("no hardware sampling interrupt available.\n");
1118 }
1119
1120 static int __init init_hw_perf_events(void)
1121 {
1122         struct event_constraint *c;
1123         int err;
1124
1125         pr_info("Performance Events: ");
1126
1127         switch (boot_cpu_data.x86_vendor) {
1128         case X86_VENDOR_INTEL:
1129                 err = intel_pmu_init();
1130                 break;
1131         case X86_VENDOR_AMD:
1132                 err = amd_pmu_init();
1133                 break;
1134         default:
1135                 return 0;
1136         }
1137         if (err != 0) {
1138                 pr_cont("no PMU driver, software events only.\n");
1139                 return 0;
1140         }
1141
1142         pmu_check_apic();
1143
1144         /* sanity check that the hardware exists or is emulated */
1145         if (!check_hw_exists())
1146                 return 0;
1147
1148         pr_cont("%s PMU driver.\n", x86_pmu.name);
1149
1150         if (x86_pmu.quirks)
1151                 x86_pmu.quirks();
1152
1153         if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
1154                 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
1155                      x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
1156                 x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
1157         }
1158         x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
1159
1160         if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
1161                 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
1162                      x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
1163                 x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
1164         }
1165
1166         x86_pmu.intel_ctrl |=
1167                 ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
1168
1169         perf_events_lapic_init();
1170         register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");
1171
1172         unconstrained = (struct event_constraint)
1173                 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
1174                                    0, x86_pmu.num_counters);
1175
1176         if (x86_pmu.event_constraints) {
1177                 for_each_event_constraint(c, x86_pmu.event_constraints) {
1178                         if (c->cmask != X86_RAW_EVENT_MASK)
1179                                 continue;
1180
1181                         c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
1182                         c->weight += x86_pmu.num_counters;
1183                 }
1184         }
1185
1186         pr_info("... version:                %d\n",     x86_pmu.version);
1187         pr_info("... bit width:              %d\n",     x86_pmu.cntval_bits);
1188         pr_info("... generic registers:      %d\n",     x86_pmu.num_counters);
1189         pr_info("... value mask:             %016Lx\n", x86_pmu.cntval_mask);
1190         pr_info("... max period:             %016Lx\n", x86_pmu.max_period);
1191         pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_counters_fixed);
1192         pr_info("... event mask:             %016Lx\n", x86_pmu.intel_ctrl);
1193
1194         perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
1195         perf_cpu_notifier(x86_pmu_notifier);
1196
1197         return 0;
1198 }
1199 early_initcall(init_hw_perf_events);
1200
1201 static inline void x86_pmu_read(struct perf_event *event)
1202 {
1203         x86_perf_event_update(event);
1204 }
1205
1206 /*
1207  * Start group events scheduling transaction
1208  * Set the flag to make pmu::enable() not perform the
1209  * schedulability test, it will be performed at commit time
1210  */
1211 static void x86_pmu_start_txn(struct pmu *pmu)
1212 {
1213         perf_pmu_disable(pmu);
1214         __this_cpu_or(cpu_hw_events.group_flag, PERF_EVENT_TXN);
1215         __this_cpu_write(cpu_hw_events.n_txn, 0);
1216 }
1217
1218 /*
1219  * Stop group events scheduling transaction
1220  * Clear the flag and pmu::enable() will perform the
1221  * schedulability test.
1222  */
1223 static void x86_pmu_cancel_txn(struct pmu *pmu)
1224 {
1225         __this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN);
1226         /*
1227          * Truncate the collected events.
1228          */
1229         __this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
1230         __this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
1231         perf_pmu_enable(pmu);
1232 }
1233
1234 /*
1235  * Commit group events scheduling transaction
1236  * Perform the group schedulability test as a whole
1237  * Return 0 if success
1238  */
1239 static int x86_pmu_commit_txn(struct pmu *pmu)
1240 {
1241         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1242         int assign[X86_PMC_IDX_MAX];
1243         int n, ret;
1244
1245         n = cpuc->n_events;
1246
1247         if (!x86_pmu_initialized())
1248                 return -EAGAIN;
1249
1250         ret = x86_pmu.schedule_events(cpuc, n, assign);
1251         if (ret)
1252                 return ret;
1253
1254         /*
1255          * copy new assignment, now we know it is possible
1256          * will be used by hw_perf_enable()
1257          */
1258         memcpy(cpuc->assign, assign, n*sizeof(int));
1259
1260         cpuc->group_flag &= ~PERF_EVENT_TXN;
1261         perf_pmu_enable(pmu);
1262         return 0;
1263 }
1264 /*
1265  * a fake_cpuc is used to validate event groups. Due to
1266  * the extra reg logic, we need to also allocate a fake
1267  * per_core and per_cpu structure. Otherwise, group events
1268  * using extra reg may conflict without the kernel being
1269  * able to catch this when the last event gets added to
1270  * the group.
1271  */
1272 static void free_fake_cpuc(struct cpu_hw_events *cpuc)
1273 {
1274         kfree(cpuc->shared_regs);
1275         kfree(cpuc);
1276 }
1277
1278 static struct cpu_hw_events *allocate_fake_cpuc(void)
1279 {
1280         struct cpu_hw_events *cpuc;
1281         int cpu = raw_smp_processor_id();
1282
1283         cpuc = kzalloc(sizeof(*cpuc), GFP_KERNEL);
1284         if (!cpuc)
1285                 return ERR_PTR(-ENOMEM);
1286
1287         /* only needed, if we have extra_regs */
1288         if (x86_pmu.extra_regs) {
1289                 cpuc->shared_regs = allocate_shared_regs(cpu);
1290                 if (!cpuc->shared_regs)
1291                         goto error;
1292         }
1293         return cpuc;
1294 error:
1295         free_fake_cpuc(cpuc);
1296         return ERR_PTR(-ENOMEM);
1297 }
1298
1299 /*
1300  * validate that we can schedule this event
1301  */
1302 static int validate_event(struct perf_event *event)
1303 {
1304         struct cpu_hw_events *fake_cpuc;
1305         struct event_constraint *c;
1306         int ret = 0;
1307
1308         fake_cpuc = allocate_fake_cpuc();
1309         if (IS_ERR(fake_cpuc))
1310                 return PTR_ERR(fake_cpuc);
1311
1312         c = x86_pmu.get_event_constraints(fake_cpuc, event);
1313
1314         if (!c || !c->weight)
1315                 ret = -EINVAL;
1316
1317         if (x86_pmu.put_event_constraints)
1318                 x86_pmu.put_event_constraints(fake_cpuc, event);
1319
1320         free_fake_cpuc(fake_cpuc);
1321
1322         return ret;
1323 }
1324
1325 /*
1326  * validate a single event group
1327  *
1328  * validation include:
1329  *      - check events are compatible which each other
1330  *      - events do not compete for the same counter
1331  *      - number of events <= number of counters
1332  *
1333  * validation ensures the group can be loaded onto the
1334  * PMU if it was the only group available.
1335  */
1336 static int validate_group(struct perf_event *event)
1337 {
1338         struct perf_event *leader = event->group_leader;
1339         struct cpu_hw_events *fake_cpuc;
1340         int ret = -EINVAL, n;
1341
1342         fake_cpuc = allocate_fake_cpuc();
1343         if (IS_ERR(fake_cpuc))
1344                 return PTR_ERR(fake_cpuc);
1345         /*
1346          * the event is not yet connected with its
1347          * siblings therefore we must first collect
1348          * existing siblings, then add the new event
1349          * before we can simulate the scheduling
1350          */
1351         n = collect_events(fake_cpuc, leader, true);
1352         if (n < 0)
1353                 goto out;
1354
1355         fake_cpuc->n_events = n;
1356         n = collect_events(fake_cpuc, event, false);
1357         if (n < 0)
1358                 goto out;
1359
1360         fake_cpuc->n_events = n;
1361
1362         ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
1363
1364 out:
1365         free_fake_cpuc(fake_cpuc);
1366         return ret;
1367 }
1368
1369 static int x86_pmu_event_init(struct perf_event *event)
1370 {
1371         struct pmu *tmp;
1372         int err;
1373
1374         switch (event->attr.type) {
1375         case PERF_TYPE_RAW:
1376         case PERF_TYPE_HARDWARE:
1377         case PERF_TYPE_HW_CACHE:
1378                 break;
1379
1380         default:
1381                 return -ENOENT;
1382         }
1383
1384         err = __x86_pmu_event_init(event);
1385         if (!err) {
1386                 /*
1387                  * we temporarily connect event to its pmu
1388                  * such that validate_group() can classify
1389                  * it as an x86 event using is_x86_event()
1390                  */
1391                 tmp = event->pmu;
1392                 event->pmu = &pmu;
1393
1394                 if (event->group_leader != event)
1395                         err = validate_group(event);
1396                 else
1397                         err = validate_event(event);
1398
1399                 event->pmu = tmp;
1400         }
1401         if (err) {
1402                 if (event->destroy)
1403                         event->destroy(event);
1404         }
1405
1406         return err;
1407 }
1408
1409 static struct pmu pmu = {
1410         .pmu_enable     = x86_pmu_enable,
1411         .pmu_disable    = x86_pmu_disable,
1412
1413         .event_init     = x86_pmu_event_init,
1414
1415         .add            = x86_pmu_add,
1416         .del            = x86_pmu_del,
1417         .start          = x86_pmu_start,
1418         .stop           = x86_pmu_stop,
1419         .read           = x86_pmu_read,
1420
1421         .start_txn      = x86_pmu_start_txn,
1422         .cancel_txn     = x86_pmu_cancel_txn,
1423         .commit_txn     = x86_pmu_commit_txn,
1424 };
1425
1426 /*
1427  * callchain support
1428  */
1429
1430 static int backtrace_stack(void *data, char *name)
1431 {
1432         return 0;
1433 }
1434
1435 static void backtrace_address(void *data, unsigned long addr, int reliable)
1436 {
1437         struct perf_callchain_entry *entry = data;
1438
1439         perf_callchain_store(entry, addr);
1440 }
1441
1442 static const struct stacktrace_ops backtrace_ops = {
1443         .stack                  = backtrace_stack,
1444         .address                = backtrace_address,
1445         .walk_stack             = print_context_stack_bp,
1446 };
1447
1448 void
1449 perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
1450 {
1451         if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1452                 /* TODO: We don't support guest os callchain now */
1453                 return;
1454         }
1455
1456         perf_callchain_store(entry, regs->ip);
1457
1458         dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
1459 }
1460
1461 #ifdef CONFIG_COMPAT
1462 static inline int
1463 perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1464 {
1465         /* 32-bit process in 64-bit kernel. */
1466         struct stack_frame_ia32 frame;
1467         const void __user *fp;
1468
1469         if (!test_thread_flag(TIF_IA32))
1470                 return 0;
1471
1472         fp = compat_ptr(regs->bp);
1473         while (entry->nr < PERF_MAX_STACK_DEPTH) {
1474                 unsigned long bytes;
1475                 frame.next_frame     = 0;
1476                 frame.return_address = 0;
1477
1478                 bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
1479                 if (bytes != sizeof(frame))
1480                         break;
1481
1482                 if (fp < compat_ptr(regs->sp))
1483                         break;
1484
1485                 perf_callchain_store(entry, frame.return_address);
1486                 fp = compat_ptr(frame.next_frame);
1487         }
1488         return 1;
1489 }
1490 #else
1491 static inline int
1492 perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1493 {
1494     return 0;
1495 }
1496 #endif
1497
1498 void
1499 perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
1500 {
1501         struct stack_frame frame;
1502         const void __user *fp;
1503
1504         if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1505                 /* TODO: We don't support guest os callchain now */
1506                 return;
1507         }
1508
1509         fp = (void __user *)regs->bp;
1510
1511         perf_callchain_store(entry, regs->ip);
1512
1513         if (!current->mm)
1514                 return;
1515
1516         if (perf_callchain_user32(regs, entry))
1517                 return;
1518
1519         while (entry->nr < PERF_MAX_STACK_DEPTH) {
1520                 unsigned long bytes;
1521                 frame.next_frame             = NULL;
1522                 frame.return_address = 0;
1523
1524                 bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
1525                 if (bytes != sizeof(frame))
1526                         break;
1527
1528                 if ((unsigned long)fp < regs->sp)
1529                         break;
1530
1531                 perf_callchain_store(entry, frame.return_address);
1532                 fp = frame.next_frame;
1533         }
1534 }
1535
1536 unsigned long perf_instruction_pointer(struct pt_regs *regs)
1537 {
1538         unsigned long ip;
1539
1540         if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
1541                 ip = perf_guest_cbs->get_guest_ip();
1542         else
1543                 ip = instruction_pointer(regs);
1544
1545         return ip;
1546 }
1547
1548 unsigned long perf_misc_flags(struct pt_regs *regs)
1549 {
1550         int misc = 0;
1551
1552         if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1553                 if (perf_guest_cbs->is_user_mode())
1554                         misc |= PERF_RECORD_MISC_GUEST_USER;
1555                 else
1556                         misc |= PERF_RECORD_MISC_GUEST_KERNEL;
1557         } else {
1558                 if (user_mode(regs))
1559                         misc |= PERF_RECORD_MISC_USER;
1560                 else
1561                         misc |= PERF_RECORD_MISC_KERNEL;
1562         }
1563
1564         if (regs->flags & PERF_EFLAGS_EXACT)
1565                 misc |= PERF_RECORD_MISC_EXACT_IP;
1566
1567         return misc;
1568 }