Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs
[pandora-kernel.git] / arch / arm / kernel / perf_event.c
1 #undef DEBUG
2
3 /*
4  * ARM performance counter support.
5  *
6  * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
7  *
8  * ARMv7 support: Jean Pihet <jpihet@mvista.com>
9  * 2010 (c) MontaVista Software, LLC.
10  *
11  * This code is based on the sparc64 perf event code, which is in turn based
12  * on the x86 code. Callchain code is based on the ARM OProfile backtrace
13  * code.
14  */
15 #define pr_fmt(fmt) "hw perfevents: " fmt
16
17 #include <linux/interrupt.h>
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/perf_event.h>
21 #include <linux/platform_device.h>
22 #include <linux/spinlock.h>
23 #include <linux/uaccess.h>
24
25 #include <asm/cputype.h>
26 #include <asm/irq.h>
27 #include <asm/irq_regs.h>
28 #include <asm/pmu.h>
29 #include <asm/stacktrace.h>
30
31 static struct platform_device *pmu_device;
32
33 /*
34  * Hardware lock to serialize accesses to PMU registers. Needed for the
35  * read/modify/write sequences.
36  */
37 DEFINE_SPINLOCK(pmu_lock);
38
39 /*
40  * ARMv6 supports a maximum of 3 events, starting from index 1. If we add
41  * another platform that supports more, we need to increase this to be the
42  * largest of all platforms.
43  *
44  * ARMv7 supports up to 32 events:
45  *  cycle counter CCNT + 31 events counters CNT0..30.
46  *  Cortex-A8 has 1+4 counters, Cortex-A9 has 1+6 counters.
47  */
48 #define ARMPMU_MAX_HWEVENTS             33
49
50 /* The events for a given CPU. */
51 struct cpu_hw_events {
52         /*
53          * The events that are active on the CPU for the given index. Index 0
54          * is reserved.
55          */
56         struct perf_event       *events[ARMPMU_MAX_HWEVENTS];
57
58         /*
59          * A 1 bit for an index indicates that the counter is being used for
60          * an event. A 0 means that the counter can be used.
61          */
62         unsigned long           used_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
63
64         /*
65          * A 1 bit for an index indicates that the counter is actively being
66          * used.
67          */
68         unsigned long           active_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
69 };
70 DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
71
72 /* PMU names. */
73 static const char *arm_pmu_names[] = {
74         [ARM_PERF_PMU_ID_XSCALE1] = "xscale1",
75         [ARM_PERF_PMU_ID_XSCALE2] = "xscale2",
76         [ARM_PERF_PMU_ID_V6]      = "v6",
77         [ARM_PERF_PMU_ID_V6MP]    = "v6mpcore",
78         [ARM_PERF_PMU_ID_CA8]     = "ARMv7 Cortex-A8",
79         [ARM_PERF_PMU_ID_CA9]     = "ARMv7 Cortex-A9",
80 };
81
82 struct arm_pmu {
83         enum arm_perf_pmu_ids id;
84         irqreturn_t     (*handle_irq)(int irq_num, void *dev);
85         void            (*enable)(struct hw_perf_event *evt, int idx);
86         void            (*disable)(struct hw_perf_event *evt, int idx);
87         int             (*event_map)(int evt);
88         u64             (*raw_event)(u64);
89         int             (*get_event_idx)(struct cpu_hw_events *cpuc,
90                                          struct hw_perf_event *hwc);
91         u32             (*read_counter)(int idx);
92         void            (*write_counter)(int idx, u32 val);
93         void            (*start)(void);
94         void            (*stop)(void);
95         int             num_events;
96         u64             max_period;
97 };
98
99 /* Set at runtime when we know what CPU type we are. */
100 static const struct arm_pmu *armpmu;
101
102 enum arm_perf_pmu_ids
103 armpmu_get_pmu_id(void)
104 {
105         int id = -ENODEV;
106
107         if (armpmu != NULL)
108                 id = armpmu->id;
109
110         return id;
111 }
112 EXPORT_SYMBOL_GPL(armpmu_get_pmu_id);
113
114 int
115 armpmu_get_max_events(void)
116 {
117         int max_events = 0;
118
119         if (armpmu != NULL)
120                 max_events = armpmu->num_events;
121
122         return max_events;
123 }
124 EXPORT_SYMBOL_GPL(armpmu_get_max_events);
125
126 int perf_num_counters(void)
127 {
128         return armpmu_get_max_events();
129 }
130 EXPORT_SYMBOL_GPL(perf_num_counters);
131
132 #define HW_OP_UNSUPPORTED               0xFFFF
133
134 #define C(_x) \
135         PERF_COUNT_HW_CACHE_##_x
136
137 #define CACHE_OP_UNSUPPORTED            0xFFFF
138
139 static unsigned armpmu_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
140                                      [PERF_COUNT_HW_CACHE_OP_MAX]
141                                      [PERF_COUNT_HW_CACHE_RESULT_MAX];
142
143 static int
144 armpmu_map_cache_event(u64 config)
145 {
146         unsigned int cache_type, cache_op, cache_result, ret;
147
148         cache_type = (config >>  0) & 0xff;
149         if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
150                 return -EINVAL;
151
152         cache_op = (config >>  8) & 0xff;
153         if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
154                 return -EINVAL;
155
156         cache_result = (config >> 16) & 0xff;
157         if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
158                 return -EINVAL;
159
160         ret = (int)armpmu_perf_cache_map[cache_type][cache_op][cache_result];
161
162         if (ret == CACHE_OP_UNSUPPORTED)
163                 return -ENOENT;
164
165         return ret;
166 }
167
168 static int
169 armpmu_event_set_period(struct perf_event *event,
170                         struct hw_perf_event *hwc,
171                         int idx)
172 {
173         s64 left = local64_read(&hwc->period_left);
174         s64 period = hwc->sample_period;
175         int ret = 0;
176
177         if (unlikely(left <= -period)) {
178                 left = period;
179                 local64_set(&hwc->period_left, left);
180                 hwc->last_period = period;
181                 ret = 1;
182         }
183
184         if (unlikely(left <= 0)) {
185                 left += period;
186                 local64_set(&hwc->period_left, left);
187                 hwc->last_period = period;
188                 ret = 1;
189         }
190
191         if (left > (s64)armpmu->max_period)
192                 left = armpmu->max_period;
193
194         local64_set(&hwc->prev_count, (u64)-left);
195
196         armpmu->write_counter(idx, (u64)(-left) & 0xffffffff);
197
198         perf_event_update_userpage(event);
199
200         return ret;
201 }
202
203 static u64
204 armpmu_event_update(struct perf_event *event,
205                     struct hw_perf_event *hwc,
206                     int idx)
207 {
208         int shift = 64 - 32;
209         s64 prev_raw_count, new_raw_count;
210         u64 delta;
211
212 again:
213         prev_raw_count = local64_read(&hwc->prev_count);
214         new_raw_count = armpmu->read_counter(idx);
215
216         if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
217                              new_raw_count) != prev_raw_count)
218                 goto again;
219
220         delta = (new_raw_count << shift) - (prev_raw_count << shift);
221         delta >>= shift;
222
223         local64_add(delta, &event->count);
224         local64_sub(delta, &hwc->period_left);
225
226         return new_raw_count;
227 }
228
229 static void
230 armpmu_read(struct perf_event *event)
231 {
232         struct hw_perf_event *hwc = &event->hw;
233
234         /* Don't read disabled counters! */
235         if (hwc->idx < 0)
236                 return;
237
238         armpmu_event_update(event, hwc, hwc->idx);
239 }
240
241 static void
242 armpmu_stop(struct perf_event *event, int flags)
243 {
244         struct hw_perf_event *hwc = &event->hw;
245
246         if (!armpmu)
247                 return;
248
249         /*
250          * ARM pmu always has to update the counter, so ignore
251          * PERF_EF_UPDATE, see comments in armpmu_start().
252          */
253         if (!(hwc->state & PERF_HES_STOPPED)) {
254                 armpmu->disable(hwc, hwc->idx);
255                 barrier(); /* why? */
256                 armpmu_event_update(event, hwc, hwc->idx);
257                 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
258         }
259 }
260
261 static void
262 armpmu_start(struct perf_event *event, int flags)
263 {
264         struct hw_perf_event *hwc = &event->hw;
265
266         if (!armpmu)
267                 return;
268
269         /*
270          * ARM pmu always has to reprogram the period, so ignore
271          * PERF_EF_RELOAD, see the comment below.
272          */
273         if (flags & PERF_EF_RELOAD)
274                 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
275
276         hwc->state = 0;
277         /*
278          * Set the period again. Some counters can't be stopped, so when we
279          * were stopped we simply disabled the IRQ source and the counter
280          * may have been left counting. If we don't do this step then we may
281          * get an interrupt too soon or *way* too late if the overflow has
282          * happened since disabling.
283          */
284         armpmu_event_set_period(event, hwc, hwc->idx);
285         armpmu->enable(hwc, hwc->idx);
286 }
287
288 static void
289 armpmu_del(struct perf_event *event, int flags)
290 {
291         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
292         struct hw_perf_event *hwc = &event->hw;
293         int idx = hwc->idx;
294
295         WARN_ON(idx < 0);
296
297         clear_bit(idx, cpuc->active_mask);
298         armpmu_stop(event, PERF_EF_UPDATE);
299         cpuc->events[idx] = NULL;
300         clear_bit(idx, cpuc->used_mask);
301
302         perf_event_update_userpage(event);
303 }
304
305 static int
306 armpmu_add(struct perf_event *event, int flags)
307 {
308         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
309         struct hw_perf_event *hwc = &event->hw;
310         int idx;
311         int err = 0;
312
313         perf_pmu_disable(event->pmu);
314
315         /* If we don't have a space for the counter then finish early. */
316         idx = armpmu->get_event_idx(cpuc, hwc);
317         if (idx < 0) {
318                 err = idx;
319                 goto out;
320         }
321
322         /*
323          * If there is an event in the counter we are going to use then make
324          * sure it is disabled.
325          */
326         event->hw.idx = idx;
327         armpmu->disable(hwc, idx);
328         cpuc->events[idx] = event;
329         set_bit(idx, cpuc->active_mask);
330
331         hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
332         if (flags & PERF_EF_START)
333                 armpmu_start(event, PERF_EF_RELOAD);
334
335         /* Propagate our changes to the userspace mapping. */
336         perf_event_update_userpage(event);
337
338 out:
339         perf_pmu_enable(event->pmu);
340         return err;
341 }
342
343 static struct pmu pmu;
344
345 static int
346 validate_event(struct cpu_hw_events *cpuc,
347                struct perf_event *event)
348 {
349         struct hw_perf_event fake_event = event->hw;
350
351         if (event->pmu != &pmu || event->state <= PERF_EVENT_STATE_OFF)
352                 return 1;
353
354         return armpmu->get_event_idx(cpuc, &fake_event) >= 0;
355 }
356
357 static int
358 validate_group(struct perf_event *event)
359 {
360         struct perf_event *sibling, *leader = event->group_leader;
361         struct cpu_hw_events fake_pmu;
362
363         memset(&fake_pmu, 0, sizeof(fake_pmu));
364
365         if (!validate_event(&fake_pmu, leader))
366                 return -ENOSPC;
367
368         list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
369                 if (!validate_event(&fake_pmu, sibling))
370                         return -ENOSPC;
371         }
372
373         if (!validate_event(&fake_pmu, event))
374                 return -ENOSPC;
375
376         return 0;
377 }
378
379 static int
380 armpmu_reserve_hardware(void)
381 {
382         int i, err = -ENODEV, irq;
383
384         pmu_device = reserve_pmu(ARM_PMU_DEVICE_CPU);
385         if (IS_ERR(pmu_device)) {
386                 pr_warning("unable to reserve pmu\n");
387                 return PTR_ERR(pmu_device);
388         }
389
390         init_pmu(ARM_PMU_DEVICE_CPU);
391
392         if (pmu_device->num_resources < 1) {
393                 pr_err("no irqs for PMUs defined\n");
394                 return -ENODEV;
395         }
396
397         for (i = 0; i < pmu_device->num_resources; ++i) {
398                 irq = platform_get_irq(pmu_device, i);
399                 if (irq < 0)
400                         continue;
401
402                 err = request_irq(irq, armpmu->handle_irq,
403                                   IRQF_DISABLED | IRQF_NOBALANCING,
404                                   "armpmu", NULL);
405                 if (err) {
406                         pr_warning("unable to request IRQ%d for ARM perf "
407                                 "counters\n", irq);
408                         break;
409                 }
410         }
411
412         if (err) {
413                 for (i = i - 1; i >= 0; --i) {
414                         irq = platform_get_irq(pmu_device, i);
415                         if (irq >= 0)
416                                 free_irq(irq, NULL);
417                 }
418                 release_pmu(pmu_device);
419                 pmu_device = NULL;
420         }
421
422         return err;
423 }
424
425 static void
426 armpmu_release_hardware(void)
427 {
428         int i, irq;
429
430         for (i = pmu_device->num_resources - 1; i >= 0; --i) {
431                 irq = platform_get_irq(pmu_device, i);
432                 if (irq >= 0)
433                         free_irq(irq, NULL);
434         }
435         armpmu->stop();
436
437         release_pmu(pmu_device);
438         pmu_device = NULL;
439 }
440
441 static atomic_t active_events = ATOMIC_INIT(0);
442 static DEFINE_MUTEX(pmu_reserve_mutex);
443
444 static void
445 hw_perf_event_destroy(struct perf_event *event)
446 {
447         if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) {
448                 armpmu_release_hardware();
449                 mutex_unlock(&pmu_reserve_mutex);
450         }
451 }
452
453 static int
454 __hw_perf_event_init(struct perf_event *event)
455 {
456         struct hw_perf_event *hwc = &event->hw;
457         int mapping, err;
458
459         /* Decode the generic type into an ARM event identifier. */
460         if (PERF_TYPE_HARDWARE == event->attr.type) {
461                 mapping = armpmu->event_map(event->attr.config);
462         } else if (PERF_TYPE_HW_CACHE == event->attr.type) {
463                 mapping = armpmu_map_cache_event(event->attr.config);
464         } else if (PERF_TYPE_RAW == event->attr.type) {
465                 mapping = armpmu->raw_event(event->attr.config);
466         } else {
467                 pr_debug("event type %x not supported\n", event->attr.type);
468                 return -EOPNOTSUPP;
469         }
470
471         if (mapping < 0) {
472                 pr_debug("event %x:%llx not supported\n", event->attr.type,
473                          event->attr.config);
474                 return mapping;
475         }
476
477         /*
478          * Check whether we need to exclude the counter from certain modes.
479          * The ARM performance counters are on all of the time so if someone
480          * has asked us for some excludes then we have to fail.
481          */
482         if (event->attr.exclude_kernel || event->attr.exclude_user ||
483             event->attr.exclude_hv || event->attr.exclude_idle) {
484                 pr_debug("ARM performance counters do not support "
485                          "mode exclusion\n");
486                 return -EPERM;
487         }
488
489         /*
490          * We don't assign an index until we actually place the event onto
491          * hardware. Use -1 to signify that we haven't decided where to put it
492          * yet. For SMP systems, each core has it's own PMU so we can't do any
493          * clever allocation or constraints checking at this point.
494          */
495         hwc->idx = -1;
496
497         /*
498          * Store the event encoding into the config_base field. config and
499          * event_base are unused as the only 2 things we need to know are
500          * the event mapping and the counter to use. The counter to use is
501          * also the indx and the config_base is the event type.
502          */
503         hwc->config_base            = (unsigned long)mapping;
504         hwc->config                 = 0;
505         hwc->event_base             = 0;
506
507         if (!hwc->sample_period) {
508                 hwc->sample_period  = armpmu->max_period;
509                 hwc->last_period    = hwc->sample_period;
510                 local64_set(&hwc->period_left, hwc->sample_period);
511         }
512
513         err = 0;
514         if (event->group_leader != event) {
515                 err = validate_group(event);
516                 if (err)
517                         return -EINVAL;
518         }
519
520         return err;
521 }
522
523 static int armpmu_event_init(struct perf_event *event)
524 {
525         int err = 0;
526
527         switch (event->attr.type) {
528         case PERF_TYPE_RAW:
529         case PERF_TYPE_HARDWARE:
530         case PERF_TYPE_HW_CACHE:
531                 break;
532
533         default:
534                 return -ENOENT;
535         }
536
537         if (!armpmu)
538                 return -ENODEV;
539
540         event->destroy = hw_perf_event_destroy;
541
542         if (!atomic_inc_not_zero(&active_events)) {
543                 if (atomic_read(&active_events) > armpmu->num_events) {
544                         atomic_dec(&active_events);
545                         return -ENOSPC;
546                 }
547
548                 mutex_lock(&pmu_reserve_mutex);
549                 if (atomic_read(&active_events) == 0) {
550                         err = armpmu_reserve_hardware();
551                 }
552
553                 if (!err)
554                         atomic_inc(&active_events);
555                 mutex_unlock(&pmu_reserve_mutex);
556         }
557
558         if (err)
559                 return err;
560
561         err = __hw_perf_event_init(event);
562         if (err)
563                 hw_perf_event_destroy(event);
564
565         return err;
566 }
567
568 static void armpmu_enable(struct pmu *pmu)
569 {
570         /* Enable all of the perf events on hardware. */
571         int idx;
572         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
573
574         if (!armpmu)
575                 return;
576
577         for (idx = 0; idx <= armpmu->num_events; ++idx) {
578                 struct perf_event *event = cpuc->events[idx];
579
580                 if (!event)
581                         continue;
582
583                 armpmu->enable(&event->hw, idx);
584         }
585
586         armpmu->start();
587 }
588
589 static void armpmu_disable(struct pmu *pmu)
590 {
591         if (armpmu)
592                 armpmu->stop();
593 }
594
595 static struct pmu pmu = {
596         .pmu_enable     = armpmu_enable,
597         .pmu_disable    = armpmu_disable,
598         .event_init     = armpmu_event_init,
599         .add            = armpmu_add,
600         .del            = armpmu_del,
601         .start          = armpmu_start,
602         .stop           = armpmu_stop,
603         .read           = armpmu_read,
604 };
605
606 /*
607  * ARMv6 Performance counter handling code.
608  *
609  * ARMv6 has 2 configurable performance counters and a single cycle counter.
610  * They all share a single reset bit but can be written to zero so we can use
611  * that for a reset.
612  *
613  * The counters can't be individually enabled or disabled so when we remove
614  * one event and replace it with another we could get spurious counts from the
615  * wrong event. However, we can take advantage of the fact that the
616  * performance counters can export events to the event bus, and the event bus
617  * itself can be monitored. This requires that we *don't* export the events to
618  * the event bus. The procedure for disabling a configurable counter is:
619  *      - change the counter to count the ETMEXTOUT[0] signal (0x20). This
620  *        effectively stops the counter from counting.
621  *      - disable the counter's interrupt generation (each counter has it's
622  *        own interrupt enable bit).
623  * Once stopped, the counter value can be written as 0 to reset.
624  *
625  * To enable a counter:
626  *      - enable the counter's interrupt generation.
627  *      - set the new event type.
628  *
629  * Note: the dedicated cycle counter only counts cycles and can't be
630  * enabled/disabled independently of the others. When we want to disable the
631  * cycle counter, we have to just disable the interrupt reporting and start
632  * ignoring that counter. When re-enabling, we have to reset the value and
633  * enable the interrupt.
634  */
635
636 enum armv6_perf_types {
637         ARMV6_PERFCTR_ICACHE_MISS           = 0x0,
638         ARMV6_PERFCTR_IBUF_STALL            = 0x1,
639         ARMV6_PERFCTR_DDEP_STALL            = 0x2,
640         ARMV6_PERFCTR_ITLB_MISS             = 0x3,
641         ARMV6_PERFCTR_DTLB_MISS             = 0x4,
642         ARMV6_PERFCTR_BR_EXEC               = 0x5,
643         ARMV6_PERFCTR_BR_MISPREDICT         = 0x6,
644         ARMV6_PERFCTR_INSTR_EXEC            = 0x7,
645         ARMV6_PERFCTR_DCACHE_HIT            = 0x9,
646         ARMV6_PERFCTR_DCACHE_ACCESS         = 0xA,
647         ARMV6_PERFCTR_DCACHE_MISS           = 0xB,
648         ARMV6_PERFCTR_DCACHE_WBACK          = 0xC,
649         ARMV6_PERFCTR_SW_PC_CHANGE          = 0xD,
650         ARMV6_PERFCTR_MAIN_TLB_MISS         = 0xF,
651         ARMV6_PERFCTR_EXPL_D_ACCESS         = 0x10,
652         ARMV6_PERFCTR_LSU_FULL_STALL        = 0x11,
653         ARMV6_PERFCTR_WBUF_DRAINED          = 0x12,
654         ARMV6_PERFCTR_CPU_CYCLES            = 0xFF,
655         ARMV6_PERFCTR_NOP                   = 0x20,
656 };
657
658 enum armv6_counters {
659         ARMV6_CYCLE_COUNTER = 1,
660         ARMV6_COUNTER0,
661         ARMV6_COUNTER1,
662 };
663
664 /*
665  * The hardware events that we support. We do support cache operations but
666  * we have harvard caches and no way to combine instruction and data
667  * accesses/misses in hardware.
668  */
669 static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
670         [PERF_COUNT_HW_CPU_CYCLES]          = ARMV6_PERFCTR_CPU_CYCLES,
671         [PERF_COUNT_HW_INSTRUCTIONS]        = ARMV6_PERFCTR_INSTR_EXEC,
672         [PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
673         [PERF_COUNT_HW_CACHE_MISSES]        = HW_OP_UNSUPPORTED,
674         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC,
675         [PERF_COUNT_HW_BRANCH_MISSES]       = ARMV6_PERFCTR_BR_MISPREDICT,
676         [PERF_COUNT_HW_BUS_CYCLES]          = HW_OP_UNSUPPORTED,
677 };
678
679 static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
680                                           [PERF_COUNT_HW_CACHE_OP_MAX]
681                                           [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
682         [C(L1D)] = {
683                 /*
684                  * The performance counters don't differentiate between read
685                  * and write accesses/misses so this isn't strictly correct,
686                  * but it's the best we can do. Writes and reads get
687                  * combined.
688                  */
689                 [C(OP_READ)] = {
690                         [C(RESULT_ACCESS)]      = ARMV6_PERFCTR_DCACHE_ACCESS,
691                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_DCACHE_MISS,
692                 },
693                 [C(OP_WRITE)] = {
694                         [C(RESULT_ACCESS)]      = ARMV6_PERFCTR_DCACHE_ACCESS,
695                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_DCACHE_MISS,
696                 },
697                 [C(OP_PREFETCH)] = {
698                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
699                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
700                 },
701         },
702         [C(L1I)] = {
703                 [C(OP_READ)] = {
704                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
705                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_ICACHE_MISS,
706                 },
707                 [C(OP_WRITE)] = {
708                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
709                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_ICACHE_MISS,
710                 },
711                 [C(OP_PREFETCH)] = {
712                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
713                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
714                 },
715         },
716         [C(LL)] = {
717                 [C(OP_READ)] = {
718                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
719                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
720                 },
721                 [C(OP_WRITE)] = {
722                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
723                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
724                 },
725                 [C(OP_PREFETCH)] = {
726                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
727                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
728                 },
729         },
730         [C(DTLB)] = {
731                 /*
732                  * The ARM performance counters can count micro DTLB misses,
733                  * micro ITLB misses and main TLB misses. There isn't an event
734                  * for TLB misses, so use the micro misses here and if users
735                  * want the main TLB misses they can use a raw counter.
736                  */
737                 [C(OP_READ)] = {
738                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
739                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_DTLB_MISS,
740                 },
741                 [C(OP_WRITE)] = {
742                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
743                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_DTLB_MISS,
744                 },
745                 [C(OP_PREFETCH)] = {
746                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
747                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
748                 },
749         },
750         [C(ITLB)] = {
751                 [C(OP_READ)] = {
752                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
753                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_ITLB_MISS,
754                 },
755                 [C(OP_WRITE)] = {
756                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
757                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_ITLB_MISS,
758                 },
759                 [C(OP_PREFETCH)] = {
760                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
761                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
762                 },
763         },
764         [C(BPU)] = {
765                 [C(OP_READ)] = {
766                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
767                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
768                 },
769                 [C(OP_WRITE)] = {
770                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
771                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
772                 },
773                 [C(OP_PREFETCH)] = {
774                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
775                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
776                 },
777         },
778 };
779
780 enum armv6mpcore_perf_types {
781         ARMV6MPCORE_PERFCTR_ICACHE_MISS     = 0x0,
782         ARMV6MPCORE_PERFCTR_IBUF_STALL      = 0x1,
783         ARMV6MPCORE_PERFCTR_DDEP_STALL      = 0x2,
784         ARMV6MPCORE_PERFCTR_ITLB_MISS       = 0x3,
785         ARMV6MPCORE_PERFCTR_DTLB_MISS       = 0x4,
786         ARMV6MPCORE_PERFCTR_BR_EXEC         = 0x5,
787         ARMV6MPCORE_PERFCTR_BR_NOTPREDICT   = 0x6,
788         ARMV6MPCORE_PERFCTR_BR_MISPREDICT   = 0x7,
789         ARMV6MPCORE_PERFCTR_INSTR_EXEC      = 0x8,
790         ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA,
791         ARMV6MPCORE_PERFCTR_DCACHE_RDMISS   = 0xB,
792         ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC,
793         ARMV6MPCORE_PERFCTR_DCACHE_WRMISS   = 0xD,
794         ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE,
795         ARMV6MPCORE_PERFCTR_SW_PC_CHANGE    = 0xF,
796         ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS   = 0x10,
797         ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11,
798         ARMV6MPCORE_PERFCTR_LSU_FULL_STALL  = 0x12,
799         ARMV6MPCORE_PERFCTR_WBUF_DRAINED    = 0x13,
800         ARMV6MPCORE_PERFCTR_CPU_CYCLES      = 0xFF,
801 };
802
803 /*
804  * The hardware events that we support. We do support cache operations but
805  * we have harvard caches and no way to combine instruction and data
806  * accesses/misses in hardware.
807  */
808 static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
809         [PERF_COUNT_HW_CPU_CYCLES]          = ARMV6MPCORE_PERFCTR_CPU_CYCLES,
810         [PERF_COUNT_HW_INSTRUCTIONS]        = ARMV6MPCORE_PERFCTR_INSTR_EXEC,
811         [PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
812         [PERF_COUNT_HW_CACHE_MISSES]        = HW_OP_UNSUPPORTED,
813         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC,
814         [PERF_COUNT_HW_BRANCH_MISSES]       = ARMV6MPCORE_PERFCTR_BR_MISPREDICT,
815         [PERF_COUNT_HW_BUS_CYCLES]          = HW_OP_UNSUPPORTED,
816 };
817
818 static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
819                                         [PERF_COUNT_HW_CACHE_OP_MAX]
820                                         [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
821         [C(L1D)] = {
822                 [C(OP_READ)] = {
823                         [C(RESULT_ACCESS)]  =
824                                 ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
825                         [C(RESULT_MISS)]    =
826                                 ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
827                 },
828                 [C(OP_WRITE)] = {
829                         [C(RESULT_ACCESS)]  =
830                                 ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
831                         [C(RESULT_MISS)]    =
832                                 ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
833                 },
834                 [C(OP_PREFETCH)] = {
835                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
836                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
837                 },
838         },
839         [C(L1I)] = {
840                 [C(OP_READ)] = {
841                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
842                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
843                 },
844                 [C(OP_WRITE)] = {
845                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
846                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
847                 },
848                 [C(OP_PREFETCH)] = {
849                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
850                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
851                 },
852         },
853         [C(LL)] = {
854                 [C(OP_READ)] = {
855                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
856                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
857                 },
858                 [C(OP_WRITE)] = {
859                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
860                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
861                 },
862                 [C(OP_PREFETCH)] = {
863                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
864                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
865                 },
866         },
867         [C(DTLB)] = {
868                 /*
869                  * The ARM performance counters can count micro DTLB misses,
870                  * micro ITLB misses and main TLB misses. There isn't an event
871                  * for TLB misses, so use the micro misses here and if users
872                  * want the main TLB misses they can use a raw counter.
873                  */
874                 [C(OP_READ)] = {
875                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
876                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_DTLB_MISS,
877                 },
878                 [C(OP_WRITE)] = {
879                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
880                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_DTLB_MISS,
881                 },
882                 [C(OP_PREFETCH)] = {
883                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
884                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
885                 },
886         },
887         [C(ITLB)] = {
888                 [C(OP_READ)] = {
889                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
890                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ITLB_MISS,
891                 },
892                 [C(OP_WRITE)] = {
893                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
894                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ITLB_MISS,
895                 },
896                 [C(OP_PREFETCH)] = {
897                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
898                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
899                 },
900         },
901         [C(BPU)] = {
902                 [C(OP_READ)] = {
903                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
904                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
905                 },
906                 [C(OP_WRITE)] = {
907                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
908                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
909                 },
910                 [C(OP_PREFETCH)] = {
911                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
912                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
913                 },
914         },
915 };
916
917 static inline unsigned long
918 armv6_pmcr_read(void)
919 {
920         u32 val;
921         asm volatile("mrc   p15, 0, %0, c15, c12, 0" : "=r"(val));
922         return val;
923 }
924
925 static inline void
926 armv6_pmcr_write(unsigned long val)
927 {
928         asm volatile("mcr   p15, 0, %0, c15, c12, 0" : : "r"(val));
929 }
930
931 #define ARMV6_PMCR_ENABLE               (1 << 0)
932 #define ARMV6_PMCR_CTR01_RESET          (1 << 1)
933 #define ARMV6_PMCR_CCOUNT_RESET         (1 << 2)
934 #define ARMV6_PMCR_CCOUNT_DIV           (1 << 3)
935 #define ARMV6_PMCR_COUNT0_IEN           (1 << 4)
936 #define ARMV6_PMCR_COUNT1_IEN           (1 << 5)
937 #define ARMV6_PMCR_CCOUNT_IEN           (1 << 6)
938 #define ARMV6_PMCR_COUNT0_OVERFLOW      (1 << 8)
939 #define ARMV6_PMCR_COUNT1_OVERFLOW      (1 << 9)
940 #define ARMV6_PMCR_CCOUNT_OVERFLOW      (1 << 10)
941 #define ARMV6_PMCR_EVT_COUNT0_SHIFT     20
942 #define ARMV6_PMCR_EVT_COUNT0_MASK      (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
943 #define ARMV6_PMCR_EVT_COUNT1_SHIFT     12
944 #define ARMV6_PMCR_EVT_COUNT1_MASK      (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
945
946 #define ARMV6_PMCR_OVERFLOWED_MASK \
947         (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
948          ARMV6_PMCR_CCOUNT_OVERFLOW)
949
950 static inline int
951 armv6_pmcr_has_overflowed(unsigned long pmcr)
952 {
953         return (pmcr & ARMV6_PMCR_OVERFLOWED_MASK);
954 }
955
956 static inline int
957 armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
958                                   enum armv6_counters counter)
959 {
960         int ret = 0;
961
962         if (ARMV6_CYCLE_COUNTER == counter)
963                 ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW;
964         else if (ARMV6_COUNTER0 == counter)
965                 ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW;
966         else if (ARMV6_COUNTER1 == counter)
967                 ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW;
968         else
969                 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
970
971         return ret;
972 }
973
974 static inline u32
975 armv6pmu_read_counter(int counter)
976 {
977         unsigned long value = 0;
978
979         if (ARMV6_CYCLE_COUNTER == counter)
980                 asm volatile("mrc   p15, 0, %0, c15, c12, 1" : "=r"(value));
981         else if (ARMV6_COUNTER0 == counter)
982                 asm volatile("mrc   p15, 0, %0, c15, c12, 2" : "=r"(value));
983         else if (ARMV6_COUNTER1 == counter)
984                 asm volatile("mrc   p15, 0, %0, c15, c12, 3" : "=r"(value));
985         else
986                 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
987
988         return value;
989 }
990
991 static inline void
992 armv6pmu_write_counter(int counter,
993                        u32 value)
994 {
995         if (ARMV6_CYCLE_COUNTER == counter)
996                 asm volatile("mcr   p15, 0, %0, c15, c12, 1" : : "r"(value));
997         else if (ARMV6_COUNTER0 == counter)
998                 asm volatile("mcr   p15, 0, %0, c15, c12, 2" : : "r"(value));
999         else if (ARMV6_COUNTER1 == counter)
1000                 asm volatile("mcr   p15, 0, %0, c15, c12, 3" : : "r"(value));
1001         else
1002                 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
1003 }
1004
1005 void
1006 armv6pmu_enable_event(struct hw_perf_event *hwc,
1007                       int idx)
1008 {
1009         unsigned long val, mask, evt, flags;
1010
1011         if (ARMV6_CYCLE_COUNTER == idx) {
1012                 mask    = 0;
1013                 evt     = ARMV6_PMCR_CCOUNT_IEN;
1014         } else if (ARMV6_COUNTER0 == idx) {
1015                 mask    = ARMV6_PMCR_EVT_COUNT0_MASK;
1016                 evt     = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
1017                           ARMV6_PMCR_COUNT0_IEN;
1018         } else if (ARMV6_COUNTER1 == idx) {
1019                 mask    = ARMV6_PMCR_EVT_COUNT1_MASK;
1020                 evt     = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) |
1021                           ARMV6_PMCR_COUNT1_IEN;
1022         } else {
1023                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
1024                 return;
1025         }
1026
1027         /*
1028          * Mask out the current event and set the counter to count the event
1029          * that we're interested in.
1030          */
1031         spin_lock_irqsave(&pmu_lock, flags);
1032         val = armv6_pmcr_read();
1033         val &= ~mask;
1034         val |= evt;
1035         armv6_pmcr_write(val);
1036         spin_unlock_irqrestore(&pmu_lock, flags);
1037 }
1038
1039 static irqreturn_t
1040 armv6pmu_handle_irq(int irq_num,
1041                     void *dev)
1042 {
1043         unsigned long pmcr = armv6_pmcr_read();
1044         struct perf_sample_data data;
1045         struct cpu_hw_events *cpuc;
1046         struct pt_regs *regs;
1047         int idx;
1048
1049         if (!armv6_pmcr_has_overflowed(pmcr))
1050                 return IRQ_NONE;
1051
1052         regs = get_irq_regs();
1053
1054         /*
1055          * The interrupts are cleared by writing the overflow flags back to
1056          * the control register. All of the other bits don't have any effect
1057          * if they are rewritten, so write the whole value back.
1058          */
1059         armv6_pmcr_write(pmcr);
1060
1061         perf_sample_data_init(&data, 0);
1062
1063         cpuc = &__get_cpu_var(cpu_hw_events);
1064         for (idx = 0; idx <= armpmu->num_events; ++idx) {
1065                 struct perf_event *event = cpuc->events[idx];
1066                 struct hw_perf_event *hwc;
1067
1068                 if (!test_bit(idx, cpuc->active_mask))
1069                         continue;
1070
1071                 /*
1072                  * We have a single interrupt for all counters. Check that
1073                  * each counter has overflowed before we process it.
1074                  */
1075                 if (!armv6_pmcr_counter_has_overflowed(pmcr, idx))
1076                         continue;
1077
1078                 hwc = &event->hw;
1079                 armpmu_event_update(event, hwc, idx);
1080                 data.period = event->hw.last_period;
1081                 if (!armpmu_event_set_period(event, hwc, idx))
1082                         continue;
1083
1084                 if (perf_event_overflow(event, 0, &data, regs))
1085                         armpmu->disable(hwc, idx);
1086         }
1087
1088         /*
1089          * Handle the pending perf events.
1090          *
1091          * Note: this call *must* be run with interrupts disabled. For
1092          * platforms that can have the PMU interrupts raised as an NMI, this
1093          * will not work.
1094          */
1095         irq_work_run();
1096
1097         return IRQ_HANDLED;
1098 }
1099
1100 static void
1101 armv6pmu_start(void)
1102 {
1103         unsigned long flags, val;
1104
1105         spin_lock_irqsave(&pmu_lock, flags);
1106         val = armv6_pmcr_read();
1107         val |= ARMV6_PMCR_ENABLE;
1108         armv6_pmcr_write(val);
1109         spin_unlock_irqrestore(&pmu_lock, flags);
1110 }
1111
1112 void
1113 armv6pmu_stop(void)
1114 {
1115         unsigned long flags, val;
1116
1117         spin_lock_irqsave(&pmu_lock, flags);
1118         val = armv6_pmcr_read();
1119         val &= ~ARMV6_PMCR_ENABLE;
1120         armv6_pmcr_write(val);
1121         spin_unlock_irqrestore(&pmu_lock, flags);
1122 }
1123
1124 static inline int
1125 armv6pmu_event_map(int config)
1126 {
1127         int mapping = armv6_perf_map[config];
1128         if (HW_OP_UNSUPPORTED == mapping)
1129                 mapping = -EOPNOTSUPP;
1130         return mapping;
1131 }
1132
1133 static inline int
1134 armv6mpcore_pmu_event_map(int config)
1135 {
1136         int mapping = armv6mpcore_perf_map[config];
1137         if (HW_OP_UNSUPPORTED == mapping)
1138                 mapping = -EOPNOTSUPP;
1139         return mapping;
1140 }
1141
1142 static u64
1143 armv6pmu_raw_event(u64 config)
1144 {
1145         return config & 0xff;
1146 }
1147
1148 static int
1149 armv6pmu_get_event_idx(struct cpu_hw_events *cpuc,
1150                        struct hw_perf_event *event)
1151 {
1152         /* Always place a cycle counter into the cycle counter. */
1153         if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) {
1154                 if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
1155                         return -EAGAIN;
1156
1157                 return ARMV6_CYCLE_COUNTER;
1158         } else {
1159                 /*
1160                  * For anything other than a cycle counter, try and use
1161                  * counter0 and counter1.
1162                  */
1163                 if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) {
1164                         return ARMV6_COUNTER1;
1165                 }
1166
1167                 if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) {
1168                         return ARMV6_COUNTER0;
1169                 }
1170
1171                 /* The counters are all in use. */
1172                 return -EAGAIN;
1173         }
1174 }
1175
1176 static void
1177 armv6pmu_disable_event(struct hw_perf_event *hwc,
1178                        int idx)
1179 {
1180         unsigned long val, mask, evt, flags;
1181
1182         if (ARMV6_CYCLE_COUNTER == idx) {
1183                 mask    = ARMV6_PMCR_CCOUNT_IEN;
1184                 evt     = 0;
1185         } else if (ARMV6_COUNTER0 == idx) {
1186                 mask    = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK;
1187                 evt     = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT;
1188         } else if (ARMV6_COUNTER1 == idx) {
1189                 mask    = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK;
1190                 evt     = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT;
1191         } else {
1192                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
1193                 return;
1194         }
1195
1196         /*
1197          * Mask out the current event and set the counter to count the number
1198          * of ETM bus signal assertion cycles. The external reporting should
1199          * be disabled and so this should never increment.
1200          */
1201         spin_lock_irqsave(&pmu_lock, flags);
1202         val = armv6_pmcr_read();
1203         val &= ~mask;
1204         val |= evt;
1205         armv6_pmcr_write(val);
1206         spin_unlock_irqrestore(&pmu_lock, flags);
1207 }
1208
1209 static void
1210 armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc,
1211                               int idx)
1212 {
1213         unsigned long val, mask, flags, evt = 0;
1214
1215         if (ARMV6_CYCLE_COUNTER == idx) {
1216                 mask    = ARMV6_PMCR_CCOUNT_IEN;
1217         } else if (ARMV6_COUNTER0 == idx) {
1218                 mask    = ARMV6_PMCR_COUNT0_IEN;
1219         } else if (ARMV6_COUNTER1 == idx) {
1220                 mask    = ARMV6_PMCR_COUNT1_IEN;
1221         } else {
1222                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
1223                 return;
1224         }
1225
1226         /*
1227          * Unlike UP ARMv6, we don't have a way of stopping the counters. We
1228          * simply disable the interrupt reporting.
1229          */
1230         spin_lock_irqsave(&pmu_lock, flags);
1231         val = armv6_pmcr_read();
1232         val &= ~mask;
1233         val |= evt;
1234         armv6_pmcr_write(val);
1235         spin_unlock_irqrestore(&pmu_lock, flags);
1236 }
1237
1238 static const struct arm_pmu armv6pmu = {
1239         .id                     = ARM_PERF_PMU_ID_V6,
1240         .handle_irq             = armv6pmu_handle_irq,
1241         .enable                 = armv6pmu_enable_event,
1242         .disable                = armv6pmu_disable_event,
1243         .event_map              = armv6pmu_event_map,
1244         .raw_event              = armv6pmu_raw_event,
1245         .read_counter           = armv6pmu_read_counter,
1246         .write_counter          = armv6pmu_write_counter,
1247         .get_event_idx          = armv6pmu_get_event_idx,
1248         .start                  = armv6pmu_start,
1249         .stop                   = armv6pmu_stop,
1250         .num_events             = 3,
1251         .max_period             = (1LLU << 32) - 1,
1252 };
1253
1254 /*
1255  * ARMv6mpcore is almost identical to single core ARMv6 with the exception
1256  * that some of the events have different enumerations and that there is no
1257  * *hack* to stop the programmable counters. To stop the counters we simply
1258  * disable the interrupt reporting and update the event. When unthrottling we
1259  * reset the period and enable the interrupt reporting.
1260  */
1261 static const struct arm_pmu armv6mpcore_pmu = {
1262         .id                     = ARM_PERF_PMU_ID_V6MP,
1263         .handle_irq             = armv6pmu_handle_irq,
1264         .enable                 = armv6pmu_enable_event,
1265         .disable                = armv6mpcore_pmu_disable_event,
1266         .event_map              = armv6mpcore_pmu_event_map,
1267         .raw_event              = armv6pmu_raw_event,
1268         .read_counter           = armv6pmu_read_counter,
1269         .write_counter          = armv6pmu_write_counter,
1270         .get_event_idx          = armv6pmu_get_event_idx,
1271         .start                  = armv6pmu_start,
1272         .stop                   = armv6pmu_stop,
1273         .num_events             = 3,
1274         .max_period             = (1LLU << 32) - 1,
1275 };
1276
1277 /*
1278  * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code.
1279  *
1280  * Copied from ARMv6 code, with the low level code inspired
1281  *  by the ARMv7 Oprofile code.
1282  *
1283  * Cortex-A8 has up to 4 configurable performance counters and
1284  *  a single cycle counter.
1285  * Cortex-A9 has up to 31 configurable performance counters and
1286  *  a single cycle counter.
1287  *
1288  * All counters can be enabled/disabled and IRQ masked separately. The cycle
1289  *  counter and all 4 performance counters together can be reset separately.
1290  */
1291
1292 /* Common ARMv7 event types */
1293 enum armv7_perf_types {
1294         ARMV7_PERFCTR_PMNC_SW_INCR              = 0x00,
1295         ARMV7_PERFCTR_IFETCH_MISS               = 0x01,
1296         ARMV7_PERFCTR_ITLB_MISS                 = 0x02,
1297         ARMV7_PERFCTR_DCACHE_REFILL             = 0x03,
1298         ARMV7_PERFCTR_DCACHE_ACCESS             = 0x04,
1299         ARMV7_PERFCTR_DTLB_REFILL               = 0x05,
1300         ARMV7_PERFCTR_DREAD                     = 0x06,
1301         ARMV7_PERFCTR_DWRITE                    = 0x07,
1302
1303         ARMV7_PERFCTR_EXC_TAKEN                 = 0x09,
1304         ARMV7_PERFCTR_EXC_EXECUTED              = 0x0A,
1305         ARMV7_PERFCTR_CID_WRITE                 = 0x0B,
1306         /* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS.
1307          * It counts:
1308          *  - all branch instructions,
1309          *  - instructions that explicitly write the PC,
1310          *  - exception generating instructions.
1311          */
1312         ARMV7_PERFCTR_PC_WRITE                  = 0x0C,
1313         ARMV7_PERFCTR_PC_IMM_BRANCH             = 0x0D,
1314         ARMV7_PERFCTR_UNALIGNED_ACCESS          = 0x0F,
1315         ARMV7_PERFCTR_PC_BRANCH_MIS_PRED        = 0x10,
1316         ARMV7_PERFCTR_CLOCK_CYCLES              = 0x11,
1317
1318         ARMV7_PERFCTR_PC_BRANCH_MIS_USED        = 0x12,
1319
1320         ARMV7_PERFCTR_CPU_CYCLES                = 0xFF
1321 };
1322
1323 /* ARMv7 Cortex-A8 specific event types */
1324 enum armv7_a8_perf_types {
1325         ARMV7_PERFCTR_INSTR_EXECUTED            = 0x08,
1326
1327         ARMV7_PERFCTR_PC_PROC_RETURN            = 0x0E,
1328
1329         ARMV7_PERFCTR_WRITE_BUFFER_FULL         = 0x40,
1330         ARMV7_PERFCTR_L2_STORE_MERGED           = 0x41,
1331         ARMV7_PERFCTR_L2_STORE_BUFF             = 0x42,
1332         ARMV7_PERFCTR_L2_ACCESS                 = 0x43,
1333         ARMV7_PERFCTR_L2_CACH_MISS              = 0x44,
1334         ARMV7_PERFCTR_AXI_READ_CYCLES           = 0x45,
1335         ARMV7_PERFCTR_AXI_WRITE_CYCLES          = 0x46,
1336         ARMV7_PERFCTR_MEMORY_REPLAY             = 0x47,
1337         ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY   = 0x48,
1338         ARMV7_PERFCTR_L1_DATA_MISS              = 0x49,
1339         ARMV7_PERFCTR_L1_INST_MISS              = 0x4A,
1340         ARMV7_PERFCTR_L1_DATA_COLORING          = 0x4B,
1341         ARMV7_PERFCTR_L1_NEON_DATA              = 0x4C,
1342         ARMV7_PERFCTR_L1_NEON_CACH_DATA         = 0x4D,
1343         ARMV7_PERFCTR_L2_NEON                   = 0x4E,
1344         ARMV7_PERFCTR_L2_NEON_HIT               = 0x4F,
1345         ARMV7_PERFCTR_L1_INST                   = 0x50,
1346         ARMV7_PERFCTR_PC_RETURN_MIS_PRED        = 0x51,
1347         ARMV7_PERFCTR_PC_BRANCH_FAILED          = 0x52,
1348         ARMV7_PERFCTR_PC_BRANCH_TAKEN           = 0x53,
1349         ARMV7_PERFCTR_PC_BRANCH_EXECUTED        = 0x54,
1350         ARMV7_PERFCTR_OP_EXECUTED               = 0x55,
1351         ARMV7_PERFCTR_CYCLES_INST_STALL         = 0x56,
1352         ARMV7_PERFCTR_CYCLES_INST               = 0x57,
1353         ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL    = 0x58,
1354         ARMV7_PERFCTR_CYCLES_NEON_INST_STALL    = 0x59,
1355         ARMV7_PERFCTR_NEON_CYCLES               = 0x5A,
1356
1357         ARMV7_PERFCTR_PMU0_EVENTS               = 0x70,
1358         ARMV7_PERFCTR_PMU1_EVENTS               = 0x71,
1359         ARMV7_PERFCTR_PMU_EVENTS                = 0x72,
1360 };
1361
1362 /* ARMv7 Cortex-A9 specific event types */
1363 enum armv7_a9_perf_types {
1364         ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC     = 0x40,
1365         ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC     = 0x41,
1366         ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC       = 0x42,
1367
1368         ARMV7_PERFCTR_COHERENT_LINE_MISS        = 0x50,
1369         ARMV7_PERFCTR_COHERENT_LINE_HIT         = 0x51,
1370
1371         ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES   = 0x60,
1372         ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES   = 0x61,
1373         ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES = 0x62,
1374         ARMV7_PERFCTR_STREX_EXECUTED_PASSED     = 0x63,
1375         ARMV7_PERFCTR_STREX_EXECUTED_FAILED     = 0x64,
1376         ARMV7_PERFCTR_DATA_EVICTION             = 0x65,
1377         ARMV7_PERFCTR_ISSUE_STAGE_NO_INST       = 0x66,
1378         ARMV7_PERFCTR_ISSUE_STAGE_EMPTY         = 0x67,
1379         ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE  = 0x68,
1380
1381         ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS = 0x6E,
1382
1383         ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST   = 0x70,
1384         ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST = 0x71,
1385         ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST  = 0x72,
1386         ARMV7_PERFCTR_FP_EXECUTED_INST          = 0x73,
1387         ARMV7_PERFCTR_NEON_EXECUTED_INST        = 0x74,
1388
1389         ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES = 0x80,
1390         ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES  = 0x81,
1391         ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES        = 0x82,
1392         ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES        = 0x83,
1393         ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES  = 0x84,
1394         ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES  = 0x85,
1395         ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES      = 0x86,
1396
1397         ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES  = 0x8A,
1398         ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES = 0x8B,
1399
1400         ARMV7_PERFCTR_ISB_INST                  = 0x90,
1401         ARMV7_PERFCTR_DSB_INST                  = 0x91,
1402         ARMV7_PERFCTR_DMB_INST                  = 0x92,
1403         ARMV7_PERFCTR_EXT_INTERRUPTS            = 0x93,
1404
1405         ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED     = 0xA0,
1406         ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED       = 0xA1,
1407         ARMV7_PERFCTR_PLE_FIFO_FLUSH            = 0xA2,
1408         ARMV7_PERFCTR_PLE_RQST_COMPLETED        = 0xA3,
1409         ARMV7_PERFCTR_PLE_FIFO_OVERFLOW         = 0xA4,
1410         ARMV7_PERFCTR_PLE_RQST_PROG             = 0xA5
1411 };
1412
1413 /*
1414  * Cortex-A8 HW events mapping
1415  *
1416  * The hardware events that we support. We do support cache operations but
1417  * we have harvard caches and no way to combine instruction and data
1418  * accesses/misses in hardware.
1419  */
1420 static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = {
1421         [PERF_COUNT_HW_CPU_CYCLES]          = ARMV7_PERFCTR_CPU_CYCLES,
1422         [PERF_COUNT_HW_INSTRUCTIONS]        = ARMV7_PERFCTR_INSTR_EXECUTED,
1423         [PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
1424         [PERF_COUNT_HW_CACHE_MISSES]        = HW_OP_UNSUPPORTED,
1425         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
1426         [PERF_COUNT_HW_BRANCH_MISSES]       = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1427         [PERF_COUNT_HW_BUS_CYCLES]          = ARMV7_PERFCTR_CLOCK_CYCLES,
1428 };
1429
1430 static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
1431                                           [PERF_COUNT_HW_CACHE_OP_MAX]
1432                                           [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1433         [C(L1D)] = {
1434                 /*
1435                  * The performance counters don't differentiate between read
1436                  * and write accesses/misses so this isn't strictly correct,
1437                  * but it's the best we can do. Writes and reads get
1438                  * combined.
1439                  */
1440                 [C(OP_READ)] = {
1441                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_DCACHE_ACCESS,
1442                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DCACHE_REFILL,
1443                 },
1444                 [C(OP_WRITE)] = {
1445                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_DCACHE_ACCESS,
1446                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DCACHE_REFILL,
1447                 },
1448                 [C(OP_PREFETCH)] = {
1449                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1450                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1451                 },
1452         },
1453         [C(L1I)] = {
1454                 [C(OP_READ)] = {
1455                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_L1_INST,
1456                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_L1_INST_MISS,
1457                 },
1458                 [C(OP_WRITE)] = {
1459                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_L1_INST,
1460                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_L1_INST_MISS,
1461                 },
1462                 [C(OP_PREFETCH)] = {
1463                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1464                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1465                 },
1466         },
1467         [C(LL)] = {
1468                 [C(OP_READ)] = {
1469                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_L2_ACCESS,
1470                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_L2_CACH_MISS,
1471                 },
1472                 [C(OP_WRITE)] = {
1473                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_L2_ACCESS,
1474                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_L2_CACH_MISS,
1475                 },
1476                 [C(OP_PREFETCH)] = {
1477                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1478                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1479                 },
1480         },
1481         [C(DTLB)] = {
1482                 /*
1483                  * Only ITLB misses and DTLB refills are supported.
1484                  * If users want the DTLB refills misses a raw counter
1485                  * must be used.
1486                  */
1487                 [C(OP_READ)] = {
1488                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1489                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DTLB_REFILL,
1490                 },
1491                 [C(OP_WRITE)] = {
1492                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1493                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DTLB_REFILL,
1494                 },
1495                 [C(OP_PREFETCH)] = {
1496                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1497                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1498                 },
1499         },
1500         [C(ITLB)] = {
1501                 [C(OP_READ)] = {
1502                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1503                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_ITLB_MISS,
1504                 },
1505                 [C(OP_WRITE)] = {
1506                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1507                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_ITLB_MISS,
1508                 },
1509                 [C(OP_PREFETCH)] = {
1510                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1511                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1512                 },
1513         },
1514         [C(BPU)] = {
1515                 [C(OP_READ)] = {
1516                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_PC_WRITE,
1517                         [C(RESULT_MISS)]
1518                                         = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1519                 },
1520                 [C(OP_WRITE)] = {
1521                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_PC_WRITE,
1522                         [C(RESULT_MISS)]
1523                                         = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1524                 },
1525                 [C(OP_PREFETCH)] = {
1526                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1527                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1528                 },
1529         },
1530 };
1531
1532 /*
1533  * Cortex-A9 HW events mapping
1534  */
1535 static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = {
1536         [PERF_COUNT_HW_CPU_CYCLES]          = ARMV7_PERFCTR_CPU_CYCLES,
1537         [PERF_COUNT_HW_INSTRUCTIONS]        =
1538                                         ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE,
1539         [PERF_COUNT_HW_CACHE_REFERENCES]    = ARMV7_PERFCTR_COHERENT_LINE_HIT,
1540         [PERF_COUNT_HW_CACHE_MISSES]        = ARMV7_PERFCTR_COHERENT_LINE_MISS,
1541         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
1542         [PERF_COUNT_HW_BRANCH_MISSES]       = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1543         [PERF_COUNT_HW_BUS_CYCLES]          = ARMV7_PERFCTR_CLOCK_CYCLES,
1544 };
1545
1546 static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
1547                                           [PERF_COUNT_HW_CACHE_OP_MAX]
1548                                           [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1549         [C(L1D)] = {
1550                 /*
1551                  * The performance counters don't differentiate between read
1552                  * and write accesses/misses so this isn't strictly correct,
1553                  * but it's the best we can do. Writes and reads get
1554                  * combined.
1555                  */
1556                 [C(OP_READ)] = {
1557                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_DCACHE_ACCESS,
1558                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DCACHE_REFILL,
1559                 },
1560                 [C(OP_WRITE)] = {
1561                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_DCACHE_ACCESS,
1562                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DCACHE_REFILL,
1563                 },
1564                 [C(OP_PREFETCH)] = {
1565                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1566                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1567                 },
1568         },
1569         [C(L1I)] = {
1570                 [C(OP_READ)] = {
1571                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1572                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_IFETCH_MISS,
1573                 },
1574                 [C(OP_WRITE)] = {
1575                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1576                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_IFETCH_MISS,
1577                 },
1578                 [C(OP_PREFETCH)] = {
1579                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1580                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1581                 },
1582         },
1583         [C(LL)] = {
1584                 [C(OP_READ)] = {
1585                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1586                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1587                 },
1588                 [C(OP_WRITE)] = {
1589                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1590                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1591                 },
1592                 [C(OP_PREFETCH)] = {
1593                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1594                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1595                 },
1596         },
1597         [C(DTLB)] = {
1598                 /*
1599                  * Only ITLB misses and DTLB refills are supported.
1600                  * If users want the DTLB refills misses a raw counter
1601                  * must be used.
1602                  */
1603                 [C(OP_READ)] = {
1604                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1605                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DTLB_REFILL,
1606                 },
1607                 [C(OP_WRITE)] = {
1608                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1609                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DTLB_REFILL,
1610                 },
1611                 [C(OP_PREFETCH)] = {
1612                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1613                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1614                 },
1615         },
1616         [C(ITLB)] = {
1617                 [C(OP_READ)] = {
1618                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1619                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_ITLB_MISS,
1620                 },
1621                 [C(OP_WRITE)] = {
1622                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1623                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_ITLB_MISS,
1624                 },
1625                 [C(OP_PREFETCH)] = {
1626                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1627                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1628                 },
1629         },
1630         [C(BPU)] = {
1631                 [C(OP_READ)] = {
1632                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_PC_WRITE,
1633                         [C(RESULT_MISS)]
1634                                         = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1635                 },
1636                 [C(OP_WRITE)] = {
1637                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_PC_WRITE,
1638                         [C(RESULT_MISS)]
1639                                         = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1640                 },
1641                 [C(OP_PREFETCH)] = {
1642                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1643                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1644                 },
1645         },
1646 };
1647
1648 /*
1649  * Perf Events counters
1650  */
1651 enum armv7_counters {
1652         ARMV7_CYCLE_COUNTER             = 1,    /* Cycle counter */
1653         ARMV7_COUNTER0                  = 2,    /* First event counter */
1654 };
1655
1656 /*
1657  * The cycle counter is ARMV7_CYCLE_COUNTER.
1658  * The first event counter is ARMV7_COUNTER0.
1659  * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1).
1660  */
1661 #define ARMV7_COUNTER_LAST      (ARMV7_COUNTER0 + armpmu->num_events - 1)
1662
1663 /*
1664  * ARMv7 low level PMNC access
1665  */
1666
1667 /*
1668  * Per-CPU PMNC: config reg
1669  */
1670 #define ARMV7_PMNC_E            (1 << 0) /* Enable all counters */
1671 #define ARMV7_PMNC_P            (1 << 1) /* Reset all counters */
1672 #define ARMV7_PMNC_C            (1 << 2) /* Cycle counter reset */
1673 #define ARMV7_PMNC_D            (1 << 3) /* CCNT counts every 64th cpu cycle */
1674 #define ARMV7_PMNC_X            (1 << 4) /* Export to ETM */
1675 #define ARMV7_PMNC_DP           (1 << 5) /* Disable CCNT if non-invasive debug*/
1676 #define ARMV7_PMNC_N_SHIFT      11       /* Number of counters supported */
1677 #define ARMV7_PMNC_N_MASK       0x1f
1678 #define ARMV7_PMNC_MASK         0x3f     /* Mask for writable bits */
1679
1680 /*
1681  * Available counters
1682  */
1683 #define ARMV7_CNT0              0       /* First event counter */
1684 #define ARMV7_CCNT              31      /* Cycle counter */
1685
1686 /* Perf Event to low level counters mapping */
1687 #define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0)
1688
1689 /*
1690  * CNTENS: counters enable reg
1691  */
1692 #define ARMV7_CNTENS_P(idx)     (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1693 #define ARMV7_CNTENS_C          (1 << ARMV7_CCNT)
1694
1695 /*
1696  * CNTENC: counters disable reg
1697  */
1698 #define ARMV7_CNTENC_P(idx)     (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1699 #define ARMV7_CNTENC_C          (1 << ARMV7_CCNT)
1700
1701 /*
1702  * INTENS: counters overflow interrupt enable reg
1703  */
1704 #define ARMV7_INTENS_P(idx)     (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1705 #define ARMV7_INTENS_C          (1 << ARMV7_CCNT)
1706
1707 /*
1708  * INTENC: counters overflow interrupt disable reg
1709  */
1710 #define ARMV7_INTENC_P(idx)     (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1711 #define ARMV7_INTENC_C          (1 << ARMV7_CCNT)
1712
1713 /*
1714  * EVTSEL: Event selection reg
1715  */
1716 #define ARMV7_EVTSEL_MASK       0xff            /* Mask for writable bits */
1717
1718 /*
1719  * SELECT: Counter selection reg
1720  */
1721 #define ARMV7_SELECT_MASK       0x1f            /* Mask for writable bits */
1722
1723 /*
1724  * FLAG: counters overflow flag status reg
1725  */
1726 #define ARMV7_FLAG_P(idx)       (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1727 #define ARMV7_FLAG_C            (1 << ARMV7_CCNT)
1728 #define ARMV7_FLAG_MASK         0xffffffff      /* Mask for writable bits */
1729 #define ARMV7_OVERFLOWED_MASK   ARMV7_FLAG_MASK
1730
1731 static inline unsigned long armv7_pmnc_read(void)
1732 {
1733         u32 val;
1734         asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
1735         return val;
1736 }
1737
1738 static inline void armv7_pmnc_write(unsigned long val)
1739 {
1740         val &= ARMV7_PMNC_MASK;
1741         asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
1742 }
1743
1744 static inline int armv7_pmnc_has_overflowed(unsigned long pmnc)
1745 {
1746         return pmnc & ARMV7_OVERFLOWED_MASK;
1747 }
1748
1749 static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc,
1750                                         enum armv7_counters counter)
1751 {
1752         int ret;
1753
1754         if (counter == ARMV7_CYCLE_COUNTER)
1755                 ret = pmnc & ARMV7_FLAG_C;
1756         else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST))
1757                 ret = pmnc & ARMV7_FLAG_P(counter);
1758         else
1759                 pr_err("CPU%u checking wrong counter %d overflow status\n",
1760                         smp_processor_id(), counter);
1761
1762         return ret;
1763 }
1764
1765 static inline int armv7_pmnc_select_counter(unsigned int idx)
1766 {
1767         u32 val;
1768
1769         if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) {
1770                 pr_err("CPU%u selecting wrong PMNC counter"
1771                         " %d\n", smp_processor_id(), idx);
1772                 return -1;
1773         }
1774
1775         val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK;
1776         asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
1777
1778         return idx;
1779 }
1780
1781 static inline u32 armv7pmu_read_counter(int idx)
1782 {
1783         unsigned long value = 0;
1784
1785         if (idx == ARMV7_CYCLE_COUNTER)
1786                 asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
1787         else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
1788                 if (armv7_pmnc_select_counter(idx) == idx)
1789                         asm volatile("mrc p15, 0, %0, c9, c13, 2"
1790                                      : "=r" (value));
1791         } else
1792                 pr_err("CPU%u reading wrong counter %d\n",
1793                         smp_processor_id(), idx);
1794
1795         return value;
1796 }
1797
1798 static inline void armv7pmu_write_counter(int idx, u32 value)
1799 {
1800         if (idx == ARMV7_CYCLE_COUNTER)
1801                 asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
1802         else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
1803                 if (armv7_pmnc_select_counter(idx) == idx)
1804                         asm volatile("mcr p15, 0, %0, c9, c13, 2"
1805                                      : : "r" (value));
1806         } else
1807                 pr_err("CPU%u writing wrong counter %d\n",
1808                         smp_processor_id(), idx);
1809 }
1810
1811 static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val)
1812 {
1813         if (armv7_pmnc_select_counter(idx) == idx) {
1814                 val &= ARMV7_EVTSEL_MASK;
1815                 asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
1816         }
1817 }
1818
1819 static inline u32 armv7_pmnc_enable_counter(unsigned int idx)
1820 {
1821         u32 val;
1822
1823         if ((idx != ARMV7_CYCLE_COUNTER) &&
1824             ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1825                 pr_err("CPU%u enabling wrong PMNC counter"
1826                         " %d\n", smp_processor_id(), idx);
1827                 return -1;
1828         }
1829
1830         if (idx == ARMV7_CYCLE_COUNTER)
1831                 val = ARMV7_CNTENS_C;
1832         else
1833                 val = ARMV7_CNTENS_P(idx);
1834
1835         asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
1836
1837         return idx;
1838 }
1839
1840 static inline u32 armv7_pmnc_disable_counter(unsigned int idx)
1841 {
1842         u32 val;
1843
1844
1845         if ((idx != ARMV7_CYCLE_COUNTER) &&
1846             ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1847                 pr_err("CPU%u disabling wrong PMNC counter"
1848                         " %d\n", smp_processor_id(), idx);
1849                 return -1;
1850         }
1851
1852         if (idx == ARMV7_CYCLE_COUNTER)
1853                 val = ARMV7_CNTENC_C;
1854         else
1855                 val = ARMV7_CNTENC_P(idx);
1856
1857         asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val));
1858
1859         return idx;
1860 }
1861
1862 static inline u32 armv7_pmnc_enable_intens(unsigned int idx)
1863 {
1864         u32 val;
1865
1866         if ((idx != ARMV7_CYCLE_COUNTER) &&
1867             ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1868                 pr_err("CPU%u enabling wrong PMNC counter"
1869                         " interrupt enable %d\n", smp_processor_id(), idx);
1870                 return -1;
1871         }
1872
1873         if (idx == ARMV7_CYCLE_COUNTER)
1874                 val = ARMV7_INTENS_C;
1875         else
1876                 val = ARMV7_INTENS_P(idx);
1877
1878         asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val));
1879
1880         return idx;
1881 }
1882
1883 static inline u32 armv7_pmnc_disable_intens(unsigned int idx)
1884 {
1885         u32 val;
1886
1887         if ((idx != ARMV7_CYCLE_COUNTER) &&
1888             ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1889                 pr_err("CPU%u disabling wrong PMNC counter"
1890                         " interrupt enable %d\n", smp_processor_id(), idx);
1891                 return -1;
1892         }
1893
1894         if (idx == ARMV7_CYCLE_COUNTER)
1895                 val = ARMV7_INTENC_C;
1896         else
1897                 val = ARMV7_INTENC_P(idx);
1898
1899         asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
1900
1901         return idx;
1902 }
1903
1904 static inline u32 armv7_pmnc_getreset_flags(void)
1905 {
1906         u32 val;
1907
1908         /* Read */
1909         asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
1910
1911         /* Write to clear flags */
1912         val &= ARMV7_FLAG_MASK;
1913         asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val));
1914
1915         return val;
1916 }
1917
1918 #ifdef DEBUG
1919 static void armv7_pmnc_dump_regs(void)
1920 {
1921         u32 val;
1922         unsigned int cnt;
1923
1924         printk(KERN_INFO "PMNC registers dump:\n");
1925
1926         asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
1927         printk(KERN_INFO "PMNC  =0x%08x\n", val);
1928
1929         asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val));
1930         printk(KERN_INFO "CNTENS=0x%08x\n", val);
1931
1932         asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val));
1933         printk(KERN_INFO "INTENS=0x%08x\n", val);
1934
1935         asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
1936         printk(KERN_INFO "FLAGS =0x%08x\n", val);
1937
1938         asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val));
1939         printk(KERN_INFO "SELECT=0x%08x\n", val);
1940
1941         asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
1942         printk(KERN_INFO "CCNT  =0x%08x\n", val);
1943
1944         for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) {
1945                 armv7_pmnc_select_counter(cnt);
1946                 asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
1947                 printk(KERN_INFO "CNT[%d] count =0x%08x\n",
1948                         cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
1949                 asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val));
1950                 printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n",
1951                         cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
1952         }
1953 }
1954 #endif
1955
1956 void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
1957 {
1958         unsigned long flags;
1959
1960         /*
1961          * Enable counter and interrupt, and set the counter to count
1962          * the event that we're interested in.
1963          */
1964         spin_lock_irqsave(&pmu_lock, flags);
1965
1966         /*
1967          * Disable counter
1968          */
1969         armv7_pmnc_disable_counter(idx);
1970
1971         /*
1972          * Set event (if destined for PMNx counters)
1973          * We don't need to set the event if it's a cycle count
1974          */
1975         if (idx != ARMV7_CYCLE_COUNTER)
1976                 armv7_pmnc_write_evtsel(idx, hwc->config_base);
1977
1978         /*
1979          * Enable interrupt for this counter
1980          */
1981         armv7_pmnc_enable_intens(idx);
1982
1983         /*
1984          * Enable counter
1985          */
1986         armv7_pmnc_enable_counter(idx);
1987
1988         spin_unlock_irqrestore(&pmu_lock, flags);
1989 }
1990
1991 static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx)
1992 {
1993         unsigned long flags;
1994
1995         /*
1996          * Disable counter and interrupt
1997          */
1998         spin_lock_irqsave(&pmu_lock, flags);
1999
2000         /*
2001          * Disable counter
2002          */
2003         armv7_pmnc_disable_counter(idx);
2004
2005         /*
2006          * Disable interrupt for this counter
2007          */
2008         armv7_pmnc_disable_intens(idx);
2009
2010         spin_unlock_irqrestore(&pmu_lock, flags);
2011 }
2012
2013 static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
2014 {
2015         unsigned long pmnc;
2016         struct perf_sample_data data;
2017         struct cpu_hw_events *cpuc;
2018         struct pt_regs *regs;
2019         int idx;
2020
2021         /*
2022          * Get and reset the IRQ flags
2023          */
2024         pmnc = armv7_pmnc_getreset_flags();
2025
2026         /*
2027          * Did an overflow occur?
2028          */
2029         if (!armv7_pmnc_has_overflowed(pmnc))
2030                 return IRQ_NONE;
2031
2032         /*
2033          * Handle the counter(s) overflow(s)
2034          */
2035         regs = get_irq_regs();
2036
2037         perf_sample_data_init(&data, 0);
2038
2039         cpuc = &__get_cpu_var(cpu_hw_events);
2040         for (idx = 0; idx <= armpmu->num_events; ++idx) {
2041                 struct perf_event *event = cpuc->events[idx];
2042                 struct hw_perf_event *hwc;
2043
2044                 if (!test_bit(idx, cpuc->active_mask))
2045                         continue;
2046
2047                 /*
2048                  * We have a single interrupt for all counters. Check that
2049                  * each counter has overflowed before we process it.
2050                  */
2051                 if (!armv7_pmnc_counter_has_overflowed(pmnc, idx))
2052                         continue;
2053
2054                 hwc = &event->hw;
2055                 armpmu_event_update(event, hwc, idx);
2056                 data.period = event->hw.last_period;
2057                 if (!armpmu_event_set_period(event, hwc, idx))
2058                         continue;
2059
2060                 if (perf_event_overflow(event, 0, &data, regs))
2061                         armpmu->disable(hwc, idx);
2062         }
2063
2064         /*
2065          * Handle the pending perf events.
2066          *
2067          * Note: this call *must* be run with interrupts disabled. For
2068          * platforms that can have the PMU interrupts raised as an NMI, this
2069          * will not work.
2070          */
2071         irq_work_run();
2072
2073         return IRQ_HANDLED;
2074 }
2075
2076 static void armv7pmu_start(void)
2077 {
2078         unsigned long flags;
2079
2080         spin_lock_irqsave(&pmu_lock, flags);
2081         /* Enable all counters */
2082         armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
2083         spin_unlock_irqrestore(&pmu_lock, flags);
2084 }
2085
2086 static void armv7pmu_stop(void)
2087 {
2088         unsigned long flags;
2089
2090         spin_lock_irqsave(&pmu_lock, flags);
2091         /* Disable all counters */
2092         armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
2093         spin_unlock_irqrestore(&pmu_lock, flags);
2094 }
2095
2096 static inline int armv7_a8_pmu_event_map(int config)
2097 {
2098         int mapping = armv7_a8_perf_map[config];
2099         if (HW_OP_UNSUPPORTED == mapping)
2100                 mapping = -EOPNOTSUPP;
2101         return mapping;
2102 }
2103
2104 static inline int armv7_a9_pmu_event_map(int config)
2105 {
2106         int mapping = armv7_a9_perf_map[config];
2107         if (HW_OP_UNSUPPORTED == mapping)
2108                 mapping = -EOPNOTSUPP;
2109         return mapping;
2110 }
2111
2112 static u64 armv7pmu_raw_event(u64 config)
2113 {
2114         return config & 0xff;
2115 }
2116
2117 static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc,
2118                                   struct hw_perf_event *event)
2119 {
2120         int idx;
2121
2122         /* Always place a cycle counter into the cycle counter. */
2123         if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) {
2124                 if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask))
2125                         return -EAGAIN;
2126
2127                 return ARMV7_CYCLE_COUNTER;
2128         } else {
2129                 /*
2130                  * For anything other than a cycle counter, try and use
2131                  * the events counters
2132                  */
2133                 for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) {
2134                         if (!test_and_set_bit(idx, cpuc->used_mask))
2135                                 return idx;
2136                 }
2137
2138                 /* The counters are all in use. */
2139                 return -EAGAIN;
2140         }
2141 }
2142
2143 static struct arm_pmu armv7pmu = {
2144         .handle_irq             = armv7pmu_handle_irq,
2145         .enable                 = armv7pmu_enable_event,
2146         .disable                = armv7pmu_disable_event,
2147         .raw_event              = armv7pmu_raw_event,
2148         .read_counter           = armv7pmu_read_counter,
2149         .write_counter          = armv7pmu_write_counter,
2150         .get_event_idx          = armv7pmu_get_event_idx,
2151         .start                  = armv7pmu_start,
2152         .stop                   = armv7pmu_stop,
2153         .max_period             = (1LLU << 32) - 1,
2154 };
2155
2156 static u32 __init armv7_reset_read_pmnc(void)
2157 {
2158         u32 nb_cnt;
2159
2160         /* Initialize & Reset PMNC: C and P bits */
2161         armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C);
2162
2163         /* Read the nb of CNTx counters supported from PMNC */
2164         nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK;
2165
2166         /* Add the CPU cycles counter and return */
2167         return nb_cnt + 1;
2168 }
2169
2170 /*
2171  * ARMv5 [xscale] Performance counter handling code.
2172  *
2173  * Based on xscale OProfile code.
2174  *
2175  * There are two variants of the xscale PMU that we support:
2176  *      - xscale1pmu: 2 event counters and a cycle counter
2177  *      - xscale2pmu: 4 event counters and a cycle counter
2178  * The two variants share event definitions, but have different
2179  * PMU structures.
2180  */
2181
2182 enum xscale_perf_types {
2183         XSCALE_PERFCTR_ICACHE_MISS              = 0x00,
2184         XSCALE_PERFCTR_ICACHE_NO_DELIVER        = 0x01,
2185         XSCALE_PERFCTR_DATA_STALL               = 0x02,
2186         XSCALE_PERFCTR_ITLB_MISS                = 0x03,
2187         XSCALE_PERFCTR_DTLB_MISS                = 0x04,
2188         XSCALE_PERFCTR_BRANCH                   = 0x05,
2189         XSCALE_PERFCTR_BRANCH_MISS              = 0x06,
2190         XSCALE_PERFCTR_INSTRUCTION              = 0x07,
2191         XSCALE_PERFCTR_DCACHE_FULL_STALL        = 0x08,
2192         XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09,
2193         XSCALE_PERFCTR_DCACHE_ACCESS            = 0x0A,
2194         XSCALE_PERFCTR_DCACHE_MISS              = 0x0B,
2195         XSCALE_PERFCTR_DCACHE_WRITE_BACK        = 0x0C,
2196         XSCALE_PERFCTR_PC_CHANGED               = 0x0D,
2197         XSCALE_PERFCTR_BCU_REQUEST              = 0x10,
2198         XSCALE_PERFCTR_BCU_FULL                 = 0x11,
2199         XSCALE_PERFCTR_BCU_DRAIN                = 0x12,
2200         XSCALE_PERFCTR_BCU_ECC_NO_ELOG          = 0x14,
2201         XSCALE_PERFCTR_BCU_1_BIT_ERR            = 0x15,
2202         XSCALE_PERFCTR_RMW                      = 0x16,
2203         /* XSCALE_PERFCTR_CCNT is not hardware defined */
2204         XSCALE_PERFCTR_CCNT                     = 0xFE,
2205         XSCALE_PERFCTR_UNUSED                   = 0xFF,
2206 };
2207
2208 enum xscale_counters {
2209         XSCALE_CYCLE_COUNTER    = 1,
2210         XSCALE_COUNTER0,
2211         XSCALE_COUNTER1,
2212         XSCALE_COUNTER2,
2213         XSCALE_COUNTER3,
2214 };
2215
2216 static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = {
2217         [PERF_COUNT_HW_CPU_CYCLES]          = XSCALE_PERFCTR_CCNT,
2218         [PERF_COUNT_HW_INSTRUCTIONS]        = XSCALE_PERFCTR_INSTRUCTION,
2219         [PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
2220         [PERF_COUNT_HW_CACHE_MISSES]        = HW_OP_UNSUPPORTED,
2221         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH,
2222         [PERF_COUNT_HW_BRANCH_MISSES]       = XSCALE_PERFCTR_BRANCH_MISS,
2223         [PERF_COUNT_HW_BUS_CYCLES]          = HW_OP_UNSUPPORTED,
2224 };
2225
2226 static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
2227                                            [PERF_COUNT_HW_CACHE_OP_MAX]
2228                                            [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
2229         [C(L1D)] = {
2230                 [C(OP_READ)] = {
2231                         [C(RESULT_ACCESS)]      = XSCALE_PERFCTR_DCACHE_ACCESS,
2232                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_DCACHE_MISS,
2233                 },
2234                 [C(OP_WRITE)] = {
2235                         [C(RESULT_ACCESS)]      = XSCALE_PERFCTR_DCACHE_ACCESS,
2236                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_DCACHE_MISS,
2237                 },
2238                 [C(OP_PREFETCH)] = {
2239                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2240                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2241                 },
2242         },
2243         [C(L1I)] = {
2244                 [C(OP_READ)] = {
2245                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2246                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_ICACHE_MISS,
2247                 },
2248                 [C(OP_WRITE)] = {
2249                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2250                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_ICACHE_MISS,
2251                 },
2252                 [C(OP_PREFETCH)] = {
2253                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2254                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2255                 },
2256         },
2257         [C(LL)] = {
2258                 [C(OP_READ)] = {
2259                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2260                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2261                 },
2262                 [C(OP_WRITE)] = {
2263                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2264                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2265                 },
2266                 [C(OP_PREFETCH)] = {
2267                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2268                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2269                 },
2270         },
2271         [C(DTLB)] = {
2272                 [C(OP_READ)] = {
2273                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2274                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_DTLB_MISS,
2275                 },
2276                 [C(OP_WRITE)] = {
2277                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2278                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_DTLB_MISS,
2279                 },
2280                 [C(OP_PREFETCH)] = {
2281                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2282                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2283                 },
2284         },
2285         [C(ITLB)] = {
2286                 [C(OP_READ)] = {
2287                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2288                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_ITLB_MISS,
2289                 },
2290                 [C(OP_WRITE)] = {
2291                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2292                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_ITLB_MISS,
2293                 },
2294                 [C(OP_PREFETCH)] = {
2295                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2296                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2297                 },
2298         },
2299         [C(BPU)] = {
2300                 [C(OP_READ)] = {
2301                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2302                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2303                 },
2304                 [C(OP_WRITE)] = {
2305                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2306                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2307                 },
2308                 [C(OP_PREFETCH)] = {
2309                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2310                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2311                 },
2312         },
2313 };
2314
2315 #define XSCALE_PMU_ENABLE       0x001
2316 #define XSCALE_PMN_RESET        0x002
2317 #define XSCALE_CCNT_RESET       0x004
2318 #define XSCALE_PMU_RESET        (CCNT_RESET | PMN_RESET)
2319 #define XSCALE_PMU_CNT64        0x008
2320
2321 static inline int
2322 xscalepmu_event_map(int config)
2323 {
2324         int mapping = xscale_perf_map[config];
2325         if (HW_OP_UNSUPPORTED == mapping)
2326                 mapping = -EOPNOTSUPP;
2327         return mapping;
2328 }
2329
2330 static u64
2331 xscalepmu_raw_event(u64 config)
2332 {
2333         return config & 0xff;
2334 }
2335
2336 #define XSCALE1_OVERFLOWED_MASK 0x700
2337 #define XSCALE1_CCOUNT_OVERFLOW 0x400
2338 #define XSCALE1_COUNT0_OVERFLOW 0x100
2339 #define XSCALE1_COUNT1_OVERFLOW 0x200
2340 #define XSCALE1_CCOUNT_INT_EN   0x040
2341 #define XSCALE1_COUNT0_INT_EN   0x010
2342 #define XSCALE1_COUNT1_INT_EN   0x020
2343 #define XSCALE1_COUNT0_EVT_SHFT 12
2344 #define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT)
2345 #define XSCALE1_COUNT1_EVT_SHFT 20
2346 #define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT)
2347
2348 static inline u32
2349 xscale1pmu_read_pmnc(void)
2350 {
2351         u32 val;
2352         asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val));
2353         return val;
2354 }
2355
2356 static inline void
2357 xscale1pmu_write_pmnc(u32 val)
2358 {
2359         /* upper 4bits and 7, 11 are write-as-0 */
2360         val &= 0xffff77f;
2361         asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val));
2362 }
2363
2364 static inline int
2365 xscale1_pmnc_counter_has_overflowed(unsigned long pmnc,
2366                                         enum xscale_counters counter)
2367 {
2368         int ret = 0;
2369
2370         switch (counter) {
2371         case XSCALE_CYCLE_COUNTER:
2372                 ret = pmnc & XSCALE1_CCOUNT_OVERFLOW;
2373                 break;
2374         case XSCALE_COUNTER0:
2375                 ret = pmnc & XSCALE1_COUNT0_OVERFLOW;
2376                 break;
2377         case XSCALE_COUNTER1:
2378                 ret = pmnc & XSCALE1_COUNT1_OVERFLOW;
2379                 break;
2380         default:
2381                 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
2382         }
2383
2384         return ret;
2385 }
2386
2387 static irqreturn_t
2388 xscale1pmu_handle_irq(int irq_num, void *dev)
2389 {
2390         unsigned long pmnc;
2391         struct perf_sample_data data;
2392         struct cpu_hw_events *cpuc;
2393         struct pt_regs *regs;
2394         int idx;
2395
2396         /*
2397          * NOTE: there's an A stepping erratum that states if an overflow
2398          *       bit already exists and another occurs, the previous
2399          *       Overflow bit gets cleared. There's no workaround.
2400          *       Fixed in B stepping or later.
2401          */
2402         pmnc = xscale1pmu_read_pmnc();
2403
2404         /*
2405          * Write the value back to clear the overflow flags. Overflow
2406          * flags remain in pmnc for use below. We also disable the PMU
2407          * while we process the interrupt.
2408          */
2409         xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
2410
2411         if (!(pmnc & XSCALE1_OVERFLOWED_MASK))
2412                 return IRQ_NONE;
2413
2414         regs = get_irq_regs();
2415
2416         perf_sample_data_init(&data, 0);
2417
2418         cpuc = &__get_cpu_var(cpu_hw_events);
2419         for (idx = 0; idx <= armpmu->num_events; ++idx) {
2420                 struct perf_event *event = cpuc->events[idx];
2421                 struct hw_perf_event *hwc;
2422
2423                 if (!test_bit(idx, cpuc->active_mask))
2424                         continue;
2425
2426                 if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx))
2427                         continue;
2428
2429                 hwc = &event->hw;
2430                 armpmu_event_update(event, hwc, idx);
2431                 data.period = event->hw.last_period;
2432                 if (!armpmu_event_set_period(event, hwc, idx))
2433                         continue;
2434
2435                 if (perf_event_overflow(event, 0, &data, regs))
2436                         armpmu->disable(hwc, idx);
2437         }
2438
2439         irq_work_run();
2440
2441         /*
2442          * Re-enable the PMU.
2443          */
2444         pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE;
2445         xscale1pmu_write_pmnc(pmnc);
2446
2447         return IRQ_HANDLED;
2448 }
2449
2450 static void
2451 xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx)
2452 {
2453         unsigned long val, mask, evt, flags;
2454
2455         switch (idx) {
2456         case XSCALE_CYCLE_COUNTER:
2457                 mask = 0;
2458                 evt = XSCALE1_CCOUNT_INT_EN;
2459                 break;
2460         case XSCALE_COUNTER0:
2461                 mask = XSCALE1_COUNT0_EVT_MASK;
2462                 evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) |
2463                         XSCALE1_COUNT0_INT_EN;
2464                 break;
2465         case XSCALE_COUNTER1:
2466                 mask = XSCALE1_COUNT1_EVT_MASK;
2467                 evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) |
2468                         XSCALE1_COUNT1_INT_EN;
2469                 break;
2470         default:
2471                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2472                 return;
2473         }
2474
2475         spin_lock_irqsave(&pmu_lock, flags);
2476         val = xscale1pmu_read_pmnc();
2477         val &= ~mask;
2478         val |= evt;
2479         xscale1pmu_write_pmnc(val);
2480         spin_unlock_irqrestore(&pmu_lock, flags);
2481 }
2482
2483 static void
2484 xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx)
2485 {
2486         unsigned long val, mask, evt, flags;
2487
2488         switch (idx) {
2489         case XSCALE_CYCLE_COUNTER:
2490                 mask = XSCALE1_CCOUNT_INT_EN;
2491                 evt = 0;
2492                 break;
2493         case XSCALE_COUNTER0:
2494                 mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK;
2495                 evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT;
2496                 break;
2497         case XSCALE_COUNTER1:
2498                 mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK;
2499                 evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT;
2500                 break;
2501         default:
2502                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2503                 return;
2504         }
2505
2506         spin_lock_irqsave(&pmu_lock, flags);
2507         val = xscale1pmu_read_pmnc();
2508         val &= ~mask;
2509         val |= evt;
2510         xscale1pmu_write_pmnc(val);
2511         spin_unlock_irqrestore(&pmu_lock, flags);
2512 }
2513
2514 static int
2515 xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc,
2516                         struct hw_perf_event *event)
2517 {
2518         if (XSCALE_PERFCTR_CCNT == event->config_base) {
2519                 if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask))
2520                         return -EAGAIN;
2521
2522                 return XSCALE_CYCLE_COUNTER;
2523         } else {
2524                 if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) {
2525                         return XSCALE_COUNTER1;
2526                 }
2527
2528                 if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) {
2529                         return XSCALE_COUNTER0;
2530                 }
2531
2532                 return -EAGAIN;
2533         }
2534 }
2535
2536 static void
2537 xscale1pmu_start(void)
2538 {
2539         unsigned long flags, val;
2540
2541         spin_lock_irqsave(&pmu_lock, flags);
2542         val = xscale1pmu_read_pmnc();
2543         val |= XSCALE_PMU_ENABLE;
2544         xscale1pmu_write_pmnc(val);
2545         spin_unlock_irqrestore(&pmu_lock, flags);
2546 }
2547
2548 static void
2549 xscale1pmu_stop(void)
2550 {
2551         unsigned long flags, val;
2552
2553         spin_lock_irqsave(&pmu_lock, flags);
2554         val = xscale1pmu_read_pmnc();
2555         val &= ~XSCALE_PMU_ENABLE;
2556         xscale1pmu_write_pmnc(val);
2557         spin_unlock_irqrestore(&pmu_lock, flags);
2558 }
2559
2560 static inline u32
2561 xscale1pmu_read_counter(int counter)
2562 {
2563         u32 val = 0;
2564
2565         switch (counter) {
2566         case XSCALE_CYCLE_COUNTER:
2567                 asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val));
2568                 break;
2569         case XSCALE_COUNTER0:
2570                 asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val));
2571                 break;
2572         case XSCALE_COUNTER1:
2573                 asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val));
2574                 break;
2575         }
2576
2577         return val;
2578 }
2579
2580 static inline void
2581 xscale1pmu_write_counter(int counter, u32 val)
2582 {
2583         switch (counter) {
2584         case XSCALE_CYCLE_COUNTER:
2585                 asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val));
2586                 break;
2587         case XSCALE_COUNTER0:
2588                 asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val));
2589                 break;
2590         case XSCALE_COUNTER1:
2591                 asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val));
2592                 break;
2593         }
2594 }
2595
2596 static const struct arm_pmu xscale1pmu = {
2597         .id             = ARM_PERF_PMU_ID_XSCALE1,
2598         .handle_irq     = xscale1pmu_handle_irq,
2599         .enable         = xscale1pmu_enable_event,
2600         .disable        = xscale1pmu_disable_event,
2601         .event_map      = xscalepmu_event_map,
2602         .raw_event      = xscalepmu_raw_event,
2603         .read_counter   = xscale1pmu_read_counter,
2604         .write_counter  = xscale1pmu_write_counter,
2605         .get_event_idx  = xscale1pmu_get_event_idx,
2606         .start          = xscale1pmu_start,
2607         .stop           = xscale1pmu_stop,
2608         .num_events     = 3,
2609         .max_period     = (1LLU << 32) - 1,
2610 };
2611
2612 #define XSCALE2_OVERFLOWED_MASK 0x01f
2613 #define XSCALE2_CCOUNT_OVERFLOW 0x001
2614 #define XSCALE2_COUNT0_OVERFLOW 0x002
2615 #define XSCALE2_COUNT1_OVERFLOW 0x004
2616 #define XSCALE2_COUNT2_OVERFLOW 0x008
2617 #define XSCALE2_COUNT3_OVERFLOW 0x010
2618 #define XSCALE2_CCOUNT_INT_EN   0x001
2619 #define XSCALE2_COUNT0_INT_EN   0x002
2620 #define XSCALE2_COUNT1_INT_EN   0x004
2621 #define XSCALE2_COUNT2_INT_EN   0x008
2622 #define XSCALE2_COUNT3_INT_EN   0x010
2623 #define XSCALE2_COUNT0_EVT_SHFT 0
2624 #define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT)
2625 #define XSCALE2_COUNT1_EVT_SHFT 8
2626 #define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT)
2627 #define XSCALE2_COUNT2_EVT_SHFT 16
2628 #define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT)
2629 #define XSCALE2_COUNT3_EVT_SHFT 24
2630 #define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT)
2631
2632 static inline u32
2633 xscale2pmu_read_pmnc(void)
2634 {
2635         u32 val;
2636         asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val));
2637         /* bits 1-2 and 4-23 are read-unpredictable */
2638         return val & 0xff000009;
2639 }
2640
2641 static inline void
2642 xscale2pmu_write_pmnc(u32 val)
2643 {
2644         /* bits 4-23 are write-as-0, 24-31 are write ignored */
2645         val &= 0xf;
2646         asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val));
2647 }
2648
2649 static inline u32
2650 xscale2pmu_read_overflow_flags(void)
2651 {
2652         u32 val;
2653         asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val));
2654         return val;
2655 }
2656
2657 static inline void
2658 xscale2pmu_write_overflow_flags(u32 val)
2659 {
2660         asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val));
2661 }
2662
2663 static inline u32
2664 xscale2pmu_read_event_select(void)
2665 {
2666         u32 val;
2667         asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val));
2668         return val;
2669 }
2670
2671 static inline void
2672 xscale2pmu_write_event_select(u32 val)
2673 {
2674         asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val));
2675 }
2676
2677 static inline u32
2678 xscale2pmu_read_int_enable(void)
2679 {
2680         u32 val;
2681         asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val));
2682         return val;
2683 }
2684
2685 static void
2686 xscale2pmu_write_int_enable(u32 val)
2687 {
2688         asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val));
2689 }
2690
2691 static inline int
2692 xscale2_pmnc_counter_has_overflowed(unsigned long of_flags,
2693                                         enum xscale_counters counter)
2694 {
2695         int ret = 0;
2696
2697         switch (counter) {
2698         case XSCALE_CYCLE_COUNTER:
2699                 ret = of_flags & XSCALE2_CCOUNT_OVERFLOW;
2700                 break;
2701         case XSCALE_COUNTER0:
2702                 ret = of_flags & XSCALE2_COUNT0_OVERFLOW;
2703                 break;
2704         case XSCALE_COUNTER1:
2705                 ret = of_flags & XSCALE2_COUNT1_OVERFLOW;
2706                 break;
2707         case XSCALE_COUNTER2:
2708                 ret = of_flags & XSCALE2_COUNT2_OVERFLOW;
2709                 break;
2710         case XSCALE_COUNTER3:
2711                 ret = of_flags & XSCALE2_COUNT3_OVERFLOW;
2712                 break;
2713         default:
2714                 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
2715         }
2716
2717         return ret;
2718 }
2719
2720 static irqreturn_t
2721 xscale2pmu_handle_irq(int irq_num, void *dev)
2722 {
2723         unsigned long pmnc, of_flags;
2724         struct perf_sample_data data;
2725         struct cpu_hw_events *cpuc;
2726         struct pt_regs *regs;
2727         int idx;
2728
2729         /* Disable the PMU. */
2730         pmnc = xscale2pmu_read_pmnc();
2731         xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
2732
2733         /* Check the overflow flag register. */
2734         of_flags = xscale2pmu_read_overflow_flags();
2735         if (!(of_flags & XSCALE2_OVERFLOWED_MASK))
2736                 return IRQ_NONE;
2737
2738         /* Clear the overflow bits. */
2739         xscale2pmu_write_overflow_flags(of_flags);
2740
2741         regs = get_irq_regs();
2742
2743         perf_sample_data_init(&data, 0);
2744
2745         cpuc = &__get_cpu_var(cpu_hw_events);
2746         for (idx = 0; idx <= armpmu->num_events; ++idx) {
2747                 struct perf_event *event = cpuc->events[idx];
2748                 struct hw_perf_event *hwc;
2749
2750                 if (!test_bit(idx, cpuc->active_mask))
2751                         continue;
2752
2753                 if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx))
2754                         continue;
2755
2756                 hwc = &event->hw;
2757                 armpmu_event_update(event, hwc, idx);
2758                 data.period = event->hw.last_period;
2759                 if (!armpmu_event_set_period(event, hwc, idx))
2760                         continue;
2761
2762                 if (perf_event_overflow(event, 0, &data, regs))
2763                         armpmu->disable(hwc, idx);
2764         }
2765
2766         irq_work_run();
2767
2768         /*
2769          * Re-enable the PMU.
2770          */
2771         pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE;
2772         xscale2pmu_write_pmnc(pmnc);
2773
2774         return IRQ_HANDLED;
2775 }
2776
2777 static void
2778 xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx)
2779 {
2780         unsigned long flags, ien, evtsel;
2781
2782         ien = xscale2pmu_read_int_enable();
2783         evtsel = xscale2pmu_read_event_select();
2784
2785         switch (idx) {
2786         case XSCALE_CYCLE_COUNTER:
2787                 ien |= XSCALE2_CCOUNT_INT_EN;
2788                 break;
2789         case XSCALE_COUNTER0:
2790                 ien |= XSCALE2_COUNT0_INT_EN;
2791                 evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
2792                 evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT;
2793                 break;
2794         case XSCALE_COUNTER1:
2795                 ien |= XSCALE2_COUNT1_INT_EN;
2796                 evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
2797                 evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT;
2798                 break;
2799         case XSCALE_COUNTER2:
2800                 ien |= XSCALE2_COUNT2_INT_EN;
2801                 evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
2802                 evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT;
2803                 break;
2804         case XSCALE_COUNTER3:
2805                 ien |= XSCALE2_COUNT3_INT_EN;
2806                 evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
2807                 evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT;
2808                 break;
2809         default:
2810                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2811                 return;
2812         }
2813
2814         spin_lock_irqsave(&pmu_lock, flags);
2815         xscale2pmu_write_event_select(evtsel);
2816         xscale2pmu_write_int_enable(ien);
2817         spin_unlock_irqrestore(&pmu_lock, flags);
2818 }
2819
2820 static void
2821 xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
2822 {
2823         unsigned long flags, ien, evtsel;
2824
2825         ien = xscale2pmu_read_int_enable();
2826         evtsel = xscale2pmu_read_event_select();
2827
2828         switch (idx) {
2829         case XSCALE_CYCLE_COUNTER:
2830                 ien &= ~XSCALE2_CCOUNT_INT_EN;
2831                 break;
2832         case XSCALE_COUNTER0:
2833                 ien &= ~XSCALE2_COUNT0_INT_EN;
2834                 evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
2835                 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT;
2836                 break;
2837         case XSCALE_COUNTER1:
2838                 ien &= ~XSCALE2_COUNT1_INT_EN;
2839                 evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
2840                 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT;
2841                 break;
2842         case XSCALE_COUNTER2:
2843                 ien &= ~XSCALE2_COUNT2_INT_EN;
2844                 evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
2845                 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT;
2846                 break;
2847         case XSCALE_COUNTER3:
2848                 ien &= ~XSCALE2_COUNT3_INT_EN;
2849                 evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
2850                 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT;
2851                 break;
2852         default:
2853                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2854                 return;
2855         }
2856
2857         spin_lock_irqsave(&pmu_lock, flags);
2858         xscale2pmu_write_event_select(evtsel);
2859         xscale2pmu_write_int_enable(ien);
2860         spin_unlock_irqrestore(&pmu_lock, flags);
2861 }
2862
2863 static int
2864 xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc,
2865                         struct hw_perf_event *event)
2866 {
2867         int idx = xscale1pmu_get_event_idx(cpuc, event);
2868         if (idx >= 0)
2869                 goto out;
2870
2871         if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask))
2872                 idx = XSCALE_COUNTER3;
2873         else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask))
2874                 idx = XSCALE_COUNTER2;
2875 out:
2876         return idx;
2877 }
2878
2879 static void
2880 xscale2pmu_start(void)
2881 {
2882         unsigned long flags, val;
2883
2884         spin_lock_irqsave(&pmu_lock, flags);
2885         val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
2886         val |= XSCALE_PMU_ENABLE;
2887         xscale2pmu_write_pmnc(val);
2888         spin_unlock_irqrestore(&pmu_lock, flags);
2889 }
2890
2891 static void
2892 xscale2pmu_stop(void)
2893 {
2894         unsigned long flags, val;
2895
2896         spin_lock_irqsave(&pmu_lock, flags);
2897         val = xscale2pmu_read_pmnc();
2898         val &= ~XSCALE_PMU_ENABLE;
2899         xscale2pmu_write_pmnc(val);
2900         spin_unlock_irqrestore(&pmu_lock, flags);
2901 }
2902
2903 static inline u32
2904 xscale2pmu_read_counter(int counter)
2905 {
2906         u32 val = 0;
2907
2908         switch (counter) {
2909         case XSCALE_CYCLE_COUNTER:
2910                 asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val));
2911                 break;
2912         case XSCALE_COUNTER0:
2913                 asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val));
2914                 break;
2915         case XSCALE_COUNTER1:
2916                 asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val));
2917                 break;
2918         case XSCALE_COUNTER2:
2919                 asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val));
2920                 break;
2921         case XSCALE_COUNTER3:
2922                 asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val));
2923                 break;
2924         }
2925
2926         return val;
2927 }
2928
2929 static inline void
2930 xscale2pmu_write_counter(int counter, u32 val)
2931 {
2932         switch (counter) {
2933         case XSCALE_CYCLE_COUNTER:
2934                 asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val));
2935                 break;
2936         case XSCALE_COUNTER0:
2937                 asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val));
2938                 break;
2939         case XSCALE_COUNTER1:
2940                 asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val));
2941                 break;
2942         case XSCALE_COUNTER2:
2943                 asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val));
2944                 break;
2945         case XSCALE_COUNTER3:
2946                 asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val));
2947                 break;
2948         }
2949 }
2950
2951 static const struct arm_pmu xscale2pmu = {
2952         .id             = ARM_PERF_PMU_ID_XSCALE2,
2953         .handle_irq     = xscale2pmu_handle_irq,
2954         .enable         = xscale2pmu_enable_event,
2955         .disable        = xscale2pmu_disable_event,
2956         .event_map      = xscalepmu_event_map,
2957         .raw_event      = xscalepmu_raw_event,
2958         .read_counter   = xscale2pmu_read_counter,
2959         .write_counter  = xscale2pmu_write_counter,
2960         .get_event_idx  = xscale2pmu_get_event_idx,
2961         .start          = xscale2pmu_start,
2962         .stop           = xscale2pmu_stop,
2963         .num_events     = 5,
2964         .max_period     = (1LLU << 32) - 1,
2965 };
2966
2967 static int __init
2968 init_hw_perf_events(void)
2969 {
2970         unsigned long cpuid = read_cpuid_id();
2971         unsigned long implementor = (cpuid & 0xFF000000) >> 24;
2972         unsigned long part_number = (cpuid & 0xFFF0);
2973
2974         /* ARM Ltd CPUs. */
2975         if (0x41 == implementor) {
2976                 switch (part_number) {
2977                 case 0xB360:    /* ARM1136 */
2978                 case 0xB560:    /* ARM1156 */
2979                 case 0xB760:    /* ARM1176 */
2980                         armpmu = &armv6pmu;
2981                         memcpy(armpmu_perf_cache_map, armv6_perf_cache_map,
2982                                         sizeof(armv6_perf_cache_map));
2983                         break;
2984                 case 0xB020:    /* ARM11mpcore */
2985                         armpmu = &armv6mpcore_pmu;
2986                         memcpy(armpmu_perf_cache_map,
2987                                armv6mpcore_perf_cache_map,
2988                                sizeof(armv6mpcore_perf_cache_map));
2989                         break;
2990                 case 0xC080:    /* Cortex-A8 */
2991                         armv7pmu.id = ARM_PERF_PMU_ID_CA8;
2992                         memcpy(armpmu_perf_cache_map, armv7_a8_perf_cache_map,
2993                                 sizeof(armv7_a8_perf_cache_map));
2994                         armv7pmu.event_map = armv7_a8_pmu_event_map;
2995                         armpmu = &armv7pmu;
2996
2997                         /* Reset PMNC and read the nb of CNTx counters
2998                             supported */
2999                         armv7pmu.num_events = armv7_reset_read_pmnc();
3000                         break;
3001                 case 0xC090:    /* Cortex-A9 */
3002                         armv7pmu.id = ARM_PERF_PMU_ID_CA9;
3003                         memcpy(armpmu_perf_cache_map, armv7_a9_perf_cache_map,
3004                                 sizeof(armv7_a9_perf_cache_map));
3005                         armv7pmu.event_map = armv7_a9_pmu_event_map;
3006                         armpmu = &armv7pmu;
3007
3008                         /* Reset PMNC and read the nb of CNTx counters
3009                             supported */
3010                         armv7pmu.num_events = armv7_reset_read_pmnc();
3011                         break;
3012                 }
3013         /* Intel CPUs [xscale]. */
3014         } else if (0x69 == implementor) {
3015                 part_number = (cpuid >> 13) & 0x7;
3016                 switch (part_number) {
3017                 case 1:
3018                         armpmu = &xscale1pmu;
3019                         memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
3020                                         sizeof(xscale_perf_cache_map));
3021                         break;
3022                 case 2:
3023                         armpmu = &xscale2pmu;
3024                         memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
3025                                         sizeof(xscale_perf_cache_map));
3026                         break;
3027                 }
3028         }
3029
3030         if (armpmu) {
3031                 pr_info("enabled with %s PMU driver, %d counters available\n",
3032                                 arm_pmu_names[armpmu->id], armpmu->num_events);
3033         } else {
3034                 pr_info("no hardware support available\n");
3035         }
3036
3037         perf_pmu_register(&pmu);
3038
3039         return 0;
3040 }
3041 arch_initcall(init_hw_perf_events);
3042
3043 /*
3044  * Callchain handling code.
3045  */
3046
3047 /*
3048  * The registers we're interested in are at the end of the variable
3049  * length saved register structure. The fp points at the end of this
3050  * structure so the address of this struct is:
3051  * (struct frame_tail *)(xxx->fp)-1
3052  *
3053  * This code has been adapted from the ARM OProfile support.
3054  */
3055 struct frame_tail {
3056         struct frame_tail   *fp;
3057         unsigned long       sp;
3058         unsigned long       lr;
3059 } __attribute__((packed));
3060
3061 /*
3062  * Get the return address for a single stackframe and return a pointer to the
3063  * next frame tail.
3064  */
3065 static struct frame_tail *
3066 user_backtrace(struct frame_tail *tail,
3067                struct perf_callchain_entry *entry)
3068 {
3069         struct frame_tail buftail;
3070
3071         /* Also check accessibility of one struct frame_tail beyond */
3072         if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
3073                 return NULL;
3074         if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail)))
3075                 return NULL;
3076
3077         perf_callchain_store(entry, buftail.lr);
3078
3079         /*
3080          * Frame pointers should strictly progress back up the stack
3081          * (towards higher addresses).
3082          */
3083         if (tail >= buftail.fp)
3084                 return NULL;
3085
3086         return buftail.fp - 1;
3087 }
3088
3089 void
3090 perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
3091 {
3092         struct frame_tail *tail;
3093
3094
3095         tail = (struct frame_tail *)regs->ARM_fp - 1;
3096
3097         while (tail && !((unsigned long)tail & 0x3))
3098                 tail = user_backtrace(tail, entry);
3099 }
3100
3101 /*
3102  * Gets called by walk_stackframe() for every stackframe. This will be called
3103  * whist unwinding the stackframe and is like a subroutine return so we use
3104  * the PC.
3105  */
3106 static int
3107 callchain_trace(struct stackframe *fr,
3108                 void *data)
3109 {
3110         struct perf_callchain_entry *entry = data;
3111         perf_callchain_store(entry, fr->pc);
3112         return 0;
3113 }
3114
3115 void
3116 perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
3117 {
3118         struct stackframe fr;
3119
3120         fr.fp = regs->ARM_fp;
3121         fr.sp = regs->ARM_sp;
3122         fr.lr = regs->ARM_lr;
3123         fr.pc = regs->ARM_pc;
3124         walk_stackframe(&fr, callchain_trace, entry);
3125 }