ARM: perf: consolidate common PMU behaviour
[pandora-kernel.git] / arch / arm / kernel / perf_event.c
1 #undef DEBUG
2
3 /*
4  * ARM performance counter support.
5  *
6  * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
7  *
8  * ARMv7 support: Jean Pihet <jpihet@mvista.com>
9  * 2010 (c) MontaVista Software, LLC.
10  *
11  * This code is based on the sparc64 perf event code, which is in turn based
12  * on the x86 code. Callchain code is based on the ARM OProfile backtrace
13  * code.
14  */
15 #define pr_fmt(fmt) "hw perfevents: " fmt
16
17 #include <linux/interrupt.h>
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/perf_event.h>
21 #include <linux/platform_device.h>
22 #include <linux/spinlock.h>
23 #include <linux/uaccess.h>
24
25 #include <asm/cputype.h>
26 #include <asm/irq.h>
27 #include <asm/irq_regs.h>
28 #include <asm/pmu.h>
29 #include <asm/stacktrace.h>
30
31 static struct platform_device *pmu_device;
32
33 /*
34  * Hardware lock to serialize accesses to PMU registers. Needed for the
35  * read/modify/write sequences.
36  */
37 DEFINE_SPINLOCK(pmu_lock);
38
39 /*
40  * ARMv6 supports a maximum of 3 events, starting from index 1. If we add
41  * another platform that supports more, we need to increase this to be the
42  * largest of all platforms.
43  *
44  * ARMv7 supports up to 32 events:
45  *  cycle counter CCNT + 31 events counters CNT0..30.
46  *  Cortex-A8 has 1+4 counters, Cortex-A9 has 1+6 counters.
47  */
48 #define ARMPMU_MAX_HWEVENTS             33
49
50 /* The events for a given CPU. */
51 struct cpu_hw_events {
52         /*
53          * The events that are active on the CPU for the given index. Index 0
54          * is reserved.
55          */
56         struct perf_event       *events[ARMPMU_MAX_HWEVENTS];
57
58         /*
59          * A 1 bit for an index indicates that the counter is being used for
60          * an event. A 0 means that the counter can be used.
61          */
62         unsigned long           used_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
63
64         /*
65          * A 1 bit for an index indicates that the counter is actively being
66          * used.
67          */
68         unsigned long           active_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
69 };
70 DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
71
72 /* PMU names. */
73 static const char *arm_pmu_names[] = {
74         [ARM_PERF_PMU_ID_XSCALE1] = "xscale1",
75         [ARM_PERF_PMU_ID_XSCALE2] = "xscale2",
76         [ARM_PERF_PMU_ID_V6]      = "v6",
77         [ARM_PERF_PMU_ID_V6MP]    = "v6mpcore",
78         [ARM_PERF_PMU_ID_CA8]     = "ARMv7 Cortex-A8",
79         [ARM_PERF_PMU_ID_CA9]     = "ARMv7 Cortex-A9",
80 };
81
82 struct arm_pmu {
83         enum arm_perf_pmu_ids id;
84         irqreturn_t     (*handle_irq)(int irq_num, void *dev);
85         void            (*enable)(struct hw_perf_event *evt, int idx);
86         void            (*disable)(struct hw_perf_event *evt, int idx);
87         int             (*get_event_idx)(struct cpu_hw_events *cpuc,
88                                          struct hw_perf_event *hwc);
89         u32             (*read_counter)(int idx);
90         void            (*write_counter)(int idx, u32 val);
91         void            (*start)(void);
92         void            (*stop)(void);
93         const unsigned  (*cache_map)[PERF_COUNT_HW_CACHE_MAX]
94                                     [PERF_COUNT_HW_CACHE_OP_MAX]
95                                     [PERF_COUNT_HW_CACHE_RESULT_MAX];
96         const unsigned  (*event_map)[PERF_COUNT_HW_MAX];
97         u32             raw_event_mask;
98         int             num_events;
99         u64             max_period;
100 };
101
102 /* Set at runtime when we know what CPU type we are. */
103 static const struct arm_pmu *armpmu;
104
105 enum arm_perf_pmu_ids
106 armpmu_get_pmu_id(void)
107 {
108         int id = -ENODEV;
109
110         if (armpmu != NULL)
111                 id = armpmu->id;
112
113         return id;
114 }
115 EXPORT_SYMBOL_GPL(armpmu_get_pmu_id);
116
117 int
118 armpmu_get_max_events(void)
119 {
120         int max_events = 0;
121
122         if (armpmu != NULL)
123                 max_events = armpmu->num_events;
124
125         return max_events;
126 }
127 EXPORT_SYMBOL_GPL(armpmu_get_max_events);
128
129 int perf_num_counters(void)
130 {
131         return armpmu_get_max_events();
132 }
133 EXPORT_SYMBOL_GPL(perf_num_counters);
134
135 #define HW_OP_UNSUPPORTED               0xFFFF
136
137 #define C(_x) \
138         PERF_COUNT_HW_CACHE_##_x
139
140 #define CACHE_OP_UNSUPPORTED            0xFFFF
141
142 static int
143 armpmu_map_cache_event(u64 config)
144 {
145         unsigned int cache_type, cache_op, cache_result, ret;
146
147         cache_type = (config >>  0) & 0xff;
148         if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
149                 return -EINVAL;
150
151         cache_op = (config >>  8) & 0xff;
152         if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
153                 return -EINVAL;
154
155         cache_result = (config >> 16) & 0xff;
156         if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
157                 return -EINVAL;
158
159         ret = (int)(*armpmu->cache_map)[cache_type][cache_op][cache_result];
160
161         if (ret == CACHE_OP_UNSUPPORTED)
162                 return -ENOENT;
163
164         return ret;
165 }
166
167 static int
168 armpmu_map_event(u64 config)
169 {
170         int mapping = (*armpmu->event_map)[config];
171         return mapping == HW_OP_UNSUPPORTED ? -EOPNOTSUPP : mapping;
172 }
173
174 static int
175 armpmu_map_raw_event(u64 config)
176 {
177         return (int)(config & armpmu->raw_event_mask);
178 }
179
180 static int
181 armpmu_event_set_period(struct perf_event *event,
182                         struct hw_perf_event *hwc,
183                         int idx)
184 {
185         s64 left = local64_read(&hwc->period_left);
186         s64 period = hwc->sample_period;
187         int ret = 0;
188
189         if (unlikely(left <= -period)) {
190                 left = period;
191                 local64_set(&hwc->period_left, left);
192                 hwc->last_period = period;
193                 ret = 1;
194         }
195
196         if (unlikely(left <= 0)) {
197                 left += period;
198                 local64_set(&hwc->period_left, left);
199                 hwc->last_period = period;
200                 ret = 1;
201         }
202
203         if (left > (s64)armpmu->max_period)
204                 left = armpmu->max_period;
205
206         local64_set(&hwc->prev_count, (u64)-left);
207
208         armpmu->write_counter(idx, (u64)(-left) & 0xffffffff);
209
210         perf_event_update_userpage(event);
211
212         return ret;
213 }
214
215 static u64
216 armpmu_event_update(struct perf_event *event,
217                     struct hw_perf_event *hwc,
218                     int idx)
219 {
220         int shift = 64 - 32;
221         s64 prev_raw_count, new_raw_count;
222         u64 delta;
223
224 again:
225         prev_raw_count = local64_read(&hwc->prev_count);
226         new_raw_count = armpmu->read_counter(idx);
227
228         if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
229                              new_raw_count) != prev_raw_count)
230                 goto again;
231
232         delta = (new_raw_count << shift) - (prev_raw_count << shift);
233         delta >>= shift;
234
235         local64_add(delta, &event->count);
236         local64_sub(delta, &hwc->period_left);
237
238         return new_raw_count;
239 }
240
241 static void
242 armpmu_read(struct perf_event *event)
243 {
244         struct hw_perf_event *hwc = &event->hw;
245
246         /* Don't read disabled counters! */
247         if (hwc->idx < 0)
248                 return;
249
250         armpmu_event_update(event, hwc, hwc->idx);
251 }
252
253 static void
254 armpmu_stop(struct perf_event *event, int flags)
255 {
256         struct hw_perf_event *hwc = &event->hw;
257
258         if (!armpmu)
259                 return;
260
261         /*
262          * ARM pmu always has to update the counter, so ignore
263          * PERF_EF_UPDATE, see comments in armpmu_start().
264          */
265         if (!(hwc->state & PERF_HES_STOPPED)) {
266                 armpmu->disable(hwc, hwc->idx);
267                 barrier(); /* why? */
268                 armpmu_event_update(event, hwc, hwc->idx);
269                 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
270         }
271 }
272
273 static void
274 armpmu_start(struct perf_event *event, int flags)
275 {
276         struct hw_perf_event *hwc = &event->hw;
277
278         if (!armpmu)
279                 return;
280
281         /*
282          * ARM pmu always has to reprogram the period, so ignore
283          * PERF_EF_RELOAD, see the comment below.
284          */
285         if (flags & PERF_EF_RELOAD)
286                 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
287
288         hwc->state = 0;
289         /*
290          * Set the period again. Some counters can't be stopped, so when we
291          * were stopped we simply disabled the IRQ source and the counter
292          * may have been left counting. If we don't do this step then we may
293          * get an interrupt too soon or *way* too late if the overflow has
294          * happened since disabling.
295          */
296         armpmu_event_set_period(event, hwc, hwc->idx);
297         armpmu->enable(hwc, hwc->idx);
298 }
299
300 static void
301 armpmu_del(struct perf_event *event, int flags)
302 {
303         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
304         struct hw_perf_event *hwc = &event->hw;
305         int idx = hwc->idx;
306
307         WARN_ON(idx < 0);
308
309         clear_bit(idx, cpuc->active_mask);
310         armpmu_stop(event, PERF_EF_UPDATE);
311         cpuc->events[idx] = NULL;
312         clear_bit(idx, cpuc->used_mask);
313
314         perf_event_update_userpage(event);
315 }
316
317 static int
318 armpmu_add(struct perf_event *event, int flags)
319 {
320         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
321         struct hw_perf_event *hwc = &event->hw;
322         int idx;
323         int err = 0;
324
325         perf_pmu_disable(event->pmu);
326
327         /* If we don't have a space for the counter then finish early. */
328         idx = armpmu->get_event_idx(cpuc, hwc);
329         if (idx < 0) {
330                 err = idx;
331                 goto out;
332         }
333
334         /*
335          * If there is an event in the counter we are going to use then make
336          * sure it is disabled.
337          */
338         event->hw.idx = idx;
339         armpmu->disable(hwc, idx);
340         cpuc->events[idx] = event;
341         set_bit(idx, cpuc->active_mask);
342
343         hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
344         if (flags & PERF_EF_START)
345                 armpmu_start(event, PERF_EF_RELOAD);
346
347         /* Propagate our changes to the userspace mapping. */
348         perf_event_update_userpage(event);
349
350 out:
351         perf_pmu_enable(event->pmu);
352         return err;
353 }
354
355 static struct pmu pmu;
356
357 static int
358 validate_event(struct cpu_hw_events *cpuc,
359                struct perf_event *event)
360 {
361         struct hw_perf_event fake_event = event->hw;
362
363         if (event->pmu != &pmu || event->state <= PERF_EVENT_STATE_OFF)
364                 return 1;
365
366         return armpmu->get_event_idx(cpuc, &fake_event) >= 0;
367 }
368
369 static int
370 validate_group(struct perf_event *event)
371 {
372         struct perf_event *sibling, *leader = event->group_leader;
373         struct cpu_hw_events fake_pmu;
374
375         memset(&fake_pmu, 0, sizeof(fake_pmu));
376
377         if (!validate_event(&fake_pmu, leader))
378                 return -ENOSPC;
379
380         list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
381                 if (!validate_event(&fake_pmu, sibling))
382                         return -ENOSPC;
383         }
384
385         if (!validate_event(&fake_pmu, event))
386                 return -ENOSPC;
387
388         return 0;
389 }
390
391 static int
392 armpmu_reserve_hardware(void)
393 {
394         int i, err = -ENODEV, irq;
395
396         pmu_device = reserve_pmu(ARM_PMU_DEVICE_CPU);
397         if (IS_ERR(pmu_device)) {
398                 pr_warning("unable to reserve pmu\n");
399                 return PTR_ERR(pmu_device);
400         }
401
402         init_pmu(ARM_PMU_DEVICE_CPU);
403
404         if (pmu_device->num_resources < 1) {
405                 pr_err("no irqs for PMUs defined\n");
406                 return -ENODEV;
407         }
408
409         for (i = 0; i < pmu_device->num_resources; ++i) {
410                 irq = platform_get_irq(pmu_device, i);
411                 if (irq < 0)
412                         continue;
413
414                 err = request_irq(irq, armpmu->handle_irq,
415                                   IRQF_DISABLED | IRQF_NOBALANCING,
416                                   "armpmu", NULL);
417                 if (err) {
418                         pr_warning("unable to request IRQ%d for ARM perf "
419                                 "counters\n", irq);
420                         break;
421                 }
422         }
423
424         if (err) {
425                 for (i = i - 1; i >= 0; --i) {
426                         irq = platform_get_irq(pmu_device, i);
427                         if (irq >= 0)
428                                 free_irq(irq, NULL);
429                 }
430                 release_pmu(pmu_device);
431                 pmu_device = NULL;
432         }
433
434         return err;
435 }
436
437 static void
438 armpmu_release_hardware(void)
439 {
440         int i, irq;
441
442         for (i = pmu_device->num_resources - 1; i >= 0; --i) {
443                 irq = platform_get_irq(pmu_device, i);
444                 if (irq >= 0)
445                         free_irq(irq, NULL);
446         }
447         armpmu->stop();
448
449         release_pmu(pmu_device);
450         pmu_device = NULL;
451 }
452
453 static atomic_t active_events = ATOMIC_INIT(0);
454 static DEFINE_MUTEX(pmu_reserve_mutex);
455
456 static void
457 hw_perf_event_destroy(struct perf_event *event)
458 {
459         if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) {
460                 armpmu_release_hardware();
461                 mutex_unlock(&pmu_reserve_mutex);
462         }
463 }
464
465 static int
466 __hw_perf_event_init(struct perf_event *event)
467 {
468         struct hw_perf_event *hwc = &event->hw;
469         int mapping, err;
470
471         /* Decode the generic type into an ARM event identifier. */
472         if (PERF_TYPE_HARDWARE == event->attr.type) {
473                 mapping = armpmu_map_event(event->attr.config);
474         } else if (PERF_TYPE_HW_CACHE == event->attr.type) {
475                 mapping = armpmu_map_cache_event(event->attr.config);
476         } else if (PERF_TYPE_RAW == event->attr.type) {
477                 mapping = armpmu_map_raw_event(event->attr.config);
478         } else {
479                 pr_debug("event type %x not supported\n", event->attr.type);
480                 return -EOPNOTSUPP;
481         }
482
483         if (mapping < 0) {
484                 pr_debug("event %x:%llx not supported\n", event->attr.type,
485                          event->attr.config);
486                 return mapping;
487         }
488
489         /*
490          * Check whether we need to exclude the counter from certain modes.
491          * The ARM performance counters are on all of the time so if someone
492          * has asked us for some excludes then we have to fail.
493          */
494         if (event->attr.exclude_kernel || event->attr.exclude_user ||
495             event->attr.exclude_hv || event->attr.exclude_idle) {
496                 pr_debug("ARM performance counters do not support "
497                          "mode exclusion\n");
498                 return -EPERM;
499         }
500
501         /*
502          * We don't assign an index until we actually place the event onto
503          * hardware. Use -1 to signify that we haven't decided where to put it
504          * yet. For SMP systems, each core has it's own PMU so we can't do any
505          * clever allocation or constraints checking at this point.
506          */
507         hwc->idx = -1;
508
509         /*
510          * Store the event encoding into the config_base field. config and
511          * event_base are unused as the only 2 things we need to know are
512          * the event mapping and the counter to use. The counter to use is
513          * also the indx and the config_base is the event type.
514          */
515         hwc->config_base            = (unsigned long)mapping;
516         hwc->config                 = 0;
517         hwc->event_base             = 0;
518
519         if (!hwc->sample_period) {
520                 hwc->sample_period  = armpmu->max_period;
521                 hwc->last_period    = hwc->sample_period;
522                 local64_set(&hwc->period_left, hwc->sample_period);
523         }
524
525         err = 0;
526         if (event->group_leader != event) {
527                 err = validate_group(event);
528                 if (err)
529                         return -EINVAL;
530         }
531
532         return err;
533 }
534
535 static int armpmu_event_init(struct perf_event *event)
536 {
537         int err = 0;
538
539         switch (event->attr.type) {
540         case PERF_TYPE_RAW:
541         case PERF_TYPE_HARDWARE:
542         case PERF_TYPE_HW_CACHE:
543                 break;
544
545         default:
546                 return -ENOENT;
547         }
548
549         if (!armpmu)
550                 return -ENODEV;
551
552         event->destroy = hw_perf_event_destroy;
553
554         if (!atomic_inc_not_zero(&active_events)) {
555                 if (atomic_read(&active_events) > armpmu->num_events) {
556                         atomic_dec(&active_events);
557                         return -ENOSPC;
558                 }
559
560                 mutex_lock(&pmu_reserve_mutex);
561                 if (atomic_read(&active_events) == 0) {
562                         err = armpmu_reserve_hardware();
563                 }
564
565                 if (!err)
566                         atomic_inc(&active_events);
567                 mutex_unlock(&pmu_reserve_mutex);
568         }
569
570         if (err)
571                 return err;
572
573         err = __hw_perf_event_init(event);
574         if (err)
575                 hw_perf_event_destroy(event);
576
577         return err;
578 }
579
580 static void armpmu_enable(struct pmu *pmu)
581 {
582         /* Enable all of the perf events on hardware. */
583         int idx;
584         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
585
586         if (!armpmu)
587                 return;
588
589         for (idx = 0; idx <= armpmu->num_events; ++idx) {
590                 struct perf_event *event = cpuc->events[idx];
591
592                 if (!event)
593                         continue;
594
595                 armpmu->enable(&event->hw, idx);
596         }
597
598         armpmu->start();
599 }
600
601 static void armpmu_disable(struct pmu *pmu)
602 {
603         if (armpmu)
604                 armpmu->stop();
605 }
606
607 static struct pmu pmu = {
608         .pmu_enable     = armpmu_enable,
609         .pmu_disable    = armpmu_disable,
610         .event_init     = armpmu_event_init,
611         .add            = armpmu_add,
612         .del            = armpmu_del,
613         .start          = armpmu_start,
614         .stop           = armpmu_stop,
615         .read           = armpmu_read,
616 };
617
618 /*
619  * ARMv6 Performance counter handling code.
620  *
621  * ARMv6 has 2 configurable performance counters and a single cycle counter.
622  * They all share a single reset bit but can be written to zero so we can use
623  * that for a reset.
624  *
625  * The counters can't be individually enabled or disabled so when we remove
626  * one event and replace it with another we could get spurious counts from the
627  * wrong event. However, we can take advantage of the fact that the
628  * performance counters can export events to the event bus, and the event bus
629  * itself can be monitored. This requires that we *don't* export the events to
630  * the event bus. The procedure for disabling a configurable counter is:
631  *      - change the counter to count the ETMEXTOUT[0] signal (0x20). This
632  *        effectively stops the counter from counting.
633  *      - disable the counter's interrupt generation (each counter has it's
634  *        own interrupt enable bit).
635  * Once stopped, the counter value can be written as 0 to reset.
636  *
637  * To enable a counter:
638  *      - enable the counter's interrupt generation.
639  *      - set the new event type.
640  *
641  * Note: the dedicated cycle counter only counts cycles and can't be
642  * enabled/disabled independently of the others. When we want to disable the
643  * cycle counter, we have to just disable the interrupt reporting and start
644  * ignoring that counter. When re-enabling, we have to reset the value and
645  * enable the interrupt.
646  */
647
648 enum armv6_perf_types {
649         ARMV6_PERFCTR_ICACHE_MISS           = 0x0,
650         ARMV6_PERFCTR_IBUF_STALL            = 0x1,
651         ARMV6_PERFCTR_DDEP_STALL            = 0x2,
652         ARMV6_PERFCTR_ITLB_MISS             = 0x3,
653         ARMV6_PERFCTR_DTLB_MISS             = 0x4,
654         ARMV6_PERFCTR_BR_EXEC               = 0x5,
655         ARMV6_PERFCTR_BR_MISPREDICT         = 0x6,
656         ARMV6_PERFCTR_INSTR_EXEC            = 0x7,
657         ARMV6_PERFCTR_DCACHE_HIT            = 0x9,
658         ARMV6_PERFCTR_DCACHE_ACCESS         = 0xA,
659         ARMV6_PERFCTR_DCACHE_MISS           = 0xB,
660         ARMV6_PERFCTR_DCACHE_WBACK          = 0xC,
661         ARMV6_PERFCTR_SW_PC_CHANGE          = 0xD,
662         ARMV6_PERFCTR_MAIN_TLB_MISS         = 0xF,
663         ARMV6_PERFCTR_EXPL_D_ACCESS         = 0x10,
664         ARMV6_PERFCTR_LSU_FULL_STALL        = 0x11,
665         ARMV6_PERFCTR_WBUF_DRAINED          = 0x12,
666         ARMV6_PERFCTR_CPU_CYCLES            = 0xFF,
667         ARMV6_PERFCTR_NOP                   = 0x20,
668 };
669
670 enum armv6_counters {
671         ARMV6_CYCLE_COUNTER = 1,
672         ARMV6_COUNTER0,
673         ARMV6_COUNTER1,
674 };
675
676 /*
677  * The hardware events that we support. We do support cache operations but
678  * we have harvard caches and no way to combine instruction and data
679  * accesses/misses in hardware.
680  */
681 static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
682         [PERF_COUNT_HW_CPU_CYCLES]          = ARMV6_PERFCTR_CPU_CYCLES,
683         [PERF_COUNT_HW_INSTRUCTIONS]        = ARMV6_PERFCTR_INSTR_EXEC,
684         [PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
685         [PERF_COUNT_HW_CACHE_MISSES]        = HW_OP_UNSUPPORTED,
686         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC,
687         [PERF_COUNT_HW_BRANCH_MISSES]       = ARMV6_PERFCTR_BR_MISPREDICT,
688         [PERF_COUNT_HW_BUS_CYCLES]          = HW_OP_UNSUPPORTED,
689 };
690
691 static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
692                                           [PERF_COUNT_HW_CACHE_OP_MAX]
693                                           [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
694         [C(L1D)] = {
695                 /*
696                  * The performance counters don't differentiate between read
697                  * and write accesses/misses so this isn't strictly correct,
698                  * but it's the best we can do. Writes and reads get
699                  * combined.
700                  */
701                 [C(OP_READ)] = {
702                         [C(RESULT_ACCESS)]      = ARMV6_PERFCTR_DCACHE_ACCESS,
703                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_DCACHE_MISS,
704                 },
705                 [C(OP_WRITE)] = {
706                         [C(RESULT_ACCESS)]      = ARMV6_PERFCTR_DCACHE_ACCESS,
707                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_DCACHE_MISS,
708                 },
709                 [C(OP_PREFETCH)] = {
710                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
711                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
712                 },
713         },
714         [C(L1I)] = {
715                 [C(OP_READ)] = {
716                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
717                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_ICACHE_MISS,
718                 },
719                 [C(OP_WRITE)] = {
720                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
721                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_ICACHE_MISS,
722                 },
723                 [C(OP_PREFETCH)] = {
724                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
725                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
726                 },
727         },
728         [C(LL)] = {
729                 [C(OP_READ)] = {
730                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
731                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
732                 },
733                 [C(OP_WRITE)] = {
734                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
735                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
736                 },
737                 [C(OP_PREFETCH)] = {
738                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
739                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
740                 },
741         },
742         [C(DTLB)] = {
743                 /*
744                  * The ARM performance counters can count micro DTLB misses,
745                  * micro ITLB misses and main TLB misses. There isn't an event
746                  * for TLB misses, so use the micro misses here and if users
747                  * want the main TLB misses they can use a raw counter.
748                  */
749                 [C(OP_READ)] = {
750                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
751                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_DTLB_MISS,
752                 },
753                 [C(OP_WRITE)] = {
754                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
755                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_DTLB_MISS,
756                 },
757                 [C(OP_PREFETCH)] = {
758                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
759                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
760                 },
761         },
762         [C(ITLB)] = {
763                 [C(OP_READ)] = {
764                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
765                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_ITLB_MISS,
766                 },
767                 [C(OP_WRITE)] = {
768                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
769                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_ITLB_MISS,
770                 },
771                 [C(OP_PREFETCH)] = {
772                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
773                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
774                 },
775         },
776         [C(BPU)] = {
777                 [C(OP_READ)] = {
778                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
779                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
780                 },
781                 [C(OP_WRITE)] = {
782                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
783                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
784                 },
785                 [C(OP_PREFETCH)] = {
786                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
787                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
788                 },
789         },
790 };
791
792 enum armv6mpcore_perf_types {
793         ARMV6MPCORE_PERFCTR_ICACHE_MISS     = 0x0,
794         ARMV6MPCORE_PERFCTR_IBUF_STALL      = 0x1,
795         ARMV6MPCORE_PERFCTR_DDEP_STALL      = 0x2,
796         ARMV6MPCORE_PERFCTR_ITLB_MISS       = 0x3,
797         ARMV6MPCORE_PERFCTR_DTLB_MISS       = 0x4,
798         ARMV6MPCORE_PERFCTR_BR_EXEC         = 0x5,
799         ARMV6MPCORE_PERFCTR_BR_NOTPREDICT   = 0x6,
800         ARMV6MPCORE_PERFCTR_BR_MISPREDICT   = 0x7,
801         ARMV6MPCORE_PERFCTR_INSTR_EXEC      = 0x8,
802         ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA,
803         ARMV6MPCORE_PERFCTR_DCACHE_RDMISS   = 0xB,
804         ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC,
805         ARMV6MPCORE_PERFCTR_DCACHE_WRMISS   = 0xD,
806         ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE,
807         ARMV6MPCORE_PERFCTR_SW_PC_CHANGE    = 0xF,
808         ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS   = 0x10,
809         ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11,
810         ARMV6MPCORE_PERFCTR_LSU_FULL_STALL  = 0x12,
811         ARMV6MPCORE_PERFCTR_WBUF_DRAINED    = 0x13,
812         ARMV6MPCORE_PERFCTR_CPU_CYCLES      = 0xFF,
813 };
814
815 /*
816  * The hardware events that we support. We do support cache operations but
817  * we have harvard caches and no way to combine instruction and data
818  * accesses/misses in hardware.
819  */
820 static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
821         [PERF_COUNT_HW_CPU_CYCLES]          = ARMV6MPCORE_PERFCTR_CPU_CYCLES,
822         [PERF_COUNT_HW_INSTRUCTIONS]        = ARMV6MPCORE_PERFCTR_INSTR_EXEC,
823         [PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
824         [PERF_COUNT_HW_CACHE_MISSES]        = HW_OP_UNSUPPORTED,
825         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC,
826         [PERF_COUNT_HW_BRANCH_MISSES]       = ARMV6MPCORE_PERFCTR_BR_MISPREDICT,
827         [PERF_COUNT_HW_BUS_CYCLES]          = HW_OP_UNSUPPORTED,
828 };
829
830 static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
831                                         [PERF_COUNT_HW_CACHE_OP_MAX]
832                                         [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
833         [C(L1D)] = {
834                 [C(OP_READ)] = {
835                         [C(RESULT_ACCESS)]  =
836                                 ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
837                         [C(RESULT_MISS)]    =
838                                 ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
839                 },
840                 [C(OP_WRITE)] = {
841                         [C(RESULT_ACCESS)]  =
842                                 ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
843                         [C(RESULT_MISS)]    =
844                                 ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
845                 },
846                 [C(OP_PREFETCH)] = {
847                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
848                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
849                 },
850         },
851         [C(L1I)] = {
852                 [C(OP_READ)] = {
853                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
854                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
855                 },
856                 [C(OP_WRITE)] = {
857                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
858                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
859                 },
860                 [C(OP_PREFETCH)] = {
861                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
862                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
863                 },
864         },
865         [C(LL)] = {
866                 [C(OP_READ)] = {
867                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
868                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
869                 },
870                 [C(OP_WRITE)] = {
871                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
872                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
873                 },
874                 [C(OP_PREFETCH)] = {
875                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
876                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
877                 },
878         },
879         [C(DTLB)] = {
880                 /*
881                  * The ARM performance counters can count micro DTLB misses,
882                  * micro ITLB misses and main TLB misses. There isn't an event
883                  * for TLB misses, so use the micro misses here and if users
884                  * want the main TLB misses they can use a raw counter.
885                  */
886                 [C(OP_READ)] = {
887                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
888                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_DTLB_MISS,
889                 },
890                 [C(OP_WRITE)] = {
891                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
892                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_DTLB_MISS,
893                 },
894                 [C(OP_PREFETCH)] = {
895                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
896                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
897                 },
898         },
899         [C(ITLB)] = {
900                 [C(OP_READ)] = {
901                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
902                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ITLB_MISS,
903                 },
904                 [C(OP_WRITE)] = {
905                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
906                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ITLB_MISS,
907                 },
908                 [C(OP_PREFETCH)] = {
909                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
910                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
911                 },
912         },
913         [C(BPU)] = {
914                 [C(OP_READ)] = {
915                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
916                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
917                 },
918                 [C(OP_WRITE)] = {
919                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
920                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
921                 },
922                 [C(OP_PREFETCH)] = {
923                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
924                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
925                 },
926         },
927 };
928
929 static inline unsigned long
930 armv6_pmcr_read(void)
931 {
932         u32 val;
933         asm volatile("mrc   p15, 0, %0, c15, c12, 0" : "=r"(val));
934         return val;
935 }
936
937 static inline void
938 armv6_pmcr_write(unsigned long val)
939 {
940         asm volatile("mcr   p15, 0, %0, c15, c12, 0" : : "r"(val));
941 }
942
943 #define ARMV6_PMCR_ENABLE               (1 << 0)
944 #define ARMV6_PMCR_CTR01_RESET          (1 << 1)
945 #define ARMV6_PMCR_CCOUNT_RESET         (1 << 2)
946 #define ARMV6_PMCR_CCOUNT_DIV           (1 << 3)
947 #define ARMV6_PMCR_COUNT0_IEN           (1 << 4)
948 #define ARMV6_PMCR_COUNT1_IEN           (1 << 5)
949 #define ARMV6_PMCR_CCOUNT_IEN           (1 << 6)
950 #define ARMV6_PMCR_COUNT0_OVERFLOW      (1 << 8)
951 #define ARMV6_PMCR_COUNT1_OVERFLOW      (1 << 9)
952 #define ARMV6_PMCR_CCOUNT_OVERFLOW      (1 << 10)
953 #define ARMV6_PMCR_EVT_COUNT0_SHIFT     20
954 #define ARMV6_PMCR_EVT_COUNT0_MASK      (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
955 #define ARMV6_PMCR_EVT_COUNT1_SHIFT     12
956 #define ARMV6_PMCR_EVT_COUNT1_MASK      (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
957
958 #define ARMV6_PMCR_OVERFLOWED_MASK \
959         (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
960          ARMV6_PMCR_CCOUNT_OVERFLOW)
961
962 static inline int
963 armv6_pmcr_has_overflowed(unsigned long pmcr)
964 {
965         return (pmcr & ARMV6_PMCR_OVERFLOWED_MASK);
966 }
967
968 static inline int
969 armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
970                                   enum armv6_counters counter)
971 {
972         int ret = 0;
973
974         if (ARMV6_CYCLE_COUNTER == counter)
975                 ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW;
976         else if (ARMV6_COUNTER0 == counter)
977                 ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW;
978         else if (ARMV6_COUNTER1 == counter)
979                 ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW;
980         else
981                 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
982
983         return ret;
984 }
985
986 static inline u32
987 armv6pmu_read_counter(int counter)
988 {
989         unsigned long value = 0;
990
991         if (ARMV6_CYCLE_COUNTER == counter)
992                 asm volatile("mrc   p15, 0, %0, c15, c12, 1" : "=r"(value));
993         else if (ARMV6_COUNTER0 == counter)
994                 asm volatile("mrc   p15, 0, %0, c15, c12, 2" : "=r"(value));
995         else if (ARMV6_COUNTER1 == counter)
996                 asm volatile("mrc   p15, 0, %0, c15, c12, 3" : "=r"(value));
997         else
998                 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
999
1000         return value;
1001 }
1002
1003 static inline void
1004 armv6pmu_write_counter(int counter,
1005                        u32 value)
1006 {
1007         if (ARMV6_CYCLE_COUNTER == counter)
1008                 asm volatile("mcr   p15, 0, %0, c15, c12, 1" : : "r"(value));
1009         else if (ARMV6_COUNTER0 == counter)
1010                 asm volatile("mcr   p15, 0, %0, c15, c12, 2" : : "r"(value));
1011         else if (ARMV6_COUNTER1 == counter)
1012                 asm volatile("mcr   p15, 0, %0, c15, c12, 3" : : "r"(value));
1013         else
1014                 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
1015 }
1016
1017 void
1018 armv6pmu_enable_event(struct hw_perf_event *hwc,
1019                       int idx)
1020 {
1021         unsigned long val, mask, evt, flags;
1022
1023         if (ARMV6_CYCLE_COUNTER == idx) {
1024                 mask    = 0;
1025                 evt     = ARMV6_PMCR_CCOUNT_IEN;
1026         } else if (ARMV6_COUNTER0 == idx) {
1027                 mask    = ARMV6_PMCR_EVT_COUNT0_MASK;
1028                 evt     = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
1029                           ARMV6_PMCR_COUNT0_IEN;
1030         } else if (ARMV6_COUNTER1 == idx) {
1031                 mask    = ARMV6_PMCR_EVT_COUNT1_MASK;
1032                 evt     = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) |
1033                           ARMV6_PMCR_COUNT1_IEN;
1034         } else {
1035                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
1036                 return;
1037         }
1038
1039         /*
1040          * Mask out the current event and set the counter to count the event
1041          * that we're interested in.
1042          */
1043         spin_lock_irqsave(&pmu_lock, flags);
1044         val = armv6_pmcr_read();
1045         val &= ~mask;
1046         val |= evt;
1047         armv6_pmcr_write(val);
1048         spin_unlock_irqrestore(&pmu_lock, flags);
1049 }
1050
1051 static irqreturn_t
1052 armv6pmu_handle_irq(int irq_num,
1053                     void *dev)
1054 {
1055         unsigned long pmcr = armv6_pmcr_read();
1056         struct perf_sample_data data;
1057         struct cpu_hw_events *cpuc;
1058         struct pt_regs *regs;
1059         int idx;
1060
1061         if (!armv6_pmcr_has_overflowed(pmcr))
1062                 return IRQ_NONE;
1063
1064         regs = get_irq_regs();
1065
1066         /*
1067          * The interrupts are cleared by writing the overflow flags back to
1068          * the control register. All of the other bits don't have any effect
1069          * if they are rewritten, so write the whole value back.
1070          */
1071         armv6_pmcr_write(pmcr);
1072
1073         perf_sample_data_init(&data, 0);
1074
1075         cpuc = &__get_cpu_var(cpu_hw_events);
1076         for (idx = 0; idx <= armpmu->num_events; ++idx) {
1077                 struct perf_event *event = cpuc->events[idx];
1078                 struct hw_perf_event *hwc;
1079
1080                 if (!test_bit(idx, cpuc->active_mask))
1081                         continue;
1082
1083                 /*
1084                  * We have a single interrupt for all counters. Check that
1085                  * each counter has overflowed before we process it.
1086                  */
1087                 if (!armv6_pmcr_counter_has_overflowed(pmcr, idx))
1088                         continue;
1089
1090                 hwc = &event->hw;
1091                 armpmu_event_update(event, hwc, idx);
1092                 data.period = event->hw.last_period;
1093                 if (!armpmu_event_set_period(event, hwc, idx))
1094                         continue;
1095
1096                 if (perf_event_overflow(event, 0, &data, regs))
1097                         armpmu->disable(hwc, idx);
1098         }
1099
1100         /*
1101          * Handle the pending perf events.
1102          *
1103          * Note: this call *must* be run with interrupts disabled. For
1104          * platforms that can have the PMU interrupts raised as an NMI, this
1105          * will not work.
1106          */
1107         irq_work_run();
1108
1109         return IRQ_HANDLED;
1110 }
1111
1112 static void
1113 armv6pmu_start(void)
1114 {
1115         unsigned long flags, val;
1116
1117         spin_lock_irqsave(&pmu_lock, flags);
1118         val = armv6_pmcr_read();
1119         val |= ARMV6_PMCR_ENABLE;
1120         armv6_pmcr_write(val);
1121         spin_unlock_irqrestore(&pmu_lock, flags);
1122 }
1123
1124 void
1125 armv6pmu_stop(void)
1126 {
1127         unsigned long flags, val;
1128
1129         spin_lock_irqsave(&pmu_lock, flags);
1130         val = armv6_pmcr_read();
1131         val &= ~ARMV6_PMCR_ENABLE;
1132         armv6_pmcr_write(val);
1133         spin_unlock_irqrestore(&pmu_lock, flags);
1134 }
1135
1136 static int
1137 armv6pmu_get_event_idx(struct cpu_hw_events *cpuc,
1138                        struct hw_perf_event *event)
1139 {
1140         /* Always place a cycle counter into the cycle counter. */
1141         if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) {
1142                 if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
1143                         return -EAGAIN;
1144
1145                 return ARMV6_CYCLE_COUNTER;
1146         } else {
1147                 /*
1148                  * For anything other than a cycle counter, try and use
1149                  * counter0 and counter1.
1150                  */
1151                 if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) {
1152                         return ARMV6_COUNTER1;
1153                 }
1154
1155                 if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) {
1156                         return ARMV6_COUNTER0;
1157                 }
1158
1159                 /* The counters are all in use. */
1160                 return -EAGAIN;
1161         }
1162 }
1163
1164 static void
1165 armv6pmu_disable_event(struct hw_perf_event *hwc,
1166                        int idx)
1167 {
1168         unsigned long val, mask, evt, flags;
1169
1170         if (ARMV6_CYCLE_COUNTER == idx) {
1171                 mask    = ARMV6_PMCR_CCOUNT_IEN;
1172                 evt     = 0;
1173         } else if (ARMV6_COUNTER0 == idx) {
1174                 mask    = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK;
1175                 evt     = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT;
1176         } else if (ARMV6_COUNTER1 == idx) {
1177                 mask    = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK;
1178                 evt     = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT;
1179         } else {
1180                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
1181                 return;
1182         }
1183
1184         /*
1185          * Mask out the current event and set the counter to count the number
1186          * of ETM bus signal assertion cycles. The external reporting should
1187          * be disabled and so this should never increment.
1188          */
1189         spin_lock_irqsave(&pmu_lock, flags);
1190         val = armv6_pmcr_read();
1191         val &= ~mask;
1192         val |= evt;
1193         armv6_pmcr_write(val);
1194         spin_unlock_irqrestore(&pmu_lock, flags);
1195 }
1196
1197 static void
1198 armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc,
1199                               int idx)
1200 {
1201         unsigned long val, mask, flags, evt = 0;
1202
1203         if (ARMV6_CYCLE_COUNTER == idx) {
1204                 mask    = ARMV6_PMCR_CCOUNT_IEN;
1205         } else if (ARMV6_COUNTER0 == idx) {
1206                 mask    = ARMV6_PMCR_COUNT0_IEN;
1207         } else if (ARMV6_COUNTER1 == idx) {
1208                 mask    = ARMV6_PMCR_COUNT1_IEN;
1209         } else {
1210                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
1211                 return;
1212         }
1213
1214         /*
1215          * Unlike UP ARMv6, we don't have a way of stopping the counters. We
1216          * simply disable the interrupt reporting.
1217          */
1218         spin_lock_irqsave(&pmu_lock, flags);
1219         val = armv6_pmcr_read();
1220         val &= ~mask;
1221         val |= evt;
1222         armv6_pmcr_write(val);
1223         spin_unlock_irqrestore(&pmu_lock, flags);
1224 }
1225
1226 static const struct arm_pmu armv6pmu = {
1227         .id                     = ARM_PERF_PMU_ID_V6,
1228         .handle_irq             = armv6pmu_handle_irq,
1229         .enable                 = armv6pmu_enable_event,
1230         .disable                = armv6pmu_disable_event,
1231         .read_counter           = armv6pmu_read_counter,
1232         .write_counter          = armv6pmu_write_counter,
1233         .get_event_idx          = armv6pmu_get_event_idx,
1234         .start                  = armv6pmu_start,
1235         .stop                   = armv6pmu_stop,
1236         .cache_map              = &armv6_perf_cache_map,
1237         .event_map              = &armv6_perf_map,
1238         .raw_event_mask         = 0xFF,
1239         .num_events             = 3,
1240         .max_period             = (1LLU << 32) - 1,
1241 };
1242
1243 /*
1244  * ARMv6mpcore is almost identical to single core ARMv6 with the exception
1245  * that some of the events have different enumerations and that there is no
1246  * *hack* to stop the programmable counters. To stop the counters we simply
1247  * disable the interrupt reporting and update the event. When unthrottling we
1248  * reset the period and enable the interrupt reporting.
1249  */
1250 static const struct arm_pmu armv6mpcore_pmu = {
1251         .id                     = ARM_PERF_PMU_ID_V6MP,
1252         .handle_irq             = armv6pmu_handle_irq,
1253         .enable                 = armv6pmu_enable_event,
1254         .disable                = armv6mpcore_pmu_disable_event,
1255         .read_counter           = armv6pmu_read_counter,
1256         .write_counter          = armv6pmu_write_counter,
1257         .get_event_idx          = armv6pmu_get_event_idx,
1258         .start                  = armv6pmu_start,
1259         .stop                   = armv6pmu_stop,
1260         .cache_map              = &armv6mpcore_perf_cache_map,
1261         .event_map              = &armv6mpcore_perf_map,
1262         .raw_event_mask         = 0xFF,
1263         .num_events             = 3,
1264         .max_period             = (1LLU << 32) - 1,
1265 };
1266
1267 /*
1268  * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code.
1269  *
1270  * Copied from ARMv6 code, with the low level code inspired
1271  *  by the ARMv7 Oprofile code.
1272  *
1273  * Cortex-A8 has up to 4 configurable performance counters and
1274  *  a single cycle counter.
1275  * Cortex-A9 has up to 31 configurable performance counters and
1276  *  a single cycle counter.
1277  *
1278  * All counters can be enabled/disabled and IRQ masked separately. The cycle
1279  *  counter and all 4 performance counters together can be reset separately.
1280  */
1281
1282 /* Common ARMv7 event types */
1283 enum armv7_perf_types {
1284         ARMV7_PERFCTR_PMNC_SW_INCR              = 0x00,
1285         ARMV7_PERFCTR_IFETCH_MISS               = 0x01,
1286         ARMV7_PERFCTR_ITLB_MISS                 = 0x02,
1287         ARMV7_PERFCTR_DCACHE_REFILL             = 0x03,
1288         ARMV7_PERFCTR_DCACHE_ACCESS             = 0x04,
1289         ARMV7_PERFCTR_DTLB_REFILL               = 0x05,
1290         ARMV7_PERFCTR_DREAD                     = 0x06,
1291         ARMV7_PERFCTR_DWRITE                    = 0x07,
1292
1293         ARMV7_PERFCTR_EXC_TAKEN                 = 0x09,
1294         ARMV7_PERFCTR_EXC_EXECUTED              = 0x0A,
1295         ARMV7_PERFCTR_CID_WRITE                 = 0x0B,
1296         /* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS.
1297          * It counts:
1298          *  - all branch instructions,
1299          *  - instructions that explicitly write the PC,
1300          *  - exception generating instructions.
1301          */
1302         ARMV7_PERFCTR_PC_WRITE                  = 0x0C,
1303         ARMV7_PERFCTR_PC_IMM_BRANCH             = 0x0D,
1304         ARMV7_PERFCTR_UNALIGNED_ACCESS          = 0x0F,
1305         ARMV7_PERFCTR_PC_BRANCH_MIS_PRED        = 0x10,
1306         ARMV7_PERFCTR_CLOCK_CYCLES              = 0x11,
1307
1308         ARMV7_PERFCTR_PC_BRANCH_MIS_USED        = 0x12,
1309
1310         ARMV7_PERFCTR_CPU_CYCLES                = 0xFF
1311 };
1312
1313 /* ARMv7 Cortex-A8 specific event types */
1314 enum armv7_a8_perf_types {
1315         ARMV7_PERFCTR_INSTR_EXECUTED            = 0x08,
1316
1317         ARMV7_PERFCTR_PC_PROC_RETURN            = 0x0E,
1318
1319         ARMV7_PERFCTR_WRITE_BUFFER_FULL         = 0x40,
1320         ARMV7_PERFCTR_L2_STORE_MERGED           = 0x41,
1321         ARMV7_PERFCTR_L2_STORE_BUFF             = 0x42,
1322         ARMV7_PERFCTR_L2_ACCESS                 = 0x43,
1323         ARMV7_PERFCTR_L2_CACH_MISS              = 0x44,
1324         ARMV7_PERFCTR_AXI_READ_CYCLES           = 0x45,
1325         ARMV7_PERFCTR_AXI_WRITE_CYCLES          = 0x46,
1326         ARMV7_PERFCTR_MEMORY_REPLAY             = 0x47,
1327         ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY   = 0x48,
1328         ARMV7_PERFCTR_L1_DATA_MISS              = 0x49,
1329         ARMV7_PERFCTR_L1_INST_MISS              = 0x4A,
1330         ARMV7_PERFCTR_L1_DATA_COLORING          = 0x4B,
1331         ARMV7_PERFCTR_L1_NEON_DATA              = 0x4C,
1332         ARMV7_PERFCTR_L1_NEON_CACH_DATA         = 0x4D,
1333         ARMV7_PERFCTR_L2_NEON                   = 0x4E,
1334         ARMV7_PERFCTR_L2_NEON_HIT               = 0x4F,
1335         ARMV7_PERFCTR_L1_INST                   = 0x50,
1336         ARMV7_PERFCTR_PC_RETURN_MIS_PRED        = 0x51,
1337         ARMV7_PERFCTR_PC_BRANCH_FAILED          = 0x52,
1338         ARMV7_PERFCTR_PC_BRANCH_TAKEN           = 0x53,
1339         ARMV7_PERFCTR_PC_BRANCH_EXECUTED        = 0x54,
1340         ARMV7_PERFCTR_OP_EXECUTED               = 0x55,
1341         ARMV7_PERFCTR_CYCLES_INST_STALL         = 0x56,
1342         ARMV7_PERFCTR_CYCLES_INST               = 0x57,
1343         ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL    = 0x58,
1344         ARMV7_PERFCTR_CYCLES_NEON_INST_STALL    = 0x59,
1345         ARMV7_PERFCTR_NEON_CYCLES               = 0x5A,
1346
1347         ARMV7_PERFCTR_PMU0_EVENTS               = 0x70,
1348         ARMV7_PERFCTR_PMU1_EVENTS               = 0x71,
1349         ARMV7_PERFCTR_PMU_EVENTS                = 0x72,
1350 };
1351
1352 /* ARMv7 Cortex-A9 specific event types */
1353 enum armv7_a9_perf_types {
1354         ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC     = 0x40,
1355         ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC     = 0x41,
1356         ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC       = 0x42,
1357
1358         ARMV7_PERFCTR_COHERENT_LINE_MISS        = 0x50,
1359         ARMV7_PERFCTR_COHERENT_LINE_HIT         = 0x51,
1360
1361         ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES   = 0x60,
1362         ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES   = 0x61,
1363         ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES = 0x62,
1364         ARMV7_PERFCTR_STREX_EXECUTED_PASSED     = 0x63,
1365         ARMV7_PERFCTR_STREX_EXECUTED_FAILED     = 0x64,
1366         ARMV7_PERFCTR_DATA_EVICTION             = 0x65,
1367         ARMV7_PERFCTR_ISSUE_STAGE_NO_INST       = 0x66,
1368         ARMV7_PERFCTR_ISSUE_STAGE_EMPTY         = 0x67,
1369         ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE  = 0x68,
1370
1371         ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS = 0x6E,
1372
1373         ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST   = 0x70,
1374         ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST = 0x71,
1375         ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST  = 0x72,
1376         ARMV7_PERFCTR_FP_EXECUTED_INST          = 0x73,
1377         ARMV7_PERFCTR_NEON_EXECUTED_INST        = 0x74,
1378
1379         ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES = 0x80,
1380         ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES  = 0x81,
1381         ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES        = 0x82,
1382         ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES        = 0x83,
1383         ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES  = 0x84,
1384         ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES  = 0x85,
1385         ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES      = 0x86,
1386
1387         ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES  = 0x8A,
1388         ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES = 0x8B,
1389
1390         ARMV7_PERFCTR_ISB_INST                  = 0x90,
1391         ARMV7_PERFCTR_DSB_INST                  = 0x91,
1392         ARMV7_PERFCTR_DMB_INST                  = 0x92,
1393         ARMV7_PERFCTR_EXT_INTERRUPTS            = 0x93,
1394
1395         ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED     = 0xA0,
1396         ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED       = 0xA1,
1397         ARMV7_PERFCTR_PLE_FIFO_FLUSH            = 0xA2,
1398         ARMV7_PERFCTR_PLE_RQST_COMPLETED        = 0xA3,
1399         ARMV7_PERFCTR_PLE_FIFO_OVERFLOW         = 0xA4,
1400         ARMV7_PERFCTR_PLE_RQST_PROG             = 0xA5
1401 };
1402
1403 /*
1404  * Cortex-A8 HW events mapping
1405  *
1406  * The hardware events that we support. We do support cache operations but
1407  * we have harvard caches and no way to combine instruction and data
1408  * accesses/misses in hardware.
1409  */
1410 static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = {
1411         [PERF_COUNT_HW_CPU_CYCLES]          = ARMV7_PERFCTR_CPU_CYCLES,
1412         [PERF_COUNT_HW_INSTRUCTIONS]        = ARMV7_PERFCTR_INSTR_EXECUTED,
1413         [PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
1414         [PERF_COUNT_HW_CACHE_MISSES]        = HW_OP_UNSUPPORTED,
1415         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
1416         [PERF_COUNT_HW_BRANCH_MISSES]       = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1417         [PERF_COUNT_HW_BUS_CYCLES]          = ARMV7_PERFCTR_CLOCK_CYCLES,
1418 };
1419
1420 static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
1421                                           [PERF_COUNT_HW_CACHE_OP_MAX]
1422                                           [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1423         [C(L1D)] = {
1424                 /*
1425                  * The performance counters don't differentiate between read
1426                  * and write accesses/misses so this isn't strictly correct,
1427                  * but it's the best we can do. Writes and reads get
1428                  * combined.
1429                  */
1430                 [C(OP_READ)] = {
1431                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_DCACHE_ACCESS,
1432                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DCACHE_REFILL,
1433                 },
1434                 [C(OP_WRITE)] = {
1435                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_DCACHE_ACCESS,
1436                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DCACHE_REFILL,
1437                 },
1438                 [C(OP_PREFETCH)] = {
1439                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1440                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1441                 },
1442         },
1443         [C(L1I)] = {
1444                 [C(OP_READ)] = {
1445                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_L1_INST,
1446                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_L1_INST_MISS,
1447                 },
1448                 [C(OP_WRITE)] = {
1449                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_L1_INST,
1450                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_L1_INST_MISS,
1451                 },
1452                 [C(OP_PREFETCH)] = {
1453                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1454                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1455                 },
1456         },
1457         [C(LL)] = {
1458                 [C(OP_READ)] = {
1459                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_L2_ACCESS,
1460                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_L2_CACH_MISS,
1461                 },
1462                 [C(OP_WRITE)] = {
1463                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_L2_ACCESS,
1464                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_L2_CACH_MISS,
1465                 },
1466                 [C(OP_PREFETCH)] = {
1467                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1468                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1469                 },
1470         },
1471         [C(DTLB)] = {
1472                 /*
1473                  * Only ITLB misses and DTLB refills are supported.
1474                  * If users want the DTLB refills misses a raw counter
1475                  * must be used.
1476                  */
1477                 [C(OP_READ)] = {
1478                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1479                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DTLB_REFILL,
1480                 },
1481                 [C(OP_WRITE)] = {
1482                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1483                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DTLB_REFILL,
1484                 },
1485                 [C(OP_PREFETCH)] = {
1486                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1487                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1488                 },
1489         },
1490         [C(ITLB)] = {
1491                 [C(OP_READ)] = {
1492                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1493                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_ITLB_MISS,
1494                 },
1495                 [C(OP_WRITE)] = {
1496                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1497                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_ITLB_MISS,
1498                 },
1499                 [C(OP_PREFETCH)] = {
1500                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1501                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1502                 },
1503         },
1504         [C(BPU)] = {
1505                 [C(OP_READ)] = {
1506                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_PC_WRITE,
1507                         [C(RESULT_MISS)]
1508                                         = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1509                 },
1510                 [C(OP_WRITE)] = {
1511                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_PC_WRITE,
1512                         [C(RESULT_MISS)]
1513                                         = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1514                 },
1515                 [C(OP_PREFETCH)] = {
1516                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1517                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1518                 },
1519         },
1520 };
1521
1522 /*
1523  * Cortex-A9 HW events mapping
1524  */
1525 static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = {
1526         [PERF_COUNT_HW_CPU_CYCLES]          = ARMV7_PERFCTR_CPU_CYCLES,
1527         [PERF_COUNT_HW_INSTRUCTIONS]        =
1528                                         ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE,
1529         [PERF_COUNT_HW_CACHE_REFERENCES]    = ARMV7_PERFCTR_COHERENT_LINE_HIT,
1530         [PERF_COUNT_HW_CACHE_MISSES]        = ARMV7_PERFCTR_COHERENT_LINE_MISS,
1531         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
1532         [PERF_COUNT_HW_BRANCH_MISSES]       = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1533         [PERF_COUNT_HW_BUS_CYCLES]          = ARMV7_PERFCTR_CLOCK_CYCLES,
1534 };
1535
1536 static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
1537                                           [PERF_COUNT_HW_CACHE_OP_MAX]
1538                                           [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1539         [C(L1D)] = {
1540                 /*
1541                  * The performance counters don't differentiate between read
1542                  * and write accesses/misses so this isn't strictly correct,
1543                  * but it's the best we can do. Writes and reads get
1544                  * combined.
1545                  */
1546                 [C(OP_READ)] = {
1547                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_DCACHE_ACCESS,
1548                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DCACHE_REFILL,
1549                 },
1550                 [C(OP_WRITE)] = {
1551                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_DCACHE_ACCESS,
1552                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DCACHE_REFILL,
1553                 },
1554                 [C(OP_PREFETCH)] = {
1555                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1556                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1557                 },
1558         },
1559         [C(L1I)] = {
1560                 [C(OP_READ)] = {
1561                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1562                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_IFETCH_MISS,
1563                 },
1564                 [C(OP_WRITE)] = {
1565                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1566                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_IFETCH_MISS,
1567                 },
1568                 [C(OP_PREFETCH)] = {
1569                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1570                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1571                 },
1572         },
1573         [C(LL)] = {
1574                 [C(OP_READ)] = {
1575                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1576                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1577                 },
1578                 [C(OP_WRITE)] = {
1579                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1580                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1581                 },
1582                 [C(OP_PREFETCH)] = {
1583                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1584                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1585                 },
1586         },
1587         [C(DTLB)] = {
1588                 /*
1589                  * Only ITLB misses and DTLB refills are supported.
1590                  * If users want the DTLB refills misses a raw counter
1591                  * must be used.
1592                  */
1593                 [C(OP_READ)] = {
1594                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1595                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DTLB_REFILL,
1596                 },
1597                 [C(OP_WRITE)] = {
1598                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1599                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DTLB_REFILL,
1600                 },
1601                 [C(OP_PREFETCH)] = {
1602                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1603                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1604                 },
1605         },
1606         [C(ITLB)] = {
1607                 [C(OP_READ)] = {
1608                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1609                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_ITLB_MISS,
1610                 },
1611                 [C(OP_WRITE)] = {
1612                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1613                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_ITLB_MISS,
1614                 },
1615                 [C(OP_PREFETCH)] = {
1616                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1617                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1618                 },
1619         },
1620         [C(BPU)] = {
1621                 [C(OP_READ)] = {
1622                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_PC_WRITE,
1623                         [C(RESULT_MISS)]
1624                                         = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1625                 },
1626                 [C(OP_WRITE)] = {
1627                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_PC_WRITE,
1628                         [C(RESULT_MISS)]
1629                                         = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1630                 },
1631                 [C(OP_PREFETCH)] = {
1632                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1633                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1634                 },
1635         },
1636 };
1637
1638 /*
1639  * Perf Events counters
1640  */
1641 enum armv7_counters {
1642         ARMV7_CYCLE_COUNTER             = 1,    /* Cycle counter */
1643         ARMV7_COUNTER0                  = 2,    /* First event counter */
1644 };
1645
1646 /*
1647  * The cycle counter is ARMV7_CYCLE_COUNTER.
1648  * The first event counter is ARMV7_COUNTER0.
1649  * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1).
1650  */
1651 #define ARMV7_COUNTER_LAST      (ARMV7_COUNTER0 + armpmu->num_events - 1)
1652
1653 /*
1654  * ARMv7 low level PMNC access
1655  */
1656
1657 /*
1658  * Per-CPU PMNC: config reg
1659  */
1660 #define ARMV7_PMNC_E            (1 << 0) /* Enable all counters */
1661 #define ARMV7_PMNC_P            (1 << 1) /* Reset all counters */
1662 #define ARMV7_PMNC_C            (1 << 2) /* Cycle counter reset */
1663 #define ARMV7_PMNC_D            (1 << 3) /* CCNT counts every 64th cpu cycle */
1664 #define ARMV7_PMNC_X            (1 << 4) /* Export to ETM */
1665 #define ARMV7_PMNC_DP           (1 << 5) /* Disable CCNT if non-invasive debug*/
1666 #define ARMV7_PMNC_N_SHIFT      11       /* Number of counters supported */
1667 #define ARMV7_PMNC_N_MASK       0x1f
1668 #define ARMV7_PMNC_MASK         0x3f     /* Mask for writable bits */
1669
1670 /*
1671  * Available counters
1672  */
1673 #define ARMV7_CNT0              0       /* First event counter */
1674 #define ARMV7_CCNT              31      /* Cycle counter */
1675
1676 /* Perf Event to low level counters mapping */
1677 #define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0)
1678
1679 /*
1680  * CNTENS: counters enable reg
1681  */
1682 #define ARMV7_CNTENS_P(idx)     (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1683 #define ARMV7_CNTENS_C          (1 << ARMV7_CCNT)
1684
1685 /*
1686  * CNTENC: counters disable reg
1687  */
1688 #define ARMV7_CNTENC_P(idx)     (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1689 #define ARMV7_CNTENC_C          (1 << ARMV7_CCNT)
1690
1691 /*
1692  * INTENS: counters overflow interrupt enable reg
1693  */
1694 #define ARMV7_INTENS_P(idx)     (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1695 #define ARMV7_INTENS_C          (1 << ARMV7_CCNT)
1696
1697 /*
1698  * INTENC: counters overflow interrupt disable reg
1699  */
1700 #define ARMV7_INTENC_P(idx)     (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1701 #define ARMV7_INTENC_C          (1 << ARMV7_CCNT)
1702
1703 /*
1704  * EVTSEL: Event selection reg
1705  */
1706 #define ARMV7_EVTSEL_MASK       0xff            /* Mask for writable bits */
1707
1708 /*
1709  * SELECT: Counter selection reg
1710  */
1711 #define ARMV7_SELECT_MASK       0x1f            /* Mask for writable bits */
1712
1713 /*
1714  * FLAG: counters overflow flag status reg
1715  */
1716 #define ARMV7_FLAG_P(idx)       (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1717 #define ARMV7_FLAG_C            (1 << ARMV7_CCNT)
1718 #define ARMV7_FLAG_MASK         0xffffffff      /* Mask for writable bits */
1719 #define ARMV7_OVERFLOWED_MASK   ARMV7_FLAG_MASK
1720
1721 static inline unsigned long armv7_pmnc_read(void)
1722 {
1723         u32 val;
1724         asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
1725         return val;
1726 }
1727
1728 static inline void armv7_pmnc_write(unsigned long val)
1729 {
1730         val &= ARMV7_PMNC_MASK;
1731         asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
1732 }
1733
1734 static inline int armv7_pmnc_has_overflowed(unsigned long pmnc)
1735 {
1736         return pmnc & ARMV7_OVERFLOWED_MASK;
1737 }
1738
1739 static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc,
1740                                         enum armv7_counters counter)
1741 {
1742         int ret = 0;
1743
1744         if (counter == ARMV7_CYCLE_COUNTER)
1745                 ret = pmnc & ARMV7_FLAG_C;
1746         else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST))
1747                 ret = pmnc & ARMV7_FLAG_P(counter);
1748         else
1749                 pr_err("CPU%u checking wrong counter %d overflow status\n",
1750                         smp_processor_id(), counter);
1751
1752         return ret;
1753 }
1754
1755 static inline int armv7_pmnc_select_counter(unsigned int idx)
1756 {
1757         u32 val;
1758
1759         if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) {
1760                 pr_err("CPU%u selecting wrong PMNC counter"
1761                         " %d\n", smp_processor_id(), idx);
1762                 return -1;
1763         }
1764
1765         val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK;
1766         asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
1767
1768         return idx;
1769 }
1770
1771 static inline u32 armv7pmu_read_counter(int idx)
1772 {
1773         unsigned long value = 0;
1774
1775         if (idx == ARMV7_CYCLE_COUNTER)
1776                 asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
1777         else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
1778                 if (armv7_pmnc_select_counter(idx) == idx)
1779                         asm volatile("mrc p15, 0, %0, c9, c13, 2"
1780                                      : "=r" (value));
1781         } else
1782                 pr_err("CPU%u reading wrong counter %d\n",
1783                         smp_processor_id(), idx);
1784
1785         return value;
1786 }
1787
1788 static inline void armv7pmu_write_counter(int idx, u32 value)
1789 {
1790         if (idx == ARMV7_CYCLE_COUNTER)
1791                 asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
1792         else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
1793                 if (armv7_pmnc_select_counter(idx) == idx)
1794                         asm volatile("mcr p15, 0, %0, c9, c13, 2"
1795                                      : : "r" (value));
1796         } else
1797                 pr_err("CPU%u writing wrong counter %d\n",
1798                         smp_processor_id(), idx);
1799 }
1800
1801 static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val)
1802 {
1803         if (armv7_pmnc_select_counter(idx) == idx) {
1804                 val &= ARMV7_EVTSEL_MASK;
1805                 asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
1806         }
1807 }
1808
1809 static inline u32 armv7_pmnc_enable_counter(unsigned int idx)
1810 {
1811         u32 val;
1812
1813         if ((idx != ARMV7_CYCLE_COUNTER) &&
1814             ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1815                 pr_err("CPU%u enabling wrong PMNC counter"
1816                         " %d\n", smp_processor_id(), idx);
1817                 return -1;
1818         }
1819
1820         if (idx == ARMV7_CYCLE_COUNTER)
1821                 val = ARMV7_CNTENS_C;
1822         else
1823                 val = ARMV7_CNTENS_P(idx);
1824
1825         asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
1826
1827         return idx;
1828 }
1829
1830 static inline u32 armv7_pmnc_disable_counter(unsigned int idx)
1831 {
1832         u32 val;
1833
1834
1835         if ((idx != ARMV7_CYCLE_COUNTER) &&
1836             ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1837                 pr_err("CPU%u disabling wrong PMNC counter"
1838                         " %d\n", smp_processor_id(), idx);
1839                 return -1;
1840         }
1841
1842         if (idx == ARMV7_CYCLE_COUNTER)
1843                 val = ARMV7_CNTENC_C;
1844         else
1845                 val = ARMV7_CNTENC_P(idx);
1846
1847         asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val));
1848
1849         return idx;
1850 }
1851
1852 static inline u32 armv7_pmnc_enable_intens(unsigned int idx)
1853 {
1854         u32 val;
1855
1856         if ((idx != ARMV7_CYCLE_COUNTER) &&
1857             ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1858                 pr_err("CPU%u enabling wrong PMNC counter"
1859                         " interrupt enable %d\n", smp_processor_id(), idx);
1860                 return -1;
1861         }
1862
1863         if (idx == ARMV7_CYCLE_COUNTER)
1864                 val = ARMV7_INTENS_C;
1865         else
1866                 val = ARMV7_INTENS_P(idx);
1867
1868         asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val));
1869
1870         return idx;
1871 }
1872
1873 static inline u32 armv7_pmnc_disable_intens(unsigned int idx)
1874 {
1875         u32 val;
1876
1877         if ((idx != ARMV7_CYCLE_COUNTER) &&
1878             ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1879                 pr_err("CPU%u disabling wrong PMNC counter"
1880                         " interrupt enable %d\n", smp_processor_id(), idx);
1881                 return -1;
1882         }
1883
1884         if (idx == ARMV7_CYCLE_COUNTER)
1885                 val = ARMV7_INTENC_C;
1886         else
1887                 val = ARMV7_INTENC_P(idx);
1888
1889         asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
1890
1891         return idx;
1892 }
1893
1894 static inline u32 armv7_pmnc_getreset_flags(void)
1895 {
1896         u32 val;
1897
1898         /* Read */
1899         asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
1900
1901         /* Write to clear flags */
1902         val &= ARMV7_FLAG_MASK;
1903         asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val));
1904
1905         return val;
1906 }
1907
1908 #ifdef DEBUG
1909 static void armv7_pmnc_dump_regs(void)
1910 {
1911         u32 val;
1912         unsigned int cnt;
1913
1914         printk(KERN_INFO "PMNC registers dump:\n");
1915
1916         asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
1917         printk(KERN_INFO "PMNC  =0x%08x\n", val);
1918
1919         asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val));
1920         printk(KERN_INFO "CNTENS=0x%08x\n", val);
1921
1922         asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val));
1923         printk(KERN_INFO "INTENS=0x%08x\n", val);
1924
1925         asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
1926         printk(KERN_INFO "FLAGS =0x%08x\n", val);
1927
1928         asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val));
1929         printk(KERN_INFO "SELECT=0x%08x\n", val);
1930
1931         asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
1932         printk(KERN_INFO "CCNT  =0x%08x\n", val);
1933
1934         for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) {
1935                 armv7_pmnc_select_counter(cnt);
1936                 asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
1937                 printk(KERN_INFO "CNT[%d] count =0x%08x\n",
1938                         cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
1939                 asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val));
1940                 printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n",
1941                         cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
1942         }
1943 }
1944 #endif
1945
1946 void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
1947 {
1948         unsigned long flags;
1949
1950         /*
1951          * Enable counter and interrupt, and set the counter to count
1952          * the event that we're interested in.
1953          */
1954         spin_lock_irqsave(&pmu_lock, flags);
1955
1956         /*
1957          * Disable counter
1958          */
1959         armv7_pmnc_disable_counter(idx);
1960
1961         /*
1962          * Set event (if destined for PMNx counters)
1963          * We don't need to set the event if it's a cycle count
1964          */
1965         if (idx != ARMV7_CYCLE_COUNTER)
1966                 armv7_pmnc_write_evtsel(idx, hwc->config_base);
1967
1968         /*
1969          * Enable interrupt for this counter
1970          */
1971         armv7_pmnc_enable_intens(idx);
1972
1973         /*
1974          * Enable counter
1975          */
1976         armv7_pmnc_enable_counter(idx);
1977
1978         spin_unlock_irqrestore(&pmu_lock, flags);
1979 }
1980
1981 static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx)
1982 {
1983         unsigned long flags;
1984
1985         /*
1986          * Disable counter and interrupt
1987          */
1988         spin_lock_irqsave(&pmu_lock, flags);
1989
1990         /*
1991          * Disable counter
1992          */
1993         armv7_pmnc_disable_counter(idx);
1994
1995         /*
1996          * Disable interrupt for this counter
1997          */
1998         armv7_pmnc_disable_intens(idx);
1999
2000         spin_unlock_irqrestore(&pmu_lock, flags);
2001 }
2002
2003 static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
2004 {
2005         unsigned long pmnc;
2006         struct perf_sample_data data;
2007         struct cpu_hw_events *cpuc;
2008         struct pt_regs *regs;
2009         int idx;
2010
2011         /*
2012          * Get and reset the IRQ flags
2013          */
2014         pmnc = armv7_pmnc_getreset_flags();
2015
2016         /*
2017          * Did an overflow occur?
2018          */
2019         if (!armv7_pmnc_has_overflowed(pmnc))
2020                 return IRQ_NONE;
2021
2022         /*
2023          * Handle the counter(s) overflow(s)
2024          */
2025         regs = get_irq_regs();
2026
2027         perf_sample_data_init(&data, 0);
2028
2029         cpuc = &__get_cpu_var(cpu_hw_events);
2030         for (idx = 0; idx <= armpmu->num_events; ++idx) {
2031                 struct perf_event *event = cpuc->events[idx];
2032                 struct hw_perf_event *hwc;
2033
2034                 if (!test_bit(idx, cpuc->active_mask))
2035                         continue;
2036
2037                 /*
2038                  * We have a single interrupt for all counters. Check that
2039                  * each counter has overflowed before we process it.
2040                  */
2041                 if (!armv7_pmnc_counter_has_overflowed(pmnc, idx))
2042                         continue;
2043
2044                 hwc = &event->hw;
2045                 armpmu_event_update(event, hwc, idx);
2046                 data.period = event->hw.last_period;
2047                 if (!armpmu_event_set_period(event, hwc, idx))
2048                         continue;
2049
2050                 if (perf_event_overflow(event, 0, &data, regs))
2051                         armpmu->disable(hwc, idx);
2052         }
2053
2054         /*
2055          * Handle the pending perf events.
2056          *
2057          * Note: this call *must* be run with interrupts disabled. For
2058          * platforms that can have the PMU interrupts raised as an NMI, this
2059          * will not work.
2060          */
2061         irq_work_run();
2062
2063         return IRQ_HANDLED;
2064 }
2065
2066 static void armv7pmu_start(void)
2067 {
2068         unsigned long flags;
2069
2070         spin_lock_irqsave(&pmu_lock, flags);
2071         /* Enable all counters */
2072         armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
2073         spin_unlock_irqrestore(&pmu_lock, flags);
2074 }
2075
2076 static void armv7pmu_stop(void)
2077 {
2078         unsigned long flags;
2079
2080         spin_lock_irqsave(&pmu_lock, flags);
2081         /* Disable all counters */
2082         armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
2083         spin_unlock_irqrestore(&pmu_lock, flags);
2084 }
2085
2086 static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc,
2087                                   struct hw_perf_event *event)
2088 {
2089         int idx;
2090
2091         /* Always place a cycle counter into the cycle counter. */
2092         if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) {
2093                 if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask))
2094                         return -EAGAIN;
2095
2096                 return ARMV7_CYCLE_COUNTER;
2097         } else {
2098                 /*
2099                  * For anything other than a cycle counter, try and use
2100                  * the events counters
2101                  */
2102                 for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) {
2103                         if (!test_and_set_bit(idx, cpuc->used_mask))
2104                                 return idx;
2105                 }
2106
2107                 /* The counters are all in use. */
2108                 return -EAGAIN;
2109         }
2110 }
2111
2112 static struct arm_pmu armv7pmu = {
2113         .handle_irq             = armv7pmu_handle_irq,
2114         .enable                 = armv7pmu_enable_event,
2115         .disable                = armv7pmu_disable_event,
2116         .read_counter           = armv7pmu_read_counter,
2117         .write_counter          = armv7pmu_write_counter,
2118         .get_event_idx          = armv7pmu_get_event_idx,
2119         .start                  = armv7pmu_start,
2120         .stop                   = armv7pmu_stop,
2121         .raw_event_mask         = 0xFF,
2122         .max_period             = (1LLU << 32) - 1,
2123 };
2124
2125 static u32 __init armv7_reset_read_pmnc(void)
2126 {
2127         u32 nb_cnt;
2128
2129         /* Initialize & Reset PMNC: C and P bits */
2130         armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C);
2131
2132         /* Read the nb of CNTx counters supported from PMNC */
2133         nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK;
2134
2135         /* Add the CPU cycles counter and return */
2136         return nb_cnt + 1;
2137 }
2138
2139 /*
2140  * ARMv5 [xscale] Performance counter handling code.
2141  *
2142  * Based on xscale OProfile code.
2143  *
2144  * There are two variants of the xscale PMU that we support:
2145  *      - xscale1pmu: 2 event counters and a cycle counter
2146  *      - xscale2pmu: 4 event counters and a cycle counter
2147  * The two variants share event definitions, but have different
2148  * PMU structures.
2149  */
2150
2151 enum xscale_perf_types {
2152         XSCALE_PERFCTR_ICACHE_MISS              = 0x00,
2153         XSCALE_PERFCTR_ICACHE_NO_DELIVER        = 0x01,
2154         XSCALE_PERFCTR_DATA_STALL               = 0x02,
2155         XSCALE_PERFCTR_ITLB_MISS                = 0x03,
2156         XSCALE_PERFCTR_DTLB_MISS                = 0x04,
2157         XSCALE_PERFCTR_BRANCH                   = 0x05,
2158         XSCALE_PERFCTR_BRANCH_MISS              = 0x06,
2159         XSCALE_PERFCTR_INSTRUCTION              = 0x07,
2160         XSCALE_PERFCTR_DCACHE_FULL_STALL        = 0x08,
2161         XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09,
2162         XSCALE_PERFCTR_DCACHE_ACCESS            = 0x0A,
2163         XSCALE_PERFCTR_DCACHE_MISS              = 0x0B,
2164         XSCALE_PERFCTR_DCACHE_WRITE_BACK        = 0x0C,
2165         XSCALE_PERFCTR_PC_CHANGED               = 0x0D,
2166         XSCALE_PERFCTR_BCU_REQUEST              = 0x10,
2167         XSCALE_PERFCTR_BCU_FULL                 = 0x11,
2168         XSCALE_PERFCTR_BCU_DRAIN                = 0x12,
2169         XSCALE_PERFCTR_BCU_ECC_NO_ELOG          = 0x14,
2170         XSCALE_PERFCTR_BCU_1_BIT_ERR            = 0x15,
2171         XSCALE_PERFCTR_RMW                      = 0x16,
2172         /* XSCALE_PERFCTR_CCNT is not hardware defined */
2173         XSCALE_PERFCTR_CCNT                     = 0xFE,
2174         XSCALE_PERFCTR_UNUSED                   = 0xFF,
2175 };
2176
2177 enum xscale_counters {
2178         XSCALE_CYCLE_COUNTER    = 1,
2179         XSCALE_COUNTER0,
2180         XSCALE_COUNTER1,
2181         XSCALE_COUNTER2,
2182         XSCALE_COUNTER3,
2183 };
2184
2185 static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = {
2186         [PERF_COUNT_HW_CPU_CYCLES]          = XSCALE_PERFCTR_CCNT,
2187         [PERF_COUNT_HW_INSTRUCTIONS]        = XSCALE_PERFCTR_INSTRUCTION,
2188         [PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
2189         [PERF_COUNT_HW_CACHE_MISSES]        = HW_OP_UNSUPPORTED,
2190         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH,
2191         [PERF_COUNT_HW_BRANCH_MISSES]       = XSCALE_PERFCTR_BRANCH_MISS,
2192         [PERF_COUNT_HW_BUS_CYCLES]          = HW_OP_UNSUPPORTED,
2193 };
2194
2195 static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
2196                                            [PERF_COUNT_HW_CACHE_OP_MAX]
2197                                            [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
2198         [C(L1D)] = {
2199                 [C(OP_READ)] = {
2200                         [C(RESULT_ACCESS)]      = XSCALE_PERFCTR_DCACHE_ACCESS,
2201                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_DCACHE_MISS,
2202                 },
2203                 [C(OP_WRITE)] = {
2204                         [C(RESULT_ACCESS)]      = XSCALE_PERFCTR_DCACHE_ACCESS,
2205                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_DCACHE_MISS,
2206                 },
2207                 [C(OP_PREFETCH)] = {
2208                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2209                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2210                 },
2211         },
2212         [C(L1I)] = {
2213                 [C(OP_READ)] = {
2214                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2215                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_ICACHE_MISS,
2216                 },
2217                 [C(OP_WRITE)] = {
2218                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2219                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_ICACHE_MISS,
2220                 },
2221                 [C(OP_PREFETCH)] = {
2222                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2223                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2224                 },
2225         },
2226         [C(LL)] = {
2227                 [C(OP_READ)] = {
2228                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2229                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2230                 },
2231                 [C(OP_WRITE)] = {
2232                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2233                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2234                 },
2235                 [C(OP_PREFETCH)] = {
2236                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2237                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2238                 },
2239         },
2240         [C(DTLB)] = {
2241                 [C(OP_READ)] = {
2242                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2243                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_DTLB_MISS,
2244                 },
2245                 [C(OP_WRITE)] = {
2246                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2247                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_DTLB_MISS,
2248                 },
2249                 [C(OP_PREFETCH)] = {
2250                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2251                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2252                 },
2253         },
2254         [C(ITLB)] = {
2255                 [C(OP_READ)] = {
2256                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2257                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_ITLB_MISS,
2258                 },
2259                 [C(OP_WRITE)] = {
2260                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2261                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_ITLB_MISS,
2262                 },
2263                 [C(OP_PREFETCH)] = {
2264                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2265                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2266                 },
2267         },
2268         [C(BPU)] = {
2269                 [C(OP_READ)] = {
2270                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2271                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2272                 },
2273                 [C(OP_WRITE)] = {
2274                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2275                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2276                 },
2277                 [C(OP_PREFETCH)] = {
2278                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2279                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2280                 },
2281         },
2282 };
2283
2284 #define XSCALE_PMU_ENABLE       0x001
2285 #define XSCALE_PMN_RESET        0x002
2286 #define XSCALE_CCNT_RESET       0x004
2287 #define XSCALE_PMU_RESET        (CCNT_RESET | PMN_RESET)
2288 #define XSCALE_PMU_CNT64        0x008
2289
2290 #define XSCALE1_OVERFLOWED_MASK 0x700
2291 #define XSCALE1_CCOUNT_OVERFLOW 0x400
2292 #define XSCALE1_COUNT0_OVERFLOW 0x100
2293 #define XSCALE1_COUNT1_OVERFLOW 0x200
2294 #define XSCALE1_CCOUNT_INT_EN   0x040
2295 #define XSCALE1_COUNT0_INT_EN   0x010
2296 #define XSCALE1_COUNT1_INT_EN   0x020
2297 #define XSCALE1_COUNT0_EVT_SHFT 12
2298 #define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT)
2299 #define XSCALE1_COUNT1_EVT_SHFT 20
2300 #define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT)
2301
2302 static inline u32
2303 xscale1pmu_read_pmnc(void)
2304 {
2305         u32 val;
2306         asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val));
2307         return val;
2308 }
2309
2310 static inline void
2311 xscale1pmu_write_pmnc(u32 val)
2312 {
2313         /* upper 4bits and 7, 11 are write-as-0 */
2314         val &= 0xffff77f;
2315         asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val));
2316 }
2317
2318 static inline int
2319 xscale1_pmnc_counter_has_overflowed(unsigned long pmnc,
2320                                         enum xscale_counters counter)
2321 {
2322         int ret = 0;
2323
2324         switch (counter) {
2325         case XSCALE_CYCLE_COUNTER:
2326                 ret = pmnc & XSCALE1_CCOUNT_OVERFLOW;
2327                 break;
2328         case XSCALE_COUNTER0:
2329                 ret = pmnc & XSCALE1_COUNT0_OVERFLOW;
2330                 break;
2331         case XSCALE_COUNTER1:
2332                 ret = pmnc & XSCALE1_COUNT1_OVERFLOW;
2333                 break;
2334         default:
2335                 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
2336         }
2337
2338         return ret;
2339 }
2340
2341 static irqreturn_t
2342 xscale1pmu_handle_irq(int irq_num, void *dev)
2343 {
2344         unsigned long pmnc;
2345         struct perf_sample_data data;
2346         struct cpu_hw_events *cpuc;
2347         struct pt_regs *regs;
2348         int idx;
2349
2350         /*
2351          * NOTE: there's an A stepping erratum that states if an overflow
2352          *       bit already exists and another occurs, the previous
2353          *       Overflow bit gets cleared. There's no workaround.
2354          *       Fixed in B stepping or later.
2355          */
2356         pmnc = xscale1pmu_read_pmnc();
2357
2358         /*
2359          * Write the value back to clear the overflow flags. Overflow
2360          * flags remain in pmnc for use below. We also disable the PMU
2361          * while we process the interrupt.
2362          */
2363         xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
2364
2365         if (!(pmnc & XSCALE1_OVERFLOWED_MASK))
2366                 return IRQ_NONE;
2367
2368         regs = get_irq_regs();
2369
2370         perf_sample_data_init(&data, 0);
2371
2372         cpuc = &__get_cpu_var(cpu_hw_events);
2373         for (idx = 0; idx <= armpmu->num_events; ++idx) {
2374                 struct perf_event *event = cpuc->events[idx];
2375                 struct hw_perf_event *hwc;
2376
2377                 if (!test_bit(idx, cpuc->active_mask))
2378                         continue;
2379
2380                 if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx))
2381                         continue;
2382
2383                 hwc = &event->hw;
2384                 armpmu_event_update(event, hwc, idx);
2385                 data.period = event->hw.last_period;
2386                 if (!armpmu_event_set_period(event, hwc, idx))
2387                         continue;
2388
2389                 if (perf_event_overflow(event, 0, &data, regs))
2390                         armpmu->disable(hwc, idx);
2391         }
2392
2393         irq_work_run();
2394
2395         /*
2396          * Re-enable the PMU.
2397          */
2398         pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE;
2399         xscale1pmu_write_pmnc(pmnc);
2400
2401         return IRQ_HANDLED;
2402 }
2403
2404 static void
2405 xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx)
2406 {
2407         unsigned long val, mask, evt, flags;
2408
2409         switch (idx) {
2410         case XSCALE_CYCLE_COUNTER:
2411                 mask = 0;
2412                 evt = XSCALE1_CCOUNT_INT_EN;
2413                 break;
2414         case XSCALE_COUNTER0:
2415                 mask = XSCALE1_COUNT0_EVT_MASK;
2416                 evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) |
2417                         XSCALE1_COUNT0_INT_EN;
2418                 break;
2419         case XSCALE_COUNTER1:
2420                 mask = XSCALE1_COUNT1_EVT_MASK;
2421                 evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) |
2422                         XSCALE1_COUNT1_INT_EN;
2423                 break;
2424         default:
2425                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2426                 return;
2427         }
2428
2429         spin_lock_irqsave(&pmu_lock, flags);
2430         val = xscale1pmu_read_pmnc();
2431         val &= ~mask;
2432         val |= evt;
2433         xscale1pmu_write_pmnc(val);
2434         spin_unlock_irqrestore(&pmu_lock, flags);
2435 }
2436
2437 static void
2438 xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx)
2439 {
2440         unsigned long val, mask, evt, flags;
2441
2442         switch (idx) {
2443         case XSCALE_CYCLE_COUNTER:
2444                 mask = XSCALE1_CCOUNT_INT_EN;
2445                 evt = 0;
2446                 break;
2447         case XSCALE_COUNTER0:
2448                 mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK;
2449                 evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT;
2450                 break;
2451         case XSCALE_COUNTER1:
2452                 mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK;
2453                 evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT;
2454                 break;
2455         default:
2456                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2457                 return;
2458         }
2459
2460         spin_lock_irqsave(&pmu_lock, flags);
2461         val = xscale1pmu_read_pmnc();
2462         val &= ~mask;
2463         val |= evt;
2464         xscale1pmu_write_pmnc(val);
2465         spin_unlock_irqrestore(&pmu_lock, flags);
2466 }
2467
2468 static int
2469 xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc,
2470                         struct hw_perf_event *event)
2471 {
2472         if (XSCALE_PERFCTR_CCNT == event->config_base) {
2473                 if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask))
2474                         return -EAGAIN;
2475
2476                 return XSCALE_CYCLE_COUNTER;
2477         } else {
2478                 if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) {
2479                         return XSCALE_COUNTER1;
2480                 }
2481
2482                 if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) {
2483                         return XSCALE_COUNTER0;
2484                 }
2485
2486                 return -EAGAIN;
2487         }
2488 }
2489
2490 static void
2491 xscale1pmu_start(void)
2492 {
2493         unsigned long flags, val;
2494
2495         spin_lock_irqsave(&pmu_lock, flags);
2496         val = xscale1pmu_read_pmnc();
2497         val |= XSCALE_PMU_ENABLE;
2498         xscale1pmu_write_pmnc(val);
2499         spin_unlock_irqrestore(&pmu_lock, flags);
2500 }
2501
2502 static void
2503 xscale1pmu_stop(void)
2504 {
2505         unsigned long flags, val;
2506
2507         spin_lock_irqsave(&pmu_lock, flags);
2508         val = xscale1pmu_read_pmnc();
2509         val &= ~XSCALE_PMU_ENABLE;
2510         xscale1pmu_write_pmnc(val);
2511         spin_unlock_irqrestore(&pmu_lock, flags);
2512 }
2513
2514 static inline u32
2515 xscale1pmu_read_counter(int counter)
2516 {
2517         u32 val = 0;
2518
2519         switch (counter) {
2520         case XSCALE_CYCLE_COUNTER:
2521                 asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val));
2522                 break;
2523         case XSCALE_COUNTER0:
2524                 asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val));
2525                 break;
2526         case XSCALE_COUNTER1:
2527                 asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val));
2528                 break;
2529         }
2530
2531         return val;
2532 }
2533
2534 static inline void
2535 xscale1pmu_write_counter(int counter, u32 val)
2536 {
2537         switch (counter) {
2538         case XSCALE_CYCLE_COUNTER:
2539                 asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val));
2540                 break;
2541         case XSCALE_COUNTER0:
2542                 asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val));
2543                 break;
2544         case XSCALE_COUNTER1:
2545                 asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val));
2546                 break;
2547         }
2548 }
2549
2550 static const struct arm_pmu xscale1pmu = {
2551         .id             = ARM_PERF_PMU_ID_XSCALE1,
2552         .handle_irq     = xscale1pmu_handle_irq,
2553         .enable         = xscale1pmu_enable_event,
2554         .disable        = xscale1pmu_disable_event,
2555         .read_counter   = xscale1pmu_read_counter,
2556         .write_counter  = xscale1pmu_write_counter,
2557         .get_event_idx  = xscale1pmu_get_event_idx,
2558         .start          = xscale1pmu_start,
2559         .stop           = xscale1pmu_stop,
2560         .cache_map      = &xscale_perf_cache_map,
2561         .event_map      = &xscale_perf_map,
2562         .raw_event_mask = 0xFF,
2563         .num_events     = 3,
2564         .max_period     = (1LLU << 32) - 1,
2565 };
2566
2567 #define XSCALE2_OVERFLOWED_MASK 0x01f
2568 #define XSCALE2_CCOUNT_OVERFLOW 0x001
2569 #define XSCALE2_COUNT0_OVERFLOW 0x002
2570 #define XSCALE2_COUNT1_OVERFLOW 0x004
2571 #define XSCALE2_COUNT2_OVERFLOW 0x008
2572 #define XSCALE2_COUNT3_OVERFLOW 0x010
2573 #define XSCALE2_CCOUNT_INT_EN   0x001
2574 #define XSCALE2_COUNT0_INT_EN   0x002
2575 #define XSCALE2_COUNT1_INT_EN   0x004
2576 #define XSCALE2_COUNT2_INT_EN   0x008
2577 #define XSCALE2_COUNT3_INT_EN   0x010
2578 #define XSCALE2_COUNT0_EVT_SHFT 0
2579 #define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT)
2580 #define XSCALE2_COUNT1_EVT_SHFT 8
2581 #define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT)
2582 #define XSCALE2_COUNT2_EVT_SHFT 16
2583 #define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT)
2584 #define XSCALE2_COUNT3_EVT_SHFT 24
2585 #define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT)
2586
2587 static inline u32
2588 xscale2pmu_read_pmnc(void)
2589 {
2590         u32 val;
2591         asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val));
2592         /* bits 1-2 and 4-23 are read-unpredictable */
2593         return val & 0xff000009;
2594 }
2595
2596 static inline void
2597 xscale2pmu_write_pmnc(u32 val)
2598 {
2599         /* bits 4-23 are write-as-0, 24-31 are write ignored */
2600         val &= 0xf;
2601         asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val));
2602 }
2603
2604 static inline u32
2605 xscale2pmu_read_overflow_flags(void)
2606 {
2607         u32 val;
2608         asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val));
2609         return val;
2610 }
2611
2612 static inline void
2613 xscale2pmu_write_overflow_flags(u32 val)
2614 {
2615         asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val));
2616 }
2617
2618 static inline u32
2619 xscale2pmu_read_event_select(void)
2620 {
2621         u32 val;
2622         asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val));
2623         return val;
2624 }
2625
2626 static inline void
2627 xscale2pmu_write_event_select(u32 val)
2628 {
2629         asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val));
2630 }
2631
2632 static inline u32
2633 xscale2pmu_read_int_enable(void)
2634 {
2635         u32 val;
2636         asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val));
2637         return val;
2638 }
2639
2640 static void
2641 xscale2pmu_write_int_enable(u32 val)
2642 {
2643         asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val));
2644 }
2645
2646 static inline int
2647 xscale2_pmnc_counter_has_overflowed(unsigned long of_flags,
2648                                         enum xscale_counters counter)
2649 {
2650         int ret = 0;
2651
2652         switch (counter) {
2653         case XSCALE_CYCLE_COUNTER:
2654                 ret = of_flags & XSCALE2_CCOUNT_OVERFLOW;
2655                 break;
2656         case XSCALE_COUNTER0:
2657                 ret = of_flags & XSCALE2_COUNT0_OVERFLOW;
2658                 break;
2659         case XSCALE_COUNTER1:
2660                 ret = of_flags & XSCALE2_COUNT1_OVERFLOW;
2661                 break;
2662         case XSCALE_COUNTER2:
2663                 ret = of_flags & XSCALE2_COUNT2_OVERFLOW;
2664                 break;
2665         case XSCALE_COUNTER3:
2666                 ret = of_flags & XSCALE2_COUNT3_OVERFLOW;
2667                 break;
2668         default:
2669                 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
2670         }
2671
2672         return ret;
2673 }
2674
2675 static irqreturn_t
2676 xscale2pmu_handle_irq(int irq_num, void *dev)
2677 {
2678         unsigned long pmnc, of_flags;
2679         struct perf_sample_data data;
2680         struct cpu_hw_events *cpuc;
2681         struct pt_regs *regs;
2682         int idx;
2683
2684         /* Disable the PMU. */
2685         pmnc = xscale2pmu_read_pmnc();
2686         xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
2687
2688         /* Check the overflow flag register. */
2689         of_flags = xscale2pmu_read_overflow_flags();
2690         if (!(of_flags & XSCALE2_OVERFLOWED_MASK))
2691                 return IRQ_NONE;
2692
2693         /* Clear the overflow bits. */
2694         xscale2pmu_write_overflow_flags(of_flags);
2695
2696         regs = get_irq_regs();
2697
2698         perf_sample_data_init(&data, 0);
2699
2700         cpuc = &__get_cpu_var(cpu_hw_events);
2701         for (idx = 0; idx <= armpmu->num_events; ++idx) {
2702                 struct perf_event *event = cpuc->events[idx];
2703                 struct hw_perf_event *hwc;
2704
2705                 if (!test_bit(idx, cpuc->active_mask))
2706                         continue;
2707
2708                 if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx))
2709                         continue;
2710
2711                 hwc = &event->hw;
2712                 armpmu_event_update(event, hwc, idx);
2713                 data.period = event->hw.last_period;
2714                 if (!armpmu_event_set_period(event, hwc, idx))
2715                         continue;
2716
2717                 if (perf_event_overflow(event, 0, &data, regs))
2718                         armpmu->disable(hwc, idx);
2719         }
2720
2721         irq_work_run();
2722
2723         /*
2724          * Re-enable the PMU.
2725          */
2726         pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE;
2727         xscale2pmu_write_pmnc(pmnc);
2728
2729         return IRQ_HANDLED;
2730 }
2731
2732 static void
2733 xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx)
2734 {
2735         unsigned long flags, ien, evtsel;
2736
2737         ien = xscale2pmu_read_int_enable();
2738         evtsel = xscale2pmu_read_event_select();
2739
2740         switch (idx) {
2741         case XSCALE_CYCLE_COUNTER:
2742                 ien |= XSCALE2_CCOUNT_INT_EN;
2743                 break;
2744         case XSCALE_COUNTER0:
2745                 ien |= XSCALE2_COUNT0_INT_EN;
2746                 evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
2747                 evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT;
2748                 break;
2749         case XSCALE_COUNTER1:
2750                 ien |= XSCALE2_COUNT1_INT_EN;
2751                 evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
2752                 evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT;
2753                 break;
2754         case XSCALE_COUNTER2:
2755                 ien |= XSCALE2_COUNT2_INT_EN;
2756                 evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
2757                 evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT;
2758                 break;
2759         case XSCALE_COUNTER3:
2760                 ien |= XSCALE2_COUNT3_INT_EN;
2761                 evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
2762                 evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT;
2763                 break;
2764         default:
2765                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2766                 return;
2767         }
2768
2769         spin_lock_irqsave(&pmu_lock, flags);
2770         xscale2pmu_write_event_select(evtsel);
2771         xscale2pmu_write_int_enable(ien);
2772         spin_unlock_irqrestore(&pmu_lock, flags);
2773 }
2774
2775 static void
2776 xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
2777 {
2778         unsigned long flags, ien, evtsel;
2779
2780         ien = xscale2pmu_read_int_enable();
2781         evtsel = xscale2pmu_read_event_select();
2782
2783         switch (idx) {
2784         case XSCALE_CYCLE_COUNTER:
2785                 ien &= ~XSCALE2_CCOUNT_INT_EN;
2786                 break;
2787         case XSCALE_COUNTER0:
2788                 ien &= ~XSCALE2_COUNT0_INT_EN;
2789                 evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
2790                 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT;
2791                 break;
2792         case XSCALE_COUNTER1:
2793                 ien &= ~XSCALE2_COUNT1_INT_EN;
2794                 evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
2795                 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT;
2796                 break;
2797         case XSCALE_COUNTER2:
2798                 ien &= ~XSCALE2_COUNT2_INT_EN;
2799                 evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
2800                 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT;
2801                 break;
2802         case XSCALE_COUNTER3:
2803                 ien &= ~XSCALE2_COUNT3_INT_EN;
2804                 evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
2805                 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT;
2806                 break;
2807         default:
2808                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2809                 return;
2810         }
2811
2812         spin_lock_irqsave(&pmu_lock, flags);
2813         xscale2pmu_write_event_select(evtsel);
2814         xscale2pmu_write_int_enable(ien);
2815         spin_unlock_irqrestore(&pmu_lock, flags);
2816 }
2817
2818 static int
2819 xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc,
2820                         struct hw_perf_event *event)
2821 {
2822         int idx = xscale1pmu_get_event_idx(cpuc, event);
2823         if (idx >= 0)
2824                 goto out;
2825
2826         if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask))
2827                 idx = XSCALE_COUNTER3;
2828         else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask))
2829                 idx = XSCALE_COUNTER2;
2830 out:
2831         return idx;
2832 }
2833
2834 static void
2835 xscale2pmu_start(void)
2836 {
2837         unsigned long flags, val;
2838
2839         spin_lock_irqsave(&pmu_lock, flags);
2840         val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
2841         val |= XSCALE_PMU_ENABLE;
2842         xscale2pmu_write_pmnc(val);
2843         spin_unlock_irqrestore(&pmu_lock, flags);
2844 }
2845
2846 static void
2847 xscale2pmu_stop(void)
2848 {
2849         unsigned long flags, val;
2850
2851         spin_lock_irqsave(&pmu_lock, flags);
2852         val = xscale2pmu_read_pmnc();
2853         val &= ~XSCALE_PMU_ENABLE;
2854         xscale2pmu_write_pmnc(val);
2855         spin_unlock_irqrestore(&pmu_lock, flags);
2856 }
2857
2858 static inline u32
2859 xscale2pmu_read_counter(int counter)
2860 {
2861         u32 val = 0;
2862
2863         switch (counter) {
2864         case XSCALE_CYCLE_COUNTER:
2865                 asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val));
2866                 break;
2867         case XSCALE_COUNTER0:
2868                 asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val));
2869                 break;
2870         case XSCALE_COUNTER1:
2871                 asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val));
2872                 break;
2873         case XSCALE_COUNTER2:
2874                 asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val));
2875                 break;
2876         case XSCALE_COUNTER3:
2877                 asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val));
2878                 break;
2879         }
2880
2881         return val;
2882 }
2883
2884 static inline void
2885 xscale2pmu_write_counter(int counter, u32 val)
2886 {
2887         switch (counter) {
2888         case XSCALE_CYCLE_COUNTER:
2889                 asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val));
2890                 break;
2891         case XSCALE_COUNTER0:
2892                 asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val));
2893                 break;
2894         case XSCALE_COUNTER1:
2895                 asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val));
2896                 break;
2897         case XSCALE_COUNTER2:
2898                 asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val));
2899                 break;
2900         case XSCALE_COUNTER3:
2901                 asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val));
2902                 break;
2903         }
2904 }
2905
2906 static const struct arm_pmu xscale2pmu = {
2907         .id             = ARM_PERF_PMU_ID_XSCALE2,
2908         .handle_irq     = xscale2pmu_handle_irq,
2909         .enable         = xscale2pmu_enable_event,
2910         .disable        = xscale2pmu_disable_event,
2911         .read_counter   = xscale2pmu_read_counter,
2912         .write_counter  = xscale2pmu_write_counter,
2913         .get_event_idx  = xscale2pmu_get_event_idx,
2914         .start          = xscale2pmu_start,
2915         .stop           = xscale2pmu_stop,
2916         .cache_map      = &xscale_perf_cache_map,
2917         .event_map      = &xscale_perf_map,
2918         .raw_event_mask = 0xFF,
2919         .num_events     = 5,
2920         .max_period     = (1LLU << 32) - 1,
2921 };
2922
2923 static int __init
2924 init_hw_perf_events(void)
2925 {
2926         unsigned long cpuid = read_cpuid_id();
2927         unsigned long implementor = (cpuid & 0xFF000000) >> 24;
2928         unsigned long part_number = (cpuid & 0xFFF0);
2929
2930         /* ARM Ltd CPUs. */
2931         if (0x41 == implementor) {
2932                 switch (part_number) {
2933                 case 0xB360:    /* ARM1136 */
2934                 case 0xB560:    /* ARM1156 */
2935                 case 0xB760:    /* ARM1176 */
2936                         armpmu = &armv6pmu;
2937                         break;
2938                 case 0xB020:    /* ARM11mpcore */
2939                         armpmu = &armv6mpcore_pmu;
2940                         break;
2941                 case 0xC080:    /* Cortex-A8 */
2942                         armv7pmu.id = ARM_PERF_PMU_ID_CA8;
2943                         armv7pmu.cache_map = &armv7_a8_perf_cache_map;
2944                         armv7pmu.event_map = &armv7_a8_perf_map;
2945                         armpmu = &armv7pmu;
2946
2947                         /* Reset PMNC and read the nb of CNTx counters
2948                             supported */
2949                         armv7pmu.num_events = armv7_reset_read_pmnc();
2950                         break;
2951                 case 0xC090:    /* Cortex-A9 */
2952                         armv7pmu.id = ARM_PERF_PMU_ID_CA9;
2953                         armv7pmu.cache_map = &armv7_a9_perf_cache_map;
2954                         armv7pmu.event_map = &armv7_a9_perf_map;
2955                         armpmu = &armv7pmu;
2956
2957                         /* Reset PMNC and read the nb of CNTx counters
2958                             supported */
2959                         armv7pmu.num_events = armv7_reset_read_pmnc();
2960                         break;
2961                 }
2962         /* Intel CPUs [xscale]. */
2963         } else if (0x69 == implementor) {
2964                 part_number = (cpuid >> 13) & 0x7;
2965                 switch (part_number) {
2966                 case 1:
2967                         armpmu = &xscale1pmu;
2968                         break;
2969                 case 2:
2970                         armpmu = &xscale2pmu;
2971                         break;
2972                 }
2973         }
2974
2975         if (armpmu) {
2976                 pr_info("enabled with %s PMU driver, %d counters available\n",
2977                                 arm_pmu_names[armpmu->id], armpmu->num_events);
2978         } else {
2979                 pr_info("no hardware support available\n");
2980         }
2981
2982         perf_pmu_register(&pmu);
2983
2984         return 0;
2985 }
2986 arch_initcall(init_hw_perf_events);
2987
2988 /*
2989  * Callchain handling code.
2990  */
2991
2992 /*
2993  * The registers we're interested in are at the end of the variable
2994  * length saved register structure. The fp points at the end of this
2995  * structure so the address of this struct is:
2996  * (struct frame_tail *)(xxx->fp)-1
2997  *
2998  * This code has been adapted from the ARM OProfile support.
2999  */
3000 struct frame_tail {
3001         struct frame_tail   *fp;
3002         unsigned long       sp;
3003         unsigned long       lr;
3004 } __attribute__((packed));
3005
3006 /*
3007  * Get the return address for a single stackframe and return a pointer to the
3008  * next frame tail.
3009  */
3010 static struct frame_tail *
3011 user_backtrace(struct frame_tail *tail,
3012                struct perf_callchain_entry *entry)
3013 {
3014         struct frame_tail buftail;
3015
3016         /* Also check accessibility of one struct frame_tail beyond */
3017         if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
3018                 return NULL;
3019         if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail)))
3020                 return NULL;
3021
3022         perf_callchain_store(entry, buftail.lr);
3023
3024         /*
3025          * Frame pointers should strictly progress back up the stack
3026          * (towards higher addresses).
3027          */
3028         if (tail >= buftail.fp)
3029                 return NULL;
3030
3031         return buftail.fp - 1;
3032 }
3033
3034 void
3035 perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
3036 {
3037         struct frame_tail *tail;
3038
3039
3040         tail = (struct frame_tail *)regs->ARM_fp - 1;
3041
3042         while (tail && !((unsigned long)tail & 0x3))
3043                 tail = user_backtrace(tail, entry);
3044 }
3045
3046 /*
3047  * Gets called by walk_stackframe() for every stackframe. This will be called
3048  * whist unwinding the stackframe and is like a subroutine return so we use
3049  * the PC.
3050  */
3051 static int
3052 callchain_trace(struct stackframe *fr,
3053                 void *data)
3054 {
3055         struct perf_callchain_entry *entry = data;
3056         perf_callchain_store(entry, fr->pc);
3057         return 0;
3058 }
3059
3060 void
3061 perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
3062 {
3063         struct stackframe fr;
3064
3065         fr.fp = regs->ARM_fp;
3066         fr.sp = regs->ARM_sp;
3067         fr.lr = regs->ARM_lr;
3068         fr.pc = regs->ARM_pc;
3069         walk_stackframe(&fr, callchain_trace, entry);
3070 }