Merge branch 'msm-fix' of git://codeaurora.org/quic/kernel/davidb/linux-msm into...
[pandora-kernel.git] / arch / mips / kernel / perf_event_mipsxx.c
index e5ad09a..4f2971b 100644 (file)
-#if defined(CONFIG_CPU_MIPS32) || defined(CONFIG_CPU_MIPS64) || \
-    defined(CONFIG_CPU_R10000) || defined(CONFIG_CPU_SB1)
+/*
+ * Linux performance counter support for MIPS.
+ *
+ * Copyright (C) 2010 MIPS Technologies, Inc.
+ * Copyright (C) 2011 Cavium Networks, Inc.
+ * Author: Deng-Cheng Zhu
+ *
+ * This code is based on the implementation for ARM, which is in turn
+ * based on the sparc64 perf event code and the x86 code. Performance
+ * counter access is based on the MIPS Oprofile code. And the callchain
+ * support references the code of MIPS stacktrace.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/cpumask.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/uaccess.h>
+
+#include <asm/irq.h>
+#include <asm/irq_regs.h>
+#include <asm/stacktrace.h>
+#include <asm/time.h> /* For perf_irq */
+
+#define MIPS_MAX_HWEVENTS 4
+
+struct cpu_hw_events {
+       /* Array of events on this cpu. */
+       struct perf_event       *events[MIPS_MAX_HWEVENTS];
+
+       /*
+        * Set the bit (indexed by the counter number) when the counter
+        * is used for an event.
+        */
+       unsigned long           used_mask[BITS_TO_LONGS(MIPS_MAX_HWEVENTS)];
+
+       /*
+        * Software copy of the control register for each performance counter.
+        * MIPS CPUs vary in performance counters. They use this differently,
+        * and even may not use it.
+        */
+       unsigned int            saved_ctrl[MIPS_MAX_HWEVENTS];
+};
+DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
+       .saved_ctrl = {0},
+};
+
+/* The description of MIPS performance events. */
+struct mips_perf_event {
+       unsigned int event_id;
+       /*
+        * MIPS performance counters are indexed starting from 0.
+        * CNTR_EVEN indicates the indexes of the counters to be used are
+        * even numbers.
+        */
+       unsigned int cntr_mask;
+       #define CNTR_EVEN       0x55555555
+       #define CNTR_ODD        0xaaaaaaaa
+       #define CNTR_ALL        0xffffffff
+#ifdef CONFIG_MIPS_MT_SMP
+       enum {
+               T  = 0,
+               V  = 1,
+               P  = 2,
+       } range;
+#else
+       #define T
+       #define V
+       #define P
+#endif
+};
+
+static struct mips_perf_event raw_event;
+static DEFINE_MUTEX(raw_event_mutex);
+
+#define UNSUPPORTED_PERF_EVENT_ID 0xffffffff
+#define C(x) PERF_COUNT_HW_CACHE_##x
+
+struct mips_pmu {
+       u64             max_period;
+       u64             valid_count;
+       u64             overflow;
+       const char      *name;
+       int             irq;
+       u64             (*read_counter)(unsigned int idx);
+       void            (*write_counter)(unsigned int idx, u64 val);
+       const struct mips_perf_event *(*map_raw_event)(u64 config);
+       const struct mips_perf_event (*general_event_map)[PERF_COUNT_HW_MAX];
+       const struct mips_perf_event (*cache_event_map)
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX];
+       unsigned int    num_counters;
+};
+
+static struct mips_pmu mipspmu;
 
 #define M_CONFIG1_PC   (1 << 4)
 
-#define M_PERFCTL_EXL                  (1UL      <<  0)
-#define M_PERFCTL_KERNEL               (1UL      <<  1)
-#define M_PERFCTL_SUPERVISOR           (1UL      <<  2)
-#define M_PERFCTL_USER                 (1UL      <<  3)
-#define M_PERFCTL_INTERRUPT_ENABLE     (1UL      <<  4)
+#define M_PERFCTL_EXL                  (1      <<  0)
+#define M_PERFCTL_KERNEL               (1      <<  1)
+#define M_PERFCTL_SUPERVISOR           (1      <<  2)
+#define M_PERFCTL_USER                 (1      <<  3)
+#define M_PERFCTL_INTERRUPT_ENABLE     (1      <<  4)
 #define M_PERFCTL_EVENT(event)         (((event) & 0x3ff)  << 5)
 #define M_PERFCTL_VPEID(vpe)           ((vpe)    << 16)
 #define M_PERFCTL_MT_EN(filter)                ((filter) << 20)
 #define    M_TC_EN_VPE                 M_PERFCTL_MT_EN(1)
 #define    M_TC_EN_TC                  M_PERFCTL_MT_EN(2)
 #define M_PERFCTL_TCID(tcid)           ((tcid)   << 22)
-#define M_PERFCTL_WIDE                 (1UL      << 30)
-#define M_PERFCTL_MORE                 (1UL      << 31)
+#define M_PERFCTL_WIDE                 (1      << 30)
+#define M_PERFCTL_MORE                 (1      << 31)
 
 #define M_PERFCTL_COUNT_EVENT_WHENEVER (M_PERFCTL_EXL |                \
                                        M_PERFCTL_KERNEL |              \
 #endif
 #define M_PERFCTL_EVENT_MASK           0xfe0
 
-#define M_COUNTER_OVERFLOW             (1UL      << 31)
 
 #ifdef CONFIG_MIPS_MT_SMP
 static int cpu_has_mipsmt_pertccounters;
 
+static DEFINE_RWLOCK(pmuint_rwlock);
+
 /*
  * FIXME: For VSMP, vpe_id() is redefined for Perf-events, because
  * cpu_data[cpuid].vpe_id reports 0 for _both_ CPUs.
@@ -49,210 +149,674 @@ static int cpu_has_mipsmt_pertccounters;
 #endif
 
 /* Copied from op_model_mipsxx.c */
-static inline unsigned int vpe_shift(void)
+static unsigned int vpe_shift(void)
 {
        if (num_possible_cpus() > 1)
                return 1;
 
        return 0;
 }
-#else /* !CONFIG_MIPS_MT_SMP */
-#define vpe_id()       0
-
-static inline unsigned int vpe_shift(void)
-{
-       return 0;
-}
-#endif /* CONFIG_MIPS_MT_SMP */
 
-static inline unsigned int
-counters_total_to_per_cpu(unsigned int counters)
+static unsigned int counters_total_to_per_cpu(unsigned int counters)
 {
        return counters >> vpe_shift();
 }
 
-static inline unsigned int
-counters_per_cpu_to_total(unsigned int counters)
+static unsigned int counters_per_cpu_to_total(unsigned int counters)
 {
        return counters << vpe_shift();
 }
 
-#define __define_perf_accessors(r, n, np)                              \
-                                                                       \
-static inline unsigned int r_c0_ ## r ## n(void)                       \
-{                                                                      \
-       unsigned int cpu = vpe_id();                                    \
-                                                                       \
-       switch (cpu) {                                                  \
-       case 0:                                                         \
-               return read_c0_ ## r ## n();                            \
-       case 1:                                                         \
-               return read_c0_ ## r ## np();                           \
-       default:                                                        \
-               BUG();                                                  \
-       }                                                               \
-       return 0;                                                       \
-}                                                                      \
-                                                                       \
-static inline void w_c0_ ## r ## n(unsigned int value)                 \
-{                                                                      \
-       unsigned int cpu = vpe_id();                                    \
-                                                                       \
-       switch (cpu) {                                                  \
-       case 0:                                                         \
-               write_c0_ ## r ## n(value);                             \
-               return;                                                 \
-       case 1:                                                         \
-               write_c0_ ## r ## np(value);                            \
-               return;                                                 \
-       default:                                                        \
-               BUG();                                                  \
-       }                                                               \
-       return;                                                         \
-}                                                                      \
-
-__define_perf_accessors(perfcntr, 0, 2)
-__define_perf_accessors(perfcntr, 1, 3)
-__define_perf_accessors(perfcntr, 2, 0)
-__define_perf_accessors(perfcntr, 3, 1)
-
-__define_perf_accessors(perfctrl, 0, 2)
-__define_perf_accessors(perfctrl, 1, 3)
-__define_perf_accessors(perfctrl, 2, 0)
-__define_perf_accessors(perfctrl, 3, 1)
-
-static inline int __n_counters(void)
-{
-       if (!(read_c0_config1() & M_CONFIG1_PC))
-               return 0;
-       if (!(read_c0_perfctrl0() & M_PERFCTL_MORE))
-               return 1;
-       if (!(read_c0_perfctrl1() & M_PERFCTL_MORE))
-               return 2;
-       if (!(read_c0_perfctrl2() & M_PERFCTL_MORE))
-               return 3;
+#else /* !CONFIG_MIPS_MT_SMP */
+#define vpe_id()       0
 
-       return 4;
+#endif /* CONFIG_MIPS_MT_SMP */
+
+static void resume_local_counters(void);
+static void pause_local_counters(void);
+static irqreturn_t mipsxx_pmu_handle_irq(int, void *);
+static int mipsxx_pmu_handle_shared_irq(void);
+
+static unsigned int mipsxx_pmu_swizzle_perf_idx(unsigned int idx)
+{
+       if (vpe_id() == 1)
+               idx = (idx + 2) & 3;
+       return idx;
 }
 
-static inline int n_counters(void)
+static u64 mipsxx_pmu_read_counter(unsigned int idx)
 {
-       int counters;
+       idx = mipsxx_pmu_swizzle_perf_idx(idx);
 
-       switch (current_cpu_type()) {
-       case CPU_R10000:
-               counters = 2;
-               break;
+       switch (idx) {
+       case 0:
+               /*
+                * The counters are unsigned, we must cast to truncate
+                * off the high bits.
+                */
+               return (u32)read_c0_perfcntr0();
+       case 1:
+               return (u32)read_c0_perfcntr1();
+       case 2:
+               return (u32)read_c0_perfcntr2();
+       case 3:
+               return (u32)read_c0_perfcntr3();
+       default:
+               WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx);
+               return 0;
+       }
+}
 
-       case CPU_R12000:
-       case CPU_R14000:
-               counters = 4;
-               break;
+static u64 mipsxx_pmu_read_counter_64(unsigned int idx)
+{
+       idx = mipsxx_pmu_swizzle_perf_idx(idx);
 
+       switch (idx) {
+       case 0:
+               return read_c0_perfcntr0_64();
+       case 1:
+               return read_c0_perfcntr1_64();
+       case 2:
+               return read_c0_perfcntr2_64();
+       case 3:
+               return read_c0_perfcntr3_64();
        default:
-               counters = __n_counters();
+               WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx);
+               return 0;
        }
-
-       return counters;
 }
 
-static void reset_counters(void *arg)
+static void mipsxx_pmu_write_counter(unsigned int idx, u64 val)
 {
-       int counters = (int)(long)arg;
-       switch (counters) {
-       case 4:
-               w_c0_perfctrl3(0);
-               w_c0_perfcntr3(0);
-       case 3:
-               w_c0_perfctrl2(0);
-               w_c0_perfcntr2(0);
+       idx = mipsxx_pmu_swizzle_perf_idx(idx);
+
+       switch (idx) {
+       case 0:
+               write_c0_perfcntr0(val);
+               return;
+       case 1:
+               write_c0_perfcntr1(val);
+               return;
        case 2:
-               w_c0_perfctrl1(0);
-               w_c0_perfcntr1(0);
+               write_c0_perfcntr2(val);
+               return;
+       case 3:
+               write_c0_perfcntr3(val);
+               return;
+       }
+}
+
+static void mipsxx_pmu_write_counter_64(unsigned int idx, u64 val)
+{
+       idx = mipsxx_pmu_swizzle_perf_idx(idx);
+
+       switch (idx) {
+       case 0:
+               write_c0_perfcntr0_64(val);
+               return;
        case 1:
-               w_c0_perfctrl0(0);
-               w_c0_perfcntr0(0);
+               write_c0_perfcntr1_64(val);
+               return;
+       case 2:
+               write_c0_perfcntr2_64(val);
+               return;
+       case 3:
+               write_c0_perfcntr3_64(val);
+               return;
        }
 }
 
-static inline u64
-mipsxx_pmu_read_counter(unsigned int idx)
+static unsigned int mipsxx_pmu_read_control(unsigned int idx)
 {
+       idx = mipsxx_pmu_swizzle_perf_idx(idx);
+
        switch (idx) {
        case 0:
-               return r_c0_perfcntr0();
+               return read_c0_perfctrl0();
        case 1:
-               return r_c0_perfcntr1();
+               return read_c0_perfctrl1();
        case 2:
-               return r_c0_perfcntr2();
+               return read_c0_perfctrl2();
        case 3:
-               return r_c0_perfcntr3();
+               return read_c0_perfctrl3();
        default:
                WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx);
                return 0;
        }
 }
 
-static inline void
-mipsxx_pmu_write_counter(unsigned int idx, u64 val)
+static void mipsxx_pmu_write_control(unsigned int idx, unsigned int val)
 {
+       idx = mipsxx_pmu_swizzle_perf_idx(idx);
+
        switch (idx) {
        case 0:
-               w_c0_perfcntr0(val);
+               write_c0_perfctrl0(val);
                return;
        case 1:
-               w_c0_perfcntr1(val);
+               write_c0_perfctrl1(val);
                return;
        case 2:
-               w_c0_perfcntr2(val);
+               write_c0_perfctrl2(val);
                return;
        case 3:
-               w_c0_perfcntr3(val);
+               write_c0_perfctrl3(val);
                return;
        }
 }
 
-static inline unsigned int
-mipsxx_pmu_read_control(unsigned int idx)
+static int mipsxx_pmu_alloc_counter(struct cpu_hw_events *cpuc,
+                                   struct hw_perf_event *hwc)
+{
+       int i;
+
+       /*
+        * We only need to care the counter mask. The range has been
+        * checked definitely.
+        */
+       unsigned long cntr_mask = (hwc->event_base >> 8) & 0xffff;
+
+       for (i = mipspmu.num_counters - 1; i >= 0; i--) {
+               /*
+                * Note that some MIPS perf events can be counted by both
+                * even and odd counters, wheresas many other are only by
+                * even _or_ odd counters. This introduces an issue that
+                * when the former kind of event takes the counter the
+                * latter kind of event wants to use, then the "counter
+                * allocation" for the latter event will fail. In fact if
+                * they can be dynamically swapped, they both feel happy.
+                * But here we leave this issue alone for now.
+                */
+               if (test_bit(i, &cntr_mask) &&
+                       !test_and_set_bit(i, cpuc->used_mask))
+                       return i;
+       }
+
+       return -EAGAIN;
+}
+
+static void mipsxx_pmu_enable_event(struct hw_perf_event *evt, int idx)
+{
+       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+       WARN_ON(idx < 0 || idx >= mipspmu.num_counters);
+
+       cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base & 0xff) |
+               (evt->config_base & M_PERFCTL_CONFIG_MASK) |
+               /* Make sure interrupt enabled. */
+               M_PERFCTL_INTERRUPT_ENABLE;
+       /*
+        * We do not actually let the counter run. Leave it until start().
+        */
+}
+
+static void mipsxx_pmu_disable_event(int idx)
+{
+       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+       unsigned long flags;
+
+       WARN_ON(idx < 0 || idx >= mipspmu.num_counters);
+
+       local_irq_save(flags);
+       cpuc->saved_ctrl[idx] = mipsxx_pmu_read_control(idx) &
+               ~M_PERFCTL_COUNT_EVENT_WHENEVER;
+       mipsxx_pmu_write_control(idx, cpuc->saved_ctrl[idx]);
+       local_irq_restore(flags);
+}
+
+static int mipspmu_event_set_period(struct perf_event *event,
+                                   struct hw_perf_event *hwc,
+                                   int idx)
+{
+       u64 left = local64_read(&hwc->period_left);
+       u64 period = hwc->sample_period;
+       int ret = 0;
+
+       if (unlikely((left + period) & (1ULL << 63))) {
+               /* left underflowed by more than period. */
+               left = period;
+               local64_set(&hwc->period_left, left);
+               hwc->last_period = period;
+               ret = 1;
+       } else  if (unlikely((left + period) <= period)) {
+               /* left underflowed by less than period. */
+               left += period;
+               local64_set(&hwc->period_left, left);
+               hwc->last_period = period;
+               ret = 1;
+       }
+
+       if (left > mipspmu.max_period) {
+               left = mipspmu.max_period;
+               local64_set(&hwc->period_left, left);
+       }
+
+       local64_set(&hwc->prev_count, mipspmu.overflow - left);
+
+       mipspmu.write_counter(idx, mipspmu.overflow - left);
+
+       perf_event_update_userpage(event);
+
+       return ret;
+}
+
+static void mipspmu_event_update(struct perf_event *event,
+                                struct hw_perf_event *hwc,
+                                int idx)
+{
+       u64 prev_raw_count, new_raw_count;
+       u64 delta;
+
+again:
+       prev_raw_count = local64_read(&hwc->prev_count);
+       new_raw_count = mipspmu.read_counter(idx);
+
+       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+                               new_raw_count) != prev_raw_count)
+               goto again;
+
+       delta = new_raw_count - prev_raw_count;
+
+       local64_add(delta, &event->count);
+       local64_sub(delta, &hwc->period_left);
+}
+
+static void mipspmu_start(struct perf_event *event, int flags)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       if (flags & PERF_EF_RELOAD)
+               WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+
+       hwc->state = 0;
+
+       /* Set the period for the event. */
+       mipspmu_event_set_period(event, hwc, hwc->idx);
+
+       /* Enable the event. */
+       mipsxx_pmu_enable_event(hwc, hwc->idx);
+}
+
+static void mipspmu_stop(struct perf_event *event, int flags)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       if (!(hwc->state & PERF_HES_STOPPED)) {
+               /* We are working on a local event. */
+               mipsxx_pmu_disable_event(hwc->idx);
+               barrier();
+               mipspmu_event_update(event, hwc, hwc->idx);
+               hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+       }
+}
+
+static int mipspmu_add(struct perf_event *event, int flags)
+{
+       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+       struct hw_perf_event *hwc = &event->hw;
+       int idx;
+       int err = 0;
+
+       perf_pmu_disable(event->pmu);
+
+       /* To look for a free counter for this event. */
+       idx = mipsxx_pmu_alloc_counter(cpuc, hwc);
+       if (idx < 0) {
+               err = idx;
+               goto out;
+       }
+
+       /*
+        * If there is an event in the counter we are going to use then
+        * make sure it is disabled.
+        */
+       event->hw.idx = idx;
+       mipsxx_pmu_disable_event(idx);
+       cpuc->events[idx] = event;
+
+       hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+       if (flags & PERF_EF_START)
+               mipspmu_start(event, PERF_EF_RELOAD);
+
+       /* Propagate our changes to the userspace mapping. */
+       perf_event_update_userpage(event);
+
+out:
+       perf_pmu_enable(event->pmu);
+       return err;
+}
+
+static void mipspmu_del(struct perf_event *event, int flags)
+{
+       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+       struct hw_perf_event *hwc = &event->hw;
+       int idx = hwc->idx;
+
+       WARN_ON(idx < 0 || idx >= mipspmu.num_counters);
+
+       mipspmu_stop(event, PERF_EF_UPDATE);
+       cpuc->events[idx] = NULL;
+       clear_bit(idx, cpuc->used_mask);
+
+       perf_event_update_userpage(event);
+}
+
+static void mipspmu_read(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       /* Don't read disabled counters! */
+       if (hwc->idx < 0)
+               return;
+
+       mipspmu_event_update(event, hwc, hwc->idx);
+}
+
+static void mipspmu_enable(struct pmu *pmu)
+{
+#ifdef CONFIG_MIPS_MT_SMP
+       write_unlock(&pmuint_rwlock);
+#endif
+       resume_local_counters();
+}
+
+/*
+ * MIPS performance counters can be per-TC. The control registers can
+ * not be directly accessed accross CPUs. Hence if we want to do global
+ * control, we need cross CPU calls. on_each_cpu() can help us, but we
+ * can not make sure this function is called with interrupts enabled. So
+ * here we pause local counters and then grab a rwlock and leave the
+ * counters on other CPUs alone. If any counter interrupt raises while
+ * we own the write lock, simply pause local counters on that CPU and
+ * spin in the handler. Also we know we won't be switched to another
+ * CPU after pausing local counters and before grabbing the lock.
+ */
+static void mipspmu_disable(struct pmu *pmu)
+{
+       pause_local_counters();
+#ifdef CONFIG_MIPS_MT_SMP
+       write_lock(&pmuint_rwlock);
+#endif
+}
+
+static atomic_t active_events = ATOMIC_INIT(0);
+static DEFINE_MUTEX(pmu_reserve_mutex);
+static int (*save_perf_irq)(void);
+
+static int mipspmu_get_irq(void)
+{
+       int err;
+
+       if (mipspmu.irq >= 0) {
+               /* Request my own irq handler. */
+               err = request_irq(mipspmu.irq, mipsxx_pmu_handle_irq,
+                       IRQF_PERCPU | IRQF_NOBALANCING,
+                       "mips_perf_pmu", NULL);
+               if (err) {
+                       pr_warning("Unable to request IRQ%d for MIPS "
+                          "performance counters!\n", mipspmu.irq);
+               }
+       } else if (cp0_perfcount_irq < 0) {
+               /*
+                * We are sharing the irq number with the timer interrupt.
+                */
+               save_perf_irq = perf_irq;
+               perf_irq = mipsxx_pmu_handle_shared_irq;
+               err = 0;
+       } else {
+               pr_warning("The platform hasn't properly defined its "
+                       "interrupt controller.\n");
+               err = -ENOENT;
+       }
+
+       return err;
+}
+
+static void mipspmu_free_irq(void)
+{
+       if (mipspmu.irq >= 0)
+               free_irq(mipspmu.irq, NULL);
+       else if (cp0_perfcount_irq < 0)
+               perf_irq = save_perf_irq;
+}
+
+/*
+ * mipsxx/rm9000/loongson2 have different performance counters, they have
+ * specific low-level init routines.
+ */
+static void reset_counters(void *arg);
+static int __hw_perf_event_init(struct perf_event *event);
+
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+       if (atomic_dec_and_mutex_lock(&active_events,
+                               &pmu_reserve_mutex)) {
+               /*
+                * We must not call the destroy function with interrupts
+                * disabled.
+                */
+               on_each_cpu(reset_counters,
+                       (void *)(long)mipspmu.num_counters, 1);
+               mipspmu_free_irq();
+               mutex_unlock(&pmu_reserve_mutex);
+       }
+}
+
+static int mipspmu_event_init(struct perf_event *event)
+{
+       int err = 0;
+
+       switch (event->attr.type) {
+       case PERF_TYPE_RAW:
+       case PERF_TYPE_HARDWARE:
+       case PERF_TYPE_HW_CACHE:
+               break;
+
+       default:
+               return -ENOENT;
+       }
+
+       if (event->cpu >= nr_cpumask_bits ||
+           (event->cpu >= 0 && !cpu_online(event->cpu)))
+               return -ENODEV;
+
+       if (!atomic_inc_not_zero(&active_events)) {
+               if (atomic_read(&active_events) > MIPS_MAX_HWEVENTS) {
+                       atomic_dec(&active_events);
+                       return -ENOSPC;
+               }
+
+               mutex_lock(&pmu_reserve_mutex);
+               if (atomic_read(&active_events) == 0)
+                       err = mipspmu_get_irq();
+
+               if (!err)
+                       atomic_inc(&active_events);
+               mutex_unlock(&pmu_reserve_mutex);
+       }
+
+       if (err)
+               return err;
+
+       err = __hw_perf_event_init(event);
+       if (err)
+               hw_perf_event_destroy(event);
+
+       return err;
+}
+
+static struct pmu pmu = {
+       .pmu_enable     = mipspmu_enable,
+       .pmu_disable    = mipspmu_disable,
+       .event_init     = mipspmu_event_init,
+       .add            = mipspmu_add,
+       .del            = mipspmu_del,
+       .start          = mipspmu_start,
+       .stop           = mipspmu_stop,
+       .read           = mipspmu_read,
+};
+
+static unsigned int mipspmu_perf_event_encode(const struct mips_perf_event *pev)
+{
+/*
+ * Top 8 bits for range, next 16 bits for cntr_mask, lowest 8 bits for
+ * event_id.
+ */
+#ifdef CONFIG_MIPS_MT_SMP
+       return ((unsigned int)pev->range << 24) |
+               (pev->cntr_mask & 0xffff00) |
+               (pev->event_id & 0xff);
+#else
+       return (pev->cntr_mask & 0xffff00) |
+               (pev->event_id & 0xff);
+#endif
+}
+
+static const struct mips_perf_event *mipspmu_map_general_event(int idx)
+{
+       const struct mips_perf_event *pev;
+
+       pev = ((*mipspmu.general_event_map)[idx].event_id ==
+               UNSUPPORTED_PERF_EVENT_ID ? ERR_PTR(-EOPNOTSUPP) :
+               &(*mipspmu.general_event_map)[idx]);
+
+       return pev;
+}
+
+static const struct mips_perf_event *mipspmu_map_cache_event(u64 config)
+{
+       unsigned int cache_type, cache_op, cache_result;
+       const struct mips_perf_event *pev;
+
+       cache_type = (config >> 0) & 0xff;
+       if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
+               return ERR_PTR(-EINVAL);
+
+       cache_op = (config >> 8) & 0xff;
+       if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
+               return ERR_PTR(-EINVAL);
+
+       cache_result = (config >> 16) & 0xff;
+       if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+               return ERR_PTR(-EINVAL);
+
+       pev = &((*mipspmu.cache_event_map)
+                                       [cache_type]
+                                       [cache_op]
+                                       [cache_result]);
+
+       if (pev->event_id == UNSUPPORTED_PERF_EVENT_ID)
+               return ERR_PTR(-EOPNOTSUPP);
+
+       return pev;
+
+}
+
+static int validate_event(struct cpu_hw_events *cpuc,
+              struct perf_event *event)
+{
+       struct hw_perf_event fake_hwc = event->hw;
+
+       /* Allow mixed event group. So return 1 to pass validation. */
+       if (event->pmu != &pmu || event->state <= PERF_EVENT_STATE_OFF)
+               return 1;
+
+       return mipsxx_pmu_alloc_counter(cpuc, &fake_hwc) >= 0;
+}
+
+static int validate_group(struct perf_event *event)
+{
+       struct perf_event *sibling, *leader = event->group_leader;
+       struct cpu_hw_events fake_cpuc;
+
+       memset(&fake_cpuc, 0, sizeof(fake_cpuc));
+
+       if (!validate_event(&fake_cpuc, leader))
+               return -ENOSPC;
+
+       list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
+               if (!validate_event(&fake_cpuc, sibling))
+                       return -ENOSPC;
+       }
+
+       if (!validate_event(&fake_cpuc, event))
+               return -ENOSPC;
+
+       return 0;
+}
+
+/* This is needed by specific irq handlers in perf_event_*.c */
+static void handle_associated_event(struct cpu_hw_events *cpuc,
+                                   int idx, struct perf_sample_data *data,
+                                   struct pt_regs *regs)
+{
+       struct perf_event *event = cpuc->events[idx];
+       struct hw_perf_event *hwc = &event->hw;
+
+       mipspmu_event_update(event, hwc, idx);
+       data->period = event->hw.last_period;
+       if (!mipspmu_event_set_period(event, hwc, idx))
+               return;
+
+       if (perf_event_overflow(event, data, regs))
+               mipsxx_pmu_disable_event(idx);
+}
+
+
+static int __n_counters(void)
+{
+       if (!(read_c0_config1() & M_CONFIG1_PC))
+               return 0;
+       if (!(read_c0_perfctrl0() & M_PERFCTL_MORE))
+               return 1;
+       if (!(read_c0_perfctrl1() & M_PERFCTL_MORE))
+               return 2;
+       if (!(read_c0_perfctrl2() & M_PERFCTL_MORE))
+               return 3;
+
+       return 4;
+}
+
+static int n_counters(void)
 {
-       switch (idx) {
-       case 0:
-               return r_c0_perfctrl0();
-       case 1:
-               return r_c0_perfctrl1();
-       case 2:
-               return r_c0_perfctrl2();
-       case 3:
-               return r_c0_perfctrl3();
+       int counters;
+
+       switch (current_cpu_type()) {
+       case CPU_R10000:
+               counters = 2;
+               break;
+
+       case CPU_R12000:
+       case CPU_R14000:
+               counters = 4;
+               break;
+
        default:
-               WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx);
-               return 0;
+               counters = __n_counters();
        }
+
+       return counters;
 }
 
-static inline void
-mipsxx_pmu_write_control(unsigned int idx, unsigned int val)
+static void reset_counters(void *arg)
 {
-       switch (idx) {
-       case 0:
-               w_c0_perfctrl0(val);
-               return;
-       case 1:
-               w_c0_perfctrl1(val);
-               return;
-       case 2:
-               w_c0_perfctrl2(val);
-               return;
+       int counters = (int)(long)arg;
+       switch (counters) {
+       case 4:
+               mipsxx_pmu_write_control(3, 0);
+               mipspmu.write_counter(3, 0);
        case 3:
-               w_c0_perfctrl3(val);
-               return;
+               mipsxx_pmu_write_control(2, 0);
+               mipspmu.write_counter(2, 0);
+       case 2:
+               mipsxx_pmu_write_control(1, 0);
+               mipspmu.write_counter(1, 0);
+       case 1:
+               mipsxx_pmu_write_control(0, 0);
+               mipspmu.write_counter(0, 0);
        }
 }
 
-#ifdef CONFIG_MIPS_MT_SMP
-static DEFINE_RWLOCK(pmuint_rwlock);
-#endif
-
 /* 24K/34K/1004K cores can share the same event map. */
 static const struct mips_perf_event mipsxxcore_event_map
                                [PERF_COUNT_HW_MAX] = {
@@ -277,6 +841,16 @@ static const struct mips_perf_event mipsxx74Kcore_event_map
        [PERF_COUNT_HW_BUS_CYCLES] = { UNSUPPORTED_PERF_EVENT_ID },
 };
 
+static const struct mips_perf_event octeon_event_map[PERF_COUNT_HW_MAX] = {
+       [PERF_COUNT_HW_CPU_CYCLES] = { 0x01, CNTR_ALL },
+       [PERF_COUNT_HW_INSTRUCTIONS] = { 0x03, CNTR_ALL },
+       [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x2b, CNTR_ALL },
+       [PERF_COUNT_HW_CACHE_MISSES] = { 0x2e, CNTR_ALL  },
+       [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x08, CNTR_ALL },
+       [PERF_COUNT_HW_BRANCH_MISSES] = { 0x09, CNTR_ALL },
+       [PERF_COUNT_HW_BUS_CYCLES] = { 0x25, CNTR_ALL },
+};
+
 /* 24K/34K/1004K cores can share the same cache event map. */
 static const struct mips_perf_event mipsxxcore_cache_map
                                [PERF_COUNT_HW_CACHE_MAX]
@@ -510,10 +1084,105 @@ static const struct mips_perf_event mipsxx74Kcore_cache_map
 },
 };
 
+
+static const struct mips_perf_event octeon_cache_map
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+[C(L1D)] = {
+       [C(OP_READ)] = {
+               [C(RESULT_ACCESS)]      = { 0x2b, CNTR_ALL },
+               [C(RESULT_MISS)]        = { 0x2e, CNTR_ALL },
+       },
+       [C(OP_WRITE)] = {
+               [C(RESULT_ACCESS)]      = { 0x30, CNTR_ALL },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+       [C(OP_PREFETCH)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+},
+[C(L1I)] = {
+       [C(OP_READ)] = {
+               [C(RESULT_ACCESS)]      = { 0x18, CNTR_ALL },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+       [C(OP_WRITE)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+       [C(OP_PREFETCH)] = {
+               [C(RESULT_ACCESS)]      = { 0x19, CNTR_ALL },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+},
+[C(LL)] = {
+       [C(OP_READ)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+       [C(OP_WRITE)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+       [C(OP_PREFETCH)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+},
+[C(DTLB)] = {
+       /*
+        * Only general DTLB misses are counted use the same event for
+        * read and write.
+        */
+       [C(OP_READ)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { 0x35, CNTR_ALL },
+       },
+       [C(OP_WRITE)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { 0x35, CNTR_ALL },
+       },
+       [C(OP_PREFETCH)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+},
+[C(ITLB)] = {
+       [C(OP_READ)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { 0x37, CNTR_ALL },
+       },
+       [C(OP_WRITE)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+       [C(OP_PREFETCH)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+},
+[C(BPU)] = {
+       /* Using the same code for *HW_BRANCH* */
+       [C(OP_READ)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+       [C(OP_WRITE)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+       [C(OP_PREFETCH)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+},
+};
+
 #ifdef CONFIG_MIPS_MT_SMP
-static void
-check_and_calc_range(struct perf_event *event,
-                       const struct mips_perf_event *pev)
+static void check_and_calc_range(struct perf_event *event,
+                                const struct mips_perf_event *pev)
 {
        struct hw_perf_event *hwc = &event->hw;
 
@@ -536,9 +1205,8 @@ check_and_calc_range(struct perf_event *event,
                hwc->config_base |= M_TC_EN_ALL;
 }
 #else
-static void
-check_and_calc_range(struct perf_event *event,
-                       const struct mips_perf_event *pev)
+static void check_and_calc_range(struct perf_event *event,
+                                const struct mips_perf_event *pev)
 {
 }
 #endif
@@ -560,7 +1228,7 @@ static int __hw_perf_event_init(struct perf_event *event)
        } else if (PERF_TYPE_RAW == event->attr.type) {
                /* We are working on the global raw event. */
                mutex_lock(&raw_event_mutex);
-               pev = mipspmu->map_raw_event(event->attr.config);
+               pev = mipspmu.map_raw_event(event->attr.config);
        } else {
                /* The event type is not (yet) supported. */
                return -EOPNOTSUPP;
@@ -605,7 +1273,7 @@ static int __hw_perf_event_init(struct perf_event *event)
        hwc->config = 0;
 
        if (!hwc->sample_period) {
-               hwc->sample_period  = MAX_PERIOD;
+               hwc->sample_period  = mipspmu.max_period;
                hwc->last_period    = hwc->sample_period;
                local64_set(&hwc->period_left, hwc->sample_period);
        }
@@ -618,70 +1286,47 @@ static int __hw_perf_event_init(struct perf_event *event)
        }
 
        event->destroy = hw_perf_event_destroy;
-
        return err;
 }
 
 static void pause_local_counters(void)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-       int counters = mipspmu->num_counters;
+       int ctr = mipspmu.num_counters;
        unsigned long flags;
 
        local_irq_save(flags);
-       switch (counters) {
-       case 4:
-               cpuc->saved_ctrl[3] = r_c0_perfctrl3();
-               w_c0_perfctrl3(cpuc->saved_ctrl[3] &
-                       ~M_PERFCTL_COUNT_EVENT_WHENEVER);
-       case 3:
-               cpuc->saved_ctrl[2] = r_c0_perfctrl2();
-               w_c0_perfctrl2(cpuc->saved_ctrl[2] &
-                       ~M_PERFCTL_COUNT_EVENT_WHENEVER);
-       case 2:
-               cpuc->saved_ctrl[1] = r_c0_perfctrl1();
-               w_c0_perfctrl1(cpuc->saved_ctrl[1] &
-                       ~M_PERFCTL_COUNT_EVENT_WHENEVER);
-       case 1:
-               cpuc->saved_ctrl[0] = r_c0_perfctrl0();
-               w_c0_perfctrl0(cpuc->saved_ctrl[0] &
-                       ~M_PERFCTL_COUNT_EVENT_WHENEVER);
-       }
+       do {
+               ctr--;
+               cpuc->saved_ctrl[ctr] = mipsxx_pmu_read_control(ctr);
+               mipsxx_pmu_write_control(ctr, cpuc->saved_ctrl[ctr] &
+                                        ~M_PERFCTL_COUNT_EVENT_WHENEVER);
+       } while (ctr > 0);
        local_irq_restore(flags);
 }
 
 static void resume_local_counters(void)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-       int counters = mipspmu->num_counters;
-       unsigned long flags;
+       int ctr = mipspmu.num_counters;
 
-       local_irq_save(flags);
-       switch (counters) {
-       case 4:
-               w_c0_perfctrl3(cpuc->saved_ctrl[3]);
-       case 3:
-               w_c0_perfctrl2(cpuc->saved_ctrl[2]);
-       case 2:
-               w_c0_perfctrl1(cpuc->saved_ctrl[1]);
-       case 1:
-               w_c0_perfctrl0(cpuc->saved_ctrl[0]);
-       }
-       local_irq_restore(flags);
+       do {
+               ctr--;
+               mipsxx_pmu_write_control(ctr, cpuc->saved_ctrl[ctr]);
+       } while (ctr > 0);
 }
 
 static int mipsxx_pmu_handle_shared_irq(void)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        struct perf_sample_data data;
-       unsigned int counters = mipspmu->num_counters;
-       unsigned int counter;
+       unsigned int counters = mipspmu.num_counters;
+       u64 counter;
        int handled = IRQ_NONE;
        struct pt_regs *regs;
 
        if (cpu_has_mips_r2 && !(read_c0_cause() & (1 << 26)))
                return handled;
-
        /*
         * First we pause the local counters, so that when we are locked
         * here, the counters are all paused. When it gets locked due to
@@ -702,13 +1347,9 @@ static int mipsxx_pmu_handle_shared_irq(void)
 #define HANDLE_COUNTER(n)                                              \
        case n + 1:                                                     \
                if (test_bit(n, cpuc->used_mask)) {                     \
-                       counter = r_c0_perfcntr ## n();                 \
-                       if (counter & M_COUNTER_OVERFLOW) {             \
-                               w_c0_perfcntr ## n(counter &            \
-                                               VALID_COUNT);           \
-                               if (test_and_change_bit(n, cpuc->msbs)) \
-                                       handle_associated_event(cpuc,   \
-                                               n, &data, regs);        \
+                       counter = mipspmu.read_counter(n);              \
+                       if (counter & mipspmu.overflow) {               \
+                               handle_associated_event(cpuc, n, &data, regs); \
                                handled = IRQ_HANDLED;                  \
                        }                                               \
                }
@@ -733,104 +1374,11 @@ static int mipsxx_pmu_handle_shared_irq(void)
        return handled;
 }
 
-static irqreturn_t
-mipsxx_pmu_handle_irq(int irq, void *dev)
+static irqreturn_t mipsxx_pmu_handle_irq(int irq, void *dev)
 {
        return mipsxx_pmu_handle_shared_irq();
 }
 
-static void mipsxx_pmu_start(void)
-{
-#ifdef CONFIG_MIPS_MT_SMP
-       write_unlock(&pmuint_rwlock);
-#endif
-       resume_local_counters();
-}
-
-/*
- * MIPS performance counters can be per-TC. The control registers can
- * not be directly accessed across CPUs. Hence if we want to do global
- * control, we need cross CPU calls. on_each_cpu() can help us, but we
- * can not make sure this function is called with interrupts enabled. So
- * here we pause local counters and then grab a rwlock and leave the
- * counters on other CPUs alone. If any counter interrupt raises while
- * we own the write lock, simply pause local counters on that CPU and
- * spin in the handler. Also we know we won't be switched to another
- * CPU after pausing local counters and before grabbing the lock.
- */
-static void mipsxx_pmu_stop(void)
-{
-       pause_local_counters();
-#ifdef CONFIG_MIPS_MT_SMP
-       write_lock(&pmuint_rwlock);
-#endif
-}
-
-static int
-mipsxx_pmu_alloc_counter(struct cpu_hw_events *cpuc,
-                       struct hw_perf_event *hwc)
-{
-       int i;
-
-       /*
-        * We only need to care the counter mask. The range has been
-        * checked definitely.
-        */
-       unsigned long cntr_mask = (hwc->event_base >> 8) & 0xffff;
-
-       for (i = mipspmu->num_counters - 1; i >= 0; i--) {
-               /*
-                * Note that some MIPS perf events can be counted by both
-                * even and odd counters, wheresas many other are only by
-                * even _or_ odd counters. This introduces an issue that
-                * when the former kind of event takes the counter the
-                * latter kind of event wants to use, then the "counter
-                * allocation" for the latter event will fail. In fact if
-                * they can be dynamically swapped, they both feel happy.
-                * But here we leave this issue alone for now.
-                */
-               if (test_bit(i, &cntr_mask) &&
-                       !test_and_set_bit(i, cpuc->used_mask))
-                       return i;
-       }
-
-       return -EAGAIN;
-}
-
-static void
-mipsxx_pmu_enable_event(struct hw_perf_event *evt, int idx)
-{
-       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-       unsigned long flags;
-
-       WARN_ON(idx < 0 || idx >= mipspmu->num_counters);
-
-       local_irq_save(flags);
-       cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base & 0xff) |
-               (evt->config_base & M_PERFCTL_CONFIG_MASK) |
-               /* Make sure interrupt enabled. */
-               M_PERFCTL_INTERRUPT_ENABLE;
-       /*
-        * We do not actually let the counter run. Leave it until start().
-        */
-       local_irq_restore(flags);
-}
-
-static void
-mipsxx_pmu_disable_event(int idx)
-{
-       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-       unsigned long flags;
-
-       WARN_ON(idx < 0 || idx >= mipspmu->num_counters);
-
-       local_irq_save(flags);
-       cpuc->saved_ctrl[idx] = mipsxx_pmu_read_control(idx) &
-               ~M_PERFCTL_COUNT_EVENT_WHENEVER;
-       mipsxx_pmu_write_control(idx, cpuc->saved_ctrl[idx]);
-       local_irq_restore(flags);
-}
-
 /* 24K */
 #define IS_UNSUPPORTED_24K_EVENT(r, b)                                 \
        ((b) == 12 || (r) == 151 || (r) == 152 || (b) == 26 ||          \
@@ -892,8 +1440,7 @@ mipsxx_pmu_disable_event(int idx)
  * then 128 needs to be added to 15 as the input for the event config,
  * i.e., 143 (0x8F) to be used.
  */
-static const struct mips_perf_event *
-mipsxx_pmu_map_raw_event(u64 config)
+static const struct mips_perf_event *mipsxx_pmu_map_raw_event(u64 config)
 {
        unsigned int raw_id = config & 0xff;
        unsigned int base_id = raw_id & 0x7f;
@@ -970,40 +1517,44 @@ mipsxx_pmu_map_raw_event(u64 config)
        return &raw_event;
 }
 
-static struct mips_pmu mipsxxcore_pmu = {
-       .handle_irq = mipsxx_pmu_handle_irq,
-       .handle_shared_irq = mipsxx_pmu_handle_shared_irq,
-       .start = mipsxx_pmu_start,
-       .stop = mipsxx_pmu_stop,
-       .alloc_counter = mipsxx_pmu_alloc_counter,
-       .read_counter = mipsxx_pmu_read_counter,
-       .write_counter = mipsxx_pmu_write_counter,
-       .enable_event = mipsxx_pmu_enable_event,
-       .disable_event = mipsxx_pmu_disable_event,
-       .map_raw_event = mipsxx_pmu_map_raw_event,
-       .general_event_map = &mipsxxcore_event_map,
-       .cache_event_map = &mipsxxcore_cache_map,
-};
+static const struct mips_perf_event *octeon_pmu_map_raw_event(u64 config)
+{
+       unsigned int raw_id = config & 0xff;
+       unsigned int base_id = raw_id & 0x7f;
 
-static struct mips_pmu mipsxx74Kcore_pmu = {
-       .handle_irq = mipsxx_pmu_handle_irq,
-       .handle_shared_irq = mipsxx_pmu_handle_shared_irq,
-       .start = mipsxx_pmu_start,
-       .stop = mipsxx_pmu_stop,
-       .alloc_counter = mipsxx_pmu_alloc_counter,
-       .read_counter = mipsxx_pmu_read_counter,
-       .write_counter = mipsxx_pmu_write_counter,
-       .enable_event = mipsxx_pmu_enable_event,
-       .disable_event = mipsxx_pmu_disable_event,
-       .map_raw_event = mipsxx_pmu_map_raw_event,
-       .general_event_map = &mipsxx74Kcore_event_map,
-       .cache_event_map = &mipsxx74Kcore_cache_map,
-};
+
+       raw_event.cntr_mask = CNTR_ALL;
+       raw_event.event_id = base_id;
+
+       if (current_cpu_type() == CPU_CAVIUM_OCTEON2) {
+               if (base_id > 0x42)
+                       return ERR_PTR(-EOPNOTSUPP);
+       } else {
+               if (base_id > 0x3a)
+                       return ERR_PTR(-EOPNOTSUPP);
+       }
+
+       switch (base_id) {
+       case 0x00:
+       case 0x0f:
+       case 0x1e:
+       case 0x1f:
+       case 0x2f:
+       case 0x34:
+       case 0x3b ... 0x3f:
+               return ERR_PTR(-EOPNOTSUPP);
+       default:
+               break;
+       }
+
+       return &raw_event;
+}
 
 static int __init
 init_hw_perf_events(void)
 {
        int counters, irq;
+       int counter_bits;
 
        pr_info("Performance counters: ");
 
@@ -1035,32 +1586,36 @@ init_hw_perf_events(void)
        }
 #endif
 
-       on_each_cpu(reset_counters, (void *)(long)counters, 1);
+       mipspmu.map_raw_event = mipsxx_pmu_map_raw_event;
 
        switch (current_cpu_type()) {
        case CPU_24K:
-               mipsxxcore_pmu.name = "mips/24K";
-               mipsxxcore_pmu.num_counters = counters;
-               mipsxxcore_pmu.irq = irq;
-               mipspmu = &mipsxxcore_pmu;
+               mipspmu.name = "mips/24K";
+               mipspmu.general_event_map = &mipsxxcore_event_map;
+               mipspmu.cache_event_map = &mipsxxcore_cache_map;
                break;
        case CPU_34K:
-               mipsxxcore_pmu.name = "mips/34K";
-               mipsxxcore_pmu.num_counters = counters;
-               mipsxxcore_pmu.irq = irq;
-               mipspmu = &mipsxxcore_pmu;
+               mipspmu.name = "mips/34K";
+               mipspmu.general_event_map = &mipsxxcore_event_map;
+               mipspmu.cache_event_map = &mipsxxcore_cache_map;
                break;
        case CPU_74K:
-               mipsxx74Kcore_pmu.name = "mips/74K";
-               mipsxx74Kcore_pmu.num_counters = counters;
-               mipsxx74Kcore_pmu.irq = irq;
-               mipspmu = &mipsxx74Kcore_pmu;
+               mipspmu.name = "mips/74K";
+               mipspmu.general_event_map = &mipsxx74Kcore_event_map;
+               mipspmu.cache_event_map = &mipsxx74Kcore_cache_map;
                break;
        case CPU_1004K:
-               mipsxxcore_pmu.name = "mips/1004K";
-               mipsxxcore_pmu.num_counters = counters;
-               mipsxxcore_pmu.irq = irq;
-               mipspmu = &mipsxxcore_pmu;
+               mipspmu.name = "mips/1004K";
+               mipspmu.general_event_map = &mipsxxcore_event_map;
+               mipspmu.cache_event_map = &mipsxxcore_cache_map;
+               break;
+       case CPU_CAVIUM_OCTEON:
+       case CPU_CAVIUM_OCTEON_PLUS:
+       case CPU_CAVIUM_OCTEON2:
+               mipspmu.name = "octeon";
+               mipspmu.general_event_map = &octeon_event_map;
+               mipspmu.cache_event_map = &octeon_cache_map;
+               mipspmu.map_raw_event = octeon_pmu_map_raw_event;
                break;
        default:
                pr_cont("Either hardware does not support performance "
@@ -1068,15 +1623,33 @@ init_hw_perf_events(void)
                return -ENODEV;
        }
 
-       if (mipspmu)
-               pr_cont("%s PMU enabled, %d counters available to each "
-                       "CPU, irq %d%s\n", mipspmu->name, counters, irq,
-                       irq < 0 ? " (share with timer interrupt)" : "");
+       mipspmu.num_counters = counters;
+       mipspmu.irq = irq;
+
+       if (read_c0_perfctrl0() & M_PERFCTL_WIDE) {
+               mipspmu.max_period = (1ULL << 63) - 1;
+               mipspmu.valid_count = (1ULL << 63) - 1;
+               mipspmu.overflow = 1ULL << 63;
+               mipspmu.read_counter = mipsxx_pmu_read_counter_64;
+               mipspmu.write_counter = mipsxx_pmu_write_counter_64;
+               counter_bits = 64;
+       } else {
+               mipspmu.max_period = (1ULL << 31) - 1;
+               mipspmu.valid_count = (1ULL << 31) - 1;
+               mipspmu.overflow = 1ULL << 31;
+               mipspmu.read_counter = mipsxx_pmu_read_counter;
+               mipspmu.write_counter = mipsxx_pmu_write_counter;
+               counter_bits = 32;
+       }
+
+       on_each_cpu(reset_counters, (void *)(long)counters, 1);
+
+       pr_cont("%s PMU enabled, %d %d-bit counters available to each "
+               "CPU, irq %d%s\n", mipspmu.name, counters, counter_bits, irq,
+               irq < 0 ? " (share with timer interrupt)" : "");
 
        perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
 
        return 0;
 }
 early_initcall(init_hw_perf_events);
-
-#endif /* defined(CONFIG_CPU_MIPS32)... */