1 #ifdef CONFIG_CPU_SUP_AMD
3 static __initconst const u64 amd_hw_cache_event_ids
4 [PERF_COUNT_HW_CACHE_MAX]
5 [PERF_COUNT_HW_CACHE_OP_MAX]
6 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
10 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
11 [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */
14 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
15 [ C(RESULT_MISS) ] = 0,
17 [ C(OP_PREFETCH) ] = {
18 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
19 [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
24 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */
25 [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */
28 [ C(RESULT_ACCESS) ] = -1,
29 [ C(RESULT_MISS) ] = -1,
31 [ C(OP_PREFETCH) ] = {
32 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
33 [ C(RESULT_MISS) ] = 0,
38 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
39 [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
42 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
43 [ C(RESULT_MISS) ] = 0,
45 [ C(OP_PREFETCH) ] = {
46 [ C(RESULT_ACCESS) ] = 0,
47 [ C(RESULT_MISS) ] = 0,
52 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
53 [ C(RESULT_MISS) ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */
56 [ C(RESULT_ACCESS) ] = 0,
57 [ C(RESULT_MISS) ] = 0,
59 [ C(OP_PREFETCH) ] = {
60 [ C(RESULT_ACCESS) ] = 0,
61 [ C(RESULT_MISS) ] = 0,
66 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
67 [ C(RESULT_MISS) ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */
70 [ C(RESULT_ACCESS) ] = -1,
71 [ C(RESULT_MISS) ] = -1,
73 [ C(OP_PREFETCH) ] = {
74 [ C(RESULT_ACCESS) ] = -1,
75 [ C(RESULT_MISS) ] = -1,
80 [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */
81 [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */
84 [ C(RESULT_ACCESS) ] = -1,
85 [ C(RESULT_MISS) ] = -1,
87 [ C(OP_PREFETCH) ] = {
88 [ C(RESULT_ACCESS) ] = -1,
89 [ C(RESULT_MISS) ] = -1,
95 * AMD Performance Monitor K7 and later.
97 static const u64 amd_perfmon_event_map[] =
99 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
100 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
101 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
102 [PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
103 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
104 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
107 static u64 amd_pmu_event_map(int hw_event)
109 return amd_perfmon_event_map[hw_event];
112 static int amd_pmu_hw_config(struct perf_event *event)
114 int ret = x86_pmu_hw_config(event);
119 if (event->attr.type != PERF_TYPE_RAW)
122 event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
128 * AMD64 events are detected based on their event codes.
130 static inline int amd_is_nb_event(struct hw_perf_event *hwc)
132 return (hwc->config & 0xe0) == 0xe0;
135 static inline int amd_has_nb(struct cpu_hw_events *cpuc)
137 struct amd_nb *nb = cpuc->amd_nb;
139 return nb && nb->nb_id != -1;
142 static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
143 struct perf_event *event)
145 struct hw_perf_event *hwc = &event->hw;
146 struct amd_nb *nb = cpuc->amd_nb;
150 * only care about NB events
152 if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
156 * need to scan whole list because event may not have
157 * been assigned during scheduling
159 * no race condition possible because event can only
160 * be removed on one CPU at a time AND PMU is disabled
163 for (i = 0; i < x86_pmu.num_counters; i++) {
164 if (nb->owners[i] == event) {
165 cmpxchg(nb->owners+i, event, NULL);
172 * AMD64 NorthBridge events need special treatment because
173 * counter access needs to be synchronized across all cores
174 * of a package. Refer to BKDG section 3.12
176 * NB events are events measuring L3 cache, Hypertransport
177 * traffic. They are identified by an event code >= 0xe00.
178 * They measure events on the NorthBride which is shared
179 * by all cores on a package. NB events are counted on a
180 * shared set of counters. When a NB event is programmed
181 * in a counter, the data actually comes from a shared
182 * counter. Thus, access to those counters needs to be
185 * We implement the synchronization such that no two cores
186 * can be measuring NB events using the same counters. Thus,
187 * we maintain a per-NB allocation table. The available slot
188 * is propagated using the event_constraint structure.
190 * We provide only one choice for each NB event based on
191 * the fact that only NB events have restrictions. Consequently,
192 * if a counter is available, there is a guarantee the NB event
193 * will be assigned to it. If no slot is available, an empty
194 * constraint is returned and scheduling will eventually fail
197 * Note that all cores attached the same NB compete for the same
198 * counters to host NB events, this is why we use atomic ops. Some
199 * multi-chip CPUs may have more than one NB.
201 * Given that resources are allocated (cmpxchg), they must be
202 * eventually freed for others to use. This is accomplished by
203 * calling amd_put_event_constraints().
205 * Non NB events are not impacted by this restriction.
207 static struct event_constraint *
208 amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
210 struct hw_perf_event *hwc = &event->hw;
211 struct amd_nb *nb = cpuc->amd_nb;
212 struct perf_event *old = NULL;
213 int max = x86_pmu.num_counters;
217 * if not NB event or no NB, then no constraints
219 if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
220 return &unconstrained;
223 * detect if already present, if so reuse
225 * cannot merge with actual allocation
226 * because of possible holes
228 * event can already be present yet not assigned (in hwc->idx)
229 * because of successive calls to x86_schedule_events() from
230 * hw_perf_group_sched_in() without hw_perf_enable()
232 for (i = 0; i < max; i++) {
234 * keep track of first free slot
236 if (k == -1 && !nb->owners[i])
239 /* already present, reuse */
240 if (nb->owners[i] == event)
244 * not present, so grab a new slot
245 * starting either at:
247 if (hwc->idx != -1) {
248 /* previous assignment */
250 } else if (k != -1) {
251 /* start from free slot found */
255 * event not found, no slot found in
256 * first pass, try again from the
263 old = cmpxchg(nb->owners+i, NULL, event);
271 return &nb->event_constraints[i];
273 return &emptyconstraint;
276 static struct amd_nb *amd_alloc_nb(int cpu)
281 nb = kmalloc_node(sizeof(struct amd_nb), GFP_KERNEL | __GFP_ZERO,
289 * initialize all possible NB constraints
291 for (i = 0; i < x86_pmu.num_counters; i++) {
292 __set_bit(i, nb->event_constraints[i].idxmsk);
293 nb->event_constraints[i].weight = 1;
298 static int amd_pmu_cpu_prepare(int cpu)
300 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
302 WARN_ON_ONCE(cpuc->amd_nb);
304 if (boot_cpu_data.x86_max_cores < 2)
307 cpuc->amd_nb = amd_alloc_nb(cpu);
314 static void amd_pmu_cpu_starting(int cpu)
316 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
320 if (boot_cpu_data.x86_max_cores < 2)
323 nb_id = amd_get_nb_id(cpu);
324 WARN_ON_ONCE(nb_id == BAD_APICID);
326 for_each_online_cpu(i) {
327 nb = per_cpu(cpu_hw_events, i).amd_nb;
328 if (WARN_ON_ONCE(!nb))
331 if (nb->nb_id == nb_id) {
338 cpuc->amd_nb->nb_id = nb_id;
339 cpuc->amd_nb->refcnt++;
342 static void amd_pmu_cpu_dead(int cpu)
344 struct cpu_hw_events *cpuhw;
346 if (boot_cpu_data.x86_max_cores < 2)
349 cpuhw = &per_cpu(cpu_hw_events, cpu);
352 struct amd_nb *nb = cpuhw->amd_nb;
354 if (nb->nb_id == -1 || --nb->refcnt == 0)
357 cpuhw->amd_nb = NULL;
361 static __initconst const struct x86_pmu amd_pmu = {
363 .handle_irq = x86_pmu_handle_irq,
364 .disable_all = x86_pmu_disable_all,
365 .enable_all = x86_pmu_enable_all,
366 .enable = x86_pmu_enable_event,
367 .disable = x86_pmu_disable_event,
368 .hw_config = amd_pmu_hw_config,
369 .schedule_events = x86_schedule_events,
370 .eventsel = MSR_K7_EVNTSEL0,
371 .perfctr = MSR_K7_PERFCTR0,
372 .event_map = amd_pmu_event_map,
373 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
376 .cntval_mask = (1ULL << 48) - 1,
378 /* use highest bit to detect overflow */
379 .max_period = (1ULL << 47) - 1,
380 .get_event_constraints = amd_get_event_constraints,
381 .put_event_constraints = amd_put_event_constraints,
383 .cpu_prepare = amd_pmu_cpu_prepare,
384 .cpu_starting = amd_pmu_cpu_starting,
385 .cpu_dead = amd_pmu_cpu_dead,
388 static __init int amd_pmu_init(void)
390 /* Performance-monitoring supported from K7 and later: */
391 if (boot_cpu_data.x86 < 6)
396 /* Events are common for all AMDs */
397 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
398 sizeof(hw_cache_event_ids));
403 #else /* CONFIG_CPU_SUP_AMD */
405 static int amd_pmu_init(void)