GNU Linux-libre 4.14.290-gnu1
[releases.git] / arch / x86 / events / amd / uncore.c
1 /*
2  * Copyright (C) 2013 Advanced Micro Devices, Inc.
3  *
4  * Author: Jacob Shin <jacob.shin@amd.com>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 #include <linux/perf_event.h>
12 #include <linux/percpu.h>
13 #include <linux/types.h>
14 #include <linux/slab.h>
15 #include <linux/init.h>
16 #include <linux/cpu.h>
17 #include <linux/cpumask.h>
18
19 #include <asm/cpufeature.h>
20 #include <asm/perf_event.h>
21 #include <asm/msr.h>
22 #include <asm/smp.h>
23
24 #define NUM_COUNTERS_NB         4
25 #define NUM_COUNTERS_L2         4
26 #define NUM_COUNTERS_L3         6
27 #define MAX_COUNTERS            6
28
29 #define RDPMC_BASE_NB           6
30 #define RDPMC_BASE_LLC          10
31
32 #define COUNTER_SHIFT           16
33
34 #undef pr_fmt
35 #define pr_fmt(fmt)     "amd_uncore: " fmt
36
37 static int num_counters_llc;
38 static int num_counters_nb;
39 static bool l3_mask;
40
41 static HLIST_HEAD(uncore_unused_list);
42
43 struct amd_uncore {
44         int id;
45         int refcnt;
46         int cpu;
47         int num_counters;
48         int rdpmc_base;
49         u32 msr_base;
50         cpumask_t *active_mask;
51         struct pmu *pmu;
52         struct perf_event *events[MAX_COUNTERS];
53         struct hlist_node node;
54 };
55
56 static struct amd_uncore * __percpu *amd_uncore_nb;
57 static struct amd_uncore * __percpu *amd_uncore_llc;
58
59 static struct pmu amd_nb_pmu;
60 static struct pmu amd_llc_pmu;
61
62 static cpumask_t amd_nb_active_mask;
63 static cpumask_t amd_llc_active_mask;
64
65 static bool is_nb_event(struct perf_event *event)
66 {
67         return event->pmu->type == amd_nb_pmu.type;
68 }
69
70 static bool is_llc_event(struct perf_event *event)
71 {
72         return event->pmu->type == amd_llc_pmu.type;
73 }
74
75 static struct amd_uncore *event_to_amd_uncore(struct perf_event *event)
76 {
77         if (is_nb_event(event) && amd_uncore_nb)
78                 return *per_cpu_ptr(amd_uncore_nb, event->cpu);
79         else if (is_llc_event(event) && amd_uncore_llc)
80                 return *per_cpu_ptr(amd_uncore_llc, event->cpu);
81
82         return NULL;
83 }
84
85 static void amd_uncore_read(struct perf_event *event)
86 {
87         struct hw_perf_event *hwc = &event->hw;
88         u64 prev, new;
89         s64 delta;
90
91         /*
92          * since we do not enable counter overflow interrupts,
93          * we do not have to worry about prev_count changing on us
94          */
95
96         prev = local64_read(&hwc->prev_count);
97         rdpmcl(hwc->event_base_rdpmc, new);
98         local64_set(&hwc->prev_count, new);
99         delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
100         delta >>= COUNTER_SHIFT;
101         local64_add(delta, &event->count);
102 }
103
104 static void amd_uncore_start(struct perf_event *event, int flags)
105 {
106         struct hw_perf_event *hwc = &event->hw;
107
108         if (flags & PERF_EF_RELOAD)
109                 wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
110
111         hwc->state = 0;
112         wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
113         perf_event_update_userpage(event);
114 }
115
116 static void amd_uncore_stop(struct perf_event *event, int flags)
117 {
118         struct hw_perf_event *hwc = &event->hw;
119
120         wrmsrl(hwc->config_base, hwc->config);
121         hwc->state |= PERF_HES_STOPPED;
122
123         if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
124                 amd_uncore_read(event);
125                 hwc->state |= PERF_HES_UPTODATE;
126         }
127 }
128
129 static int amd_uncore_add(struct perf_event *event, int flags)
130 {
131         int i;
132         struct amd_uncore *uncore = event_to_amd_uncore(event);
133         struct hw_perf_event *hwc = &event->hw;
134
135         /* are we already assigned? */
136         if (hwc->idx != -1 && uncore->events[hwc->idx] == event)
137                 goto out;
138
139         for (i = 0; i < uncore->num_counters; i++) {
140                 if (uncore->events[i] == event) {
141                         hwc->idx = i;
142                         goto out;
143                 }
144         }
145
146         /* if not, take the first available counter */
147         hwc->idx = -1;
148         for (i = 0; i < uncore->num_counters; i++) {
149                 if (cmpxchg(&uncore->events[i], NULL, event) == NULL) {
150                         hwc->idx = i;
151                         break;
152                 }
153         }
154
155 out:
156         if (hwc->idx == -1)
157                 return -EBUSY;
158
159         hwc->config_base = uncore->msr_base + (2 * hwc->idx);
160         hwc->event_base = uncore->msr_base + 1 + (2 * hwc->idx);
161         hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
162         hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
163
164         if (flags & PERF_EF_START)
165                 amd_uncore_start(event, PERF_EF_RELOAD);
166
167         return 0;
168 }
169
170 static void amd_uncore_del(struct perf_event *event, int flags)
171 {
172         int i;
173         struct amd_uncore *uncore = event_to_amd_uncore(event);
174         struct hw_perf_event *hwc = &event->hw;
175
176         amd_uncore_stop(event, PERF_EF_UPDATE);
177
178         for (i = 0; i < uncore->num_counters; i++) {
179                 if (cmpxchg(&uncore->events[i], event, NULL) == event)
180                         break;
181         }
182
183         hwc->idx = -1;
184 }
185
186 static int amd_uncore_event_init(struct perf_event *event)
187 {
188         struct amd_uncore *uncore;
189         struct hw_perf_event *hwc = &event->hw;
190
191         if (event->attr.type != event->pmu->type)
192                 return -ENOENT;
193
194         /*
195          * NB and Last level cache counters (MSRs) are shared across all cores
196          * that share the same NB / Last level cache.  On family 16h and below,
197          * Interrupts can be directed to a single target core, however, event
198          * counts generated by processes running on other cores cannot be masked
199          * out. So we do not support sampling and per-thread events via
200          * CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts:
201          */
202
203         /* NB and Last level cache counters do not have usr/os/guest/host bits */
204         if (event->attr.exclude_user || event->attr.exclude_kernel ||
205             event->attr.exclude_host || event->attr.exclude_guest)
206                 return -EINVAL;
207
208         hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
209         hwc->idx = -1;
210
211         if (event->cpu < 0)
212                 return -EINVAL;
213
214         /*
215          * SliceMask and ThreadMask need to be set for certain L3 events in
216          * Family 17h. For other events, the two fields do not affect the count.
217          */
218         if (l3_mask && is_llc_event(event)) {
219                 int thread = 2 * (cpu_data(event->cpu).cpu_core_id % 4);
220
221                 if (smp_num_siblings > 1)
222                         thread += cpu_data(event->cpu).apicid & 1;
223
224                 hwc->config |= (1ULL << (AMD64_L3_THREAD_SHIFT + thread) &
225                                 AMD64_L3_THREAD_MASK) | AMD64_L3_SLICE_MASK;
226         }
227
228         uncore = event_to_amd_uncore(event);
229         if (!uncore)
230                 return -ENODEV;
231
232         /*
233          * since request can come in to any of the shared cores, we will remap
234          * to a single common cpu.
235          */
236         event->cpu = uncore->cpu;
237
238         return 0;
239 }
240
241 static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
242                                             struct device_attribute *attr,
243                                             char *buf)
244 {
245         cpumask_t *active_mask;
246         struct pmu *pmu = dev_get_drvdata(dev);
247
248         if (pmu->type == amd_nb_pmu.type)
249                 active_mask = &amd_nb_active_mask;
250         else if (pmu->type == amd_llc_pmu.type)
251                 active_mask = &amd_llc_active_mask;
252         else
253                 return 0;
254
255         return cpumap_print_to_pagebuf(true, buf, active_mask);
256 }
257 static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);
258
259 static struct attribute *amd_uncore_attrs[] = {
260         &dev_attr_cpumask.attr,
261         NULL,
262 };
263
264 static struct attribute_group amd_uncore_attr_group = {
265         .attrs = amd_uncore_attrs,
266 };
267
268 /*
269  * Similar to PMU_FORMAT_ATTR but allowing for format_attr to be assigned based
270  * on family
271  */
272 #define AMD_FORMAT_ATTR(_dev, _name, _format)                                \
273 static ssize_t                                                               \
274 _dev##_show##_name(struct device *dev,                                       \
275                 struct device_attribute *attr,                               \
276                 char *page)                                                  \
277 {                                                                            \
278         BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);                          \
279         return sprintf(page, _format "\n");                                  \
280 }                                                                            \
281 static struct device_attribute format_attr_##_dev##_name = __ATTR_RO(_dev);
282
283 /* Used for each uncore counter type */
284 #define AMD_ATTRIBUTE(_name)                                                 \
285 static struct attribute *amd_uncore_format_attr_##_name[] = {                \
286         &format_attr_event_##_name.attr,                                     \
287         &format_attr_umask.attr,                                             \
288         NULL,                                                                \
289 };                                                                           \
290 static struct attribute_group amd_uncore_format_group_##_name = {            \
291         .name = "format",                                                    \
292         .attrs = amd_uncore_format_attr_##_name,                             \
293 };                                                                           \
294 static const struct attribute_group *amd_uncore_attr_groups_##_name[] = {    \
295         &amd_uncore_attr_group,                                              \
296         &amd_uncore_format_group_##_name,                                    \
297         NULL,                                                                \
298 };
299
300 AMD_FORMAT_ATTR(event, , "config:0-7,32-35");
301 AMD_FORMAT_ATTR(umask, , "config:8-15");
302 AMD_FORMAT_ATTR(event, _df, "config:0-7,32-35,59-60");
303 AMD_FORMAT_ATTR(event, _l3, "config:0-7");
304 AMD_ATTRIBUTE(df);
305 AMD_ATTRIBUTE(l3);
306
307 static struct pmu amd_nb_pmu = {
308         .task_ctx_nr    = perf_invalid_context,
309         .event_init     = amd_uncore_event_init,
310         .add            = amd_uncore_add,
311         .del            = amd_uncore_del,
312         .start          = amd_uncore_start,
313         .stop           = amd_uncore_stop,
314         .read           = amd_uncore_read,
315         .capabilities   = PERF_PMU_CAP_NO_INTERRUPT,
316 };
317
318 static struct pmu amd_llc_pmu = {
319         .task_ctx_nr    = perf_invalid_context,
320         .event_init     = amd_uncore_event_init,
321         .add            = amd_uncore_add,
322         .del            = amd_uncore_del,
323         .start          = amd_uncore_start,
324         .stop           = amd_uncore_stop,
325         .read           = amd_uncore_read,
326         .capabilities   = PERF_PMU_CAP_NO_INTERRUPT,
327 };
328
329 static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
330 {
331         return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL,
332                         cpu_to_node(cpu));
333 }
334
335 static int amd_uncore_cpu_up_prepare(unsigned int cpu)
336 {
337         struct amd_uncore *uncore_nb = NULL, *uncore_llc;
338
339         if (amd_uncore_nb) {
340                 uncore_nb = amd_uncore_alloc(cpu);
341                 if (!uncore_nb)
342                         goto fail;
343                 uncore_nb->cpu = cpu;
344                 uncore_nb->num_counters = num_counters_nb;
345                 uncore_nb->rdpmc_base = RDPMC_BASE_NB;
346                 uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
347                 uncore_nb->active_mask = &amd_nb_active_mask;
348                 uncore_nb->pmu = &amd_nb_pmu;
349                 uncore_nb->id = -1;
350                 *per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
351         }
352
353         if (amd_uncore_llc) {
354                 uncore_llc = amd_uncore_alloc(cpu);
355                 if (!uncore_llc)
356                         goto fail;
357                 uncore_llc->cpu = cpu;
358                 uncore_llc->num_counters = num_counters_llc;
359                 uncore_llc->rdpmc_base = RDPMC_BASE_LLC;
360                 uncore_llc->msr_base = MSR_F16H_L2I_PERF_CTL;
361                 uncore_llc->active_mask = &amd_llc_active_mask;
362                 uncore_llc->pmu = &amd_llc_pmu;
363                 uncore_llc->id = -1;
364                 *per_cpu_ptr(amd_uncore_llc, cpu) = uncore_llc;
365         }
366
367         return 0;
368
369 fail:
370         if (amd_uncore_nb)
371                 *per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
372         kfree(uncore_nb);
373         return -ENOMEM;
374 }
375
376 static struct amd_uncore *
377 amd_uncore_find_online_sibling(struct amd_uncore *this,
378                                struct amd_uncore * __percpu *uncores)
379 {
380         unsigned int cpu;
381         struct amd_uncore *that;
382
383         for_each_online_cpu(cpu) {
384                 that = *per_cpu_ptr(uncores, cpu);
385
386                 if (!that)
387                         continue;
388
389                 if (this == that)
390                         continue;
391
392                 if (this->id == that->id) {
393                         hlist_add_head(&this->node, &uncore_unused_list);
394                         this = that;
395                         break;
396                 }
397         }
398
399         this->refcnt++;
400         return this;
401 }
402
403 static int amd_uncore_cpu_starting(unsigned int cpu)
404 {
405         unsigned int eax, ebx, ecx, edx;
406         struct amd_uncore *uncore;
407
408         if (amd_uncore_nb) {
409                 uncore = *per_cpu_ptr(amd_uncore_nb, cpu);
410                 cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
411                 uncore->id = ecx & 0xff;
412
413                 uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_nb);
414                 *per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
415         }
416
417         if (amd_uncore_llc) {
418                 uncore = *per_cpu_ptr(amd_uncore_llc, cpu);
419                 uncore->id = per_cpu(cpu_llc_id, cpu);
420
421                 uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_llc);
422                 *per_cpu_ptr(amd_uncore_llc, cpu) = uncore;
423         }
424
425         return 0;
426 }
427
428 static void uncore_clean_online(void)
429 {
430         struct amd_uncore *uncore;
431         struct hlist_node *n;
432
433         hlist_for_each_entry_safe(uncore, n, &uncore_unused_list, node) {
434                 hlist_del(&uncore->node);
435                 kfree(uncore);
436         }
437 }
438
439 static void uncore_online(unsigned int cpu,
440                           struct amd_uncore * __percpu *uncores)
441 {
442         struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
443
444         uncore_clean_online();
445
446         if (cpu == uncore->cpu)
447                 cpumask_set_cpu(cpu, uncore->active_mask);
448 }
449
450 static int amd_uncore_cpu_online(unsigned int cpu)
451 {
452         if (amd_uncore_nb)
453                 uncore_online(cpu, amd_uncore_nb);
454
455         if (amd_uncore_llc)
456                 uncore_online(cpu, amd_uncore_llc);
457
458         return 0;
459 }
460
461 static void uncore_down_prepare(unsigned int cpu,
462                                 struct amd_uncore * __percpu *uncores)
463 {
464         unsigned int i;
465         struct amd_uncore *this = *per_cpu_ptr(uncores, cpu);
466
467         if (this->cpu != cpu)
468                 return;
469
470         /* this cpu is going down, migrate to a shared sibling if possible */
471         for_each_online_cpu(i) {
472                 struct amd_uncore *that = *per_cpu_ptr(uncores, i);
473
474                 if (cpu == i)
475                         continue;
476
477                 if (this == that) {
478                         perf_pmu_migrate_context(this->pmu, cpu, i);
479                         cpumask_clear_cpu(cpu, that->active_mask);
480                         cpumask_set_cpu(i, that->active_mask);
481                         that->cpu = i;
482                         break;
483                 }
484         }
485 }
486
487 static int amd_uncore_cpu_down_prepare(unsigned int cpu)
488 {
489         if (amd_uncore_nb)
490                 uncore_down_prepare(cpu, amd_uncore_nb);
491
492         if (amd_uncore_llc)
493                 uncore_down_prepare(cpu, amd_uncore_llc);
494
495         return 0;
496 }
497
498 static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
499 {
500         struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
501
502         if (cpu == uncore->cpu)
503                 cpumask_clear_cpu(cpu, uncore->active_mask);
504
505         if (!--uncore->refcnt)
506                 kfree(uncore);
507         *per_cpu_ptr(uncores, cpu) = NULL;
508 }
509
510 static int amd_uncore_cpu_dead(unsigned int cpu)
511 {
512         if (amd_uncore_nb)
513                 uncore_dead(cpu, amd_uncore_nb);
514
515         if (amd_uncore_llc)
516                 uncore_dead(cpu, amd_uncore_llc);
517
518         return 0;
519 }
520
521 static int __init amd_uncore_init(void)
522 {
523         int ret = -ENODEV;
524
525         if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
526                 return -ENODEV;
527
528         if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
529                 return -ENODEV;
530
531         if (boot_cpu_data.x86 == 0x17) {
532                 /*
533                  * For F17h, the Northbridge counters are repurposed as Data
534                  * Fabric counters. Also, L3 counters are supported too. The PMUs
535                  * are exported based on  family as either L2 or L3 and NB or DF.
536                  */
537                 num_counters_nb           = NUM_COUNTERS_NB;
538                 num_counters_llc          = NUM_COUNTERS_L3;
539                 amd_nb_pmu.name           = "amd_df";
540                 amd_llc_pmu.name          = "amd_l3";
541                 format_attr_event_df.show = &event_show_df;
542                 format_attr_event_l3.show = &event_show_l3;
543                 l3_mask                   = true;
544         } else {
545                 num_counters_nb           = NUM_COUNTERS_NB;
546                 num_counters_llc          = NUM_COUNTERS_L2;
547                 amd_nb_pmu.name           = "amd_nb";
548                 amd_llc_pmu.name          = "amd_l2";
549                 format_attr_event_df      = format_attr_event;
550                 format_attr_event_l3      = format_attr_event;
551                 l3_mask                   = false;
552         }
553
554         amd_nb_pmu.attr_groups  = amd_uncore_attr_groups_df;
555         amd_llc_pmu.attr_groups = amd_uncore_attr_groups_l3;
556
557         if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
558                 amd_uncore_nb = alloc_percpu(struct amd_uncore *);
559                 if (!amd_uncore_nb) {
560                         ret = -ENOMEM;
561                         goto fail_nb;
562                 }
563                 ret = perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1);
564                 if (ret)
565                         goto fail_nb;
566
567                 pr_info("AMD NB counters detected\n");
568                 ret = 0;
569         }
570
571         if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) {
572                 amd_uncore_llc = alloc_percpu(struct amd_uncore *);
573                 if (!amd_uncore_llc) {
574                         ret = -ENOMEM;
575                         goto fail_llc;
576                 }
577                 ret = perf_pmu_register(&amd_llc_pmu, amd_llc_pmu.name, -1);
578                 if (ret)
579                         goto fail_llc;
580
581                 pr_info("AMD LLC counters detected\n");
582                 ret = 0;
583         }
584
585         /*
586          * Install callbacks. Core will call them for each online cpu.
587          */
588         if (cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP,
589                               "perf/x86/amd/uncore:prepare",
590                               amd_uncore_cpu_up_prepare, amd_uncore_cpu_dead))
591                 goto fail_llc;
592
593         if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
594                               "perf/x86/amd/uncore:starting",
595                               amd_uncore_cpu_starting, NULL))
596                 goto fail_prep;
597         if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
598                               "perf/x86/amd/uncore:online",
599                               amd_uncore_cpu_online,
600                               amd_uncore_cpu_down_prepare))
601                 goto fail_start;
602         return 0;
603
604 fail_start:
605         cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
606 fail_prep:
607         cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
608 fail_llc:
609         if (boot_cpu_has(X86_FEATURE_PERFCTR_NB))
610                 perf_pmu_unregister(&amd_nb_pmu);
611         if (amd_uncore_llc)
612                 free_percpu(amd_uncore_llc);
613 fail_nb:
614         if (amd_uncore_nb)
615                 free_percpu(amd_uncore_nb);
616
617         return ret;
618 }
619 device_initcall(amd_uncore_init);