GNU Linux-libre 4.19.286-gnu1
[releases.git] / tools / power / x86 / turbostat / turbostat.c
1 /*
2  * turbostat -- show CPU frequency and C-state residency
3  * on modern Intel and AMD processors.
4  *
5  * Copyright (c) 2013 Intel Corporation.
6  * Len Brown <len.brown@intel.com>
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21
22 #define _GNU_SOURCE
23 #include MSRHEADER
24 #include INTEL_FAMILY_HEADER
25 #include <stdarg.h>
26 #include <stdio.h>
27 #include <err.h>
28 #include <unistd.h>
29 #include <sys/types.h>
30 #include <sys/wait.h>
31 #include <sys/stat.h>
32 #include <sys/select.h>
33 #include <sys/resource.h>
34 #include <fcntl.h>
35 #include <signal.h>
36 #include <sys/time.h>
37 #include <stdlib.h>
38 #include <getopt.h>
39 #include <dirent.h>
40 #include <string.h>
41 #include <ctype.h>
42 #include <sched.h>
43 #include <time.h>
44 #include <cpuid.h>
45 #include <linux/capability.h>
46 #include <errno.h>
47
48 char *proc_stat = "/proc/stat";
49 FILE *outf;
50 int *fd_percpu;
51 struct timeval interval_tv = {5, 0};
52 struct timespec interval_ts = {5, 0};
53 struct timespec one_msec = {0, 1000000};
54 unsigned int num_iterations;
55 unsigned int debug;
56 unsigned int quiet;
57 unsigned int shown;
58 unsigned int sums_need_wide_columns;
59 unsigned int rapl_joules;
60 unsigned int summary_only;
61 unsigned int list_header_only;
62 unsigned int dump_only;
63 unsigned int do_snb_cstates;
64 unsigned int do_knl_cstates;
65 unsigned int do_slm_cstates;
66 unsigned int do_cnl_cstates;
67 unsigned int use_c1_residency_msr;
68 unsigned int has_aperf;
69 unsigned int has_epb;
70 unsigned int do_irtl_snb;
71 unsigned int do_irtl_hsw;
72 unsigned int units = 1000000;   /* MHz etc */
73 unsigned int genuine_intel;
74 unsigned int authentic_amd;
75 unsigned int max_level, max_extended_level;
76 unsigned int has_invariant_tsc;
77 unsigned int do_nhm_platform_info;
78 unsigned int no_MSR_MISC_PWR_MGMT;
79 unsigned int aperf_mperf_multiplier = 1;
80 double bclk;
81 double base_hz;
82 unsigned int has_base_hz;
83 double tsc_tweak = 1.0;
84 unsigned int show_pkg_only;
85 unsigned int show_core_only;
86 char *output_buffer, *outp;
87 unsigned int do_rapl;
88 unsigned int do_dts;
89 unsigned int do_ptm;
90 unsigned long long  gfx_cur_rc6_ms;
91 unsigned long long cpuidle_cur_cpu_lpi_us;
92 unsigned long long cpuidle_cur_sys_lpi_us;
93 unsigned int gfx_cur_mhz;
94 unsigned int tcc_activation_temp;
95 unsigned int tcc_activation_temp_override;
96 double rapl_power_units, rapl_time_units;
97 double rapl_dram_energy_units, rapl_energy_units;
98 double rapl_joule_counter_range;
99 unsigned int do_core_perf_limit_reasons;
100 unsigned int has_automatic_cstate_conversion;
101 unsigned int do_gfx_perf_limit_reasons;
102 unsigned int do_ring_perf_limit_reasons;
103 unsigned int crystal_hz;
104 unsigned long long tsc_hz;
105 int base_cpu;
106 double discover_bclk(unsigned int family, unsigned int model);
107 unsigned int has_hwp;   /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
108                         /* IA32_HWP_REQUEST, IA32_HWP_STATUS */
109 unsigned int has_hwp_notify;            /* IA32_HWP_INTERRUPT */
110 unsigned int has_hwp_activity_window;   /* IA32_HWP_REQUEST[bits 41:32] */
111 unsigned int has_hwp_epp;               /* IA32_HWP_REQUEST[bits 31:24] */
112 unsigned int has_hwp_pkg;               /* IA32_HWP_REQUEST_PKG */
113 unsigned int has_misc_feature_control;
114 unsigned int first_counter_read = 1;
115
116 #define RAPL_PKG                (1 << 0)
117                                         /* 0x610 MSR_PKG_POWER_LIMIT */
118                                         /* 0x611 MSR_PKG_ENERGY_STATUS */
119 #define RAPL_PKG_PERF_STATUS    (1 << 1)
120                                         /* 0x613 MSR_PKG_PERF_STATUS */
121 #define RAPL_PKG_POWER_INFO     (1 << 2)
122                                         /* 0x614 MSR_PKG_POWER_INFO */
123
124 #define RAPL_DRAM               (1 << 3)
125                                         /* 0x618 MSR_DRAM_POWER_LIMIT */
126                                         /* 0x619 MSR_DRAM_ENERGY_STATUS */
127 #define RAPL_DRAM_PERF_STATUS   (1 << 4)
128                                         /* 0x61b MSR_DRAM_PERF_STATUS */
129 #define RAPL_DRAM_POWER_INFO    (1 << 5)
130                                         /* 0x61c MSR_DRAM_POWER_INFO */
131
132 #define RAPL_CORES_POWER_LIMIT  (1 << 6)
133                                         /* 0x638 MSR_PP0_POWER_LIMIT */
134 #define RAPL_CORE_POLICY        (1 << 7)
135                                         /* 0x63a MSR_PP0_POLICY */
136
137 #define RAPL_GFX                (1 << 8)
138                                         /* 0x640 MSR_PP1_POWER_LIMIT */
139                                         /* 0x641 MSR_PP1_ENERGY_STATUS */
140                                         /* 0x642 MSR_PP1_POLICY */
141
142 #define RAPL_CORES_ENERGY_STATUS        (1 << 9)
143                                         /* 0x639 MSR_PP0_ENERGY_STATUS */
144 #define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT)
145 #define TJMAX_DEFAULT   100
146
147 #define MAX(a, b) ((a) > (b) ? (a) : (b))
148
149 /*
150  * buffer size used by sscanf() for added column names
151  * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
152  */
153 #define NAME_BYTES 20
154 #define PATH_BYTES 128
155
156 int backwards_count;
157 char *progname;
158
159 #define CPU_SUBSET_MAXCPUS      1024    /* need to use before probe... */
160 cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset;
161 size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
162 #define MAX_ADDED_COUNTERS 8
163 #define MAX_ADDED_THREAD_COUNTERS 24
164 #define BITMASK_SIZE 32
165
166 struct thread_data {
167         struct timeval tv_begin;
168         struct timeval tv_end;
169         unsigned long long tsc;
170         unsigned long long aperf;
171         unsigned long long mperf;
172         unsigned long long c1;
173         unsigned long long  irq_count;
174         unsigned int smi_count;
175         unsigned int cpu_id;
176         unsigned int apic_id;
177         unsigned int x2apic_id;
178         unsigned int flags;
179 #define CPU_IS_FIRST_THREAD_IN_CORE     0x2
180 #define CPU_IS_FIRST_CORE_IN_PACKAGE    0x4
181         unsigned long long counter[MAX_ADDED_THREAD_COUNTERS];
182 } *thread_even, *thread_odd;
183
184 struct core_data {
185         unsigned long long c3;
186         unsigned long long c6;
187         unsigned long long c7;
188         unsigned long long mc6_us;      /* duplicate as per-core for now, even though per module */
189         unsigned int core_temp_c;
190         unsigned int core_id;
191         unsigned long long counter[MAX_ADDED_COUNTERS];
192 } *core_even, *core_odd;
193
194 struct pkg_data {
195         unsigned long long pc2;
196         unsigned long long pc3;
197         unsigned long long pc6;
198         unsigned long long pc7;
199         unsigned long long pc8;
200         unsigned long long pc9;
201         unsigned long long pc10;
202         unsigned long long cpu_lpi;
203         unsigned long long sys_lpi;
204         unsigned long long pkg_wtd_core_c0;
205         unsigned long long pkg_any_core_c0;
206         unsigned long long pkg_any_gfxe_c0;
207         unsigned long long pkg_both_core_gfxe_c0;
208         long long gfx_rc6_ms;
209         unsigned int gfx_mhz;
210         unsigned int package_id;
211         unsigned int energy_pkg;        /* MSR_PKG_ENERGY_STATUS */
212         unsigned int energy_dram;       /* MSR_DRAM_ENERGY_STATUS */
213         unsigned int energy_cores;      /* MSR_PP0_ENERGY_STATUS */
214         unsigned int energy_gfx;        /* MSR_PP1_ENERGY_STATUS */
215         unsigned int rapl_pkg_perf_status;      /* MSR_PKG_PERF_STATUS */
216         unsigned int rapl_dram_perf_status;     /* MSR_DRAM_PERF_STATUS */
217         unsigned int pkg_temp_c;
218         unsigned long long counter[MAX_ADDED_COUNTERS];
219 } *package_even, *package_odd;
220
221 #define ODD_COUNTERS thread_odd, core_odd, package_odd
222 #define EVEN_COUNTERS thread_even, core_even, package_even
223
224 #define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no)          \
225         ((thread_base) +                                                      \
226          ((pkg_no) *                                                          \
227           topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \
228          ((node_no) * topo.cores_per_node * topo.threads_per_core) +          \
229          ((core_no) * topo.threads_per_core) +                                \
230          (thread_no))
231
232 #define GET_CORE(core_base, core_no, node_no, pkg_no)                   \
233         ((core_base) +                                                  \
234          ((pkg_no) *  topo.nodes_per_pkg * topo.cores_per_node) +       \
235          ((node_no) * topo.cores_per_node) +                            \
236          (core_no))
237
238
239 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
240
241 enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE};
242 enum counter_type {COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC};
243 enum counter_format {FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT};
244
245 struct msr_counter {
246         unsigned int msr_num;
247         char name[NAME_BYTES];
248         char path[PATH_BYTES];
249         unsigned int width;
250         enum counter_type type;
251         enum counter_format format;
252         struct msr_counter *next;
253         unsigned int flags;
254 #define FLAGS_HIDE      (1 << 0)
255 #define FLAGS_SHOW      (1 << 1)
256 #define SYSFS_PERCPU    (1 << 1)
257 };
258
259 struct sys_counters {
260         unsigned int added_thread_counters;
261         unsigned int added_core_counters;
262         unsigned int added_package_counters;
263         struct msr_counter *tp;
264         struct msr_counter *cp;
265         struct msr_counter *pp;
266 } sys;
267
268 struct system_summary {
269         struct thread_data threads;
270         struct core_data cores;
271         struct pkg_data packages;
272 } average;
273
274 struct cpu_topology {
275         int physical_package_id;
276         int logical_cpu_id;
277         int physical_node_id;
278         int logical_node_id;    /* 0-based count within the package */
279         int physical_core_id;
280         int thread_id;
281         cpu_set_t *put_ids; /* Processing Unit/Thread IDs */
282 } *cpus;
283
284 struct topo_params {
285         int num_packages;
286         int num_cpus;
287         int num_cores;
288         int max_cpu_num;
289         int max_node_num;
290         int nodes_per_pkg;
291         int cores_per_node;
292         int threads_per_core;
293 } topo;
294
295 struct timeval tv_even, tv_odd, tv_delta;
296
297 int *irq_column_2_cpu;  /* /proc/interrupts column numbers */
298 int *irqs_per_cpu;              /* indexed by cpu_num */
299
300 void setup_all_buffers(void);
301
302 char *sys_lpi_file;
303 char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us";
304 char *sys_lpi_file_debugfs = "/sys/kernel/debug/pmc_core/slp_s0_residency_usec";
305
306 int cpu_is_not_present(int cpu)
307 {
308         return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
309 }
310 /*
311  * run func(thread, core, package) in topology order
312  * skip non-present cpus
313  */
314
315 int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *),
316         struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
317 {
318         int retval, pkg_no, core_no, thread_no, node_no;
319
320         for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
321                 for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
322                         for (node_no = 0; node_no < topo.nodes_per_pkg;
323                              node_no++) {
324                                 for (thread_no = 0; thread_no <
325                                         topo.threads_per_core; ++thread_no) {
326                                         struct thread_data *t;
327                                         struct core_data *c;
328                                         struct pkg_data *p;
329
330                                         t = GET_THREAD(thread_base, thread_no,
331                                                        core_no, node_no,
332                                                        pkg_no);
333
334                                         if (cpu_is_not_present(t->cpu_id))
335                                                 continue;
336
337                                         c = GET_CORE(core_base, core_no,
338                                                      node_no, pkg_no);
339                                         p = GET_PKG(pkg_base, pkg_no);
340
341                                         retval = func(t, c, p);
342                                         if (retval)
343                                                 return retval;
344                                 }
345                         }
346                 }
347         }
348         return 0;
349 }
350
351 int cpu_migrate(int cpu)
352 {
353         CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
354         CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
355         if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
356                 return -1;
357         else
358                 return 0;
359 }
360 int get_msr_fd(int cpu)
361 {
362         char pathname[32];
363         int fd;
364
365         fd = fd_percpu[cpu];
366
367         if (fd)
368                 return fd;
369
370         sprintf(pathname, "/dev/cpu/%d/msr", cpu);
371         fd = open(pathname, O_RDONLY);
372         if (fd < 0)
373                 err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
374
375         fd_percpu[cpu] = fd;
376
377         return fd;
378 }
379
380 int get_msr(int cpu, off_t offset, unsigned long long *msr)
381 {
382         ssize_t retval;
383
384         retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
385
386         if (retval != sizeof *msr)
387                 err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset);
388
389         return 0;
390 }
391
392 /*
393  * This list matches the column headers, except
394  * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
395  * 2. Core and CPU are moved to the end, we can't have strings that contain them
396  *    matching on them for --show and --hide.
397  */
398 struct msr_counter bic[] = {
399         { 0x0, "usec" },
400         { 0x0, "Time_Of_Day_Seconds" },
401         { 0x0, "Package" },
402         { 0x0, "Node" },
403         { 0x0, "Avg_MHz" },
404         { 0x0, "Busy%" },
405         { 0x0, "Bzy_MHz" },
406         { 0x0, "TSC_MHz" },
407         { 0x0, "IRQ" },
408         { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL},
409         { 0x0, "sysfs" },
410         { 0x0, "CPU%c1" },
411         { 0x0, "CPU%c3" },
412         { 0x0, "CPU%c6" },
413         { 0x0, "CPU%c7" },
414         { 0x0, "ThreadC" },
415         { 0x0, "CoreTmp" },
416         { 0x0, "CoreCnt" },
417         { 0x0, "PkgTmp" },
418         { 0x0, "GFX%rc6" },
419         { 0x0, "GFXMHz" },
420         { 0x0, "Pkg%pc2" },
421         { 0x0, "Pkg%pc3" },
422         { 0x0, "Pkg%pc6" },
423         { 0x0, "Pkg%pc7" },
424         { 0x0, "Pkg%pc8" },
425         { 0x0, "Pkg%pc9" },
426         { 0x0, "Pk%pc10" },
427         { 0x0, "CPU%LPI" },
428         { 0x0, "SYS%LPI" },
429         { 0x0, "PkgWatt" },
430         { 0x0, "CorWatt" },
431         { 0x0, "GFXWatt" },
432         { 0x0, "PkgCnt" },
433         { 0x0, "RAMWatt" },
434         { 0x0, "PKG_%" },
435         { 0x0, "RAM_%" },
436         { 0x0, "Pkg_J" },
437         { 0x0, "Cor_J" },
438         { 0x0, "GFX_J" },
439         { 0x0, "RAM_J" },
440         { 0x0, "Mod%c6" },
441         { 0x0, "Totl%C0" },
442         { 0x0, "Any%C0" },
443         { 0x0, "GFX%C0" },
444         { 0x0, "CPUGFX%" },
445         { 0x0, "Core" },
446         { 0x0, "CPU" },
447         { 0x0, "APIC" },
448         { 0x0, "X2APIC" },
449 };
450
451 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
452 #define BIC_USEC        (1ULL << 0)
453 #define BIC_TOD         (1ULL << 1)
454 #define BIC_Package     (1ULL << 2)
455 #define BIC_Node        (1ULL << 3)
456 #define BIC_Avg_MHz     (1ULL << 4)
457 #define BIC_Busy        (1ULL << 5)
458 #define BIC_Bzy_MHz     (1ULL << 6)
459 #define BIC_TSC_MHz     (1ULL << 7)
460 #define BIC_IRQ         (1ULL << 8)
461 #define BIC_SMI         (1ULL << 9)
462 #define BIC_sysfs       (1ULL << 10)
463 #define BIC_CPU_c1      (1ULL << 11)
464 #define BIC_CPU_c3      (1ULL << 12)
465 #define BIC_CPU_c6      (1ULL << 13)
466 #define BIC_CPU_c7      (1ULL << 14)
467 #define BIC_ThreadC     (1ULL << 15)
468 #define BIC_CoreTmp     (1ULL << 16)
469 #define BIC_CoreCnt     (1ULL << 17)
470 #define BIC_PkgTmp      (1ULL << 18)
471 #define BIC_GFX_rc6     (1ULL << 19)
472 #define BIC_GFXMHz      (1ULL << 20)
473 #define BIC_Pkgpc2      (1ULL << 21)
474 #define BIC_Pkgpc3      (1ULL << 22)
475 #define BIC_Pkgpc6      (1ULL << 23)
476 #define BIC_Pkgpc7      (1ULL << 24)
477 #define BIC_Pkgpc8      (1ULL << 25)
478 #define BIC_Pkgpc9      (1ULL << 26)
479 #define BIC_Pkgpc10     (1ULL << 27)
480 #define BIC_CPU_LPI     (1ULL << 28)
481 #define BIC_SYS_LPI     (1ULL << 29)
482 #define BIC_PkgWatt     (1ULL << 30)
483 #define BIC_CorWatt     (1ULL << 31)
484 #define BIC_GFXWatt     (1ULL << 32)
485 #define BIC_PkgCnt      (1ULL << 33)
486 #define BIC_RAMWatt     (1ULL << 34)
487 #define BIC_PKG__       (1ULL << 35)
488 #define BIC_RAM__       (1ULL << 36)
489 #define BIC_Pkg_J       (1ULL << 37)
490 #define BIC_Cor_J       (1ULL << 38)
491 #define BIC_GFX_J       (1ULL << 39)
492 #define BIC_RAM_J       (1ULL << 40)
493 #define BIC_Mod_c6      (1ULL << 41)
494 #define BIC_Totl_c0     (1ULL << 42)
495 #define BIC_Any_c0      (1ULL << 43)
496 #define BIC_GFX_c0      (1ULL << 44)
497 #define BIC_CPUGFX      (1ULL << 45)
498 #define BIC_Core        (1ULL << 46)
499 #define BIC_CPU         (1ULL << 47)
500 #define BIC_APIC        (1ULL << 48)
501 #define BIC_X2APIC      (1ULL << 49)
502
503 #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
504
505 unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
506 unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
507
508 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
509 #define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
510 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
511 #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
512
513
514 #define MAX_DEFERRED 16
515 char *deferred_skip_names[MAX_DEFERRED];
516 int deferred_skip_index;
517
518 /*
519  * HIDE_LIST - hide this list of counters, show the rest [default]
520  * SHOW_LIST - show this list of counters, hide the rest
521  */
522 enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;
523
524 void help(void)
525 {
526         fprintf(outf,
527         "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
528         "\n"
529         "Turbostat forks the specified COMMAND and prints statistics\n"
530         "when COMMAND completes.\n"
531         "If no COMMAND is specified, turbostat wakes every 5-seconds\n"
532         "to print statistics, until interrupted.\n"
533         "  -a, --add    add a counter\n"
534         "                 eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
535         "  -c, --cpu    cpu-set limit output to summary plus cpu-set:\n"
536         "                 {core | package | j,k,l..m,n-p }\n"
537         "  -d, --debug  displays usec, Time_Of_Day_Seconds and more debugging\n"
538         "  -D, --Dump   displays the raw counter values\n"
539         "  -e, --enable [all | column]\n"
540         "               shows all or the specified disabled column\n"
541         "  -H, --hide [column|column,column,...]\n"
542         "               hide the specified column(s)\n"
543         "  -i, --interval sec.subsec\n"
544         "               Override default 5-second measurement interval\n"
545         "  -J, --Joules displays energy in Joules instead of Watts\n"
546         "  -l, --list   list column headers only\n"
547         "  -n, --num_iterations num\n"
548         "               number of the measurement iterations\n"
549         "  -o, --out file\n"
550         "               create or truncate \"file\" for all output\n"
551         "  -q, --quiet  skip decoding system configuration header\n"
552         "  -s, --show [column|column,column,...]\n"
553         "               show only the specified column(s)\n"
554         "  -S, --Summary\n"
555         "               limits output to 1-line system summary per interval\n"
556         "  -T, --TCC temperature\n"
557         "               sets the Thermal Control Circuit temperature in\n"
558         "                 degrees Celsius\n"
559         "  -h, --help   print this help message\n"
560         "  -v, --version        print version information\n"
561         "\n"
562         "For more help, run \"man turbostat\"\n");
563 }
564
565 /*
566  * bic_lookup
567  * for all the strings in comma separate name_list,
568  * set the approprate bit in return value.
569  */
570 unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
571 {
572         int i;
573         unsigned long long retval = 0;
574
575         while (name_list) {
576                 char *comma;
577
578                 comma = strchr(name_list, ',');
579
580                 if (comma)
581                         *comma = '\0';
582
583                 if (!strcmp(name_list, "all"))
584                         return ~0;
585
586                 for (i = 0; i < MAX_BIC; ++i) {
587                         if (!strcmp(name_list, bic[i].name)) {
588                                 retval |= (1ULL << i);
589                                 break;
590                         }
591                 }
592                 if (i == MAX_BIC) {
593                         if (mode == SHOW_LIST) {
594                                 fprintf(stderr, "Invalid counter name: %s\n", name_list);
595                                 exit(-1);
596                         }
597                         deferred_skip_names[deferred_skip_index++] = name_list;
598                         if (debug)
599                                 fprintf(stderr, "deferred \"%s\"\n", name_list);
600                         if (deferred_skip_index >= MAX_DEFERRED) {
601                                 fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
602                                         MAX_DEFERRED, name_list);
603                                 help();
604                                 exit(1);
605                         }
606                 }
607
608                 name_list = comma;
609                 if (name_list)
610                         name_list++;
611
612         }
613         return retval;
614 }
615
616
617 void print_header(char *delim)
618 {
619         struct msr_counter *mp;
620         int printed = 0;
621
622         if (DO_BIC(BIC_USEC))
623                 outp += sprintf(outp, "%susec", (printed++ ? delim : ""));
624         if (DO_BIC(BIC_TOD))
625                 outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : ""));
626         if (DO_BIC(BIC_Package))
627                 outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
628         if (DO_BIC(BIC_Node))
629                 outp += sprintf(outp, "%sNode", (printed++ ? delim : ""));
630         if (DO_BIC(BIC_Core))
631                 outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
632         if (DO_BIC(BIC_CPU))
633                 outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
634         if (DO_BIC(BIC_APIC))
635                 outp += sprintf(outp, "%sAPIC", (printed++ ? delim : ""));
636         if (DO_BIC(BIC_X2APIC))
637                 outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : ""));
638         if (DO_BIC(BIC_Avg_MHz))
639                 outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
640         if (DO_BIC(BIC_Busy))
641                 outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : ""));
642         if (DO_BIC(BIC_Bzy_MHz))
643                 outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : ""));
644         if (DO_BIC(BIC_TSC_MHz))
645                 outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : ""));
646
647         if (DO_BIC(BIC_IRQ)) {
648                 if (sums_need_wide_columns)
649                         outp += sprintf(outp, "%s     IRQ", (printed++ ? delim : ""));
650                 else
651                         outp += sprintf(outp, "%sIRQ", (printed++ ? delim : ""));
652         }
653
654         if (DO_BIC(BIC_SMI))
655                 outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
656
657         for (mp = sys.tp; mp; mp = mp->next) {
658
659                 if (mp->format == FORMAT_RAW) {
660                         if (mp->width == 64)
661                                 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name);
662                         else
663                                 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name);
664                 } else {
665                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
666                                 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name);
667                         else
668                                 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name);
669                 }
670         }
671
672         if (DO_BIC(BIC_CPU_c1))
673                 outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : ""));
674         if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates)
675                 outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : ""));
676         if (DO_BIC(BIC_CPU_c6))
677                 outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : ""));
678         if (DO_BIC(BIC_CPU_c7))
679                 outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : ""));
680
681         if (DO_BIC(BIC_Mod_c6))
682                 outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : ""));
683
684         if (DO_BIC(BIC_CoreTmp))
685                 outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : ""));
686
687         for (mp = sys.cp; mp; mp = mp->next) {
688                 if (mp->format == FORMAT_RAW) {
689                         if (mp->width == 64)
690                                 outp += sprintf(outp, "%s%18.18s", delim, mp->name);
691                         else
692                                 outp += sprintf(outp, "%s%10.10s", delim, mp->name);
693                 } else {
694                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
695                                 outp += sprintf(outp, "%s%8s", delim, mp->name);
696                         else
697                                 outp += sprintf(outp, "%s%s", delim, mp->name);
698                 }
699         }
700
701         if (DO_BIC(BIC_PkgTmp))
702                 outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : ""));
703
704         if (DO_BIC(BIC_GFX_rc6))
705                 outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : ""));
706
707         if (DO_BIC(BIC_GFXMHz))
708                 outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : ""));
709
710         if (DO_BIC(BIC_Totl_c0))
711                 outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
712         if (DO_BIC(BIC_Any_c0))
713                 outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : ""));
714         if (DO_BIC(BIC_GFX_c0))
715                 outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : ""));
716         if (DO_BIC(BIC_CPUGFX))
717                 outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : ""));
718
719         if (DO_BIC(BIC_Pkgpc2))
720                 outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : ""));
721         if (DO_BIC(BIC_Pkgpc3))
722                 outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : ""));
723         if (DO_BIC(BIC_Pkgpc6))
724                 outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : ""));
725         if (DO_BIC(BIC_Pkgpc7))
726                 outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : ""));
727         if (DO_BIC(BIC_Pkgpc8))
728                 outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : ""));
729         if (DO_BIC(BIC_Pkgpc9))
730                 outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : ""));
731         if (DO_BIC(BIC_Pkgpc10))
732                 outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : ""));
733         if (DO_BIC(BIC_CPU_LPI))
734                 outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : ""));
735         if (DO_BIC(BIC_SYS_LPI))
736                 outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : ""));
737
738         if (do_rapl && !rapl_joules) {
739                 if (DO_BIC(BIC_PkgWatt))
740                         outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
741                 if (DO_BIC(BIC_CorWatt))
742                         outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
743                 if (DO_BIC(BIC_GFXWatt))
744                         outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
745                 if (DO_BIC(BIC_RAMWatt))
746                         outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : ""));
747                 if (DO_BIC(BIC_PKG__))
748                         outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
749                 if (DO_BIC(BIC_RAM__))
750                         outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
751         } else if (do_rapl && rapl_joules) {
752                 if (DO_BIC(BIC_Pkg_J))
753                         outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
754                 if (DO_BIC(BIC_Cor_J))
755                         outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
756                 if (DO_BIC(BIC_GFX_J))
757                         outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
758                 if (DO_BIC(BIC_RAM_J))
759                         outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : ""));
760                 if (DO_BIC(BIC_PKG__))
761                         outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
762                 if (DO_BIC(BIC_RAM__))
763                         outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
764         }
765         for (mp = sys.pp; mp; mp = mp->next) {
766                 if (mp->format == FORMAT_RAW) {
767                         if (mp->width == 64)
768                                 outp += sprintf(outp, "%s%18.18s", delim, mp->name);
769                         else
770                                 outp += sprintf(outp, "%s%10.10s", delim, mp->name);
771                 } else {
772                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
773                                 outp += sprintf(outp, "%s%8s", delim, mp->name);
774                         else
775                                 outp += sprintf(outp, "%s%s", delim, mp->name);
776                 }
777         }
778
779         outp += sprintf(outp, "\n");
780 }
781
782 int dump_counters(struct thread_data *t, struct core_data *c,
783         struct pkg_data *p)
784 {
785         int i;
786         struct msr_counter *mp;
787
788         outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
789
790         if (t) {
791                 outp += sprintf(outp, "CPU: %d flags 0x%x\n",
792                         t->cpu_id, t->flags);
793                 outp += sprintf(outp, "TSC: %016llX\n", t->tsc);
794                 outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
795                 outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
796                 outp += sprintf(outp, "c1: %016llX\n", t->c1);
797
798                 if (DO_BIC(BIC_IRQ))
799                         outp += sprintf(outp, "IRQ: %lld\n", t->irq_count);
800                 if (DO_BIC(BIC_SMI))
801                         outp += sprintf(outp, "SMI: %d\n", t->smi_count);
802
803                 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
804                         outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n",
805                                 i, mp->msr_num, t->counter[i]);
806                 }
807         }
808
809         if (c) {
810                 outp += sprintf(outp, "core: %d\n", c->core_id);
811                 outp += sprintf(outp, "c3: %016llX\n", c->c3);
812                 outp += sprintf(outp, "c6: %016llX\n", c->c6);
813                 outp += sprintf(outp, "c7: %016llX\n", c->c7);
814                 outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
815
816                 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
817                         outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n",
818                                 i, mp->msr_num, c->counter[i]);
819                 }
820                 outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
821         }
822
823         if (p) {
824                 outp += sprintf(outp, "package: %d\n", p->package_id);
825
826                 outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
827                 outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
828                 outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
829                 outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
830
831                 outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
832                 if (DO_BIC(BIC_Pkgpc3))
833                         outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
834                 if (DO_BIC(BIC_Pkgpc6))
835                         outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
836                 if (DO_BIC(BIC_Pkgpc7))
837                         outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
838                 outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
839                 outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
840                 outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
841                 outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
842                 outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
843                 outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
844                 outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg);
845                 outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores);
846                 outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx);
847                 outp += sprintf(outp, "Joules RAM: %0X\n", p->energy_dram);
848                 outp += sprintf(outp, "Throttle PKG: %0X\n",
849                         p->rapl_pkg_perf_status);
850                 outp += sprintf(outp, "Throttle RAM: %0X\n",
851                         p->rapl_dram_perf_status);
852                 outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
853
854                 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
855                         outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n",
856                                 i, mp->msr_num, p->counter[i]);
857                 }
858         }
859
860         outp += sprintf(outp, "\n");
861
862         return 0;
863 }
864
865 /*
866  * column formatting convention & formats
867  */
868 int format_counters(struct thread_data *t, struct core_data *c,
869         struct pkg_data *p)
870 {
871         double interval_float, tsc;
872         char *fmt8;
873         int i;
874         struct msr_counter *mp;
875         char *delim = "\t";
876         int printed = 0;
877
878          /* if showing only 1st thread in core and this isn't one, bail out */
879         if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
880                 return 0;
881
882          /* if showing only 1st thread in pkg and this isn't one, bail out */
883         if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
884                 return 0;
885
886         /*if not summary line and --cpu is used */
887         if ((t != &average.threads) &&
888                 (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
889                 return 0;
890
891         if (DO_BIC(BIC_USEC)) {
892                 /* on each row, print how many usec each timestamp took to gather */
893                 struct timeval tv;
894
895                 timersub(&t->tv_end, &t->tv_begin, &tv);
896                 outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec);
897         }
898
899         /* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */
900         if (DO_BIC(BIC_TOD))
901                 outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec);
902
903         interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
904
905         tsc = t->tsc * tsc_tweak;
906
907         /* topo columns, print blanks on 1st (average) line */
908         if (t == &average.threads) {
909                 if (DO_BIC(BIC_Package))
910                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
911                 if (DO_BIC(BIC_Node))
912                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
913                 if (DO_BIC(BIC_Core))
914                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
915                 if (DO_BIC(BIC_CPU))
916                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
917                 if (DO_BIC(BIC_APIC))
918                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
919                 if (DO_BIC(BIC_X2APIC))
920                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
921         } else {
922                 if (DO_BIC(BIC_Package)) {
923                         if (p)
924                                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id);
925                         else
926                                 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
927                 }
928                 if (DO_BIC(BIC_Node)) {
929                         if (t)
930                                 outp += sprintf(outp, "%s%d",
931                                                 (printed++ ? delim : ""),
932                                               cpus[t->cpu_id].physical_node_id);
933                         else
934                                 outp += sprintf(outp, "%s-",
935                                                 (printed++ ? delim : ""));
936                 }
937                 if (DO_BIC(BIC_Core)) {
938                         if (c)
939                                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id);
940                         else
941                                 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
942                 }
943                 if (DO_BIC(BIC_CPU))
944                         outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
945                 if (DO_BIC(BIC_APIC))
946                         outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id);
947                 if (DO_BIC(BIC_X2APIC))
948                         outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id);
949         }
950
951         if (DO_BIC(BIC_Avg_MHz))
952                 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
953                         1.0 / units * t->aperf / interval_float);
954
955         if (DO_BIC(BIC_Busy))
956                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf/tsc);
957
958         if (DO_BIC(BIC_Bzy_MHz)) {
959                 if (has_base_hz)
960                         outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf);
961                 else
962                         outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
963                                 tsc / units * t->aperf / t->mperf / interval_float);
964         }
965
966         if (DO_BIC(BIC_TSC_MHz))
967                 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc/units/interval_float);
968
969         /* IRQ */
970         if (DO_BIC(BIC_IRQ)) {
971                 if (sums_need_wide_columns)
972                         outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count);
973                 else
974                         outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count);
975         }
976
977         /* SMI */
978         if (DO_BIC(BIC_SMI))
979                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count);
980
981         /* Added counters */
982         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
983                 if (mp->format == FORMAT_RAW) {
984                         if (mp->width == 32)
985                                 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) t->counter[i]);
986                         else
987                                 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]);
988                 } else if (mp->format == FORMAT_DELTA) {
989                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
990                                 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]);
991                         else
992                                 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]);
993                 } else if (mp->format == FORMAT_PERCENT) {
994                         if (mp->type == COUNTER_USEC)
995                                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), t->counter[i]/interval_float/10000);
996                         else
997                                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i]/tsc);
998                 }
999         }
1000
1001         /* C1 */
1002         if (DO_BIC(BIC_CPU_c1))
1003                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1/tsc);
1004
1005
1006         /* print per-core data only for 1st thread in core */
1007         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1008                 goto done;
1009
1010         if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates)
1011                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc);
1012         if (DO_BIC(BIC_CPU_c6))
1013                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc);
1014         if (DO_BIC(BIC_CPU_c7))
1015                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7/tsc);
1016
1017         /* Mod%c6 */
1018         if (DO_BIC(BIC_Mod_c6))
1019                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc);
1020
1021         if (DO_BIC(BIC_CoreTmp))
1022                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c);
1023
1024         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1025                 if (mp->format == FORMAT_RAW) {
1026                         if (mp->width == 32)
1027                                 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) c->counter[i]);
1028                         else
1029                                 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]);
1030                 } else if (mp->format == FORMAT_DELTA) {
1031                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1032                                 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]);
1033                         else
1034                                 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]);
1035                 } else if (mp->format == FORMAT_PERCENT) {
1036                         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i]/tsc);
1037                 }
1038         }
1039
1040         /* print per-package data only for 1st core in package */
1041         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1042                 goto done;
1043
1044         /* PkgTmp */
1045         if (DO_BIC(BIC_PkgTmp))
1046                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c);
1047
1048         /* GFXrc6 */
1049         if (DO_BIC(BIC_GFX_rc6)) {
1050                 if (p->gfx_rc6_ms == -1) {      /* detect GFX counter reset */
1051                         outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
1052                 } else {
1053                         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
1054                                 p->gfx_rc6_ms / 10.0 / interval_float);
1055                 }
1056         }
1057
1058         /* GFXMHz */
1059         if (DO_BIC(BIC_GFXMHz))
1060                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz);
1061
1062         /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
1063         if (DO_BIC(BIC_Totl_c0))
1064                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc);
1065         if (DO_BIC(BIC_Any_c0))
1066                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0/tsc);
1067         if (DO_BIC(BIC_GFX_c0))
1068                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0/tsc);
1069         if (DO_BIC(BIC_CPUGFX))
1070                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0/tsc);
1071
1072         if (DO_BIC(BIC_Pkgpc2))
1073                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2/tsc);
1074         if (DO_BIC(BIC_Pkgpc3))
1075                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3/tsc);
1076         if (DO_BIC(BIC_Pkgpc6))
1077                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6/tsc);
1078         if (DO_BIC(BIC_Pkgpc7))
1079                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7/tsc);
1080         if (DO_BIC(BIC_Pkgpc8))
1081                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8/tsc);
1082         if (DO_BIC(BIC_Pkgpc9))
1083                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9/tsc);
1084         if (DO_BIC(BIC_Pkgpc10))
1085                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10/tsc);
1086
1087         if (DO_BIC(BIC_CPU_LPI))
1088                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
1089         if (DO_BIC(BIC_SYS_LPI))
1090                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float);
1091
1092         /*
1093          * If measurement interval exceeds minimum RAPL Joule Counter range,
1094          * indicate that results are suspect by printing "**" in fraction place.
1095          */
1096         if (interval_float < rapl_joule_counter_range)
1097                 fmt8 = "%s%.2f";
1098         else
1099                 fmt8 = "%6.0f**";
1100
1101         if (DO_BIC(BIC_PkgWatt))
1102                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
1103         if (DO_BIC(BIC_CorWatt))
1104                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
1105         if (DO_BIC(BIC_GFXWatt))
1106                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
1107         if (DO_BIC(BIC_RAMWatt))
1108                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units / interval_float);
1109         if (DO_BIC(BIC_Pkg_J))
1110                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
1111         if (DO_BIC(BIC_Cor_J))
1112                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
1113         if (DO_BIC(BIC_GFX_J))
1114                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
1115         if (DO_BIC(BIC_RAM_J))
1116                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units);
1117         if (DO_BIC(BIC_PKG__))
1118                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
1119         if (DO_BIC(BIC_RAM__))
1120                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
1121
1122         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1123                 if (mp->format == FORMAT_RAW) {
1124                         if (mp->width == 32)
1125                                 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) p->counter[i]);
1126                         else
1127                                 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]);
1128                 } else if (mp->format == FORMAT_DELTA) {
1129                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1130                                 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]);
1131                         else
1132                                 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]);
1133                 } else if (mp->format == FORMAT_PERCENT) {
1134                         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i]/tsc);
1135                 }
1136         }
1137
1138 done:
1139         if (*(outp - 1) != '\n')
1140                 outp += sprintf(outp, "\n");
1141
1142         return 0;
1143 }
1144
1145 void flush_output_stdout(void)
1146 {
1147         FILE *filep;
1148
1149         if (outf == stderr)
1150                 filep = stdout;
1151         else
1152                 filep = outf;
1153
1154         fputs(output_buffer, filep);
1155         fflush(filep);
1156
1157         outp = output_buffer;
1158 }
1159 void flush_output_stderr(void)
1160 {
1161         fputs(output_buffer, outf);
1162         fflush(outf);
1163         outp = output_buffer;
1164 }
1165 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1166 {
1167         static int printed;
1168
1169         if (!printed || !summary_only)
1170                 print_header("\t");
1171
1172         format_counters(&average.threads, &average.cores, &average.packages);
1173
1174         printed = 1;
1175
1176         if (summary_only)
1177                 return;
1178
1179         for_all_cpus(format_counters, t, c, p);
1180 }
1181
1182 #define DELTA_WRAP32(new, old)                  \
1183         if (new > old) {                        \
1184                 old = new - old;                \
1185         } else {                                \
1186                 old = 0x100000000 + new - old;  \
1187         }
1188
1189 int
1190 delta_package(struct pkg_data *new, struct pkg_data *old)
1191 {
1192         int i;
1193         struct msr_counter *mp;
1194
1195
1196         if (DO_BIC(BIC_Totl_c0))
1197                 old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
1198         if (DO_BIC(BIC_Any_c0))
1199                 old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
1200         if (DO_BIC(BIC_GFX_c0))
1201                 old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
1202         if (DO_BIC(BIC_CPUGFX))
1203                 old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
1204
1205         old->pc2 = new->pc2 - old->pc2;
1206         if (DO_BIC(BIC_Pkgpc3))
1207                 old->pc3 = new->pc3 - old->pc3;
1208         if (DO_BIC(BIC_Pkgpc6))
1209                 old->pc6 = new->pc6 - old->pc6;
1210         if (DO_BIC(BIC_Pkgpc7))
1211                 old->pc7 = new->pc7 - old->pc7;
1212         old->pc8 = new->pc8 - old->pc8;
1213         old->pc9 = new->pc9 - old->pc9;
1214         old->pc10 = new->pc10 - old->pc10;
1215         old->cpu_lpi = new->cpu_lpi - old->cpu_lpi;
1216         old->sys_lpi = new->sys_lpi - old->sys_lpi;
1217         old->pkg_temp_c = new->pkg_temp_c;
1218
1219         /* flag an error when rc6 counter resets/wraps */
1220         if (old->gfx_rc6_ms >  new->gfx_rc6_ms)
1221                 old->gfx_rc6_ms = -1;
1222         else
1223                 old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
1224
1225         old->gfx_mhz = new->gfx_mhz;
1226
1227         DELTA_WRAP32(new->energy_pkg, old->energy_pkg);
1228         DELTA_WRAP32(new->energy_cores, old->energy_cores);
1229         DELTA_WRAP32(new->energy_gfx, old->energy_gfx);
1230         DELTA_WRAP32(new->energy_dram, old->energy_dram);
1231         DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status);
1232         DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status);
1233
1234         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1235                 if (mp->format == FORMAT_RAW)
1236                         old->counter[i] = new->counter[i];
1237                 else
1238                         old->counter[i] = new->counter[i] - old->counter[i];
1239         }
1240
1241         return 0;
1242 }
1243
1244 void
1245 delta_core(struct core_data *new, struct core_data *old)
1246 {
1247         int i;
1248         struct msr_counter *mp;
1249
1250         old->c3 = new->c3 - old->c3;
1251         old->c6 = new->c6 - old->c6;
1252         old->c7 = new->c7 - old->c7;
1253         old->core_temp_c = new->core_temp_c;
1254         old->mc6_us = new->mc6_us - old->mc6_us;
1255
1256         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1257                 if (mp->format == FORMAT_RAW)
1258                         old->counter[i] = new->counter[i];
1259                 else
1260                         old->counter[i] = new->counter[i] - old->counter[i];
1261         }
1262 }
1263
1264 /*
1265  * old = new - old
1266  */
1267 int
1268 delta_thread(struct thread_data *new, struct thread_data *old,
1269         struct core_data *core_delta)
1270 {
1271         int i;
1272         struct msr_counter *mp;
1273
1274         /* we run cpuid just the 1st time, copy the results */
1275         if (DO_BIC(BIC_APIC))
1276                 new->apic_id = old->apic_id;
1277         if (DO_BIC(BIC_X2APIC))
1278                 new->x2apic_id = old->x2apic_id;
1279
1280         /*
1281          * the timestamps from start of measurement interval are in "old"
1282          * the timestamp from end of measurement interval are in "new"
1283          * over-write old w/ new so we can print end of interval values
1284          */
1285
1286         old->tv_begin = new->tv_begin;
1287         old->tv_end = new->tv_end;
1288
1289         old->tsc = new->tsc - old->tsc;
1290
1291         /* check for TSC < 1 Mcycles over interval */
1292         if (old->tsc < (1000 * 1000))
1293                 errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n"
1294                      "You can disable all c-states by booting with \"idle=poll\"\n"
1295                      "or just the deep ones with \"processor.max_cstate=1\"");
1296
1297         old->c1 = new->c1 - old->c1;
1298
1299         if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
1300                 if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
1301                         old->aperf = new->aperf - old->aperf;
1302                         old->mperf = new->mperf - old->mperf;
1303                 } else {
1304                         return -1;
1305                 }
1306         }
1307
1308
1309         if (use_c1_residency_msr) {
1310                 /*
1311                  * Some models have a dedicated C1 residency MSR,
1312                  * which should be more accurate than the derivation below.
1313                  */
1314         } else {
1315                 /*
1316                  * As counter collection is not atomic,
1317                  * it is possible for mperf's non-halted cycles + idle states
1318                  * to exceed TSC's all cycles: show c1 = 0% in that case.
1319                  */
1320                 if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak))
1321                         old->c1 = 0;
1322                 else {
1323                         /* normal case, derive c1 */
1324                         old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3
1325                                 - core_delta->c6 - core_delta->c7;
1326                 }
1327         }
1328
1329         if (old->mperf == 0) {
1330                 if (debug > 1)
1331                         fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id);
1332                 old->mperf = 1; /* divide by 0 protection */
1333         }
1334
1335         if (DO_BIC(BIC_IRQ))
1336                 old->irq_count = new->irq_count - old->irq_count;
1337
1338         if (DO_BIC(BIC_SMI))
1339                 old->smi_count = new->smi_count - old->smi_count;
1340
1341         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1342                 if (mp->format == FORMAT_RAW)
1343                         old->counter[i] = new->counter[i];
1344                 else
1345                         old->counter[i] = new->counter[i] - old->counter[i];
1346         }
1347         return 0;
1348 }
1349
1350 int delta_cpu(struct thread_data *t, struct core_data *c,
1351         struct pkg_data *p, struct thread_data *t2,
1352         struct core_data *c2, struct pkg_data *p2)
1353 {
1354         int retval = 0;
1355
1356         /* calculate core delta only for 1st thread in core */
1357         if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
1358                 delta_core(c, c2);
1359
1360         /* always calculate thread delta */
1361         retval = delta_thread(t, t2, c2);       /* c2 is core delta */
1362         if (retval)
1363                 return retval;
1364
1365         /* calculate package delta only for 1st core in package */
1366         if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
1367                 retval = delta_package(p, p2);
1368
1369         return retval;
1370 }
1371
1372 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1373 {
1374         int i;
1375         struct msr_counter  *mp;
1376
1377         t->tv_begin.tv_sec = 0;
1378         t->tv_begin.tv_usec = 0;
1379         t->tv_end.tv_sec = 0;
1380         t->tv_end.tv_usec = 0;
1381
1382         t->tsc = 0;
1383         t->aperf = 0;
1384         t->mperf = 0;
1385         t->c1 = 0;
1386
1387         t->irq_count = 0;
1388         t->smi_count = 0;
1389
1390         /* tells format_counters to dump all fields from this set */
1391         t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
1392
1393         c->c3 = 0;
1394         c->c6 = 0;
1395         c->c7 = 0;
1396         c->mc6_us = 0;
1397         c->core_temp_c = 0;
1398
1399         p->pkg_wtd_core_c0 = 0;
1400         p->pkg_any_core_c0 = 0;
1401         p->pkg_any_gfxe_c0 = 0;
1402         p->pkg_both_core_gfxe_c0 = 0;
1403
1404         p->pc2 = 0;
1405         if (DO_BIC(BIC_Pkgpc3))
1406                 p->pc3 = 0;
1407         if (DO_BIC(BIC_Pkgpc6))
1408                 p->pc6 = 0;
1409         if (DO_BIC(BIC_Pkgpc7))
1410                 p->pc7 = 0;
1411         p->pc8 = 0;
1412         p->pc9 = 0;
1413         p->pc10 = 0;
1414         p->cpu_lpi = 0;
1415         p->sys_lpi = 0;
1416
1417         p->energy_pkg = 0;
1418         p->energy_dram = 0;
1419         p->energy_cores = 0;
1420         p->energy_gfx = 0;
1421         p->rapl_pkg_perf_status = 0;
1422         p->rapl_dram_perf_status = 0;
1423         p->pkg_temp_c = 0;
1424
1425         p->gfx_rc6_ms = 0;
1426         p->gfx_mhz = 0;
1427         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
1428                 t->counter[i] = 0;
1429
1430         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next)
1431                 c->counter[i] = 0;
1432
1433         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next)
1434                 p->counter[i] = 0;
1435 }
1436 int sum_counters(struct thread_data *t, struct core_data *c,
1437         struct pkg_data *p)
1438 {
1439         int i;
1440         struct msr_counter *mp;
1441
1442         /* copy un-changing apic_id's */
1443         if (DO_BIC(BIC_APIC))
1444                 average.threads.apic_id = t->apic_id;
1445         if (DO_BIC(BIC_X2APIC))
1446                 average.threads.x2apic_id = t->x2apic_id;
1447
1448         /* remember first tv_begin */
1449         if (average.threads.tv_begin.tv_sec == 0)
1450                 average.threads.tv_begin = t->tv_begin;
1451
1452         /* remember last tv_end */
1453         average.threads.tv_end = t->tv_end;
1454
1455         average.threads.tsc += t->tsc;
1456         average.threads.aperf += t->aperf;
1457         average.threads.mperf += t->mperf;
1458         average.threads.c1 += t->c1;
1459
1460         average.threads.irq_count += t->irq_count;
1461         average.threads.smi_count += t->smi_count;
1462
1463         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1464                 if (mp->format == FORMAT_RAW)
1465                         continue;
1466                 average.threads.counter[i] += t->counter[i];
1467         }
1468
1469         /* sum per-core values only for 1st thread in core */
1470         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1471                 return 0;
1472
1473         average.cores.c3 += c->c3;
1474         average.cores.c6 += c->c6;
1475         average.cores.c7 += c->c7;
1476         average.cores.mc6_us += c->mc6_us;
1477
1478         average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
1479
1480         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1481                 if (mp->format == FORMAT_RAW)
1482                         continue;
1483                 average.cores.counter[i] += c->counter[i];
1484         }
1485
1486         /* sum per-pkg values only for 1st core in pkg */
1487         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1488                 return 0;
1489
1490         if (DO_BIC(BIC_Totl_c0))
1491                 average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
1492         if (DO_BIC(BIC_Any_c0))
1493                 average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
1494         if (DO_BIC(BIC_GFX_c0))
1495                 average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
1496         if (DO_BIC(BIC_CPUGFX))
1497                 average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
1498
1499         average.packages.pc2 += p->pc2;
1500         if (DO_BIC(BIC_Pkgpc3))
1501                 average.packages.pc3 += p->pc3;
1502         if (DO_BIC(BIC_Pkgpc6))
1503                 average.packages.pc6 += p->pc6;
1504         if (DO_BIC(BIC_Pkgpc7))
1505                 average.packages.pc7 += p->pc7;
1506         average.packages.pc8 += p->pc8;
1507         average.packages.pc9 += p->pc9;
1508         average.packages.pc10 += p->pc10;
1509
1510         average.packages.cpu_lpi = p->cpu_lpi;
1511         average.packages.sys_lpi = p->sys_lpi;
1512
1513         average.packages.energy_pkg += p->energy_pkg;
1514         average.packages.energy_dram += p->energy_dram;
1515         average.packages.energy_cores += p->energy_cores;
1516         average.packages.energy_gfx += p->energy_gfx;
1517
1518         average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
1519         average.packages.gfx_mhz = p->gfx_mhz;
1520
1521         average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
1522
1523         average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
1524         average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
1525
1526         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1527                 if (mp->format == FORMAT_RAW)
1528                         continue;
1529                 average.packages.counter[i] += p->counter[i];
1530         }
1531         return 0;
1532 }
1533 /*
1534  * sum the counters for all cpus in the system
1535  * compute the weighted average
1536  */
1537 void compute_average(struct thread_data *t, struct core_data *c,
1538         struct pkg_data *p)
1539 {
1540         int i;
1541         struct msr_counter *mp;
1542
1543         clear_counters(&average.threads, &average.cores, &average.packages);
1544
1545         for_all_cpus(sum_counters, t, c, p);
1546
1547         average.threads.tsc /= topo.num_cpus;
1548         average.threads.aperf /= topo.num_cpus;
1549         average.threads.mperf /= topo.num_cpus;
1550         average.threads.c1 /= topo.num_cpus;
1551
1552         if (average.threads.irq_count > 9999999)
1553                 sums_need_wide_columns = 1;
1554
1555         average.cores.c3 /= topo.num_cores;
1556         average.cores.c6 /= topo.num_cores;
1557         average.cores.c7 /= topo.num_cores;
1558         average.cores.mc6_us /= topo.num_cores;
1559
1560         if (DO_BIC(BIC_Totl_c0))
1561                 average.packages.pkg_wtd_core_c0 /= topo.num_packages;
1562         if (DO_BIC(BIC_Any_c0))
1563                 average.packages.pkg_any_core_c0 /= topo.num_packages;
1564         if (DO_BIC(BIC_GFX_c0))
1565                 average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
1566         if (DO_BIC(BIC_CPUGFX))
1567                 average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
1568
1569         average.packages.pc2 /= topo.num_packages;
1570         if (DO_BIC(BIC_Pkgpc3))
1571                 average.packages.pc3 /= topo.num_packages;
1572         if (DO_BIC(BIC_Pkgpc6))
1573                 average.packages.pc6 /= topo.num_packages;
1574         if (DO_BIC(BIC_Pkgpc7))
1575                 average.packages.pc7 /= topo.num_packages;
1576
1577         average.packages.pc8 /= topo.num_packages;
1578         average.packages.pc9 /= topo.num_packages;
1579         average.packages.pc10 /= topo.num_packages;
1580
1581         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1582                 if (mp->format == FORMAT_RAW)
1583                         continue;
1584                 if (mp->type == COUNTER_ITEMS) {
1585                         if (average.threads.counter[i] > 9999999)
1586                                 sums_need_wide_columns = 1;
1587                         continue;
1588                 }
1589                 average.threads.counter[i] /= topo.num_cpus;
1590         }
1591         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1592                 if (mp->format == FORMAT_RAW)
1593                         continue;
1594                 if (mp->type == COUNTER_ITEMS) {
1595                         if (average.cores.counter[i] > 9999999)
1596                                 sums_need_wide_columns = 1;
1597                 }
1598                 average.cores.counter[i] /= topo.num_cores;
1599         }
1600         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1601                 if (mp->format == FORMAT_RAW)
1602                         continue;
1603                 if (mp->type == COUNTER_ITEMS) {
1604                         if (average.packages.counter[i] > 9999999)
1605                                 sums_need_wide_columns = 1;
1606                 }
1607                 average.packages.counter[i] /= topo.num_packages;
1608         }
1609 }
1610
1611 static unsigned long long rdtsc(void)
1612 {
1613         unsigned int low, high;
1614
1615         asm volatile("rdtsc" : "=a" (low), "=d" (high));
1616
1617         return low | ((unsigned long long)high) << 32;
1618 }
1619
1620 /*
1621  * Open a file, and exit on failure
1622  */
1623 FILE *fopen_or_die(const char *path, const char *mode)
1624 {
1625         FILE *filep = fopen(path, mode);
1626
1627         if (!filep)
1628                 err(1, "%s: open failed", path);
1629         return filep;
1630 }
1631 /*
1632  * snapshot_sysfs_counter()
1633  *
1634  * return snapshot of given counter
1635  */
1636 unsigned long long snapshot_sysfs_counter(char *path)
1637 {
1638         FILE *fp;
1639         int retval;
1640         unsigned long long counter;
1641
1642         fp = fopen_or_die(path, "r");
1643
1644         retval = fscanf(fp, "%lld", &counter);
1645         if (retval != 1)
1646                 err(1, "snapshot_sysfs_counter(%s)", path);
1647
1648         fclose(fp);
1649
1650         return counter;
1651 }
1652
1653 int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
1654 {
1655         if (mp->msr_num != 0) {
1656                 if (get_msr(cpu, mp->msr_num, counterp))
1657                         return -1;
1658         } else {
1659                 char path[128 + PATH_BYTES];
1660
1661                 if (mp->flags & SYSFS_PERCPU) {
1662                         sprintf(path, "/sys/devices/system/cpu/cpu%d/%s",
1663                                  cpu, mp->path);
1664
1665                         *counterp = snapshot_sysfs_counter(path);
1666                 } else {
1667                         *counterp = snapshot_sysfs_counter(mp->path);
1668                 }
1669         }
1670
1671         return 0;
1672 }
1673
1674 void get_apic_id(struct thread_data *t)
1675 {
1676         unsigned int eax, ebx, ecx, edx;
1677
1678         if (DO_BIC(BIC_APIC)) {
1679                 eax = ebx = ecx = edx = 0;
1680                 __cpuid(1, eax, ebx, ecx, edx);
1681
1682                 t->apic_id = (ebx >> 24) & 0xff;
1683         }
1684
1685         if (!DO_BIC(BIC_X2APIC))
1686                 return;
1687
1688         if (authentic_amd) {
1689                 unsigned int topology_extensions;
1690
1691                 if (max_extended_level < 0x8000001e)
1692                         return;
1693
1694                 eax = ebx = ecx = edx = 0;
1695                 __cpuid(0x80000001, eax, ebx, ecx, edx);
1696                         topology_extensions = ecx & (1 << 22);
1697
1698                 if (topology_extensions == 0)
1699                         return;
1700
1701                 eax = ebx = ecx = edx = 0;
1702                 __cpuid(0x8000001e, eax, ebx, ecx, edx);
1703
1704                 t->x2apic_id = eax;
1705                 return;
1706         }
1707
1708         if (!genuine_intel)
1709                 return;
1710
1711         if (max_level < 0xb)
1712                 return;
1713
1714         ecx = 0;
1715         __cpuid(0xb, eax, ebx, ecx, edx);
1716         t->x2apic_id = edx;
1717
1718         if (debug && (t->apic_id != (t->x2apic_id & 0xff)))
1719                 fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n",
1720                                 t->cpu_id, t->apic_id, t->x2apic_id);
1721 }
1722
1723 /*
1724  * get_counters(...)
1725  * migrate to cpu
1726  * acquire and record local counters for that cpu
1727  */
1728 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1729 {
1730         int cpu = t->cpu_id;
1731         unsigned long long msr;
1732         int aperf_mperf_retry_count = 0;
1733         struct msr_counter *mp;
1734         int i;
1735
1736         gettimeofday(&t->tv_begin, (struct timezone *)NULL);
1737
1738         if (cpu_migrate(cpu)) {
1739                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
1740                 return -1;
1741         }
1742
1743         if (first_counter_read)
1744                 get_apic_id(t);
1745 retry:
1746         t->tsc = rdtsc();       /* we are running on local CPU of interest */
1747
1748         if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
1749                 unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
1750
1751                 /*
1752                  * The TSC, APERF and MPERF must be read together for
1753                  * APERF/MPERF and MPERF/TSC to give accurate results.
1754                  *
1755                  * Unfortunately, APERF and MPERF are read by
1756                  * individual system call, so delays may occur
1757                  * between them.  If the time to read them
1758                  * varies by a large amount, we re-read them.
1759                  */
1760
1761                 /*
1762                  * This initial dummy APERF read has been seen to
1763                  * reduce jitter in the subsequent reads.
1764                  */
1765
1766                 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
1767                         return -3;
1768
1769                 t->tsc = rdtsc();       /* re-read close to APERF */
1770
1771                 tsc_before = t->tsc;
1772
1773                 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
1774                         return -3;
1775
1776                 tsc_between = rdtsc();
1777
1778                 if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
1779                         return -4;
1780
1781                 tsc_after = rdtsc();
1782
1783                 aperf_time = tsc_between - tsc_before;
1784                 mperf_time = tsc_after - tsc_between;
1785
1786                 /*
1787                  * If the system call latency to read APERF and MPERF
1788                  * differ by more than 2x, then try again.
1789                  */
1790                 if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
1791                         aperf_mperf_retry_count++;
1792                         if (aperf_mperf_retry_count < 5)
1793                                 goto retry;
1794                         else
1795                                 warnx("cpu%d jitter %lld %lld",
1796                                         cpu, aperf_time, mperf_time);
1797                 }
1798                 aperf_mperf_retry_count = 0;
1799
1800                 t->aperf = t->aperf * aperf_mperf_multiplier;
1801                 t->mperf = t->mperf * aperf_mperf_multiplier;
1802         }
1803
1804         if (DO_BIC(BIC_IRQ))
1805                 t->irq_count = irqs_per_cpu[cpu];
1806         if (DO_BIC(BIC_SMI)) {
1807                 if (get_msr(cpu, MSR_SMI_COUNT, &msr))
1808                         return -5;
1809                 t->smi_count = msr & 0xFFFFFFFF;
1810         }
1811         if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) {
1812                 if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
1813                         return -6;
1814         }
1815
1816         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1817                 if (get_mp(cpu, mp, &t->counter[i]))
1818                         return -10;
1819         }
1820
1821         /* collect core counters only for 1st thread in core */
1822         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1823                 goto done;
1824
1825         if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates) {
1826                 if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
1827                         return -6;
1828         }
1829
1830         if (DO_BIC(BIC_CPU_c6) && !do_knl_cstates) {
1831                 if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
1832                         return -7;
1833         } else if (do_knl_cstates) {
1834                 if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
1835                         return -7;
1836         }
1837
1838         if (DO_BIC(BIC_CPU_c7))
1839                 if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
1840                         return -8;
1841
1842         if (DO_BIC(BIC_Mod_c6))
1843                 if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
1844                         return -8;
1845
1846         if (DO_BIC(BIC_CoreTmp)) {
1847                 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
1848                         return -9;
1849                 c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
1850         }
1851
1852         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1853                 if (get_mp(cpu, mp, &c->counter[i]))
1854                         return -10;
1855         }
1856
1857         /* collect package counters only for 1st core in package */
1858         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1859                 goto done;
1860
1861         if (DO_BIC(BIC_Totl_c0)) {
1862                 if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
1863                         return -10;
1864         }
1865         if (DO_BIC(BIC_Any_c0)) {
1866                 if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
1867                         return -11;
1868         }
1869         if (DO_BIC(BIC_GFX_c0)) {
1870                 if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
1871                         return -12;
1872         }
1873         if (DO_BIC(BIC_CPUGFX)) {
1874                 if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
1875                         return -13;
1876         }
1877         if (DO_BIC(BIC_Pkgpc3))
1878                 if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
1879                         return -9;
1880         if (DO_BIC(BIC_Pkgpc6)) {
1881                 if (do_slm_cstates) {
1882                         if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
1883                                 return -10;
1884                 } else {
1885                         if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
1886                                 return -10;
1887                 }
1888         }
1889
1890         if (DO_BIC(BIC_Pkgpc2))
1891                 if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
1892                         return -11;
1893         if (DO_BIC(BIC_Pkgpc7))
1894                 if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
1895                         return -12;
1896         if (DO_BIC(BIC_Pkgpc8))
1897                 if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8))
1898                         return -13;
1899         if (DO_BIC(BIC_Pkgpc9))
1900                 if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9))
1901                         return -13;
1902         if (DO_BIC(BIC_Pkgpc10))
1903                 if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
1904                         return -13;
1905
1906         if (DO_BIC(BIC_CPU_LPI))
1907                 p->cpu_lpi = cpuidle_cur_cpu_lpi_us;
1908         if (DO_BIC(BIC_SYS_LPI))
1909                 p->sys_lpi = cpuidle_cur_sys_lpi_us;
1910
1911         if (do_rapl & RAPL_PKG) {
1912                 if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr))
1913                         return -13;
1914                 p->energy_pkg = msr & 0xFFFFFFFF;
1915         }
1916         if (do_rapl & RAPL_CORES_ENERGY_STATUS) {
1917                 if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr))
1918                         return -14;
1919                 p->energy_cores = msr & 0xFFFFFFFF;
1920         }
1921         if (do_rapl & RAPL_DRAM) {
1922                 if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
1923                         return -15;
1924                 p->energy_dram = msr & 0xFFFFFFFF;
1925         }
1926         if (do_rapl & RAPL_GFX) {
1927                 if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr))
1928                         return -16;
1929                 p->energy_gfx = msr & 0xFFFFFFFF;
1930         }
1931         if (do_rapl & RAPL_PKG_PERF_STATUS) {
1932                 if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr))
1933                         return -16;
1934                 p->rapl_pkg_perf_status = msr & 0xFFFFFFFF;
1935         }
1936         if (do_rapl & RAPL_DRAM_PERF_STATUS) {
1937                 if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr))
1938                         return -16;
1939                 p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
1940         }
1941         if (DO_BIC(BIC_PkgTmp)) {
1942                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
1943                         return -17;
1944                 p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
1945         }
1946
1947         if (DO_BIC(BIC_GFX_rc6))
1948                 p->gfx_rc6_ms = gfx_cur_rc6_ms;
1949
1950         if (DO_BIC(BIC_GFXMHz))
1951                 p->gfx_mhz = gfx_cur_mhz;
1952
1953         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1954                 if (get_mp(cpu, mp, &p->counter[i]))
1955                         return -10;
1956         }
1957 done:
1958         gettimeofday(&t->tv_end, (struct timezone *)NULL);
1959
1960         return 0;
1961 }
1962
1963 /*
1964  * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit:
1965  * If you change the values, note they are used both in comparisons
1966  * (>= PCL__7) and to index pkg_cstate_limit_strings[].
1967  */
1968
1969 #define PCLUKN 0 /* Unknown */
1970 #define PCLRSV 1 /* Reserved */
1971 #define PCL__0 2 /* PC0 */
1972 #define PCL__1 3 /* PC1 */
1973 #define PCL__2 4 /* PC2 */
1974 #define PCL__3 5 /* PC3 */
1975 #define PCL__4 6 /* PC4 */
1976 #define PCL__6 7 /* PC6 */
1977 #define PCL_6N 8 /* PC6 No Retention */
1978 #define PCL_6R 9 /* PC6 Retention */
1979 #define PCL__7 10 /* PC7 */
1980 #define PCL_7S 11 /* PC7 Shrink */
1981 #define PCL__8 12 /* PC8 */
1982 #define PCL__9 13 /* PC9 */
1983 #define PCL_10 14 /* PC10 */
1984 #define PCLUNL 15 /* Unlimited */
1985
1986 int pkg_cstate_limit = PCLUKN;
1987 char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
1988         "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited"};
1989
1990 int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1991 int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1992 int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1993 int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7};
1994 int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1995 int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1996 int glm_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1997 int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1998
1999
2000 static void
2001 calculate_tsc_tweak()
2002 {
2003         tsc_tweak = base_hz / tsc_hz;
2004 }
2005
2006 static void
2007 dump_nhm_platform_info(void)
2008 {
2009         unsigned long long msr;
2010         unsigned int ratio;
2011
2012         get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
2013
2014         fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
2015
2016         ratio = (msr >> 40) & 0xFF;
2017         fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n",
2018                 ratio, bclk, ratio * bclk);
2019
2020         ratio = (msr >> 8) & 0xFF;
2021         fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
2022                 ratio, bclk, ratio * bclk);
2023
2024         get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
2025         fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
2026                 base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
2027
2028         return;
2029 }
2030
2031 static void
2032 dump_hsw_turbo_ratio_limits(void)
2033 {
2034         unsigned long long msr;
2035         unsigned int ratio;
2036
2037         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
2038
2039         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
2040
2041         ratio = (msr >> 8) & 0xFF;
2042         if (ratio)
2043                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n",
2044                         ratio, bclk, ratio * bclk);
2045
2046         ratio = (msr >> 0) & 0xFF;
2047         if (ratio)
2048                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n",
2049                         ratio, bclk, ratio * bclk);
2050         return;
2051 }
2052
2053 static void
2054 dump_ivt_turbo_ratio_limits(void)
2055 {
2056         unsigned long long msr;
2057         unsigned int ratio;
2058
2059         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
2060
2061         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
2062
2063         ratio = (msr >> 56) & 0xFF;
2064         if (ratio)
2065                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n",
2066                         ratio, bclk, ratio * bclk);
2067
2068         ratio = (msr >> 48) & 0xFF;
2069         if (ratio)
2070                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n",
2071                         ratio, bclk, ratio * bclk);
2072
2073         ratio = (msr >> 40) & 0xFF;
2074         if (ratio)
2075                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n",
2076                         ratio, bclk, ratio * bclk);
2077
2078         ratio = (msr >> 32) & 0xFF;
2079         if (ratio)
2080                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n",
2081                         ratio, bclk, ratio * bclk);
2082
2083         ratio = (msr >> 24) & 0xFF;
2084         if (ratio)
2085                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n",
2086                         ratio, bclk, ratio * bclk);
2087
2088         ratio = (msr >> 16) & 0xFF;
2089         if (ratio)
2090                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n",
2091                         ratio, bclk, ratio * bclk);
2092
2093         ratio = (msr >> 8) & 0xFF;
2094         if (ratio)
2095                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n",
2096                         ratio, bclk, ratio * bclk);
2097
2098         ratio = (msr >> 0) & 0xFF;
2099         if (ratio)
2100                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n",
2101                         ratio, bclk, ratio * bclk);
2102         return;
2103 }
2104 int has_turbo_ratio_group_limits(int family, int model)
2105 {
2106
2107         if (!genuine_intel)
2108                 return 0;
2109
2110         switch (model) {
2111         case INTEL_FAM6_ATOM_GOLDMONT:
2112         case INTEL_FAM6_SKYLAKE_X:
2113         case INTEL_FAM6_ATOM_GOLDMONT_X:
2114                 return 1;
2115         }
2116         return 0;
2117 }
2118
2119 static void
2120 dump_turbo_ratio_limits(int family, int model)
2121 {
2122         unsigned long long msr, core_counts;
2123         unsigned int ratio, group_size;
2124
2125         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
2126         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
2127
2128         if (has_turbo_ratio_group_limits(family, model)) {
2129                 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
2130                 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts);
2131         } else {
2132                 core_counts = 0x0807060504030201;
2133         }
2134
2135         ratio = (msr >> 56) & 0xFF;
2136         group_size = (core_counts >> 56) & 0xFF;
2137         if (ratio)
2138                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2139                         ratio, bclk, ratio * bclk, group_size);
2140
2141         ratio = (msr >> 48) & 0xFF;
2142         group_size = (core_counts >> 48) & 0xFF;
2143         if (ratio)
2144                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2145                         ratio, bclk, ratio * bclk, group_size);
2146
2147         ratio = (msr >> 40) & 0xFF;
2148         group_size = (core_counts >> 40) & 0xFF;
2149         if (ratio)
2150                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2151                         ratio, bclk, ratio * bclk, group_size);
2152
2153         ratio = (msr >> 32) & 0xFF;
2154         group_size = (core_counts >> 32) & 0xFF;
2155         if (ratio)
2156                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2157                         ratio, bclk, ratio * bclk, group_size);
2158
2159         ratio = (msr >> 24) & 0xFF;
2160         group_size = (core_counts >> 24) & 0xFF;
2161         if (ratio)
2162                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2163                         ratio, bclk, ratio * bclk, group_size);
2164
2165         ratio = (msr >> 16) & 0xFF;
2166         group_size = (core_counts >> 16) & 0xFF;
2167         if (ratio)
2168                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2169                         ratio, bclk, ratio * bclk, group_size);
2170
2171         ratio = (msr >> 8) & 0xFF;
2172         group_size = (core_counts >> 8) & 0xFF;
2173         if (ratio)
2174                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2175                         ratio, bclk, ratio * bclk, group_size);
2176
2177         ratio = (msr >> 0) & 0xFF;
2178         group_size = (core_counts >> 0) & 0xFF;
2179         if (ratio)
2180                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2181                         ratio, bclk, ratio * bclk, group_size);
2182         return;
2183 }
2184
2185 static void
2186 dump_atom_turbo_ratio_limits(void)
2187 {
2188         unsigned long long msr;
2189         unsigned int ratio;
2190
2191         get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr);
2192         fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
2193
2194         ratio = (msr >> 0) & 0x3F;
2195         if (ratio)
2196                 fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n",
2197                         ratio, bclk, ratio * bclk);
2198
2199         ratio = (msr >> 8) & 0x3F;
2200         if (ratio)
2201                 fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n",
2202                         ratio, bclk, ratio * bclk);
2203
2204         ratio = (msr >> 16) & 0x3F;
2205         if (ratio)
2206                 fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
2207                         ratio, bclk, ratio * bclk);
2208
2209         get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr);
2210         fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
2211
2212         ratio = (msr >> 24) & 0x3F;
2213         if (ratio)
2214                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n",
2215                         ratio, bclk, ratio * bclk);
2216
2217         ratio = (msr >> 16) & 0x3F;
2218         if (ratio)
2219                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n",
2220                         ratio, bclk, ratio * bclk);
2221
2222         ratio = (msr >> 8) & 0x3F;
2223         if (ratio)
2224                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n",
2225                         ratio, bclk, ratio * bclk);
2226
2227         ratio = (msr >> 0) & 0x3F;
2228         if (ratio)
2229                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n",
2230                         ratio, bclk, ratio * bclk);
2231 }
2232
2233 static void
2234 dump_knl_turbo_ratio_limits(void)
2235 {
2236         const unsigned int buckets_no = 7;
2237
2238         unsigned long long msr;
2239         int delta_cores, delta_ratio;
2240         int i, b_nr;
2241         unsigned int cores[buckets_no];
2242         unsigned int ratio[buckets_no];
2243
2244         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
2245
2246         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n",
2247                 base_cpu, msr);
2248
2249         /**
2250          * Turbo encoding in KNL is as follows:
2251          * [0] -- Reserved
2252          * [7:1] -- Base value of number of active cores of bucket 1.
2253          * [15:8] -- Base value of freq ratio of bucket 1.
2254          * [20:16] -- +ve delta of number of active cores of bucket 2.
2255          * i.e. active cores of bucket 2 =
2256          * active cores of bucket 1 + delta
2257          * [23:21] -- Negative delta of freq ratio of bucket 2.
2258          * i.e. freq ratio of bucket 2 =
2259          * freq ratio of bucket 1 - delta
2260          * [28:24]-- +ve delta of number of active cores of bucket 3.
2261          * [31:29]-- -ve delta of freq ratio of bucket 3.
2262          * [36:32]-- +ve delta of number of active cores of bucket 4.
2263          * [39:37]-- -ve delta of freq ratio of bucket 4.
2264          * [44:40]-- +ve delta of number of active cores of bucket 5.
2265          * [47:45]-- -ve delta of freq ratio of bucket 5.
2266          * [52:48]-- +ve delta of number of active cores of bucket 6.
2267          * [55:53]-- -ve delta of freq ratio of bucket 6.
2268          * [60:56]-- +ve delta of number of active cores of bucket 7.
2269          * [63:61]-- -ve delta of freq ratio of bucket 7.
2270          */
2271
2272         b_nr = 0;
2273         cores[b_nr] = (msr & 0xFF) >> 1;
2274         ratio[b_nr] = (msr >> 8) & 0xFF;
2275
2276         for (i = 16; i < 64; i += 8) {
2277                 delta_cores = (msr >> i) & 0x1F;
2278                 delta_ratio = (msr >> (i + 5)) & 0x7;
2279
2280                 cores[b_nr + 1] = cores[b_nr] + delta_cores;
2281                 ratio[b_nr + 1] = ratio[b_nr] - delta_ratio;
2282                 b_nr++;
2283         }
2284
2285         for (i = buckets_no - 1; i >= 0; i--)
2286                 if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
2287                         fprintf(outf,
2288                                 "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2289                                 ratio[i], bclk, ratio[i] * bclk, cores[i]);
2290 }
2291
2292 static void
2293 dump_nhm_cst_cfg(void)
2294 {
2295         unsigned long long msr;
2296
2297         get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
2298
2299         fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
2300
2301         fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)",
2302                 (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
2303                 (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
2304                 (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
2305                 (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
2306                 (msr & (1 << 15)) ? "" : "UN",
2307                 (unsigned int)msr & 0xF,
2308                 pkg_cstate_limit_strings[pkg_cstate_limit]);
2309
2310 #define AUTOMATIC_CSTATE_CONVERSION             (1UL << 16)
2311         if (has_automatic_cstate_conversion) {
2312                 fprintf(outf, ", automatic c-state conversion=%s",
2313                         (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off");
2314         }
2315
2316         fprintf(outf, ")\n");
2317
2318         return;
2319 }
2320
2321 static void
2322 dump_config_tdp(void)
2323 {
2324         unsigned long long msr;
2325
2326         get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr);
2327         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
2328         fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF);
2329
2330         get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr);
2331         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
2332         if (msr) {
2333                 fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
2334                 fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
2335                 fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
2336                 fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF);
2337         }
2338         fprintf(outf, ")\n");
2339
2340         get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr);
2341         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
2342         if (msr) {
2343                 fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
2344                 fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
2345                 fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
2346                 fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF);
2347         }
2348         fprintf(outf, ")\n");
2349
2350         get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr);
2351         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
2352         if ((msr) & 0x3)
2353                 fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
2354         fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
2355         fprintf(outf, ")\n");
2356
2357         get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
2358         fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
2359         fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF);
2360         fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
2361         fprintf(outf, ")\n");
2362 }
2363
2364 unsigned int irtl_time_units[] = {1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
2365
2366 void print_irtl(void)
2367 {
2368         unsigned long long msr;
2369
2370         get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
2371         fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
2372         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2373                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2374
2375         get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
2376         fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
2377         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2378                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2379
2380         get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
2381         fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
2382         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2383                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2384
2385         if (!do_irtl_hsw)
2386                 return;
2387
2388         get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
2389         fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
2390         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2391                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2392
2393         get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
2394         fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
2395         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2396                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2397
2398         get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
2399         fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
2400         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2401                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2402
2403 }
2404 void free_fd_percpu(void)
2405 {
2406         int i;
2407
2408         for (i = 0; i < topo.max_cpu_num + 1; ++i) {
2409                 if (fd_percpu[i] != 0)
2410                         close(fd_percpu[i]);
2411         }
2412
2413         free(fd_percpu);
2414 }
2415
2416 void free_all_buffers(void)
2417 {
2418         int i;
2419
2420         CPU_FREE(cpu_present_set);
2421         cpu_present_set = NULL;
2422         cpu_present_setsize = 0;
2423
2424         CPU_FREE(cpu_affinity_set);
2425         cpu_affinity_set = NULL;
2426         cpu_affinity_setsize = 0;
2427
2428         free(thread_even);
2429         free(core_even);
2430         free(package_even);
2431
2432         thread_even = NULL;
2433         core_even = NULL;
2434         package_even = NULL;
2435
2436         free(thread_odd);
2437         free(core_odd);
2438         free(package_odd);
2439
2440         thread_odd = NULL;
2441         core_odd = NULL;
2442         package_odd = NULL;
2443
2444         free(output_buffer);
2445         output_buffer = NULL;
2446         outp = NULL;
2447
2448         free_fd_percpu();
2449
2450         free(irq_column_2_cpu);
2451         free(irqs_per_cpu);
2452
2453         for (i = 0; i <= topo.max_cpu_num; ++i) {
2454                 if (cpus[i].put_ids)
2455                         CPU_FREE(cpus[i].put_ids);
2456         }
2457         free(cpus);
2458 }
2459
2460
2461 /*
2462  * Parse a file containing a single int.
2463  */
2464 int parse_int_file(const char *fmt, ...)
2465 {
2466         va_list args;
2467         char path[PATH_MAX];
2468         FILE *filep;
2469         int value;
2470
2471         va_start(args, fmt);
2472         vsnprintf(path, sizeof(path), fmt, args);
2473         va_end(args);
2474         filep = fopen_or_die(path, "r");
2475         if (fscanf(filep, "%d", &value) != 1)
2476                 err(1, "%s: failed to parse number from file", path);
2477         fclose(filep);
2478         return value;
2479 }
2480
2481 /*
2482  * cpu_is_first_core_in_package(cpu)
2483  * return 1 if given CPU is 1st core in package
2484  */
2485 int cpu_is_first_core_in_package(int cpu)
2486 {
2487         return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu);
2488 }
2489
2490 int get_physical_package_id(int cpu)
2491 {
2492         return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
2493 }
2494
2495 int get_core_id(int cpu)
2496 {
2497         return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
2498 }
2499
2500 void set_node_data(void)
2501 {
2502         int pkg, node, lnode, cpu, cpux;
2503         int cpu_count;
2504
2505         /* initialize logical_node_id */
2506         for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu)
2507                 cpus[cpu].logical_node_id = -1;
2508
2509         cpu_count = 0;
2510         for (pkg = 0; pkg < topo.num_packages; pkg++) {
2511                 lnode = 0;
2512                 for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) {
2513                         if (cpus[cpu].physical_package_id != pkg)
2514                                 continue;
2515                         /* find a cpu with an unset logical_node_id */
2516                         if (cpus[cpu].logical_node_id != -1)
2517                                 continue;
2518                         cpus[cpu].logical_node_id = lnode;
2519                         node = cpus[cpu].physical_node_id;
2520                         cpu_count++;
2521                         /*
2522                          * find all matching cpus on this pkg and set
2523                          * the logical_node_id
2524                          */
2525                         for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) {
2526                                 if ((cpus[cpux].physical_package_id == pkg) &&
2527                                    (cpus[cpux].physical_node_id == node)) {
2528                                         cpus[cpux].logical_node_id = lnode;
2529                                         cpu_count++;
2530                                 }
2531                         }
2532                         lnode++;
2533                         if (lnode > topo.nodes_per_pkg)
2534                                 topo.nodes_per_pkg = lnode;
2535                 }
2536                 if (cpu_count >= topo.max_cpu_num)
2537                         break;
2538         }
2539 }
2540
2541 int get_physical_node_id(struct cpu_topology *thiscpu)
2542 {
2543         char path[80];
2544         FILE *filep;
2545         int i;
2546         int cpu = thiscpu->logical_cpu_id;
2547
2548         for (i = 0; i <= topo.max_cpu_num; i++) {
2549                 sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist",
2550                         cpu, i);
2551                 filep = fopen(path, "r");
2552                 if (!filep)
2553                         continue;
2554                 fclose(filep);
2555                 return i;
2556         }
2557         return -1;
2558 }
2559
2560 int get_thread_siblings(struct cpu_topology *thiscpu)
2561 {
2562         char path[80], character;
2563         FILE *filep;
2564         unsigned long map;
2565         int so, shift, sib_core;
2566         int cpu = thiscpu->logical_cpu_id;
2567         int offset = topo.max_cpu_num + 1;
2568         size_t size;
2569         int thread_id = 0;
2570
2571         thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1));
2572         if (thiscpu->thread_id < 0)
2573                 thiscpu->thread_id = thread_id++;
2574         if (!thiscpu->put_ids)
2575                 return -1;
2576
2577         size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
2578         CPU_ZERO_S(size, thiscpu->put_ids);
2579
2580         sprintf(path,
2581                 "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
2582         filep = fopen_or_die(path, "r");
2583         do {
2584                 offset -= BITMASK_SIZE;
2585                 fscanf(filep, "%lx%c", &map, &character);
2586                 for (shift = 0; shift < BITMASK_SIZE; shift++) {
2587                         if ((map >> shift) & 0x1) {
2588                                 so = shift + offset;
2589                                 sib_core = get_core_id(so);
2590                                 if (sib_core == thiscpu->physical_core_id) {
2591                                         CPU_SET_S(so, size, thiscpu->put_ids);
2592                                         if ((so != cpu) &&
2593                                             (cpus[so].thread_id < 0))
2594                                                 cpus[so].thread_id =
2595                                                                     thread_id++;
2596                                 }
2597                         }
2598                 }
2599         } while (!strncmp(&character, ",", 1));
2600         fclose(filep);
2601
2602         return CPU_COUNT_S(size, thiscpu->put_ids);
2603 }
2604
2605 /*
2606  * run func(thread, core, package) in topology order
2607  * skip non-present cpus
2608  */
2609
2610 int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *,
2611         struct pkg_data *, struct thread_data *, struct core_data *,
2612         struct pkg_data *), struct thread_data *thread_base,
2613         struct core_data *core_base, struct pkg_data *pkg_base,
2614         struct thread_data *thread_base2, struct core_data *core_base2,
2615         struct pkg_data *pkg_base2)
2616 {
2617         int retval, pkg_no, node_no, core_no, thread_no;
2618
2619         for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
2620                 for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) {
2621                         for (core_no = 0; core_no < topo.cores_per_node;
2622                              ++core_no) {
2623                                 for (thread_no = 0; thread_no <
2624                                         topo.threads_per_core; ++thread_no) {
2625                                         struct thread_data *t, *t2;
2626                                         struct core_data *c, *c2;
2627                                         struct pkg_data *p, *p2;
2628
2629                                         t = GET_THREAD(thread_base, thread_no,
2630                                                        core_no, node_no,
2631                                                        pkg_no);
2632
2633                                         if (cpu_is_not_present(t->cpu_id))
2634                                                 continue;
2635
2636                                         t2 = GET_THREAD(thread_base2, thread_no,
2637                                                         core_no, node_no,
2638                                                         pkg_no);
2639
2640                                         c = GET_CORE(core_base, core_no,
2641                                                      node_no, pkg_no);
2642                                         c2 = GET_CORE(core_base2, core_no,
2643                                                       node_no,
2644                                                       pkg_no);
2645
2646                                         p = GET_PKG(pkg_base, pkg_no);
2647                                         p2 = GET_PKG(pkg_base2, pkg_no);
2648
2649                                         retval = func(t, c, p, t2, c2, p2);
2650                                         if (retval)
2651                                                 return retval;
2652                                 }
2653                         }
2654                 }
2655         }
2656         return 0;
2657 }
2658
2659 /*
2660  * run func(cpu) on every cpu in /proc/stat
2661  * return max_cpu number
2662  */
2663 int for_all_proc_cpus(int (func)(int))
2664 {
2665         FILE *fp;
2666         int cpu_num;
2667         int retval;
2668
2669         fp = fopen_or_die(proc_stat, "r");
2670
2671         retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
2672         if (retval != 0)
2673                 err(1, "%s: failed to parse format", proc_stat);
2674
2675         while (1) {
2676                 retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
2677                 if (retval != 1)
2678                         break;
2679
2680                 retval = func(cpu_num);
2681                 if (retval) {
2682                         fclose(fp);
2683                         return(retval);
2684                 }
2685         }
2686         fclose(fp);
2687         return 0;
2688 }
2689
2690 void re_initialize(void)
2691 {
2692         free_all_buffers();
2693         setup_all_buffers();
2694         printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
2695 }
2696
2697 void set_max_cpu_num(void)
2698 {
2699         FILE *filep;
2700         unsigned long dummy;
2701
2702         topo.max_cpu_num = 0;
2703         filep = fopen_or_die(
2704                         "/sys/devices/system/cpu/cpu0/topology/thread_siblings",
2705                         "r");
2706         while (fscanf(filep, "%lx,", &dummy) == 1)
2707                 topo.max_cpu_num += BITMASK_SIZE;
2708         fclose(filep);
2709         topo.max_cpu_num--; /* 0 based */
2710 }
2711
2712 /*
2713  * count_cpus()
2714  * remember the last one seen, it will be the max
2715  */
2716 int count_cpus(int cpu)
2717 {
2718         topo.num_cpus++;
2719         return 0;
2720 }
2721 int mark_cpu_present(int cpu)
2722 {
2723         CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
2724         return 0;
2725 }
2726
2727 int init_thread_id(int cpu)
2728 {
2729         cpus[cpu].thread_id = -1;
2730         return 0;
2731 }
2732
2733 /*
2734  * snapshot_proc_interrupts()
2735  *
2736  * read and record summary of /proc/interrupts
2737  *
2738  * return 1 if config change requires a restart, else return 0
2739  */
2740 int snapshot_proc_interrupts(void)
2741 {
2742         static FILE *fp;
2743         int column, retval;
2744
2745         if (fp == NULL)
2746                 fp = fopen_or_die("/proc/interrupts", "r");
2747         else
2748                 rewind(fp);
2749
2750         /* read 1st line of /proc/interrupts to get cpu* name for each column */
2751         for (column = 0; column < topo.num_cpus; ++column) {
2752                 int cpu_number;
2753
2754                 retval = fscanf(fp, " CPU%d", &cpu_number);
2755                 if (retval != 1)
2756                         break;
2757
2758                 if (cpu_number > topo.max_cpu_num) {
2759                         warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num);
2760                         return 1;
2761                 }
2762
2763                 irq_column_2_cpu[column] = cpu_number;
2764                 irqs_per_cpu[cpu_number] = 0;
2765         }
2766
2767         /* read /proc/interrupt count lines and sum up irqs per cpu */
2768         while (1) {
2769                 int column;
2770                 char buf[64];
2771
2772                 retval = fscanf(fp, " %s:", buf);       /* flush irq# "N:" */
2773                 if (retval != 1)
2774                         break;
2775
2776                 /* read the count per cpu */
2777                 for (column = 0; column < topo.num_cpus; ++column) {
2778
2779                         int cpu_number, irq_count;
2780
2781                         retval = fscanf(fp, " %d", &irq_count);
2782                         if (retval != 1)
2783                                 break;
2784
2785                         cpu_number = irq_column_2_cpu[column];
2786                         irqs_per_cpu[cpu_number] += irq_count;
2787
2788                 }
2789
2790                 while (getc(fp) != '\n')
2791                         ;       /* flush interrupt description */
2792
2793         }
2794         return 0;
2795 }
2796 /*
2797  * snapshot_gfx_rc6_ms()
2798  *
2799  * record snapshot of
2800  * /sys/class/drm/card0/power/rc6_residency_ms
2801  *
2802  * return 1 if config change requires a restart, else return 0
2803  */
2804 int snapshot_gfx_rc6_ms(void)
2805 {
2806         FILE *fp;
2807         int retval;
2808
2809         fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");
2810
2811         retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
2812         if (retval != 1)
2813                 err(1, "GFX rc6");
2814
2815         fclose(fp);
2816
2817         return 0;
2818 }
2819 /*
2820  * snapshot_gfx_mhz()
2821  *
2822  * record snapshot of
2823  * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
2824  *
2825  * return 1 if config change requires a restart, else return 0
2826  */
2827 int snapshot_gfx_mhz(void)
2828 {
2829         static FILE *fp;
2830         int retval;
2831
2832         if (fp == NULL)
2833                 fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
2834         else {
2835                 rewind(fp);
2836                 fflush(fp);
2837         }
2838
2839         retval = fscanf(fp, "%d", &gfx_cur_mhz);
2840         if (retval != 1)
2841                 err(1, "GFX MHz");
2842
2843         return 0;
2844 }
2845
2846 /*
2847  * snapshot_cpu_lpi()
2848  *
2849  * record snapshot of
2850  * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
2851  */
2852 int snapshot_cpu_lpi_us(void)
2853 {
2854         FILE *fp;
2855         int retval;
2856
2857         fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r");
2858
2859         retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us);
2860         if (retval != 1)
2861                 err(1, "CPU LPI");
2862
2863         fclose(fp);
2864
2865         return 0;
2866 }
2867 /*
2868  * snapshot_sys_lpi()
2869  *
2870  * record snapshot of sys_lpi_file
2871  */
2872 int snapshot_sys_lpi_us(void)
2873 {
2874         FILE *fp;
2875         int retval;
2876
2877         fp = fopen_or_die(sys_lpi_file, "r");
2878
2879         retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us);
2880         if (retval != 1)
2881                 err(1, "SYS LPI");
2882
2883         fclose(fp);
2884
2885         return 0;
2886 }
2887 /*
2888  * snapshot /proc and /sys files
2889  *
2890  * return 1 if configuration restart needed, else return 0
2891  */
2892 int snapshot_proc_sysfs_files(void)
2893 {
2894         if (DO_BIC(BIC_IRQ))
2895                 if (snapshot_proc_interrupts())
2896                         return 1;
2897
2898         if (DO_BIC(BIC_GFX_rc6))
2899                 snapshot_gfx_rc6_ms();
2900
2901         if (DO_BIC(BIC_GFXMHz))
2902                 snapshot_gfx_mhz();
2903
2904         if (DO_BIC(BIC_CPU_LPI))
2905                 snapshot_cpu_lpi_us();
2906
2907         if (DO_BIC(BIC_SYS_LPI))
2908                 snapshot_sys_lpi_us();
2909
2910         return 0;
2911 }
2912
2913 int exit_requested;
2914
2915 static void signal_handler (int signal)
2916 {
2917         switch (signal) {
2918         case SIGINT:
2919                 exit_requested = 1;
2920                 if (debug)
2921                         fprintf(stderr, " SIGINT\n");
2922                 break;
2923         case SIGUSR1:
2924                 if (debug > 1)
2925                         fprintf(stderr, "SIGUSR1\n");
2926                 break;
2927         }
2928         /* make sure this manually-invoked interval is at least 1ms long */
2929         nanosleep(&one_msec, NULL);
2930 }
2931
2932 void setup_signal_handler(void)
2933 {
2934         struct sigaction sa;
2935
2936         memset(&sa, 0, sizeof(sa));
2937
2938         sa.sa_handler = &signal_handler;
2939
2940         if (sigaction(SIGINT, &sa, NULL) < 0)
2941                 err(1, "sigaction SIGINT");
2942         if (sigaction(SIGUSR1, &sa, NULL) < 0)
2943                 err(1, "sigaction SIGUSR1");
2944 }
2945
2946 void do_sleep(void)
2947 {
2948         struct timeval select_timeout;
2949         fd_set readfds;
2950         int retval;
2951
2952         FD_ZERO(&readfds);
2953         FD_SET(0, &readfds);
2954
2955         if (!isatty(fileno(stdin))) {
2956                 nanosleep(&interval_ts, NULL);
2957                 return;
2958         }
2959
2960         select_timeout = interval_tv;
2961         retval = select(1, &readfds, NULL, NULL, &select_timeout);
2962
2963         if (retval == 1) {
2964                 switch (getc(stdin)) {
2965                 case 'q':
2966                         exit_requested = 1;
2967                         break;
2968                 }
2969                 /* make sure this manually-invoked interval is at least 1ms long */
2970                 nanosleep(&one_msec, NULL);
2971         }
2972 }
2973
2974
2975 void turbostat_loop()
2976 {
2977         int retval;
2978         int restarted = 0;
2979         int done_iters = 0;
2980
2981         setup_signal_handler();
2982
2983 restart:
2984         restarted++;
2985
2986         snapshot_proc_sysfs_files();
2987         retval = for_all_cpus(get_counters, EVEN_COUNTERS);
2988         first_counter_read = 0;
2989         if (retval < -1) {
2990                 exit(retval);
2991         } else if (retval == -1) {
2992                 if (restarted > 1) {
2993                         exit(retval);
2994                 }
2995                 re_initialize();
2996                 goto restart;
2997         }
2998         restarted = 0;
2999         done_iters = 0;
3000         gettimeofday(&tv_even, (struct timezone *)NULL);
3001
3002         while (1) {
3003                 if (for_all_proc_cpus(cpu_is_not_present)) {
3004                         re_initialize();
3005                         goto restart;
3006                 }
3007                 do_sleep();
3008                 if (snapshot_proc_sysfs_files())
3009                         goto restart;
3010                 retval = for_all_cpus(get_counters, ODD_COUNTERS);
3011                 if (retval < -1) {
3012                         exit(retval);
3013                 } else if (retval == -1) {
3014                         re_initialize();
3015                         goto restart;
3016                 }
3017                 gettimeofday(&tv_odd, (struct timezone *)NULL);
3018                 timersub(&tv_odd, &tv_even, &tv_delta);
3019                 if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) {
3020                         re_initialize();
3021                         goto restart;
3022                 }
3023                 compute_average(EVEN_COUNTERS);
3024                 format_all_counters(EVEN_COUNTERS);
3025                 flush_output_stdout();
3026                 if (exit_requested)
3027                         break;
3028                 if (num_iterations && ++done_iters >= num_iterations)
3029                         break;
3030                 do_sleep();
3031                 if (snapshot_proc_sysfs_files())
3032                         goto restart;
3033                 retval = for_all_cpus(get_counters, EVEN_COUNTERS);
3034                 if (retval < -1) {
3035                         exit(retval);
3036                 } else if (retval == -1) {
3037                         re_initialize();
3038                         goto restart;
3039                 }
3040                 gettimeofday(&tv_even, (struct timezone *)NULL);
3041                 timersub(&tv_even, &tv_odd, &tv_delta);
3042                 if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) {
3043                         re_initialize();
3044                         goto restart;
3045                 }
3046                 compute_average(ODD_COUNTERS);
3047                 format_all_counters(ODD_COUNTERS);
3048                 flush_output_stdout();
3049                 if (exit_requested)
3050                         break;
3051                 if (num_iterations && ++done_iters >= num_iterations)
3052                         break;
3053         }
3054 }
3055
3056 void check_dev_msr()
3057 {
3058         struct stat sb;
3059         char pathname[32];
3060
3061         sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
3062         if (stat(pathname, &sb))
3063                 if (system("/sbin/modprobe msr > /dev/null 2>&1"))
3064                         err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
3065 }
3066
3067 void check_permissions()
3068 {
3069         struct __user_cap_header_struct cap_header_data;
3070         cap_user_header_t cap_header = &cap_header_data;
3071         struct __user_cap_data_struct cap_data_data;
3072         cap_user_data_t cap_data = &cap_data_data;
3073         extern int capget(cap_user_header_t hdrp, cap_user_data_t datap);
3074         int do_exit = 0;
3075         char pathname[32];
3076
3077         /* check for CAP_SYS_RAWIO */
3078         cap_header->pid = getpid();
3079         cap_header->version = _LINUX_CAPABILITY_VERSION;
3080         if (capget(cap_header, cap_data) < 0)
3081                 err(-6, "capget(2) failed");
3082
3083         if ((cap_data->effective & (1 << CAP_SYS_RAWIO)) == 0) {
3084                 do_exit++;
3085                 warnx("capget(CAP_SYS_RAWIO) failed,"
3086                         " try \"# setcap cap_sys_rawio=ep %s\"", progname);
3087         }
3088
3089         /* test file permissions */
3090         sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
3091         if (euidaccess(pathname, R_OK)) {
3092                 do_exit++;
3093                 warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
3094         }
3095
3096         /* if all else fails, thell them to be root */
3097         if (do_exit)
3098                 if (getuid() != 0)
3099                         warnx("... or simply run as root");
3100
3101         if (do_exit)
3102                 exit(-6);
3103 }
3104
3105 /*
3106  * NHM adds support for additional MSRs:
3107  *
3108  * MSR_SMI_COUNT                   0x00000034
3109  *
3110  * MSR_PLATFORM_INFO               0x000000ce
3111  * MSR_PKG_CST_CONFIG_CONTROL     0x000000e2
3112  *
3113  * MSR_MISC_PWR_MGMT               0x000001aa
3114  *
3115  * MSR_PKG_C3_RESIDENCY            0x000003f8
3116  * MSR_PKG_C6_RESIDENCY            0x000003f9
3117  * MSR_CORE_C3_RESIDENCY           0x000003fc
3118  * MSR_CORE_C6_RESIDENCY           0x000003fd
3119  *
3120  * Side effect:
3121  * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL
3122  * sets has_misc_feature_control
3123  */
3124 int probe_nhm_msrs(unsigned int family, unsigned int model)
3125 {
3126         unsigned long long msr;
3127         unsigned int base_ratio;
3128         int *pkg_cstate_limits;
3129
3130         if (!genuine_intel)
3131                 return 0;
3132
3133         if (family != 6)
3134                 return 0;
3135
3136         bclk = discover_bclk(family, model);
3137
3138         switch (model) {
3139         case INTEL_FAM6_NEHALEM_EP:     /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
3140         case INTEL_FAM6_NEHALEM:        /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
3141         case 0x1F:      /* Core i7 and i5 Processor - Nehalem */
3142         case INTEL_FAM6_WESTMERE:       /* Westmere Client - Clarkdale, Arrandale */
3143         case INTEL_FAM6_WESTMERE_EP:    /* Westmere EP - Gulftown */
3144         case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
3145         case INTEL_FAM6_WESTMERE_EX:    /* Westmere-EX Xeon - Eagleton */
3146                 pkg_cstate_limits = nhm_pkg_cstate_limits;
3147                 break;
3148         case INTEL_FAM6_SANDYBRIDGE:    /* SNB */
3149         case INTEL_FAM6_SANDYBRIDGE_X:  /* SNB Xeon */
3150         case INTEL_FAM6_IVYBRIDGE:      /* IVB */
3151         case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
3152                 pkg_cstate_limits = snb_pkg_cstate_limits;
3153                 has_misc_feature_control = 1;
3154                 break;
3155         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
3156         case INTEL_FAM6_HASWELL_X:      /* HSX */
3157         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
3158         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
3159         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
3160         case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
3161         case INTEL_FAM6_BROADWELL_X:    /* BDX */
3162         case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
3163         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
3164         case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
3165         case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
3166         case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
3167         case INTEL_FAM6_CANNONLAKE_MOBILE:      /* CNL */
3168                 pkg_cstate_limits = hsw_pkg_cstate_limits;
3169                 has_misc_feature_control = 1;
3170                 break;
3171         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
3172                 pkg_cstate_limits = skx_pkg_cstate_limits;
3173                 has_misc_feature_control = 1;
3174                 break;
3175         case INTEL_FAM6_ATOM_SILVERMONT:        /* BYT */
3176                 no_MSR_MISC_PWR_MGMT = 1;
3177         case INTEL_FAM6_ATOM_SILVERMONT_X:      /* AVN */
3178                 pkg_cstate_limits = slv_pkg_cstate_limits;
3179                 break;
3180         case INTEL_FAM6_ATOM_AIRMONT:   /* AMT */
3181                 pkg_cstate_limits = amt_pkg_cstate_limits;
3182                 no_MSR_MISC_PWR_MGMT = 1;
3183                 break;
3184         case INTEL_FAM6_XEON_PHI_KNL:   /* PHI */
3185         case INTEL_FAM6_XEON_PHI_KNM:
3186                 pkg_cstate_limits = phi_pkg_cstate_limits;
3187                 break;
3188         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
3189         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
3190         case INTEL_FAM6_ATOM_GOLDMONT_X:        /* DNV */
3191                 pkg_cstate_limits = glm_pkg_cstate_limits;
3192                 break;
3193         default:
3194                 return 0;
3195         }
3196         get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
3197         pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
3198
3199         get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
3200         base_ratio = (msr >> 8) & 0xFF;
3201
3202         base_hz = base_ratio * bclk * 1000000;
3203         has_base_hz = 1;
3204         return 1;
3205 }
3206 /*
3207  * SLV client has support for unique MSRs:
3208  *
3209  * MSR_CC6_DEMOTION_POLICY_CONFIG
3210  * MSR_MC6_DEMOTION_POLICY_CONFIG
3211  */
3212
3213 int has_slv_msrs(unsigned int family, unsigned int model)
3214 {
3215         if (!genuine_intel)
3216                 return 0;
3217
3218         switch (model) {
3219         case INTEL_FAM6_ATOM_SILVERMONT:
3220         case INTEL_FAM6_ATOM_SILVERMONT_MID:
3221         case INTEL_FAM6_ATOM_AIRMONT_MID:
3222                 return 1;
3223         }
3224         return 0;
3225 }
3226 int is_dnv(unsigned int family, unsigned int model)
3227 {
3228
3229         if (!genuine_intel)
3230                 return 0;
3231
3232         switch (model) {
3233         case INTEL_FAM6_ATOM_GOLDMONT_X:
3234                 return 1;
3235         }
3236         return 0;
3237 }
3238 int is_bdx(unsigned int family, unsigned int model)
3239 {
3240
3241         if (!genuine_intel)
3242                 return 0;
3243
3244         switch (model) {
3245         case INTEL_FAM6_BROADWELL_X:
3246         case INTEL_FAM6_BROADWELL_XEON_D:
3247                 return 1;
3248         }
3249         return 0;
3250 }
3251 int is_skx(unsigned int family, unsigned int model)
3252 {
3253
3254         if (!genuine_intel)
3255                 return 0;
3256
3257         switch (model) {
3258         case INTEL_FAM6_SKYLAKE_X:
3259                 return 1;
3260         }
3261         return 0;
3262 }
3263
3264 int has_turbo_ratio_limit(unsigned int family, unsigned int model)
3265 {
3266         if (has_slv_msrs(family, model))
3267                 return 0;
3268
3269         switch (model) {
3270         /* Nehalem compatible, but do not include turbo-ratio limit support */
3271         case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
3272         case INTEL_FAM6_WESTMERE_EX:    /* Westmere-EX Xeon - Eagleton */
3273         case INTEL_FAM6_XEON_PHI_KNL:   /* PHI - Knights Landing (different MSR definition) */
3274         case INTEL_FAM6_XEON_PHI_KNM:
3275                 return 0;
3276         default:
3277                 return 1;
3278         }
3279 }
3280 int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model)
3281 {
3282         if (has_slv_msrs(family, model))
3283                 return 1;
3284
3285         return 0;
3286 }
3287 int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
3288 {
3289         if (!genuine_intel)
3290                 return 0;
3291
3292         if (family != 6)
3293                 return 0;
3294
3295         switch (model) {
3296         case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
3297         case INTEL_FAM6_HASWELL_X:      /* HSW Xeon */
3298                 return 1;
3299         default:
3300                 return 0;
3301         }
3302 }
3303 int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
3304 {
3305         if (!genuine_intel)
3306                 return 0;
3307
3308         if (family != 6)
3309                 return 0;
3310
3311         switch (model) {
3312         case INTEL_FAM6_HASWELL_X:      /* HSW Xeon */
3313                 return 1;
3314         default:
3315                 return 0;
3316         }
3317 }
3318
3319 int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
3320 {
3321         if (!genuine_intel)
3322                 return 0;
3323
3324         if (family != 6)
3325                 return 0;
3326
3327         switch (model) {
3328         case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
3329         case INTEL_FAM6_XEON_PHI_KNM:
3330                 return 1;
3331         default:
3332                 return 0;
3333         }
3334 }
3335 int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model)
3336 {
3337         if (!genuine_intel)
3338                 return 0;
3339
3340         if (family != 6)
3341                 return 0;
3342
3343         switch (model) {
3344         case INTEL_FAM6_ATOM_GOLDMONT:
3345         case INTEL_FAM6_SKYLAKE_X:
3346                 return 1;
3347         default:
3348                 return 0;
3349         }
3350 }
3351 int has_config_tdp(unsigned int family, unsigned int model)
3352 {
3353         if (!genuine_intel)
3354                 return 0;
3355
3356         if (family != 6)
3357                 return 0;
3358
3359         switch (model) {
3360         case INTEL_FAM6_IVYBRIDGE:      /* IVB */
3361         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
3362         case INTEL_FAM6_HASWELL_X:      /* HSX */
3363         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
3364         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
3365         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
3366         case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
3367         case INTEL_FAM6_BROADWELL_X:    /* BDX */
3368         case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
3369         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
3370         case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
3371         case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
3372         case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
3373         case INTEL_FAM6_CANNONLAKE_MOBILE:      /* CNL */
3374         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
3375
3376         case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
3377         case INTEL_FAM6_XEON_PHI_KNM:
3378                 return 1;
3379         default:
3380                 return 0;
3381         }
3382 }
3383
3384 static void
3385 dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
3386 {
3387         if (!do_nhm_platform_info)
3388                 return;
3389
3390         dump_nhm_platform_info();
3391
3392         if (has_hsw_turbo_ratio_limit(family, model))
3393                 dump_hsw_turbo_ratio_limits();
3394
3395         if (has_ivt_turbo_ratio_limit(family, model))
3396                 dump_ivt_turbo_ratio_limits();
3397
3398         if (has_turbo_ratio_limit(family, model))
3399                 dump_turbo_ratio_limits(family, model);
3400
3401         if (has_atom_turbo_ratio_limit(family, model))
3402                 dump_atom_turbo_ratio_limits();
3403
3404         if (has_knl_turbo_ratio_limit(family, model))
3405                 dump_knl_turbo_ratio_limits();
3406
3407         if (has_config_tdp(family, model))
3408                 dump_config_tdp();
3409
3410         dump_nhm_cst_cfg();
3411 }
3412
3413 static void
3414 dump_sysfs_cstate_config(void)
3415 {
3416         char path[64];
3417         char name_buf[16];
3418         char desc[64];
3419         FILE *input;
3420         int state;
3421         char *sp;
3422
3423         if (!DO_BIC(BIC_sysfs))
3424                 return;
3425
3426         for (state = 0; state < 10; ++state) {
3427
3428                 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
3429                         base_cpu, state);
3430                 input = fopen(path, "r");
3431                 if (input == NULL)
3432                         continue;
3433                 fgets(name_buf, sizeof(name_buf), input);
3434
3435                  /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
3436                 sp = strchr(name_buf, '-');
3437                 if (!sp)
3438                         sp = strchrnul(name_buf, '\n');
3439                 *sp = '\0';
3440
3441                 fclose(input);
3442
3443                 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc",
3444                         base_cpu, state);
3445                 input = fopen(path, "r");
3446                 if (input == NULL)
3447                         continue;
3448                 fgets(desc, sizeof(desc), input);
3449
3450                 fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc);
3451                 fclose(input);
3452         }
3453 }
3454 static void
3455 dump_sysfs_pstate_config(void)
3456 {
3457         char path[64];
3458         char driver_buf[64];
3459         char governor_buf[64];
3460         FILE *input;
3461         int turbo;
3462
3463         sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver",
3464                         base_cpu);
3465         input = fopen(path, "r");
3466         if (input == NULL) {
3467                 fprintf(stderr, "NSFOD %s\n", path);
3468                 return;
3469         }
3470         fgets(driver_buf, sizeof(driver_buf), input);
3471         fclose(input);
3472
3473         sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor",
3474                         base_cpu);
3475         input = fopen(path, "r");
3476         if (input == NULL) {
3477                 fprintf(stderr, "NSFOD %s\n", path);
3478                 return;
3479         }
3480         fgets(governor_buf, sizeof(governor_buf), input);
3481         fclose(input);
3482
3483         fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf);
3484         fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf);
3485
3486         sprintf(path, "/sys/devices/system/cpu/cpufreq/boost");
3487         input = fopen(path, "r");
3488         if (input != NULL) {
3489                 fscanf(input, "%d", &turbo);
3490                 fprintf(outf, "cpufreq boost: %d\n", turbo);
3491                 fclose(input);
3492         }
3493
3494         sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo");
3495         input = fopen(path, "r");
3496         if (input != NULL) {
3497                 fscanf(input, "%d", &turbo);
3498                 fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo);
3499                 fclose(input);
3500         }
3501 }
3502
3503
3504 /*
3505  * print_epb()
3506  * Decode the ENERGY_PERF_BIAS MSR
3507  */
3508 int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3509 {
3510         unsigned long long msr;
3511         char *epb_string;
3512         int cpu;
3513
3514         if (!has_epb)
3515                 return 0;
3516
3517         cpu = t->cpu_id;
3518
3519         /* EPB is per-package */
3520         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3521                 return 0;
3522
3523         if (cpu_migrate(cpu)) {
3524                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3525                 return -1;
3526         }
3527
3528         if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr))
3529                 return 0;
3530
3531         switch (msr & 0xF) {
3532         case ENERGY_PERF_BIAS_PERFORMANCE:
3533                 epb_string = "performance";
3534                 break;
3535         case ENERGY_PERF_BIAS_NORMAL:
3536                 epb_string = "balanced";
3537                 break;
3538         case ENERGY_PERF_BIAS_POWERSAVE:
3539                 epb_string = "powersave";
3540                 break;
3541         default:
3542                 epb_string = "custom";
3543                 break;
3544         }
3545         fprintf(outf, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string);
3546
3547         return 0;
3548 }
3549 /*
3550  * print_hwp()
3551  * Decode the MSR_HWP_CAPABILITIES
3552  */
3553 int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3554 {
3555         unsigned long long msr;
3556         int cpu;
3557
3558         if (!has_hwp)
3559                 return 0;
3560
3561         cpu = t->cpu_id;
3562
3563         /* MSR_HWP_CAPABILITIES is per-package */
3564         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3565                 return 0;
3566
3567         if (cpu_migrate(cpu)) {
3568                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3569                 return -1;
3570         }
3571
3572         if (get_msr(cpu, MSR_PM_ENABLE, &msr))
3573                 return 0;
3574
3575         fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n",
3576                 cpu, msr, (msr & (1 << 0)) ? "" : "No-");
3577
3578         /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
3579         if ((msr & (1 << 0)) == 0)
3580                 return 0;
3581
3582         if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr))
3583                 return 0;
3584
3585         fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
3586                         "(high %d guar %d eff %d low %d)\n",
3587                         cpu, msr,
3588                         (unsigned int)HWP_HIGHEST_PERF(msr),
3589                         (unsigned int)HWP_GUARANTEED_PERF(msr),
3590                         (unsigned int)HWP_MOSTEFFICIENT_PERF(msr),
3591                         (unsigned int)HWP_LOWEST_PERF(msr));
3592
3593         if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
3594                 return 0;
3595
3596         fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
3597                         "(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n",
3598                         cpu, msr,
3599                         (unsigned int)(((msr) >> 0) & 0xff),
3600                         (unsigned int)(((msr) >> 8) & 0xff),
3601                         (unsigned int)(((msr) >> 16) & 0xff),
3602                         (unsigned int)(((msr) >> 24) & 0xff),
3603                         (unsigned int)(((msr) >> 32) & 0xff3),
3604                         (unsigned int)(((msr) >> 42) & 0x1));
3605
3606         if (has_hwp_pkg) {
3607                 if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
3608                         return 0;
3609
3610                 fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
3611                         "(min %d max %d des %d epp 0x%x window 0x%x)\n",
3612                         cpu, msr,
3613                         (unsigned int)(((msr) >> 0) & 0xff),
3614                         (unsigned int)(((msr) >> 8) & 0xff),
3615                         (unsigned int)(((msr) >> 16) & 0xff),
3616                         (unsigned int)(((msr) >> 24) & 0xff),
3617                         (unsigned int)(((msr) >> 32) & 0xff3));
3618         }
3619         if (has_hwp_notify) {
3620                 if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
3621                         return 0;
3622
3623                 fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
3624                         "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
3625                         cpu, msr,
3626                         ((msr) & 0x1) ? "EN" : "Dis",
3627                         ((msr) & 0x2) ? "EN" : "Dis");
3628         }
3629         if (get_msr(cpu, MSR_HWP_STATUS, &msr))
3630                 return 0;
3631
3632         fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
3633                         "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
3634                         cpu, msr,
3635                         ((msr) & 0x1) ? "" : "No-",
3636                         ((msr) & 0x2) ? "" : "No-");
3637
3638         return 0;
3639 }
3640
3641 /*
3642  * print_perf_limit()
3643  */
3644 int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3645 {
3646         unsigned long long msr;
3647         int cpu;
3648
3649         cpu = t->cpu_id;
3650
3651         /* per-package */
3652         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3653                 return 0;
3654
3655         if (cpu_migrate(cpu)) {
3656                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3657                 return -1;
3658         }
3659
3660         if (do_core_perf_limit_reasons) {
3661                 get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
3662                 fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
3663                 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
3664                         (msr & 1 << 15) ? "bit15, " : "",
3665                         (msr & 1 << 14) ? "bit14, " : "",
3666                         (msr & 1 << 13) ? "Transitions, " : "",
3667                         (msr & 1 << 12) ? "MultiCoreTurbo, " : "",
3668                         (msr & 1 << 11) ? "PkgPwrL2, " : "",
3669                         (msr & 1 << 10) ? "PkgPwrL1, " : "",
3670                         (msr & 1 << 9) ? "CorePwr, " : "",
3671                         (msr & 1 << 8) ? "Amps, " : "",
3672                         (msr & 1 << 6) ? "VR-Therm, " : "",
3673                         (msr & 1 << 5) ? "Auto-HWP, " : "",
3674                         (msr & 1 << 4) ? "Graphics, " : "",
3675                         (msr & 1 << 2) ? "bit2, " : "",
3676                         (msr & 1 << 1) ? "ThermStatus, " : "",
3677                         (msr & 1 << 0) ? "PROCHOT, " : "");
3678                 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
3679                         (msr & 1 << 31) ? "bit31, " : "",
3680                         (msr & 1 << 30) ? "bit30, " : "",
3681                         (msr & 1 << 29) ? "Transitions, " : "",
3682                         (msr & 1 << 28) ? "MultiCoreTurbo, " : "",
3683                         (msr & 1 << 27) ? "PkgPwrL2, " : "",
3684                         (msr & 1 << 26) ? "PkgPwrL1, " : "",
3685                         (msr & 1 << 25) ? "CorePwr, " : "",
3686                         (msr & 1 << 24) ? "Amps, " : "",
3687                         (msr & 1 << 22) ? "VR-Therm, " : "",
3688                         (msr & 1 << 21) ? "Auto-HWP, " : "",
3689                         (msr & 1 << 20) ? "Graphics, " : "",
3690                         (msr & 1 << 18) ? "bit18, " : "",
3691                         (msr & 1 << 17) ? "ThermStatus, " : "",
3692                         (msr & 1 << 16) ? "PROCHOT, " : "");
3693
3694         }
3695         if (do_gfx_perf_limit_reasons) {
3696                 get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
3697                 fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
3698                 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
3699                         (msr & 1 << 0) ? "PROCHOT, " : "",
3700                         (msr & 1 << 1) ? "ThermStatus, " : "",
3701                         (msr & 1 << 4) ? "Graphics, " : "",
3702                         (msr & 1 << 6) ? "VR-Therm, " : "",
3703                         (msr & 1 << 8) ? "Amps, " : "",
3704                         (msr & 1 << 9) ? "GFXPwr, " : "",
3705                         (msr & 1 << 10) ? "PkgPwrL1, " : "",
3706                         (msr & 1 << 11) ? "PkgPwrL2, " : "");
3707                 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
3708                         (msr & 1 << 16) ? "PROCHOT, " : "",
3709                         (msr & 1 << 17) ? "ThermStatus, " : "",
3710                         (msr & 1 << 20) ? "Graphics, " : "",
3711                         (msr & 1 << 22) ? "VR-Therm, " : "",
3712                         (msr & 1 << 24) ? "Amps, " : "",
3713                         (msr & 1 << 25) ? "GFXPwr, " : "",
3714                         (msr & 1 << 26) ? "PkgPwrL1, " : "",
3715                         (msr & 1 << 27) ? "PkgPwrL2, " : "");
3716         }
3717         if (do_ring_perf_limit_reasons) {
3718                 get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
3719                 fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
3720                 fprintf(outf, " (Active: %s%s%s%s%s%s)",
3721                         (msr & 1 << 0) ? "PROCHOT, " : "",
3722                         (msr & 1 << 1) ? "ThermStatus, " : "",
3723                         (msr & 1 << 6) ? "VR-Therm, " : "",
3724                         (msr & 1 << 8) ? "Amps, " : "",
3725                         (msr & 1 << 10) ? "PkgPwrL1, " : "",
3726                         (msr & 1 << 11) ? "PkgPwrL2, " : "");
3727                 fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
3728                         (msr & 1 << 16) ? "PROCHOT, " : "",
3729                         (msr & 1 << 17) ? "ThermStatus, " : "",
3730                         (msr & 1 << 22) ? "VR-Therm, " : "",
3731                         (msr & 1 << 24) ? "Amps, " : "",
3732                         (msr & 1 << 26) ? "PkgPwrL1, " : "",
3733                         (msr & 1 << 27) ? "PkgPwrL2, " : "");
3734         }
3735         return 0;
3736 }
3737
3738 #define RAPL_POWER_GRANULARITY  0x7FFF  /* 15 bit power granularity */
3739 #define RAPL_TIME_GRANULARITY   0x3F /* 6 bit time granularity */
3740
3741 double get_tdp(unsigned int model)
3742 {
3743         unsigned long long msr;
3744
3745         if (do_rapl & RAPL_PKG_POWER_INFO)
3746                 if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
3747                         return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
3748
3749         switch (model) {
3750         case INTEL_FAM6_ATOM_SILVERMONT:
3751         case INTEL_FAM6_ATOM_SILVERMONT_X:
3752                 return 30.0;
3753         default:
3754                 return 135.0;
3755         }
3756 }
3757
3758 /*
3759  * rapl_dram_energy_units_probe()
3760  * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
3761  */
3762 static double
3763 rapl_dram_energy_units_probe(int  model, double rapl_energy_units)
3764 {
3765         /* only called for genuine_intel, family 6 */
3766
3767         switch (model) {
3768         case INTEL_FAM6_HASWELL_X:      /* HSX */
3769         case INTEL_FAM6_BROADWELL_X:    /* BDX */
3770         case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
3771         case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
3772         case INTEL_FAM6_XEON_PHI_KNM:
3773                 return (rapl_dram_energy_units = 15.3 / 1000000);
3774         default:
3775                 return (rapl_energy_units);
3776         }
3777 }
3778
3779
3780 /*
3781  * rapl_probe()
3782  *
3783  * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
3784  */
3785 void rapl_probe(unsigned int family, unsigned int model)
3786 {
3787         unsigned long long msr;
3788         unsigned int time_unit;
3789         double tdp;
3790
3791         if (!genuine_intel)
3792                 return;
3793
3794         if (family != 6)
3795                 return;
3796
3797         switch (model) {
3798         case INTEL_FAM6_SANDYBRIDGE:
3799         case INTEL_FAM6_IVYBRIDGE:
3800         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
3801         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
3802         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
3803         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
3804         case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
3805                 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
3806                 if (rapl_joules) {
3807                         BIC_PRESENT(BIC_Pkg_J);
3808                         BIC_PRESENT(BIC_Cor_J);
3809                         BIC_PRESENT(BIC_GFX_J);
3810                 } else {
3811                         BIC_PRESENT(BIC_PkgWatt);
3812                         BIC_PRESENT(BIC_CorWatt);
3813                         BIC_PRESENT(BIC_GFXWatt);
3814                 }
3815                 break;
3816         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
3817         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
3818                 do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
3819                 if (rapl_joules)
3820                         BIC_PRESENT(BIC_Pkg_J);
3821                 else
3822                         BIC_PRESENT(BIC_PkgWatt);
3823                 break;
3824         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
3825         case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
3826         case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
3827         case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
3828         case INTEL_FAM6_CANNONLAKE_MOBILE:      /* CNL */
3829                 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
3830                 BIC_PRESENT(BIC_PKG__);
3831                 BIC_PRESENT(BIC_RAM__);
3832                 if (rapl_joules) {
3833                         BIC_PRESENT(BIC_Pkg_J);
3834                         BIC_PRESENT(BIC_Cor_J);
3835                         BIC_PRESENT(BIC_RAM_J);
3836                         BIC_PRESENT(BIC_GFX_J);
3837                 } else {
3838                         BIC_PRESENT(BIC_PkgWatt);
3839                         BIC_PRESENT(BIC_CorWatt);
3840                         BIC_PRESENT(BIC_RAMWatt);
3841                         BIC_PRESENT(BIC_GFXWatt);
3842                 }
3843                 break;
3844         case INTEL_FAM6_HASWELL_X:      /* HSX */
3845         case INTEL_FAM6_BROADWELL_X:    /* BDX */
3846         case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
3847         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
3848         case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
3849         case INTEL_FAM6_XEON_PHI_KNM:
3850                 do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
3851                 BIC_PRESENT(BIC_PKG__);
3852                 BIC_PRESENT(BIC_RAM__);
3853                 if (rapl_joules) {
3854                         BIC_PRESENT(BIC_Pkg_J);
3855                         BIC_PRESENT(BIC_RAM_J);
3856                 } else {
3857                         BIC_PRESENT(BIC_PkgWatt);
3858                         BIC_PRESENT(BIC_RAMWatt);
3859                 }
3860                 break;
3861         case INTEL_FAM6_SANDYBRIDGE_X:
3862         case INTEL_FAM6_IVYBRIDGE_X:
3863                 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
3864                 BIC_PRESENT(BIC_PKG__);
3865                 BIC_PRESENT(BIC_RAM__);
3866                 if (rapl_joules) {
3867                         BIC_PRESENT(BIC_Pkg_J);
3868                         BIC_PRESENT(BIC_Cor_J);
3869                         BIC_PRESENT(BIC_RAM_J);
3870                 } else {
3871                         BIC_PRESENT(BIC_PkgWatt);
3872                         BIC_PRESENT(BIC_CorWatt);
3873                         BIC_PRESENT(BIC_RAMWatt);
3874                 }
3875                 break;
3876         case INTEL_FAM6_ATOM_SILVERMONT:        /* BYT */
3877         case INTEL_FAM6_ATOM_SILVERMONT_X:      /* AVN */
3878                 do_rapl = RAPL_PKG | RAPL_CORES;
3879                 if (rapl_joules) {
3880                         BIC_PRESENT(BIC_Pkg_J);
3881                         BIC_PRESENT(BIC_Cor_J);
3882                 } else {
3883                         BIC_PRESENT(BIC_PkgWatt);
3884                         BIC_PRESENT(BIC_CorWatt);
3885                 }
3886                 break;
3887         case INTEL_FAM6_ATOM_GOLDMONT_X:        /* DNV */
3888                 do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
3889                 BIC_PRESENT(BIC_PKG__);
3890                 BIC_PRESENT(BIC_RAM__);
3891                 if (rapl_joules) {
3892                         BIC_PRESENT(BIC_Pkg_J);
3893                         BIC_PRESENT(BIC_Cor_J);
3894                         BIC_PRESENT(BIC_RAM_J);
3895                 } else {
3896                         BIC_PRESENT(BIC_PkgWatt);
3897                         BIC_PRESENT(BIC_CorWatt);
3898                         BIC_PRESENT(BIC_RAMWatt);
3899                 }
3900                 break;
3901         default:
3902                 return;
3903         }
3904
3905         /* units on package 0, verify later other packages match */
3906         if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
3907                 return;
3908
3909         rapl_power_units = 1.0 / (1 << (msr & 0xF));
3910         if (model == INTEL_FAM6_ATOM_SILVERMONT)
3911                 rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
3912         else
3913                 rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
3914
3915         rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);
3916
3917         time_unit = msr >> 16 & 0xF;
3918         if (time_unit == 0)
3919                 time_unit = 0xA;
3920
3921         rapl_time_units = 1.0 / (1 << (time_unit));
3922
3923         tdp = get_tdp(model);
3924
3925         rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
3926         if (!quiet)
3927                 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
3928
3929         return;
3930 }
3931
3932 void perf_limit_reasons_probe(unsigned int family, unsigned int model)
3933 {
3934         if (!genuine_intel)
3935                 return;
3936
3937         if (family != 6)
3938                 return;
3939
3940         switch (model) {
3941         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
3942         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
3943         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
3944                 do_gfx_perf_limit_reasons = 1;
3945         case INTEL_FAM6_HASWELL_X:      /* HSX */
3946                 do_core_perf_limit_reasons = 1;
3947                 do_ring_perf_limit_reasons = 1;
3948         default:
3949                 return;
3950         }
3951 }
3952
3953 void automatic_cstate_conversion_probe(unsigned int family, unsigned int model)
3954 {
3955         if (is_skx(family, model) || is_bdx(family, model))
3956                 has_automatic_cstate_conversion = 1;
3957 }
3958
3959 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3960 {
3961         unsigned long long msr;
3962         unsigned int dts, dts2;
3963         int cpu;
3964
3965         if (!(do_dts || do_ptm))
3966                 return 0;
3967
3968         cpu = t->cpu_id;
3969
3970         /* DTS is per-core, no need to print for each thread */
3971         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
3972                 return 0;
3973
3974         if (cpu_migrate(cpu)) {
3975                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3976                 return -1;
3977         }
3978
3979         if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
3980                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
3981                         return 0;
3982
3983                 dts = (msr >> 16) & 0x7F;
3984                 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
3985                         cpu, msr, tcc_activation_temp - dts);
3986
3987                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
3988                         return 0;
3989
3990                 dts = (msr >> 16) & 0x7F;
3991                 dts2 = (msr >> 8) & 0x7F;
3992                 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
3993                         cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
3994         }
3995
3996
3997         if (do_dts && debug) {
3998                 unsigned int resolution;
3999
4000                 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
4001                         return 0;
4002
4003                 dts = (msr >> 16) & 0x7F;
4004                 resolution = (msr >> 27) & 0xF;
4005                 fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
4006                         cpu, msr, tcc_activation_temp - dts, resolution);
4007
4008                 if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
4009                         return 0;
4010
4011                 dts = (msr >> 16) & 0x7F;
4012                 dts2 = (msr >> 8) & 0x7F;
4013                 fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
4014                         cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
4015         }
4016
4017         return 0;
4018 }
4019
4020 void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
4021 {
4022         fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
4023                 cpu, label,
4024                 ((msr >> 15) & 1) ? "EN" : "DIS",
4025                 ((msr >> 0) & 0x7FFF) * rapl_power_units,
4026                 (1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
4027                 (((msr >> 16) & 1) ? "EN" : "DIS"));
4028
4029         return;
4030 }
4031
4032 int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4033 {
4034         unsigned long long msr;
4035         int cpu;
4036
4037         if (!do_rapl)
4038                 return 0;
4039
4040         /* RAPL counters are per package, so print only for 1st thread/package */
4041         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
4042                 return 0;
4043
4044         cpu = t->cpu_id;
4045         if (cpu_migrate(cpu)) {
4046                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
4047                 return -1;
4048         }
4049
4050         if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
4051                 return -1;
4052
4053         fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr,
4054                 rapl_power_units, rapl_energy_units, rapl_time_units);
4055
4056         if (do_rapl & RAPL_PKG_POWER_INFO) {
4057
4058                 if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
4059                         return -5;
4060
4061
4062                 fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
4063                         cpu, msr,
4064                         ((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4065                         ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4066                         ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4067                         ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
4068
4069         }
4070         if (do_rapl & RAPL_PKG) {
4071
4072                 if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
4073                         return -9;
4074
4075                 fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
4076                         cpu, msr, (msr >> 63) & 1 ? "" : "UN");
4077
4078                 print_power_limit_msr(cpu, msr, "PKG Limit #1");
4079                 fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
4080                         cpu,
4081                         ((msr >> 47) & 1) ? "EN" : "DIS",
4082                         ((msr >> 32) & 0x7FFF) * rapl_power_units,
4083                         (1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
4084                         ((msr >> 48) & 1) ? "EN" : "DIS");
4085         }
4086
4087         if (do_rapl & RAPL_DRAM_POWER_INFO) {
4088                 if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
4089                         return -6;
4090
4091                 fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
4092                         cpu, msr,
4093                         ((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4094                         ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4095                         ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4096                         ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
4097         }
4098         if (do_rapl & RAPL_DRAM) {
4099                 if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
4100                         return -9;
4101                 fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
4102                                 cpu, msr, (msr >> 31) & 1 ? "" : "UN");
4103
4104                 print_power_limit_msr(cpu, msr, "DRAM Limit");
4105         }
4106         if (do_rapl & RAPL_CORE_POLICY) {
4107                 if (get_msr(cpu, MSR_PP0_POLICY, &msr))
4108                         return -7;
4109
4110                 fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
4111         }
4112         if (do_rapl & RAPL_CORES_POWER_LIMIT) {
4113                 if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
4114                         return -9;
4115                 fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
4116                                 cpu, msr, (msr >> 31) & 1 ? "" : "UN");
4117                 print_power_limit_msr(cpu, msr, "Cores Limit");
4118         }
4119         if (do_rapl & RAPL_GFX) {
4120                 if (get_msr(cpu, MSR_PP1_POLICY, &msr))
4121                         return -8;
4122
4123                 fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
4124
4125                 if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
4126                         return -9;
4127                 fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
4128                                 cpu, msr, (msr >> 31) & 1 ? "" : "UN");
4129                 print_power_limit_msr(cpu, msr, "GFX Limit");
4130         }
4131         return 0;
4132 }
4133
4134 /*
4135  * SNB adds support for additional MSRs:
4136  *
4137  * MSR_PKG_C7_RESIDENCY            0x000003fa
4138  * MSR_CORE_C7_RESIDENCY           0x000003fe
4139  * MSR_PKG_C2_RESIDENCY            0x0000060d
4140  */
4141
4142 int has_snb_msrs(unsigned int family, unsigned int model)
4143 {
4144         if (!genuine_intel)
4145                 return 0;
4146
4147         switch (model) {
4148         case INTEL_FAM6_SANDYBRIDGE:
4149         case INTEL_FAM6_SANDYBRIDGE_X:
4150         case INTEL_FAM6_IVYBRIDGE:      /* IVB */
4151         case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
4152         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
4153         case INTEL_FAM6_HASWELL_X:      /* HSW */
4154         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
4155         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
4156         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
4157         case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
4158         case INTEL_FAM6_BROADWELL_X:    /* BDX */
4159         case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
4160         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
4161         case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
4162         case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
4163         case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
4164         case INTEL_FAM6_CANNONLAKE_MOBILE:      /* CNL */
4165         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
4166         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
4167         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
4168         case INTEL_FAM6_ATOM_GOLDMONT_X:        /* DNV */
4169                 return 1;
4170         }
4171         return 0;
4172 }
4173
4174 /*
4175  * HSW adds support for additional MSRs:
4176  *
4177  * MSR_PKG_C8_RESIDENCY         0x00000630
4178  * MSR_PKG_C9_RESIDENCY         0x00000631
4179  * MSR_PKG_C10_RESIDENCY        0x00000632
4180  *
4181  * MSR_PKGC8_IRTL               0x00000633
4182  * MSR_PKGC9_IRTL               0x00000634
4183  * MSR_PKGC10_IRTL              0x00000635
4184  *
4185  */
4186 int has_hsw_msrs(unsigned int family, unsigned int model)
4187 {
4188         if (!genuine_intel)
4189                 return 0;
4190
4191         switch (model) {
4192         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
4193         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
4194         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
4195         case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
4196         case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
4197         case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
4198         case INTEL_FAM6_CANNONLAKE_MOBILE:      /* CNL */
4199         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
4200         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
4201                 return 1;
4202         }
4203         return 0;
4204 }
4205
4206 /*
4207  * SKL adds support for additional MSRS:
4208  *
4209  * MSR_PKG_WEIGHTED_CORE_C0_RES    0x00000658
4210  * MSR_PKG_ANY_CORE_C0_RES         0x00000659
4211  * MSR_PKG_ANY_GFXE_C0_RES         0x0000065A
4212  * MSR_PKG_BOTH_CORE_GFXE_C0_RES   0x0000065B
4213  */
4214 int has_skl_msrs(unsigned int family, unsigned int model)
4215 {
4216         if (!genuine_intel)
4217                 return 0;
4218
4219         switch (model) {
4220         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
4221         case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
4222         case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
4223         case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
4224         case INTEL_FAM6_CANNONLAKE_MOBILE:      /* CNL */
4225                 return 1;
4226         }
4227         return 0;
4228 }
4229
4230 int is_slm(unsigned int family, unsigned int model)
4231 {
4232         if (!genuine_intel)
4233                 return 0;
4234         switch (model) {
4235         case INTEL_FAM6_ATOM_SILVERMONT:        /* BYT */
4236         case INTEL_FAM6_ATOM_SILVERMONT_X:      /* AVN */
4237                 return 1;
4238         }
4239         return 0;
4240 }
4241
4242 int is_knl(unsigned int family, unsigned int model)
4243 {
4244         if (!genuine_intel)
4245                 return 0;
4246         switch (model) {
4247         case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
4248         case INTEL_FAM6_XEON_PHI_KNM:
4249                 return 1;
4250         }
4251         return 0;
4252 }
4253
4254 int is_cnl(unsigned int family, unsigned int model)
4255 {
4256         if (!genuine_intel)
4257                 return 0;
4258
4259         switch (model) {
4260         case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */
4261                 return 1;
4262         }
4263
4264         return 0;
4265 }
4266
4267 unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model)
4268 {
4269         if (is_knl(family, model))
4270                 return 1024;
4271         return 1;
4272 }
4273
4274 #define SLM_BCLK_FREQS 5
4275 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};
4276
4277 double slm_bclk(void)
4278 {
4279         unsigned long long msr = 3;
4280         unsigned int i;
4281         double freq;
4282
4283         if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
4284                 fprintf(outf, "SLM BCLK: unknown\n");
4285
4286         i = msr & 0xf;
4287         if (i >= SLM_BCLK_FREQS) {
4288                 fprintf(outf, "SLM BCLK[%d] invalid\n", i);
4289                 i = 3;
4290         }
4291         freq = slm_freq_table[i];
4292
4293         if (!quiet)
4294                 fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
4295
4296         return freq;
4297 }
4298
4299 double discover_bclk(unsigned int family, unsigned int model)
4300 {
4301         if (has_snb_msrs(family, model) || is_knl(family, model))
4302                 return 100.00;
4303         else if (is_slm(family, model))
4304                 return slm_bclk();
4305         else
4306                 return 133.33;
4307 }
4308
4309 /*
4310  * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
4311  * the Thermal Control Circuit (TCC) activates.
4312  * This is usually equal to tjMax.
4313  *
4314  * Older processors do not have this MSR, so there we guess,
4315  * but also allow cmdline over-ride with -T.
4316  *
4317  * Several MSR temperature values are in units of degrees-C
4318  * below this value, including the Digital Thermal Sensor (DTS),
4319  * Package Thermal Management Sensor (PTM), and thermal event thresholds.
4320  */
4321 int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4322 {
4323         unsigned long long msr;
4324         unsigned int target_c_local;
4325         int cpu;
4326
4327         /* tcc_activation_temp is used only for dts or ptm */
4328         if (!(do_dts || do_ptm))
4329                 return 0;
4330
4331         /* this is a per-package concept */
4332         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
4333                 return 0;
4334
4335         cpu = t->cpu_id;
4336         if (cpu_migrate(cpu)) {
4337                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
4338                 return -1;
4339         }
4340
4341         if (tcc_activation_temp_override != 0) {
4342                 tcc_activation_temp = tcc_activation_temp_override;
4343                 fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n",
4344                         cpu, tcc_activation_temp);
4345                 return 0;
4346         }
4347
4348         /* Temperature Target MSR is Nehalem and newer only */
4349         if (!do_nhm_platform_info)
4350                 goto guess;
4351
4352         if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
4353                 goto guess;
4354
4355         target_c_local = (msr >> 16) & 0xFF;
4356
4357         if (!quiet)
4358                 fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
4359                         cpu, msr, target_c_local);
4360
4361         if (!target_c_local)
4362                 goto guess;
4363
4364         tcc_activation_temp = target_c_local;
4365
4366         return 0;
4367
4368 guess:
4369         tcc_activation_temp = TJMAX_DEFAULT;
4370         fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
4371                 cpu, tcc_activation_temp);
4372
4373         return 0;
4374 }
4375
4376 void decode_feature_control_msr(void)
4377 {
4378         unsigned long long msr;
4379
4380         if (!get_msr(base_cpu, MSR_IA32_FEATURE_CONTROL, &msr))
4381                 fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
4382                         base_cpu, msr,
4383                         msr & FEATURE_CONTROL_LOCKED ? "" : "UN-",
4384                         msr & (1 << 18) ? "SGX" : "");
4385 }
4386
4387 void decode_misc_enable_msr(void)
4388 {
4389         unsigned long long msr;
4390
4391         if (!genuine_intel)
4392                 return;
4393
4394         if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
4395                 fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n",
4396                         base_cpu, msr,
4397                         msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-",
4398                         msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-",
4399                         msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-",
4400                         msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "",
4401                         msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
4402 }
4403
4404 void decode_misc_feature_control(void)
4405 {
4406         unsigned long long msr;
4407
4408         if (!has_misc_feature_control)
4409                 return;
4410
4411         if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr))
4412                 fprintf(outf, "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n",
4413                         base_cpu, msr,
4414                         msr & (0 << 0) ? "No-" : "",
4415                         msr & (1 << 0) ? "No-" : "",
4416                         msr & (2 << 0) ? "No-" : "",
4417                         msr & (3 << 0) ? "No-" : "");
4418 }
4419 /*
4420  * Decode MSR_MISC_PWR_MGMT
4421  *
4422  * Decode the bits according to the Nehalem documentation
4423  * bit[0] seems to continue to have same meaning going forward
4424  * bit[1] less so...
4425  */
4426 void decode_misc_pwr_mgmt_msr(void)
4427 {
4428         unsigned long long msr;
4429
4430         if (!do_nhm_platform_info)
4431                 return;
4432
4433         if (no_MSR_MISC_PWR_MGMT)
4434                 return;
4435
4436         if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
4437                 fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
4438                         base_cpu, msr,
4439                         msr & (1 << 0) ? "DIS" : "EN",
4440                         msr & (1 << 1) ? "EN" : "DIS",
4441                         msr & (1 << 8) ? "EN" : "DIS");
4442 }
4443 /*
4444  * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG
4445  *
4446  * This MSRs are present on Silvermont processors,
4447  * Intel Atom processor E3000 series (Baytrail), and friends.
4448  */
4449 void decode_c6_demotion_policy_msr(void)
4450 {
4451         unsigned long long msr;
4452
4453         if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr))
4454                 fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n",
4455                         base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
4456
4457         if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr))
4458                 fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n",
4459                         base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
4460 }
4461
4462 void process_cpuid()
4463 {
4464         unsigned int eax, ebx, ecx, edx;
4465         unsigned int fms, family, model, stepping, ecx_flags, edx_flags;
4466         unsigned int has_turbo;
4467
4468         eax = ebx = ecx = edx = 0;
4469
4470         __cpuid(0, max_level, ebx, ecx, edx);
4471
4472         if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
4473                 genuine_intel = 1;
4474         else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
4475                 authentic_amd = 1;
4476
4477         if (!quiet)
4478                 fprintf(outf, "CPUID(0): %.4s%.4s%.4s ",
4479                         (char *)&ebx, (char *)&edx, (char *)&ecx);
4480
4481         __cpuid(1, fms, ebx, ecx, edx);
4482         family = (fms >> 8) & 0xf;
4483         model = (fms >> 4) & 0xf;
4484         stepping = fms & 0xf;
4485         if (family == 0xf)
4486                 family += (fms >> 20) & 0xff;
4487         if (family >= 6)
4488                 model += ((fms >> 16) & 0xf) << 4;
4489         ecx_flags = ecx;
4490         edx_flags = edx;
4491
4492         /*
4493          * check max extended function levels of CPUID.
4494          * This is needed to check for invariant TSC.
4495          * This check is valid for both Intel and AMD.
4496          */
4497         ebx = ecx = edx = 0;
4498         __cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
4499
4500         if (!quiet) {
4501                 fprintf(outf, "0x%x CPUID levels; 0x%x xlevels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
4502                         max_level, max_extended_level, family, model, stepping, family, model, stepping);
4503                 fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
4504                         ecx_flags & (1 << 0) ? "SSE3" : "-",
4505                         ecx_flags & (1 << 3) ? "MONITOR" : "-",
4506                         ecx_flags & (1 << 6) ? "SMX" : "-",
4507                         ecx_flags & (1 << 7) ? "EIST" : "-",
4508                         ecx_flags & (1 << 8) ? "TM2" : "-",
4509                         edx_flags & (1 << 4) ? "TSC" : "-",
4510                         edx_flags & (1 << 5) ? "MSR" : "-",
4511                         edx_flags & (1 << 22) ? "ACPI-TM" : "-",
4512                         edx_flags & (1 << 28) ? "HT" : "-",
4513                         edx_flags & (1 << 29) ? "TM" : "-");
4514         }
4515
4516         if (!(edx_flags & (1 << 5)))
4517                 errx(1, "CPUID: no MSR");
4518
4519         if (max_extended_level >= 0x80000007) {
4520
4521                 /*
4522                  * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
4523                  * this check is valid for both Intel and AMD
4524                  */
4525                 __cpuid(0x80000007, eax, ebx, ecx, edx);
4526                 has_invariant_tsc = edx & (1 << 8);
4527         }
4528
4529         /*
4530          * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
4531          * this check is valid for both Intel and AMD
4532          */
4533
4534         __cpuid(0x6, eax, ebx, ecx, edx);
4535         has_aperf = ecx & (1 << 0);
4536         if (has_aperf) {
4537                 BIC_PRESENT(BIC_Avg_MHz);
4538                 BIC_PRESENT(BIC_Busy);
4539                 BIC_PRESENT(BIC_Bzy_MHz);
4540         }
4541         do_dts = eax & (1 << 0);
4542         if (do_dts)
4543                 BIC_PRESENT(BIC_CoreTmp);
4544         has_turbo = eax & (1 << 1);
4545         do_ptm = eax & (1 << 6);
4546         if (do_ptm)
4547                 BIC_PRESENT(BIC_PkgTmp);
4548         has_hwp = eax & (1 << 7);
4549         has_hwp_notify = eax & (1 << 8);
4550         has_hwp_activity_window = eax & (1 << 9);
4551         has_hwp_epp = eax & (1 << 10);
4552         has_hwp_pkg = eax & (1 << 11);
4553         has_epb = ecx & (1 << 3);
4554
4555         if (!quiet)
4556                 fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, "
4557                         "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
4558                         has_aperf ? "" : "No-",
4559                         has_turbo ? "" : "No-",
4560                         do_dts ? "" : "No-",
4561                         do_ptm ? "" : "No-",
4562                         has_hwp ? "" : "No-",
4563                         has_hwp_notify ? "" : "No-",
4564                         has_hwp_activity_window ? "" : "No-",
4565                         has_hwp_epp ? "" : "No-",
4566                         has_hwp_pkg ? "" : "No-",
4567                         has_epb ? "" : "No-");
4568
4569         if (!quiet)
4570                 decode_misc_enable_msr();
4571
4572
4573         if (max_level >= 0x7 && !quiet) {
4574                 int has_sgx;
4575
4576                 ecx = 0;
4577
4578                 __cpuid_count(0x7, 0, eax, ebx, ecx, edx);
4579
4580                 has_sgx = ebx & (1 << 2);
4581                 fprintf(outf, "CPUID(7): %sSGX\n", has_sgx ? "" : "No-");
4582
4583                 if (has_sgx)
4584                         decode_feature_control_msr();
4585         }
4586
4587         if (max_level >= 0x15) {
4588                 unsigned int eax_crystal;
4589                 unsigned int ebx_tsc;
4590
4591                 /*
4592                  * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
4593                  */
4594                 eax_crystal = ebx_tsc = crystal_hz = edx = 0;
4595                 __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
4596
4597                 if (ebx_tsc != 0) {
4598
4599                         if (!quiet && (ebx != 0))
4600                                 fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
4601                                         eax_crystal, ebx_tsc, crystal_hz);
4602
4603                         if (crystal_hz == 0)
4604                                 switch(model) {
4605                                 case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
4606                                 case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
4607                                 case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
4608                                 case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
4609                                         crystal_hz = 24000000;  /* 24.0 MHz */
4610                                         break;
4611                                 case INTEL_FAM6_ATOM_GOLDMONT_X:        /* DNV */
4612                                         crystal_hz = 25000000;  /* 25.0 MHz */
4613                                         break;
4614                                 case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
4615                                 case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
4616                                         crystal_hz = 19200000;  /* 19.2 MHz */
4617                                         break;
4618                                 default:
4619                                         crystal_hz = 0;
4620                         }
4621
4622                         if (crystal_hz) {
4623                                 tsc_hz =  (unsigned long long) crystal_hz * ebx_tsc / eax_crystal;
4624                                 if (!quiet)
4625                                         fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
4626                                                 tsc_hz / 1000000, crystal_hz, ebx_tsc,  eax_crystal);
4627                         }
4628                 }
4629         }
4630         if (max_level >= 0x16) {
4631                 unsigned int base_mhz, max_mhz, bus_mhz, edx;
4632
4633                 /*
4634                  * CPUID 16H Base MHz, Max MHz, Bus MHz
4635                  */
4636                 base_mhz = max_mhz = bus_mhz = edx = 0;
4637
4638                 __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
4639                 if (!quiet)
4640                         fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
4641                                 base_mhz, max_mhz, bus_mhz);
4642         }
4643
4644         if (has_aperf)
4645                 aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
4646
4647         BIC_PRESENT(BIC_IRQ);
4648         BIC_PRESENT(BIC_TSC_MHz);
4649
4650         if (probe_nhm_msrs(family, model)) {
4651                 do_nhm_platform_info = 1;
4652                 BIC_PRESENT(BIC_CPU_c1);
4653                 BIC_PRESENT(BIC_CPU_c3);
4654                 BIC_PRESENT(BIC_CPU_c6);
4655                 BIC_PRESENT(BIC_SMI);
4656         }
4657         do_snb_cstates = has_snb_msrs(family, model);
4658
4659         if (do_snb_cstates)
4660                 BIC_PRESENT(BIC_CPU_c7);
4661
4662         do_irtl_snb = has_snb_msrs(family, model);
4663         if (do_snb_cstates && (pkg_cstate_limit >= PCL__2))
4664                 BIC_PRESENT(BIC_Pkgpc2);
4665         if (pkg_cstate_limit >= PCL__3)
4666                 BIC_PRESENT(BIC_Pkgpc3);
4667         if (pkg_cstate_limit >= PCL__6)
4668                 BIC_PRESENT(BIC_Pkgpc6);
4669         if (do_snb_cstates && (pkg_cstate_limit >= PCL__7))
4670                 BIC_PRESENT(BIC_Pkgpc7);
4671         if (has_slv_msrs(family, model)) {
4672                 BIC_NOT_PRESENT(BIC_Pkgpc2);
4673                 BIC_NOT_PRESENT(BIC_Pkgpc3);
4674                 BIC_PRESENT(BIC_Pkgpc6);
4675                 BIC_NOT_PRESENT(BIC_Pkgpc7);
4676                 BIC_PRESENT(BIC_Mod_c6);
4677                 use_c1_residency_msr = 1;
4678         }
4679         if (is_dnv(family, model)) {
4680                 BIC_PRESENT(BIC_CPU_c1);
4681                 BIC_NOT_PRESENT(BIC_CPU_c3);
4682                 BIC_NOT_PRESENT(BIC_Pkgpc3);
4683                 BIC_NOT_PRESENT(BIC_CPU_c7);
4684                 BIC_NOT_PRESENT(BIC_Pkgpc7);
4685                 use_c1_residency_msr = 1;
4686         }
4687         if (is_skx(family, model)) {
4688                 BIC_NOT_PRESENT(BIC_CPU_c3);
4689                 BIC_NOT_PRESENT(BIC_Pkgpc3);
4690                 BIC_NOT_PRESENT(BIC_CPU_c7);
4691                 BIC_NOT_PRESENT(BIC_Pkgpc7);
4692         }
4693         if (is_bdx(family, model)) {
4694                 BIC_NOT_PRESENT(BIC_CPU_c7);
4695                 BIC_NOT_PRESENT(BIC_Pkgpc7);
4696         }
4697         if (has_hsw_msrs(family, model)) {
4698                 BIC_PRESENT(BIC_Pkgpc8);
4699                 BIC_PRESENT(BIC_Pkgpc9);
4700                 BIC_PRESENT(BIC_Pkgpc10);
4701         }
4702         do_irtl_hsw = has_hsw_msrs(family, model);
4703         if (has_skl_msrs(family, model)) {
4704                 BIC_PRESENT(BIC_Totl_c0);
4705                 BIC_PRESENT(BIC_Any_c0);
4706                 BIC_PRESENT(BIC_GFX_c0);
4707                 BIC_PRESENT(BIC_CPUGFX);
4708         }
4709         do_slm_cstates = is_slm(family, model);
4710         do_knl_cstates  = is_knl(family, model);
4711         do_cnl_cstates = is_cnl(family, model);
4712
4713         if (!quiet)
4714                 decode_misc_pwr_mgmt_msr();
4715
4716         if (!quiet && has_slv_msrs(family, model))
4717                 decode_c6_demotion_policy_msr();
4718
4719         rapl_probe(family, model);
4720         perf_limit_reasons_probe(family, model);
4721         automatic_cstate_conversion_probe(family, model);
4722
4723         if (!quiet)
4724                 dump_cstate_pstate_config_info(family, model);
4725
4726         if (!quiet)
4727                 dump_sysfs_cstate_config();
4728         if (!quiet)
4729                 dump_sysfs_pstate_config();
4730
4731         if (has_skl_msrs(family, model))
4732                 calculate_tsc_tweak();
4733
4734         if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
4735                 BIC_PRESENT(BIC_GFX_rc6);
4736
4737         if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
4738                 BIC_PRESENT(BIC_GFXMHz);
4739
4740         if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
4741                 BIC_PRESENT(BIC_CPU_LPI);
4742         else
4743                 BIC_NOT_PRESENT(BIC_CPU_LPI);
4744
4745         if (!access(sys_lpi_file_sysfs, R_OK)) {
4746                 sys_lpi_file = sys_lpi_file_sysfs;
4747                 BIC_PRESENT(BIC_SYS_LPI);
4748         } else if (!access(sys_lpi_file_debugfs, R_OK)) {
4749                 sys_lpi_file = sys_lpi_file_debugfs;
4750                 BIC_PRESENT(BIC_SYS_LPI);
4751         } else {
4752                 sys_lpi_file_sysfs = NULL;
4753                 BIC_NOT_PRESENT(BIC_SYS_LPI);
4754         }
4755
4756         if (!quiet)
4757                 decode_misc_feature_control();
4758
4759         return;
4760 }
4761
4762 /*
4763  * in /dev/cpu/ return success for names that are numbers
4764  * ie. filter out ".", "..", "microcode".
4765  */
4766 int dir_filter(const struct dirent *dirp)
4767 {
4768         if (isdigit(dirp->d_name[0]))
4769                 return 1;
4770         else
4771                 return 0;
4772 }
4773
4774 int open_dev_cpu_msr(int dummy1)
4775 {
4776         return 0;
4777 }
4778
4779 void topology_probe()
4780 {
4781         int i;
4782         int max_core_id = 0;
4783         int max_package_id = 0;
4784         int max_siblings = 0;
4785
4786         /* Initialize num_cpus, max_cpu_num */
4787         set_max_cpu_num();
4788         topo.num_cpus = 0;
4789         for_all_proc_cpus(count_cpus);
4790         if (!summary_only && topo.num_cpus > 1)
4791                 BIC_PRESENT(BIC_CPU);
4792
4793         if (debug > 1)
4794                 fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
4795
4796         cpus = calloc(1, (topo.max_cpu_num  + 1) * sizeof(struct cpu_topology));
4797         if (cpus == NULL)
4798                 err(1, "calloc cpus");
4799
4800         /*
4801          * Allocate and initialize cpu_present_set
4802          */
4803         cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1));
4804         if (cpu_present_set == NULL)
4805                 err(3, "CPU_ALLOC");
4806         cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
4807         CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
4808         for_all_proc_cpus(mark_cpu_present);
4809
4810         /*
4811          * Validate that all cpus in cpu_subset are also in cpu_present_set
4812          */
4813         for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) {
4814                 if (CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
4815                         if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set))
4816                                 err(1, "cpu%d not present", i);
4817         }
4818
4819         /*
4820          * Allocate and initialize cpu_affinity_set
4821          */
4822         cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
4823         if (cpu_affinity_set == NULL)
4824                 err(3, "CPU_ALLOC");
4825         cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
4826         CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
4827
4828         for_all_proc_cpus(init_thread_id);
4829
4830         /*
4831          * For online cpus
4832          * find max_core_id, max_package_id
4833          */
4834         for (i = 0; i <= topo.max_cpu_num; ++i) {
4835                 int siblings;
4836
4837                 if (cpu_is_not_present(i)) {
4838                         if (debug > 1)
4839                                 fprintf(outf, "cpu%d NOT PRESENT\n", i);
4840                         continue;
4841                 }
4842
4843                 cpus[i].logical_cpu_id = i;
4844
4845                 /* get package information */
4846                 cpus[i].physical_package_id = get_physical_package_id(i);
4847                 if (cpus[i].physical_package_id > max_package_id)
4848                         max_package_id = cpus[i].physical_package_id;
4849
4850                 /* get numa node information */
4851                 cpus[i].physical_node_id = get_physical_node_id(&cpus[i]);
4852                 if (cpus[i].physical_node_id > topo.max_node_num)
4853                         topo.max_node_num = cpus[i].physical_node_id;
4854
4855                 /* get core information */
4856                 cpus[i].physical_core_id = get_core_id(i);
4857                 if (cpus[i].physical_core_id > max_core_id)
4858                         max_core_id = cpus[i].physical_core_id;
4859
4860                 /* get thread information */
4861                 siblings = get_thread_siblings(&cpus[i]);
4862                 if (siblings > max_siblings)
4863                         max_siblings = siblings;
4864                 if (cpus[i].thread_id == 0)
4865                         topo.num_cores++;
4866         }
4867
4868         topo.cores_per_node = max_core_id + 1;
4869         if (debug > 1)
4870                 fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
4871                         max_core_id, topo.cores_per_node);
4872         if (!summary_only && topo.cores_per_node > 1)
4873                 BIC_PRESENT(BIC_Core);
4874
4875         topo.num_packages = max_package_id + 1;
4876         if (debug > 1)
4877                 fprintf(outf, "max_package_id %d, sizing for %d packages\n",
4878                         max_package_id, topo.num_packages);
4879         if (!summary_only && topo.num_packages > 1)
4880                 BIC_PRESENT(BIC_Package);
4881
4882         set_node_data();
4883         if (debug > 1)
4884                 fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg);
4885         if (!summary_only && topo.nodes_per_pkg > 1)
4886                 BIC_PRESENT(BIC_Node);
4887
4888         topo.threads_per_core = max_siblings;
4889         if (debug > 1)
4890                 fprintf(outf, "max_siblings %d\n", max_siblings);
4891
4892         if (debug < 1)
4893                 return;
4894
4895         for (i = 0; i <= topo.max_cpu_num; ++i) {
4896                 fprintf(outf,
4897                         "cpu %d pkg %d node %d lnode %d core %d thread %d\n",
4898                         i, cpus[i].physical_package_id,
4899                         cpus[i].physical_node_id,
4900                         cpus[i].logical_node_id,
4901                         cpus[i].physical_core_id,
4902                         cpus[i].thread_id);
4903         }
4904
4905 }
4906
4907 void
4908 allocate_counters(struct thread_data **t, struct core_data **c,
4909                   struct pkg_data **p)
4910 {
4911         int i;
4912         int num_cores = topo.cores_per_node * topo.nodes_per_pkg *
4913                         topo.num_packages;
4914         int num_threads = topo.threads_per_core * num_cores;
4915
4916         *t = calloc(num_threads, sizeof(struct thread_data));
4917         if (*t == NULL)
4918                 goto error;
4919
4920         for (i = 0; i < num_threads; i++)
4921                 (*t)[i].cpu_id = -1;
4922
4923         *c = calloc(num_cores, sizeof(struct core_data));
4924         if (*c == NULL)
4925                 goto error;
4926
4927         for (i = 0; i < num_cores; i++)
4928                 (*c)[i].core_id = -1;
4929
4930         *p = calloc(topo.num_packages, sizeof(struct pkg_data));
4931         if (*p == NULL)
4932                 goto error;
4933
4934         for (i = 0; i < topo.num_packages; i++)
4935                 (*p)[i].package_id = i;
4936
4937         return;
4938 error:
4939         err(1, "calloc counters");
4940 }
4941 /*
4942  * init_counter()
4943  *
4944  * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
4945  */
4946 void init_counter(struct thread_data *thread_base, struct core_data *core_base,
4947         struct pkg_data *pkg_base, int cpu_id)
4948 {
4949         int pkg_id = cpus[cpu_id].physical_package_id;
4950         int node_id = cpus[cpu_id].logical_node_id;
4951         int core_id = cpus[cpu_id].physical_core_id;
4952         int thread_id = cpus[cpu_id].thread_id;
4953         struct thread_data *t;
4954         struct core_data *c;
4955         struct pkg_data *p;
4956
4957
4958         /* Workaround for systems where physical_node_id==-1
4959          * and logical_node_id==(-1 - topo.num_cpus)
4960          */
4961         if (node_id < 0)
4962                 node_id = 0;
4963
4964         t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id);
4965         c = GET_CORE(core_base, core_id, node_id, pkg_id);
4966         p = GET_PKG(pkg_base, pkg_id);
4967
4968         t->cpu_id = cpu_id;
4969         if (thread_id == 0) {
4970                 t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
4971                 if (cpu_is_first_core_in_package(cpu_id))
4972                         t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
4973         }
4974
4975         c->core_id = core_id;
4976         p->package_id = pkg_id;
4977 }
4978
4979
4980 int initialize_counters(int cpu_id)
4981 {
4982         init_counter(EVEN_COUNTERS, cpu_id);
4983         init_counter(ODD_COUNTERS, cpu_id);
4984         return 0;
4985 }
4986
4987 void allocate_output_buffer()
4988 {
4989         output_buffer = calloc(1, (1 + topo.num_cpus) * 2048);
4990         outp = output_buffer;
4991         if (outp == NULL)
4992                 err(-1, "calloc output buffer");
4993 }
4994 void allocate_fd_percpu(void)
4995 {
4996         fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
4997         if (fd_percpu == NULL)
4998                 err(-1, "calloc fd_percpu");
4999 }
5000 void allocate_irq_buffers(void)
5001 {
5002         irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int));
5003         if (irq_column_2_cpu == NULL)
5004                 err(-1, "calloc %d", topo.num_cpus);
5005
5006         irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
5007         if (irqs_per_cpu == NULL)
5008                 err(-1, "calloc %d", topo.max_cpu_num + 1);
5009 }
5010 void setup_all_buffers(void)
5011 {
5012         topology_probe();
5013         allocate_irq_buffers();
5014         allocate_fd_percpu();
5015         allocate_counters(&thread_even, &core_even, &package_even);
5016         allocate_counters(&thread_odd, &core_odd, &package_odd);
5017         allocate_output_buffer();
5018         for_all_proc_cpus(initialize_counters);
5019 }
5020
5021 void set_base_cpu(void)
5022 {
5023         base_cpu = sched_getcpu();
5024         if (base_cpu < 0)
5025                 err(-ENODEV, "No valid cpus found");
5026
5027         if (debug > 1)
5028                 fprintf(outf, "base_cpu = %d\n", base_cpu);
5029 }
5030
5031 void turbostat_init()
5032 {
5033         setup_all_buffers();
5034         set_base_cpu();
5035         check_dev_msr();
5036         check_permissions();
5037         process_cpuid();
5038
5039
5040         if (!quiet)
5041                 for_all_cpus(print_hwp, ODD_COUNTERS);
5042
5043         if (!quiet)
5044                 for_all_cpus(print_epb, ODD_COUNTERS);
5045
5046         if (!quiet)
5047                 for_all_cpus(print_perf_limit, ODD_COUNTERS);
5048
5049         if (!quiet)
5050                 for_all_cpus(print_rapl, ODD_COUNTERS);
5051
5052         for_all_cpus(set_temperature_target, ODD_COUNTERS);
5053
5054         if (!quiet)
5055                 for_all_cpus(print_thermal, ODD_COUNTERS);
5056
5057         if (!quiet && do_irtl_snb)
5058                 print_irtl();
5059 }
5060
5061 int fork_it(char **argv)
5062 {
5063         pid_t child_pid;
5064         int status;
5065
5066         snapshot_proc_sysfs_files();
5067         status = for_all_cpus(get_counters, EVEN_COUNTERS);
5068         first_counter_read = 0;
5069         if (status)
5070                 exit(status);
5071         /* clear affinity side-effect of get_counters() */
5072         sched_setaffinity(0, cpu_present_setsize, cpu_present_set);
5073         gettimeofday(&tv_even, (struct timezone *)NULL);
5074
5075         child_pid = fork();
5076         if (!child_pid) {
5077                 /* child */
5078                 execvp(argv[0], argv);
5079                 err(errno, "exec %s", argv[0]);
5080         } else {
5081
5082                 /* parent */
5083                 if (child_pid == -1)
5084                         err(1, "fork");
5085
5086                 signal(SIGINT, SIG_IGN);
5087                 signal(SIGQUIT, SIG_IGN);
5088                 if (waitpid(child_pid, &status, 0) == -1)
5089                         err(status, "waitpid");
5090
5091                 if (WIFEXITED(status))
5092                         status = WEXITSTATUS(status);
5093         }
5094         /*
5095          * n.b. fork_it() does not check for errors from for_all_cpus()
5096          * because re-starting is problematic when forking
5097          */
5098         snapshot_proc_sysfs_files();
5099         for_all_cpus(get_counters, ODD_COUNTERS);
5100         gettimeofday(&tv_odd, (struct timezone *)NULL);
5101         timersub(&tv_odd, &tv_even, &tv_delta);
5102         if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS))
5103                 fprintf(outf, "%s: Counter reset detected\n", progname);
5104         else {
5105                 compute_average(EVEN_COUNTERS);
5106                 format_all_counters(EVEN_COUNTERS);
5107         }
5108
5109         fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);
5110
5111         flush_output_stderr();
5112
5113         return status;
5114 }
5115
5116 int get_and_dump_counters(void)
5117 {
5118         int status;
5119
5120         snapshot_proc_sysfs_files();
5121         status = for_all_cpus(get_counters, ODD_COUNTERS);
5122         if (status)
5123                 return status;
5124
5125         status = for_all_cpus(dump_counters, ODD_COUNTERS);
5126         if (status)
5127                 return status;
5128
5129         flush_output_stdout();
5130
5131         return status;
5132 }
5133
5134 void print_version() {
5135         fprintf(outf, "turbostat version 18.07.27"
5136                 " - Len Brown <lenb@kernel.org>\n");
5137 }
5138
5139 int add_counter(unsigned int msr_num, char *path, char *name,
5140         unsigned int width, enum counter_scope scope,
5141         enum counter_type type, enum counter_format format, int flags)
5142 {
5143         struct msr_counter *msrp;
5144
5145         msrp = calloc(1, sizeof(struct msr_counter));
5146         if (msrp == NULL) {
5147                 perror("calloc");
5148                 exit(1);
5149         }
5150
5151         msrp->msr_num = msr_num;
5152         strncpy(msrp->name, name, NAME_BYTES - 1);
5153         if (path)
5154                 strncpy(msrp->path, path, PATH_BYTES - 1);
5155         msrp->width = width;
5156         msrp->type = type;
5157         msrp->format = format;
5158         msrp->flags = flags;
5159
5160         switch (scope) {
5161
5162         case SCOPE_CPU:
5163                 msrp->next = sys.tp;
5164                 sys.tp = msrp;
5165                 sys.added_thread_counters++;
5166                 if (sys.added_thread_counters > MAX_ADDED_THREAD_COUNTERS) {
5167                         fprintf(stderr, "exceeded max %d added thread counters\n",
5168                                 MAX_ADDED_COUNTERS);
5169                         exit(-1);
5170                 }
5171                 break;
5172
5173         case SCOPE_CORE:
5174                 msrp->next = sys.cp;
5175                 sys.cp = msrp;
5176                 sys.added_core_counters++;
5177                 if (sys.added_core_counters > MAX_ADDED_COUNTERS) {
5178                         fprintf(stderr, "exceeded max %d added core counters\n",
5179                                 MAX_ADDED_COUNTERS);
5180                         exit(-1);
5181                 }
5182                 break;
5183
5184         case SCOPE_PACKAGE:
5185                 msrp->next = sys.pp;
5186                 sys.pp = msrp;
5187                 sys.added_package_counters++;
5188                 if (sys.added_package_counters > MAX_ADDED_COUNTERS) {
5189                         fprintf(stderr, "exceeded max %d added package counters\n",
5190                                 MAX_ADDED_COUNTERS);
5191                         exit(-1);
5192                 }
5193                 break;
5194         }
5195
5196         return 0;
5197 }
5198
5199 void parse_add_command(char *add_command)
5200 {
5201         int msr_num = 0;
5202         char *path = NULL;
5203         char name_buffer[NAME_BYTES] = "";
5204         int width = 64;
5205         int fail = 0;
5206         enum counter_scope scope = SCOPE_CPU;
5207         enum counter_type type = COUNTER_CYCLES;
5208         enum counter_format format = FORMAT_DELTA;
5209
5210         while (add_command) {
5211
5212                 if (sscanf(add_command, "msr0x%x", &msr_num) == 1)
5213                         goto next;
5214
5215                 if (sscanf(add_command, "msr%d", &msr_num) == 1)
5216                         goto next;
5217
5218                 if (*add_command == '/') {
5219                         path = add_command;
5220                         goto next;
5221                 }
5222
5223                 if (sscanf(add_command, "u%d", &width) == 1) {
5224                         if ((width == 32) || (width == 64))
5225                                 goto next;
5226                         width = 64;
5227                 }
5228                 if (!strncmp(add_command, "cpu", strlen("cpu"))) {
5229                         scope = SCOPE_CPU;
5230                         goto next;
5231                 }
5232                 if (!strncmp(add_command, "core", strlen("core"))) {
5233                         scope = SCOPE_CORE;
5234                         goto next;
5235                 }
5236                 if (!strncmp(add_command, "package", strlen("package"))) {
5237                         scope = SCOPE_PACKAGE;
5238                         goto next;
5239                 }
5240                 if (!strncmp(add_command, "cycles", strlen("cycles"))) {
5241                         type = COUNTER_CYCLES;
5242                         goto next;
5243                 }
5244                 if (!strncmp(add_command, "seconds", strlen("seconds"))) {
5245                         type = COUNTER_SECONDS;
5246                         goto next;
5247                 }
5248                 if (!strncmp(add_command, "usec", strlen("usec"))) {
5249                         type = COUNTER_USEC;
5250                         goto next;
5251                 }
5252                 if (!strncmp(add_command, "raw", strlen("raw"))) {
5253                         format = FORMAT_RAW;
5254                         goto next;
5255                 }
5256                 if (!strncmp(add_command, "delta", strlen("delta"))) {
5257                         format = FORMAT_DELTA;
5258                         goto next;
5259                 }
5260                 if (!strncmp(add_command, "percent", strlen("percent"))) {
5261                         format = FORMAT_PERCENT;
5262                         goto next;
5263                 }
5264
5265                 if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) {        /* 18 < NAME_BYTES */
5266                         char *eos;
5267
5268                         eos = strchr(name_buffer, ',');
5269                         if (eos)
5270                                 *eos = '\0';
5271                         goto next;
5272                 }
5273
5274 next:
5275                 add_command = strchr(add_command, ',');
5276                 if (add_command) {
5277                         *add_command = '\0';
5278                         add_command++;
5279                 }
5280
5281         }
5282         if ((msr_num == 0) && (path == NULL)) {
5283                 fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter ) required\n");
5284                 fail++;
5285         }
5286
5287         /* generate default column header */
5288         if (*name_buffer == '\0') {
5289                 if (width == 32)
5290                         sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
5291                 else
5292                         sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
5293         }
5294
5295         if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0))
5296                 fail++;
5297
5298         if (fail) {
5299                 help();
5300                 exit(1);
5301         }
5302 }
5303
5304 int is_deferred_skip(char *name)
5305 {
5306         int i;
5307
5308         for (i = 0; i < deferred_skip_index; ++i)
5309                 if (!strcmp(name, deferred_skip_names[i]))
5310                         return 1;
5311         return 0;
5312 }
5313
5314 void probe_sysfs(void)
5315 {
5316         char path[64];
5317         char name_buf[16];
5318         FILE *input;
5319         int state;
5320         char *sp;
5321
5322         if (!DO_BIC(BIC_sysfs))
5323                 return;
5324
5325         for (state = 10; state >= 0; --state) {
5326
5327                 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
5328                         base_cpu, state);
5329                 input = fopen(path, "r");
5330                 if (input == NULL)
5331                         continue;
5332                 fgets(name_buf, sizeof(name_buf), input);
5333
5334                  /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
5335                 sp = strchr(name_buf, '-');
5336                 if (!sp)
5337                         sp = strchrnul(name_buf, '\n');
5338                 *sp = '%';
5339                 *(sp + 1) = '\0';
5340
5341                 fclose(input);
5342
5343                 sprintf(path, "cpuidle/state%d/time", state);
5344
5345                 if (is_deferred_skip(name_buf))
5346                         continue;
5347
5348                 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC,
5349                                 FORMAT_PERCENT, SYSFS_PERCPU);
5350         }
5351
5352         for (state = 10; state >= 0; --state) {
5353
5354                 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
5355                         base_cpu, state);
5356                 input = fopen(path, "r");
5357                 if (input == NULL)
5358                         continue;
5359                 fgets(name_buf, sizeof(name_buf), input);
5360                  /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
5361                 sp = strchr(name_buf, '-');
5362                 if (!sp)
5363                         sp = strchrnul(name_buf, '\n');
5364                 *sp = '\0';
5365                 fclose(input);
5366
5367                 sprintf(path, "cpuidle/state%d/usage", state);
5368
5369                 if (is_deferred_skip(name_buf))
5370                         continue;
5371
5372                 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS,
5373                                 FORMAT_DELTA, SYSFS_PERCPU);
5374         }
5375
5376 }
5377
5378
5379 /*
5380  * parse cpuset with following syntax
5381  * 1,2,4..6,8-10 and set bits in cpu_subset
5382  */
5383 void parse_cpu_command(char *optarg)
5384 {
5385         unsigned int start, end;
5386         char *next;
5387
5388         if (!strcmp(optarg, "core")) {
5389                 if (cpu_subset)
5390                         goto error;
5391                 show_core_only++;
5392                 return;
5393         }
5394         if (!strcmp(optarg, "package")) {
5395                 if (cpu_subset)
5396                         goto error;
5397                 show_pkg_only++;
5398                 return;
5399         }
5400         if (show_core_only || show_pkg_only)
5401                 goto error;
5402
5403         cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS);
5404         if (cpu_subset == NULL)
5405                 err(3, "CPU_ALLOC");
5406         cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS);
5407
5408         CPU_ZERO_S(cpu_subset_size, cpu_subset);
5409
5410         next = optarg;
5411
5412         while (next && *next) {
5413
5414                 if (*next == '-')       /* no negative cpu numbers */
5415                         goto error;
5416
5417                 start = strtoul(next, &next, 10);
5418
5419                 if (start >= CPU_SUBSET_MAXCPUS)
5420                         goto error;
5421                 CPU_SET_S(start, cpu_subset_size, cpu_subset);
5422
5423                 if (*next == '\0')
5424                         break;
5425
5426                 if (*next == ',') {
5427                         next += 1;
5428                         continue;
5429                 }
5430
5431                 if (*next == '-') {
5432                         next += 1;      /* start range */
5433                 } else if (*next == '.') {
5434                         next += 1;
5435                         if (*next == '.')
5436                                 next += 1;      /* start range */
5437                         else
5438                                 goto error;
5439                 }
5440
5441                 end = strtoul(next, &next, 10);
5442                 if (end <= start)
5443                         goto error;
5444
5445                 while (++start <= end) {
5446                         if (start >= CPU_SUBSET_MAXCPUS)
5447                                 goto error;
5448                         CPU_SET_S(start, cpu_subset_size, cpu_subset);
5449                 }
5450
5451                 if (*next == ',')
5452                         next += 1;
5453                 else if (*next != '\0')
5454                         goto error;
5455         }
5456
5457         return;
5458
5459 error:
5460         fprintf(stderr, "\"--cpu %s\" malformed\n", optarg);
5461         help();
5462         exit(-1);
5463 }
5464
5465
5466 void cmdline(int argc, char **argv)
5467 {
5468         int opt;
5469         int option_index = 0;
5470         static struct option long_options[] = {
5471                 {"add",         required_argument,      0, 'a'},
5472                 {"cpu",         required_argument,      0, 'c'},
5473                 {"Dump",        no_argument,            0, 'D'},
5474                 {"debug",       no_argument,            0, 'd'},        /* internal, not documented */
5475                 {"enable",      required_argument,      0, 'e'},
5476                 {"interval",    required_argument,      0, 'i'},
5477                 {"num_iterations",      required_argument,      0, 'n'},
5478                 {"help",        no_argument,            0, 'h'},
5479                 {"hide",        required_argument,      0, 'H'},        // meh, -h taken by --help
5480                 {"Joules",      no_argument,            0, 'J'},
5481                 {"list",        no_argument,            0, 'l'},
5482                 {"out",         required_argument,      0, 'o'},
5483                 {"quiet",       no_argument,            0, 'q'},
5484                 {"show",        required_argument,      0, 's'},
5485                 {"Summary",     no_argument,            0, 'S'},
5486                 {"TCC",         required_argument,      0, 'T'},
5487                 {"version",     no_argument,            0, 'v' },
5488                 {0,             0,                      0,  0 }
5489         };
5490
5491         progname = argv[0];
5492
5493         while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v",
5494                                 long_options, &option_index)) != -1) {
5495                 switch (opt) {
5496                 case 'a':
5497                         parse_add_command(optarg);
5498                         break;
5499                 case 'c':
5500                         parse_cpu_command(optarg);
5501                         break;
5502                 case 'D':
5503                         dump_only++;
5504                         break;
5505                 case 'e':
5506                         /* --enable specified counter */
5507                         bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST);
5508                         break;
5509                 case 'd':
5510                         debug++;
5511                         ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
5512                         break;
5513                 case 'H':
5514                         /*
5515                          * --hide: do not show those specified
5516                          *  multiple invocations simply clear more bits in enabled mask
5517                          */
5518                         bic_enabled &= ~bic_lookup(optarg, HIDE_LIST);
5519                         break;
5520                 case 'h':
5521                 default:
5522                         help();
5523                         exit(1);
5524                 case 'i':
5525                         {
5526                                 double interval = strtod(optarg, NULL);
5527
5528                                 if (interval < 0.001) {
5529                                         fprintf(outf, "interval %f seconds is too small\n",
5530                                                 interval);
5531                                         exit(2);
5532                                 }
5533
5534                                 interval_tv.tv_sec = interval_ts.tv_sec = interval;
5535                                 interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000;
5536                                 interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
5537                         }
5538                         break;
5539                 case 'J':
5540                         rapl_joules++;
5541                         break;
5542                 case 'l':
5543                         ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
5544                         list_header_only++;
5545                         quiet++;
5546                         break;
5547                 case 'o':
5548                         outf = fopen_or_die(optarg, "w");
5549                         break;
5550                 case 'q':
5551                         quiet = 1;
5552                         break;
5553                 case 'n':
5554                         num_iterations = strtod(optarg, NULL);
5555
5556                         if (num_iterations <= 0) {
5557                                 fprintf(outf, "iterations %d should be positive number\n",
5558                                         num_iterations);
5559                                 exit(2);
5560                         }
5561                         break;
5562                 case 's':
5563                         /*
5564                          * --show: show only those specified
5565                          *  The 1st invocation will clear and replace the enabled mask
5566                          *  subsequent invocations can add to it.
5567                          */
5568                         if (shown == 0)
5569                                 bic_enabled = bic_lookup(optarg, SHOW_LIST);
5570                         else
5571                                 bic_enabled |= bic_lookup(optarg, SHOW_LIST);
5572                         shown = 1;
5573                         break;
5574                 case 'S':
5575                         summary_only++;
5576                         break;
5577                 case 'T':
5578                         tcc_activation_temp_override = atoi(optarg);
5579                         break;
5580                 case 'v':
5581                         print_version();
5582                         exit(0);
5583                         break;
5584                 }
5585         }
5586 }
5587
5588 int main(int argc, char **argv)
5589 {
5590         outf = stderr;
5591         cmdline(argc, argv);
5592
5593         if (!quiet)
5594                 print_version();
5595
5596         probe_sysfs();
5597
5598         turbostat_init();
5599
5600         /* dump counters and exit */
5601         if (dump_only)
5602                 return get_and_dump_counters();
5603
5604         /* list header and exit */
5605         if (list_header_only) {
5606                 print_header(",");
5607                 flush_output_stdout();
5608                 return 0;
5609         }
5610
5611         /*
5612          * if any params left, it must be a command to fork
5613          */
5614         if (argc - optind)
5615                 return fork_it(argv + optind);
5616         else
5617                 turbostat_loop();
5618
5619         return 0;
5620 }