GNU Linux-libre 4.19.286-gnu1
[releases.git] / drivers / idle / intel_idle.c
1 /*
2  * intel_idle.c - native hardware idle loop for modern Intel processors
3  *
4  * Copyright (c) 2013, Intel Corporation.
5  * Len Brown <len.brown@intel.com>
6  *
7  * This program is free software; you can redistribute it and/or modify it
8  * under the terms and conditions of the GNU General Public License,
9  * version 2, as published by the Free Software Foundation.
10  *
11  * This program is distributed in the hope it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14  * more details.
15  *
16  * You should have received a copy of the GNU General Public License along with
17  * this program; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19  */
20
21 /*
22  * intel_idle is a cpuidle driver that loads on specific Intel processors
23  * in lieu of the legacy ACPI processor_idle driver.  The intent is to
24  * make Linux more efficient on these processors, as intel_idle knows
25  * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
26  */
27
28 /*
29  * Design Assumptions
30  *
31  * All CPUs have same idle states as boot CPU
32  *
33  * Chipset BM_STS (bus master status) bit is a NOP
34  *      for preventing entry into deep C-stats
35  */
36
37 /*
38  * Known limitations
39  *
40  * The driver currently initializes for_each_online_cpu() upon modprobe.
41  * It it unaware of subsequent processors hot-added to the system.
42  * This means that if you boot with maxcpus=n and later online
43  * processors above n, those processors will use C1 only.
44  *
45  * ACPI has a .suspend hack to turn off deep c-statees during suspend
46  * to avoid complications with the lapic timer workaround.
47  * Have not seen issues with suspend, but may need same workaround here.
48  *
49  */
50
51 /* un-comment DEBUG to enable pr_debug() statements */
52 #define DEBUG
53
54 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
55
56 #include <linux/kernel.h>
57 #include <linux/cpuidle.h>
58 #include <linux/tick.h>
59 #include <trace/events/power.h>
60 #include <linux/sched.h>
61 #include <linux/sched/smt.h>
62 #include <linux/notifier.h>
63 #include <linux/cpu.h>
64 #include <linux/moduleparam.h>
65 #include <asm/cpu_device_id.h>
66 #include <asm/intel-family.h>
67 #include <asm/nospec-branch.h>
68 #include <asm/mwait.h>
69 #include <asm/msr.h>
70
71 #define INTEL_IDLE_VERSION "0.4.1"
72
73 static struct cpuidle_driver intel_idle_driver = {
74         .name = "intel_idle",
75         .owner = THIS_MODULE,
76 };
77 /* intel_idle.max_cstate=0 disables driver */
78 static int max_cstate = CPUIDLE_STATE_MAX - 1;
79
80 static unsigned int mwait_substates;
81
82 #define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF
83 /* Reliable LAPIC Timer States, bit 1 for C1 etc.  */
84 static unsigned int lapic_timer_reliable_states = (1 << 1);      /* Default to only C1 */
85
86 struct idle_cpu {
87         struct cpuidle_state *state_table;
88
89         /*
90          * Hardware C-state auto-demotion may not always be optimal.
91          * Indicate which enable bits to clear here.
92          */
93         unsigned long auto_demotion_disable_flags;
94         bool byt_auto_demotion_disable_flag;
95         bool disable_promotion_to_c1e;
96 };
97
98 static const struct idle_cpu *icpu;
99 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
100 static int intel_idle(struct cpuidle_device *dev,
101                         struct cpuidle_driver *drv, int index);
102 static void intel_idle_s2idle(struct cpuidle_device *dev,
103                               struct cpuidle_driver *drv, int index);
104 static struct cpuidle_state *cpuidle_state_table;
105
106 /*
107  * Set this flag for states where the HW flushes the TLB for us
108  * and so we don't need cross-calls to keep it consistent.
109  * If this flag is set, SW flushes the TLB, so even if the
110  * HW doesn't do the flushing, this flag is safe to use.
111  */
112 #define CPUIDLE_FLAG_TLB_FLUSHED        0x10000
113
114 /*
115  * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE
116  * above.
117  */
118 #define CPUIDLE_FLAG_IBRS               BIT(16)
119
120 /*
121  * MWAIT takes an 8-bit "hint" in EAX "suggesting"
122  * the C-state (top nibble) and sub-state (bottom nibble)
123  * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
124  *
125  * We store the hint at the top of our "flags" for each state.
126  */
127 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
128 #define MWAIT2flg(eax) ((eax & 0xFF) << 24)
129
130 static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev,
131                                      struct cpuidle_driver *drv, int index)
132 {
133         bool smt_active = sched_smt_active();
134         u64 spec_ctrl = spec_ctrl_current();
135         int ret;
136
137         if (smt_active)
138                 wrmsrl(MSR_IA32_SPEC_CTRL, 0);
139
140         ret = intel_idle(dev, drv, index);
141
142         if (smt_active)
143                 wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
144
145         return ret;
146 }
147
148 /*
149  * States are indexed by the cstate number,
150  * which is also the index into the MWAIT hint array.
151  * Thus C0 is a dummy.
152  */
153 static struct cpuidle_state nehalem_cstates[] = {
154         {
155                 .name = "C1",
156                 .desc = "MWAIT 0x00",
157                 .flags = MWAIT2flg(0x00),
158                 .exit_latency = 3,
159                 .target_residency = 6,
160                 .enter = &intel_idle,
161                 .enter_s2idle = intel_idle_s2idle, },
162         {
163                 .name = "C1E",
164                 .desc = "MWAIT 0x01",
165                 .flags = MWAIT2flg(0x01),
166                 .exit_latency = 10,
167                 .target_residency = 20,
168                 .enter = &intel_idle,
169                 .enter_s2idle = intel_idle_s2idle, },
170         {
171                 .name = "C3",
172                 .desc = "MWAIT 0x10",
173                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
174                 .exit_latency = 20,
175                 .target_residency = 80,
176                 .enter = &intel_idle,
177                 .enter_s2idle = intel_idle_s2idle, },
178         {
179                 .name = "C6",
180                 .desc = "MWAIT 0x20",
181                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
182                 .exit_latency = 200,
183                 .target_residency = 800,
184                 .enter = &intel_idle,
185                 .enter_s2idle = intel_idle_s2idle, },
186         {
187                 .enter = NULL }
188 };
189
190 static struct cpuidle_state snb_cstates[] = {
191         {
192                 .name = "C1",
193                 .desc = "MWAIT 0x00",
194                 .flags = MWAIT2flg(0x00),
195                 .exit_latency = 2,
196                 .target_residency = 2,
197                 .enter = &intel_idle,
198                 .enter_s2idle = intel_idle_s2idle, },
199         {
200                 .name = "C1E",
201                 .desc = "MWAIT 0x01",
202                 .flags = MWAIT2flg(0x01),
203                 .exit_latency = 10,
204                 .target_residency = 20,
205                 .enter = &intel_idle,
206                 .enter_s2idle = intel_idle_s2idle, },
207         {
208                 .name = "C3",
209                 .desc = "MWAIT 0x10",
210                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
211                 .exit_latency = 80,
212                 .target_residency = 211,
213                 .enter = &intel_idle,
214                 .enter_s2idle = intel_idle_s2idle, },
215         {
216                 .name = "C6",
217                 .desc = "MWAIT 0x20",
218                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
219                 .exit_latency = 104,
220                 .target_residency = 345,
221                 .enter = &intel_idle,
222                 .enter_s2idle = intel_idle_s2idle, },
223         {
224                 .name = "C7",
225                 .desc = "MWAIT 0x30",
226                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
227                 .exit_latency = 109,
228                 .target_residency = 345,
229                 .enter = &intel_idle,
230                 .enter_s2idle = intel_idle_s2idle, },
231         {
232                 .enter = NULL }
233 };
234
235 static struct cpuidle_state byt_cstates[] = {
236         {
237                 .name = "C1",
238                 .desc = "MWAIT 0x00",
239                 .flags = MWAIT2flg(0x00),
240                 .exit_latency = 1,
241                 .target_residency = 1,
242                 .enter = &intel_idle,
243                 .enter_s2idle = intel_idle_s2idle, },
244         {
245                 .name = "C6N",
246                 .desc = "MWAIT 0x58",
247                 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
248                 .exit_latency = 300,
249                 .target_residency = 275,
250                 .enter = &intel_idle,
251                 .enter_s2idle = intel_idle_s2idle, },
252         {
253                 .name = "C6S",
254                 .desc = "MWAIT 0x52",
255                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
256                 .exit_latency = 500,
257                 .target_residency = 560,
258                 .enter = &intel_idle,
259                 .enter_s2idle = intel_idle_s2idle, },
260         {
261                 .name = "C7",
262                 .desc = "MWAIT 0x60",
263                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
264                 .exit_latency = 1200,
265                 .target_residency = 4000,
266                 .enter = &intel_idle,
267                 .enter_s2idle = intel_idle_s2idle, },
268         {
269                 .name = "C7S",
270                 .desc = "MWAIT 0x64",
271                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
272                 .exit_latency = 10000,
273                 .target_residency = 20000,
274                 .enter = &intel_idle,
275                 .enter_s2idle = intel_idle_s2idle, },
276         {
277                 .enter = NULL }
278 };
279
280 static struct cpuidle_state cht_cstates[] = {
281         {
282                 .name = "C1",
283                 .desc = "MWAIT 0x00",
284                 .flags = MWAIT2flg(0x00),
285                 .exit_latency = 1,
286                 .target_residency = 1,
287                 .enter = &intel_idle,
288                 .enter_s2idle = intel_idle_s2idle, },
289         {
290                 .name = "C6N",
291                 .desc = "MWAIT 0x58",
292                 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
293                 .exit_latency = 80,
294                 .target_residency = 275,
295                 .enter = &intel_idle,
296                 .enter_s2idle = intel_idle_s2idle, },
297         {
298                 .name = "C6S",
299                 .desc = "MWAIT 0x52",
300                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
301                 .exit_latency = 200,
302                 .target_residency = 560,
303                 .enter = &intel_idle,
304                 .enter_s2idle = intel_idle_s2idle, },
305         {
306                 .name = "C7",
307                 .desc = "MWAIT 0x60",
308                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
309                 .exit_latency = 1200,
310                 .target_residency = 4000,
311                 .enter = &intel_idle,
312                 .enter_s2idle = intel_idle_s2idle, },
313         {
314                 .name = "C7S",
315                 .desc = "MWAIT 0x64",
316                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
317                 .exit_latency = 10000,
318                 .target_residency = 20000,
319                 .enter = &intel_idle,
320                 .enter_s2idle = intel_idle_s2idle, },
321         {
322                 .enter = NULL }
323 };
324
325 static struct cpuidle_state ivb_cstates[] = {
326         {
327                 .name = "C1",
328                 .desc = "MWAIT 0x00",
329                 .flags = MWAIT2flg(0x00),
330                 .exit_latency = 1,
331                 .target_residency = 1,
332                 .enter = &intel_idle,
333                 .enter_s2idle = intel_idle_s2idle, },
334         {
335                 .name = "C1E",
336                 .desc = "MWAIT 0x01",
337                 .flags = MWAIT2flg(0x01),
338                 .exit_latency = 10,
339                 .target_residency = 20,
340                 .enter = &intel_idle,
341                 .enter_s2idle = intel_idle_s2idle, },
342         {
343                 .name = "C3",
344                 .desc = "MWAIT 0x10",
345                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
346                 .exit_latency = 59,
347                 .target_residency = 156,
348                 .enter = &intel_idle,
349                 .enter_s2idle = intel_idle_s2idle, },
350         {
351                 .name = "C6",
352                 .desc = "MWAIT 0x20",
353                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
354                 .exit_latency = 80,
355                 .target_residency = 300,
356                 .enter = &intel_idle,
357                 .enter_s2idle = intel_idle_s2idle, },
358         {
359                 .name = "C7",
360                 .desc = "MWAIT 0x30",
361                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
362                 .exit_latency = 87,
363                 .target_residency = 300,
364                 .enter = &intel_idle,
365                 .enter_s2idle = intel_idle_s2idle, },
366         {
367                 .enter = NULL }
368 };
369
370 static struct cpuidle_state ivt_cstates[] = {
371         {
372                 .name = "C1",
373                 .desc = "MWAIT 0x00",
374                 .flags = MWAIT2flg(0x00),
375                 .exit_latency = 1,
376                 .target_residency = 1,
377                 .enter = &intel_idle,
378                 .enter_s2idle = intel_idle_s2idle, },
379         {
380                 .name = "C1E",
381                 .desc = "MWAIT 0x01",
382                 .flags = MWAIT2flg(0x01),
383                 .exit_latency = 10,
384                 .target_residency = 80,
385                 .enter = &intel_idle,
386                 .enter_s2idle = intel_idle_s2idle, },
387         {
388                 .name = "C3",
389                 .desc = "MWAIT 0x10",
390                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
391                 .exit_latency = 59,
392                 .target_residency = 156,
393                 .enter = &intel_idle,
394                 .enter_s2idle = intel_idle_s2idle, },
395         {
396                 .name = "C6",
397                 .desc = "MWAIT 0x20",
398                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
399                 .exit_latency = 82,
400                 .target_residency = 300,
401                 .enter = &intel_idle,
402                 .enter_s2idle = intel_idle_s2idle, },
403         {
404                 .enter = NULL }
405 };
406
407 static struct cpuidle_state ivt_cstates_4s[] = {
408         {
409                 .name = "C1",
410                 .desc = "MWAIT 0x00",
411                 .flags = MWAIT2flg(0x00),
412                 .exit_latency = 1,
413                 .target_residency = 1,
414                 .enter = &intel_idle,
415                 .enter_s2idle = intel_idle_s2idle, },
416         {
417                 .name = "C1E",
418                 .desc = "MWAIT 0x01",
419                 .flags = MWAIT2flg(0x01),
420                 .exit_latency = 10,
421                 .target_residency = 250,
422                 .enter = &intel_idle,
423                 .enter_s2idle = intel_idle_s2idle, },
424         {
425                 .name = "C3",
426                 .desc = "MWAIT 0x10",
427                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
428                 .exit_latency = 59,
429                 .target_residency = 300,
430                 .enter = &intel_idle,
431                 .enter_s2idle = intel_idle_s2idle, },
432         {
433                 .name = "C6",
434                 .desc = "MWAIT 0x20",
435                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
436                 .exit_latency = 84,
437                 .target_residency = 400,
438                 .enter = &intel_idle,
439                 .enter_s2idle = intel_idle_s2idle, },
440         {
441                 .enter = NULL }
442 };
443
444 static struct cpuidle_state ivt_cstates_8s[] = {
445         {
446                 .name = "C1",
447                 .desc = "MWAIT 0x00",
448                 .flags = MWAIT2flg(0x00),
449                 .exit_latency = 1,
450                 .target_residency = 1,
451                 .enter = &intel_idle,
452                 .enter_s2idle = intel_idle_s2idle, },
453         {
454                 .name = "C1E",
455                 .desc = "MWAIT 0x01",
456                 .flags = MWAIT2flg(0x01),
457                 .exit_latency = 10,
458                 .target_residency = 500,
459                 .enter = &intel_idle,
460                 .enter_s2idle = intel_idle_s2idle, },
461         {
462                 .name = "C3",
463                 .desc = "MWAIT 0x10",
464                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
465                 .exit_latency = 59,
466                 .target_residency = 600,
467                 .enter = &intel_idle,
468                 .enter_s2idle = intel_idle_s2idle, },
469         {
470                 .name = "C6",
471                 .desc = "MWAIT 0x20",
472                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
473                 .exit_latency = 88,
474                 .target_residency = 700,
475                 .enter = &intel_idle,
476                 .enter_s2idle = intel_idle_s2idle, },
477         {
478                 .enter = NULL }
479 };
480
481 static struct cpuidle_state hsw_cstates[] = {
482         {
483                 .name = "C1",
484                 .desc = "MWAIT 0x00",
485                 .flags = MWAIT2flg(0x00),
486                 .exit_latency = 2,
487                 .target_residency = 2,
488                 .enter = &intel_idle,
489                 .enter_s2idle = intel_idle_s2idle, },
490         {
491                 .name = "C1E",
492                 .desc = "MWAIT 0x01",
493                 .flags = MWAIT2flg(0x01),
494                 .exit_latency = 10,
495                 .target_residency = 20,
496                 .enter = &intel_idle,
497                 .enter_s2idle = intel_idle_s2idle, },
498         {
499                 .name = "C3",
500                 .desc = "MWAIT 0x10",
501                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
502                 .exit_latency = 33,
503                 .target_residency = 100,
504                 .enter = &intel_idle,
505                 .enter_s2idle = intel_idle_s2idle, },
506         {
507                 .name = "C6",
508                 .desc = "MWAIT 0x20",
509                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
510                 .exit_latency = 133,
511                 .target_residency = 400,
512                 .enter = &intel_idle,
513                 .enter_s2idle = intel_idle_s2idle, },
514         {
515                 .name = "C7s",
516                 .desc = "MWAIT 0x32",
517                 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
518                 .exit_latency = 166,
519                 .target_residency = 500,
520                 .enter = &intel_idle,
521                 .enter_s2idle = intel_idle_s2idle, },
522         {
523                 .name = "C8",
524                 .desc = "MWAIT 0x40",
525                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
526                 .exit_latency = 300,
527                 .target_residency = 900,
528                 .enter = &intel_idle,
529                 .enter_s2idle = intel_idle_s2idle, },
530         {
531                 .name = "C9",
532                 .desc = "MWAIT 0x50",
533                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
534                 .exit_latency = 600,
535                 .target_residency = 1800,
536                 .enter = &intel_idle,
537                 .enter_s2idle = intel_idle_s2idle, },
538         {
539                 .name = "C10",
540                 .desc = "MWAIT 0x60",
541                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
542                 .exit_latency = 2600,
543                 .target_residency = 7700,
544                 .enter = &intel_idle,
545                 .enter_s2idle = intel_idle_s2idle, },
546         {
547                 .enter = NULL }
548 };
549 static struct cpuidle_state bdw_cstates[] = {
550         {
551                 .name = "C1",
552                 .desc = "MWAIT 0x00",
553                 .flags = MWAIT2flg(0x00),
554                 .exit_latency = 2,
555                 .target_residency = 2,
556                 .enter = &intel_idle,
557                 .enter_s2idle = intel_idle_s2idle, },
558         {
559                 .name = "C1E",
560                 .desc = "MWAIT 0x01",
561                 .flags = MWAIT2flg(0x01),
562                 .exit_latency = 10,
563                 .target_residency = 20,
564                 .enter = &intel_idle,
565                 .enter_s2idle = intel_idle_s2idle, },
566         {
567                 .name = "C3",
568                 .desc = "MWAIT 0x10",
569                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
570                 .exit_latency = 40,
571                 .target_residency = 100,
572                 .enter = &intel_idle,
573                 .enter_s2idle = intel_idle_s2idle, },
574         {
575                 .name = "C6",
576                 .desc = "MWAIT 0x20",
577                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
578                 .exit_latency = 133,
579                 .target_residency = 400,
580                 .enter = &intel_idle,
581                 .enter_s2idle = intel_idle_s2idle, },
582         {
583                 .name = "C7s",
584                 .desc = "MWAIT 0x32",
585                 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
586                 .exit_latency = 166,
587                 .target_residency = 500,
588                 .enter = &intel_idle,
589                 .enter_s2idle = intel_idle_s2idle, },
590         {
591                 .name = "C8",
592                 .desc = "MWAIT 0x40",
593                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
594                 .exit_latency = 300,
595                 .target_residency = 900,
596                 .enter = &intel_idle,
597                 .enter_s2idle = intel_idle_s2idle, },
598         {
599                 .name = "C9",
600                 .desc = "MWAIT 0x50",
601                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
602                 .exit_latency = 600,
603                 .target_residency = 1800,
604                 .enter = &intel_idle,
605                 .enter_s2idle = intel_idle_s2idle, },
606         {
607                 .name = "C10",
608                 .desc = "MWAIT 0x60",
609                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
610                 .exit_latency = 2600,
611                 .target_residency = 7700,
612                 .enter = &intel_idle,
613                 .enter_s2idle = intel_idle_s2idle, },
614         {
615                 .enter = NULL }
616 };
617
618 static struct cpuidle_state skl_cstates[] = {
619         {
620                 .name = "C1",
621                 .desc = "MWAIT 0x00",
622                 .flags = MWAIT2flg(0x00),
623                 .exit_latency = 2,
624                 .target_residency = 2,
625                 .enter = &intel_idle,
626                 .enter_s2idle = intel_idle_s2idle, },
627         {
628                 .name = "C1E",
629                 .desc = "MWAIT 0x01",
630                 .flags = MWAIT2flg(0x01),
631                 .exit_latency = 10,
632                 .target_residency = 20,
633                 .enter = &intel_idle,
634                 .enter_s2idle = intel_idle_s2idle, },
635         {
636                 .name = "C3",
637                 .desc = "MWAIT 0x10",
638                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
639                 .exit_latency = 70,
640                 .target_residency = 100,
641                 .enter = &intel_idle,
642                 .enter_s2idle = intel_idle_s2idle, },
643         {
644                 .name = "C6",
645                 .desc = "MWAIT 0x20",
646                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
647                 .exit_latency = 85,
648                 .target_residency = 200,
649                 .enter = &intel_idle,
650                 .enter_s2idle = intel_idle_s2idle, },
651         {
652                 .name = "C7s",
653                 .desc = "MWAIT 0x33",
654                 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
655                 .exit_latency = 124,
656                 .target_residency = 800,
657                 .enter = &intel_idle,
658                 .enter_s2idle = intel_idle_s2idle, },
659         {
660                 .name = "C8",
661                 .desc = "MWAIT 0x40",
662                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
663                 .exit_latency = 200,
664                 .target_residency = 800,
665                 .enter = &intel_idle,
666                 .enter_s2idle = intel_idle_s2idle, },
667         {
668                 .name = "C9",
669                 .desc = "MWAIT 0x50",
670                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
671                 .exit_latency = 480,
672                 .target_residency = 5000,
673                 .enter = &intel_idle,
674                 .enter_s2idle = intel_idle_s2idle, },
675         {
676                 .name = "C10",
677                 .desc = "MWAIT 0x60",
678                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
679                 .exit_latency = 890,
680                 .target_residency = 5000,
681                 .enter = &intel_idle,
682                 .enter_s2idle = intel_idle_s2idle, },
683         {
684                 .enter = NULL }
685 };
686
687 static struct cpuidle_state skx_cstates[] = {
688         {
689                 .name = "C1",
690                 .desc = "MWAIT 0x00",
691                 .flags = MWAIT2flg(0x00),
692                 .exit_latency = 2,
693                 .target_residency = 2,
694                 .enter = &intel_idle,
695                 .enter_s2idle = intel_idle_s2idle, },
696         {
697                 .name = "C1E",
698                 .desc = "MWAIT 0x01",
699                 .flags = MWAIT2flg(0x01),
700                 .exit_latency = 10,
701                 .target_residency = 20,
702                 .enter = &intel_idle,
703                 .enter_s2idle = intel_idle_s2idle, },
704         {
705                 .name = "C6",
706                 .desc = "MWAIT 0x20",
707                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
708                 .exit_latency = 133,
709                 .target_residency = 600,
710                 .enter = &intel_idle,
711                 .enter_s2idle = intel_idle_s2idle, },
712         {
713                 .enter = NULL }
714 };
715
716 static struct cpuidle_state atom_cstates[] = {
717         {
718                 .name = "C1E",
719                 .desc = "MWAIT 0x00",
720                 .flags = MWAIT2flg(0x00),
721                 .exit_latency = 10,
722                 .target_residency = 20,
723                 .enter = &intel_idle,
724                 .enter_s2idle = intel_idle_s2idle, },
725         {
726                 .name = "C2",
727                 .desc = "MWAIT 0x10",
728                 .flags = MWAIT2flg(0x10),
729                 .exit_latency = 20,
730                 .target_residency = 80,
731                 .enter = &intel_idle,
732                 .enter_s2idle = intel_idle_s2idle, },
733         {
734                 .name = "C4",
735                 .desc = "MWAIT 0x30",
736                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
737                 .exit_latency = 100,
738                 .target_residency = 400,
739                 .enter = &intel_idle,
740                 .enter_s2idle = intel_idle_s2idle, },
741         {
742                 .name = "C6",
743                 .desc = "MWAIT 0x52",
744                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
745                 .exit_latency = 140,
746                 .target_residency = 560,
747                 .enter = &intel_idle,
748                 .enter_s2idle = intel_idle_s2idle, },
749         {
750                 .enter = NULL }
751 };
752 static struct cpuidle_state tangier_cstates[] = {
753         {
754                 .name = "C1",
755                 .desc = "MWAIT 0x00",
756                 .flags = MWAIT2flg(0x00),
757                 .exit_latency = 1,
758                 .target_residency = 4,
759                 .enter = &intel_idle,
760                 .enter_s2idle = intel_idle_s2idle, },
761         {
762                 .name = "C4",
763                 .desc = "MWAIT 0x30",
764                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
765                 .exit_latency = 100,
766                 .target_residency = 400,
767                 .enter = &intel_idle,
768                 .enter_s2idle = intel_idle_s2idle, },
769         {
770                 .name = "C6",
771                 .desc = "MWAIT 0x52",
772                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
773                 .exit_latency = 140,
774                 .target_residency = 560,
775                 .enter = &intel_idle,
776                 .enter_s2idle = intel_idle_s2idle, },
777         {
778                 .name = "C7",
779                 .desc = "MWAIT 0x60",
780                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
781                 .exit_latency = 1200,
782                 .target_residency = 4000,
783                 .enter = &intel_idle,
784                 .enter_s2idle = intel_idle_s2idle, },
785         {
786                 .name = "C9",
787                 .desc = "MWAIT 0x64",
788                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
789                 .exit_latency = 10000,
790                 .target_residency = 20000,
791                 .enter = &intel_idle,
792                 .enter_s2idle = intel_idle_s2idle, },
793         {
794                 .enter = NULL }
795 };
796 static struct cpuidle_state avn_cstates[] = {
797         {
798                 .name = "C1",
799                 .desc = "MWAIT 0x00",
800                 .flags = MWAIT2flg(0x00),
801                 .exit_latency = 2,
802                 .target_residency = 2,
803                 .enter = &intel_idle,
804                 .enter_s2idle = intel_idle_s2idle, },
805         {
806                 .name = "C6",
807                 .desc = "MWAIT 0x51",
808                 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
809                 .exit_latency = 15,
810                 .target_residency = 45,
811                 .enter = &intel_idle,
812                 .enter_s2idle = intel_idle_s2idle, },
813         {
814                 .enter = NULL }
815 };
816 static struct cpuidle_state knl_cstates[] = {
817         {
818                 .name = "C1",
819                 .desc = "MWAIT 0x00",
820                 .flags = MWAIT2flg(0x00),
821                 .exit_latency = 1,
822                 .target_residency = 2,
823                 .enter = &intel_idle,
824                 .enter_s2idle = intel_idle_s2idle },
825         {
826                 .name = "C6",
827                 .desc = "MWAIT 0x10",
828                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
829                 .exit_latency = 120,
830                 .target_residency = 500,
831                 .enter = &intel_idle,
832                 .enter_s2idle = intel_idle_s2idle },
833         {
834                 .enter = NULL }
835 };
836
837 static struct cpuidle_state bxt_cstates[] = {
838         {
839                 .name = "C1",
840                 .desc = "MWAIT 0x00",
841                 .flags = MWAIT2flg(0x00),
842                 .exit_latency = 2,
843                 .target_residency = 2,
844                 .enter = &intel_idle,
845                 .enter_s2idle = intel_idle_s2idle, },
846         {
847                 .name = "C1E",
848                 .desc = "MWAIT 0x01",
849                 .flags = MWAIT2flg(0x01),
850                 .exit_latency = 10,
851                 .target_residency = 20,
852                 .enter = &intel_idle,
853                 .enter_s2idle = intel_idle_s2idle, },
854         {
855                 .name = "C6",
856                 .desc = "MWAIT 0x20",
857                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
858                 .exit_latency = 133,
859                 .target_residency = 133,
860                 .enter = &intel_idle,
861                 .enter_s2idle = intel_idle_s2idle, },
862         {
863                 .name = "C7s",
864                 .desc = "MWAIT 0x31",
865                 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
866                 .exit_latency = 155,
867                 .target_residency = 155,
868                 .enter = &intel_idle,
869                 .enter_s2idle = intel_idle_s2idle, },
870         {
871                 .name = "C8",
872                 .desc = "MWAIT 0x40",
873                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
874                 .exit_latency = 1000,
875                 .target_residency = 1000,
876                 .enter = &intel_idle,
877                 .enter_s2idle = intel_idle_s2idle, },
878         {
879                 .name = "C9",
880                 .desc = "MWAIT 0x50",
881                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
882                 .exit_latency = 2000,
883                 .target_residency = 2000,
884                 .enter = &intel_idle,
885                 .enter_s2idle = intel_idle_s2idle, },
886         {
887                 .name = "C10",
888                 .desc = "MWAIT 0x60",
889                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
890                 .exit_latency = 10000,
891                 .target_residency = 10000,
892                 .enter = &intel_idle,
893                 .enter_s2idle = intel_idle_s2idle, },
894         {
895                 .enter = NULL }
896 };
897
898 static struct cpuidle_state dnv_cstates[] = {
899         {
900                 .name = "C1",
901                 .desc = "MWAIT 0x00",
902                 .flags = MWAIT2flg(0x00),
903                 .exit_latency = 2,
904                 .target_residency = 2,
905                 .enter = &intel_idle,
906                 .enter_s2idle = intel_idle_s2idle, },
907         {
908                 .name = "C1E",
909                 .desc = "MWAIT 0x01",
910                 .flags = MWAIT2flg(0x01),
911                 .exit_latency = 10,
912                 .target_residency = 20,
913                 .enter = &intel_idle,
914                 .enter_s2idle = intel_idle_s2idle, },
915         {
916                 .name = "C6",
917                 .desc = "MWAIT 0x20",
918                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
919                 .exit_latency = 50,
920                 .target_residency = 500,
921                 .enter = &intel_idle,
922                 .enter_s2idle = intel_idle_s2idle, },
923         {
924                 .enter = NULL }
925 };
926
927 /**
928  * intel_idle
929  * @dev: cpuidle_device
930  * @drv: cpuidle driver
931  * @index: index of cpuidle state
932  *
933  * Must be called under local_irq_disable().
934  */
935 static __cpuidle int intel_idle(struct cpuidle_device *dev,
936                                 struct cpuidle_driver *drv, int index)
937 {
938         unsigned long ecx = 1; /* break on interrupt flag */
939         struct cpuidle_state *state = &drv->states[index];
940         unsigned long eax = flg2MWAIT(state->flags);
941         unsigned int cstate;
942         bool uninitialized_var(tick);
943         int cpu = smp_processor_id();
944
945         /*
946          * leave_mm() to avoid costly and often unnecessary wakeups
947          * for flushing the user TLB's associated with the active mm.
948          */
949         if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
950                 leave_mm(cpu);
951
952         if (!static_cpu_has(X86_FEATURE_ARAT)) {
953                 cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) &
954                                 MWAIT_CSTATE_MASK) + 1;
955                 tick = false;
956                 if (!(lapic_timer_reliable_states & (1 << (cstate)))) {
957                         tick = true;
958                         tick_broadcast_enter();
959                 }
960         }
961
962         mwait_idle_with_hints(eax, ecx);
963
964         if (!static_cpu_has(X86_FEATURE_ARAT) && tick)
965                 tick_broadcast_exit();
966
967         return index;
968 }
969
970 /**
971  * intel_idle_s2idle - simplified "enter" callback routine for suspend-to-idle
972  * @dev: cpuidle_device
973  * @drv: cpuidle driver
974  * @index: state index
975  */
976 static void intel_idle_s2idle(struct cpuidle_device *dev,
977                              struct cpuidle_driver *drv, int index)
978 {
979         unsigned long ecx = 1; /* break on interrupt flag */
980         unsigned long eax = flg2MWAIT(drv->states[index].flags);
981
982         mwait_idle_with_hints(eax, ecx);
983 }
984
985 static void __setup_broadcast_timer(bool on)
986 {
987         if (on)
988                 tick_broadcast_enable();
989         else
990                 tick_broadcast_disable();
991 }
992
993 static void auto_demotion_disable(void)
994 {
995         unsigned long long msr_bits;
996
997         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
998         msr_bits &= ~(icpu->auto_demotion_disable_flags);
999         wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
1000 }
1001 static void c1e_promotion_disable(void)
1002 {
1003         unsigned long long msr_bits;
1004
1005         rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
1006         msr_bits &= ~0x2;
1007         wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
1008 }
1009
1010 static const struct idle_cpu idle_cpu_nehalem = {
1011         .state_table = nehalem_cstates,
1012         .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
1013         .disable_promotion_to_c1e = true,
1014 };
1015
1016 static const struct idle_cpu idle_cpu_atom = {
1017         .state_table = atom_cstates,
1018 };
1019
1020 static const struct idle_cpu idle_cpu_tangier = {
1021         .state_table = tangier_cstates,
1022 };
1023
1024 static const struct idle_cpu idle_cpu_lincroft = {
1025         .state_table = atom_cstates,
1026         .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
1027 };
1028
1029 static const struct idle_cpu idle_cpu_snb = {
1030         .state_table = snb_cstates,
1031         .disable_promotion_to_c1e = true,
1032 };
1033
1034 static const struct idle_cpu idle_cpu_byt = {
1035         .state_table = byt_cstates,
1036         .disable_promotion_to_c1e = true,
1037         .byt_auto_demotion_disable_flag = true,
1038 };
1039
1040 static const struct idle_cpu idle_cpu_cht = {
1041         .state_table = cht_cstates,
1042         .disable_promotion_to_c1e = true,
1043         .byt_auto_demotion_disable_flag = true,
1044 };
1045
1046 static const struct idle_cpu idle_cpu_ivb = {
1047         .state_table = ivb_cstates,
1048         .disable_promotion_to_c1e = true,
1049 };
1050
1051 static const struct idle_cpu idle_cpu_ivt = {
1052         .state_table = ivt_cstates,
1053         .disable_promotion_to_c1e = true,
1054 };
1055
1056 static const struct idle_cpu idle_cpu_hsw = {
1057         .state_table = hsw_cstates,
1058         .disable_promotion_to_c1e = true,
1059 };
1060
1061 static const struct idle_cpu idle_cpu_bdw = {
1062         .state_table = bdw_cstates,
1063         .disable_promotion_to_c1e = true,
1064 };
1065
1066 static const struct idle_cpu idle_cpu_skl = {
1067         .state_table = skl_cstates,
1068         .disable_promotion_to_c1e = true,
1069 };
1070
1071 static const struct idle_cpu idle_cpu_skx = {
1072         .state_table = skx_cstates,
1073         .disable_promotion_to_c1e = true,
1074 };
1075
1076 static const struct idle_cpu idle_cpu_avn = {
1077         .state_table = avn_cstates,
1078         .disable_promotion_to_c1e = true,
1079 };
1080
1081 static const struct idle_cpu idle_cpu_knl = {
1082         .state_table = knl_cstates,
1083 };
1084
1085 static const struct idle_cpu idle_cpu_bxt = {
1086         .state_table = bxt_cstates,
1087         .disable_promotion_to_c1e = true,
1088 };
1089
1090 static const struct idle_cpu idle_cpu_dnv = {
1091         .state_table = dnv_cstates,
1092         .disable_promotion_to_c1e = true,
1093 };
1094
1095 #define ICPU(model, cpu) \
1096         { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&cpu }
1097
1098 static const struct x86_cpu_id intel_idle_ids[] __initconst = {
1099         ICPU(INTEL_FAM6_NEHALEM_EP,             idle_cpu_nehalem),
1100         ICPU(INTEL_FAM6_NEHALEM,                idle_cpu_nehalem),
1101         ICPU(INTEL_FAM6_NEHALEM_G,              idle_cpu_nehalem),
1102         ICPU(INTEL_FAM6_WESTMERE,               idle_cpu_nehalem),
1103         ICPU(INTEL_FAM6_WESTMERE_EP,            idle_cpu_nehalem),
1104         ICPU(INTEL_FAM6_NEHALEM_EX,             idle_cpu_nehalem),
1105         ICPU(INTEL_FAM6_ATOM_BONNELL,           idle_cpu_atom),
1106         ICPU(INTEL_FAM6_ATOM_BONNELL_MID,               idle_cpu_lincroft),
1107         ICPU(INTEL_FAM6_WESTMERE_EX,            idle_cpu_nehalem),
1108         ICPU(INTEL_FAM6_SANDYBRIDGE,            idle_cpu_snb),
1109         ICPU(INTEL_FAM6_SANDYBRIDGE_X,          idle_cpu_snb),
1110         ICPU(INTEL_FAM6_ATOM_SALTWELL,          idle_cpu_atom),
1111         ICPU(INTEL_FAM6_ATOM_SILVERMONT,        idle_cpu_byt),
1112         ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID,    idle_cpu_tangier),
1113         ICPU(INTEL_FAM6_ATOM_AIRMONT,           idle_cpu_cht),
1114         ICPU(INTEL_FAM6_IVYBRIDGE,              idle_cpu_ivb),
1115         ICPU(INTEL_FAM6_IVYBRIDGE_X,            idle_cpu_ivt),
1116         ICPU(INTEL_FAM6_HASWELL_CORE,           idle_cpu_hsw),
1117         ICPU(INTEL_FAM6_HASWELL_X,              idle_cpu_hsw),
1118         ICPU(INTEL_FAM6_HASWELL_ULT,            idle_cpu_hsw),
1119         ICPU(INTEL_FAM6_HASWELL_GT3E,           idle_cpu_hsw),
1120         ICPU(INTEL_FAM6_ATOM_SILVERMONT_X,      idle_cpu_avn),
1121         ICPU(INTEL_FAM6_BROADWELL_CORE,         idle_cpu_bdw),
1122         ICPU(INTEL_FAM6_BROADWELL_GT3E,         idle_cpu_bdw),
1123         ICPU(INTEL_FAM6_BROADWELL_X,            idle_cpu_bdw),
1124         ICPU(INTEL_FAM6_BROADWELL_XEON_D,       idle_cpu_bdw),
1125         ICPU(INTEL_FAM6_SKYLAKE_MOBILE,         idle_cpu_skl),
1126         ICPU(INTEL_FAM6_SKYLAKE_DESKTOP,        idle_cpu_skl),
1127         ICPU(INTEL_FAM6_KABYLAKE_MOBILE,        idle_cpu_skl),
1128         ICPU(INTEL_FAM6_KABYLAKE_DESKTOP,       idle_cpu_skl),
1129         ICPU(INTEL_FAM6_SKYLAKE_X,              idle_cpu_skx),
1130         ICPU(INTEL_FAM6_XEON_PHI_KNL,           idle_cpu_knl),
1131         ICPU(INTEL_FAM6_XEON_PHI_KNM,           idle_cpu_knl),
1132         ICPU(INTEL_FAM6_ATOM_GOLDMONT,          idle_cpu_bxt),
1133         ICPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS,     idle_cpu_bxt),
1134         ICPU(INTEL_FAM6_ATOM_GOLDMONT_X,        idle_cpu_dnv),
1135         {}
1136 };
1137
1138 /*
1139  * intel_idle_probe()
1140  */
1141 static int __init intel_idle_probe(void)
1142 {
1143         unsigned int eax, ebx, ecx;
1144         const struct x86_cpu_id *id;
1145
1146         if (max_cstate == 0) {
1147                 pr_debug("disabled\n");
1148                 return -EPERM;
1149         }
1150
1151         id = x86_match_cpu(intel_idle_ids);
1152         if (!id) {
1153                 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
1154                     boot_cpu_data.x86 == 6)
1155                         pr_debug("does not run on family %d model %d\n",
1156                                  boot_cpu_data.x86, boot_cpu_data.x86_model);
1157                 return -ENODEV;
1158         }
1159
1160         if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
1161                 pr_debug("Please enable MWAIT in BIOS SETUP\n");
1162                 return -ENODEV;
1163         }
1164
1165         if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
1166                 return -ENODEV;
1167
1168         cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
1169
1170         if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
1171             !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
1172             !mwait_substates)
1173                         return -ENODEV;
1174
1175         pr_debug("MWAIT substates: 0x%x\n", mwait_substates);
1176
1177         icpu = (const struct idle_cpu *)id->driver_data;
1178         cpuidle_state_table = icpu->state_table;
1179
1180         pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
1181                  boot_cpu_data.x86_model);
1182
1183         return 0;
1184 }
1185
1186 /*
1187  * intel_idle_cpuidle_devices_uninit()
1188  * Unregisters the cpuidle devices.
1189  */
1190 static void intel_idle_cpuidle_devices_uninit(void)
1191 {
1192         int i;
1193         struct cpuidle_device *dev;
1194
1195         for_each_online_cpu(i) {
1196                 dev = per_cpu_ptr(intel_idle_cpuidle_devices, i);
1197                 cpuidle_unregister_device(dev);
1198         }
1199 }
1200
1201 /*
1202  * ivt_idle_state_table_update(void)
1203  *
1204  * Tune IVT multi-socket targets
1205  * Assumption: num_sockets == (max_package_num + 1)
1206  */
1207 static void ivt_idle_state_table_update(void)
1208 {
1209         /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
1210         int cpu, package_num, num_sockets = 1;
1211
1212         for_each_online_cpu(cpu) {
1213                 package_num = topology_physical_package_id(cpu);
1214                 if (package_num + 1 > num_sockets) {
1215                         num_sockets = package_num + 1;
1216
1217                         if (num_sockets > 4) {
1218                                 cpuidle_state_table = ivt_cstates_8s;
1219                                 return;
1220                         }
1221                 }
1222         }
1223
1224         if (num_sockets > 2)
1225                 cpuidle_state_table = ivt_cstates_4s;
1226
1227         /* else, 1 and 2 socket systems use default ivt_cstates */
1228 }
1229
1230 /*
1231  * Translate IRTL (Interrupt Response Time Limit) MSR to usec
1232  */
1233
1234 static unsigned int irtl_ns_units[] = {
1235         1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
1236
1237 static unsigned long long irtl_2_usec(unsigned long long irtl)
1238 {
1239         unsigned long long ns;
1240
1241         if (!irtl)
1242                 return 0;
1243
1244         ns = irtl_ns_units[(irtl >> 10) & 0x7];
1245
1246         return div64_u64((irtl & 0x3FF) * ns, 1000);
1247 }
1248 /*
1249  * bxt_idle_state_table_update(void)
1250  *
1251  * On BXT, we trust the IRTL to show the definitive maximum latency
1252  * We use the same value for target_residency.
1253  */
1254 static void bxt_idle_state_table_update(void)
1255 {
1256         unsigned long long msr;
1257         unsigned int usec;
1258
1259         rdmsrl(MSR_PKGC6_IRTL, msr);
1260         usec = irtl_2_usec(msr);
1261         if (usec) {
1262                 bxt_cstates[2].exit_latency = usec;
1263                 bxt_cstates[2].target_residency = usec;
1264         }
1265
1266         rdmsrl(MSR_PKGC7_IRTL, msr);
1267         usec = irtl_2_usec(msr);
1268         if (usec) {
1269                 bxt_cstates[3].exit_latency = usec;
1270                 bxt_cstates[3].target_residency = usec;
1271         }
1272
1273         rdmsrl(MSR_PKGC8_IRTL, msr);
1274         usec = irtl_2_usec(msr);
1275         if (usec) {
1276                 bxt_cstates[4].exit_latency = usec;
1277                 bxt_cstates[4].target_residency = usec;
1278         }
1279
1280         rdmsrl(MSR_PKGC9_IRTL, msr);
1281         usec = irtl_2_usec(msr);
1282         if (usec) {
1283                 bxt_cstates[5].exit_latency = usec;
1284                 bxt_cstates[5].target_residency = usec;
1285         }
1286
1287         rdmsrl(MSR_PKGC10_IRTL, msr);
1288         usec = irtl_2_usec(msr);
1289         if (usec) {
1290                 bxt_cstates[6].exit_latency = usec;
1291                 bxt_cstates[6].target_residency = usec;
1292         }
1293
1294 }
1295 /*
1296  * sklh_idle_state_table_update(void)
1297  *
1298  * On SKL-H (model 0x5e) disable C8 and C9 if:
1299  * C10 is enabled and SGX disabled
1300  */
1301 static void sklh_idle_state_table_update(void)
1302 {
1303         unsigned long long msr;
1304         unsigned int eax, ebx, ecx, edx;
1305
1306
1307         /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
1308         if (max_cstate <= 7)
1309                 return;
1310
1311         /* if PC10 not present in CPUID.MWAIT.EDX */
1312         if ((mwait_substates & (0xF << 28)) == 0)
1313                 return;
1314
1315         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1316
1317         /* PC10 is not enabled in PKG C-state limit */
1318         if ((msr & 0xF) != 8)
1319                 return;
1320
1321         ecx = 0;
1322         cpuid(7, &eax, &ebx, &ecx, &edx);
1323
1324         /* if SGX is present */
1325         if (ebx & (1 << 2)) {
1326
1327                 rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
1328
1329                 /* if SGX is enabled */
1330                 if (msr & (1 << 18))
1331                         return;
1332         }
1333
1334         skl_cstates[5].disabled = 1;    /* C8-SKL */
1335         skl_cstates[6].disabled = 1;    /* C9-SKL */
1336 }
1337 /*
1338  * intel_idle_state_table_update()
1339  *
1340  * Update the default state_table for this CPU-id
1341  */
1342
1343 static void intel_idle_state_table_update(void)
1344 {
1345         switch (boot_cpu_data.x86_model) {
1346
1347         case INTEL_FAM6_IVYBRIDGE_X:
1348                 ivt_idle_state_table_update();
1349                 break;
1350         case INTEL_FAM6_ATOM_GOLDMONT:
1351         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
1352                 bxt_idle_state_table_update();
1353                 break;
1354         case INTEL_FAM6_SKYLAKE_DESKTOP:
1355                 sklh_idle_state_table_update();
1356                 break;
1357         }
1358 }
1359
1360 /*
1361  * intel_idle_cpuidle_driver_init()
1362  * allocate, initialize cpuidle_states
1363  */
1364 static void __init intel_idle_cpuidle_driver_init(void)
1365 {
1366         int cstate;
1367         struct cpuidle_driver *drv = &intel_idle_driver;
1368
1369         intel_idle_state_table_update();
1370
1371         cpuidle_poll_state_init(drv);
1372         drv->state_count = 1;
1373
1374         for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
1375                 int num_substates, mwait_hint, mwait_cstate;
1376
1377                 if ((cpuidle_state_table[cstate].enter == NULL) &&
1378                     (cpuidle_state_table[cstate].enter_s2idle == NULL))
1379                         break;
1380
1381                 if (cstate + 1 > max_cstate) {
1382                         pr_info("max_cstate %d reached\n", max_cstate);
1383                         break;
1384                 }
1385
1386                 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
1387                 mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint);
1388
1389                 /* number of sub-states for this state in CPUID.MWAIT */
1390                 num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4))
1391                                         & MWAIT_SUBSTATE_MASK;
1392
1393                 /* if NO sub-states for this state in CPUID, skip it */
1394                 if (num_substates == 0)
1395                         continue;
1396
1397                 /* if state marked as disabled, skip it */
1398                 if (cpuidle_state_table[cstate].disabled != 0) {
1399                         pr_debug("state %s is disabled\n",
1400                                  cpuidle_state_table[cstate].name);
1401                         continue;
1402                 }
1403
1404
1405                 if (((mwait_cstate + 1) > 2) &&
1406                         !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1407                         mark_tsc_unstable("TSC halts in idle"
1408                                         " states deeper than C2");
1409
1410                 drv->states[drv->state_count] = /* structure copy */
1411                         cpuidle_state_table[cstate];
1412
1413                 if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
1414                     cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) {
1415                         drv->states[drv->state_count].enter = intel_idle_ibrs;
1416                 }
1417
1418                 drv->state_count += 1;
1419         }
1420
1421         if (icpu->byt_auto_demotion_disable_flag) {
1422                 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
1423                 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
1424         }
1425 }
1426
1427
1428 /*
1429  * intel_idle_cpu_init()
1430  * allocate, initialize, register cpuidle_devices
1431  * @cpu: cpu/core to initialize
1432  */
1433 static int intel_idle_cpu_init(unsigned int cpu)
1434 {
1435         struct cpuidle_device *dev;
1436
1437         dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1438         dev->cpu = cpu;
1439
1440         if (cpuidle_register_device(dev)) {
1441                 pr_debug("cpuidle_register_device %d failed!\n", cpu);
1442                 return -EIO;
1443         }
1444
1445         if (icpu->auto_demotion_disable_flags)
1446                 auto_demotion_disable();
1447
1448         if (icpu->disable_promotion_to_c1e)
1449                 c1e_promotion_disable();
1450
1451         return 0;
1452 }
1453
1454 static int intel_idle_cpu_online(unsigned int cpu)
1455 {
1456         struct cpuidle_device *dev;
1457
1458         if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE)
1459                 __setup_broadcast_timer(true);
1460
1461         /*
1462          * Some systems can hotplug a cpu at runtime after
1463          * the kernel has booted, we have to initialize the
1464          * driver in this case
1465          */
1466         dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1467         if (!dev->registered)
1468                 return intel_idle_cpu_init(cpu);
1469
1470         return 0;
1471 }
1472
1473 static int __init intel_idle_init(void)
1474 {
1475         int retval;
1476
1477         /* Do not load intel_idle at all for now if idle= is passed */
1478         if (boot_option_idle_override != IDLE_NO_OVERRIDE)
1479                 return -ENODEV;
1480
1481         retval = intel_idle_probe();
1482         if (retval)
1483                 return retval;
1484
1485         intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
1486         if (intel_idle_cpuidle_devices == NULL)
1487                 return -ENOMEM;
1488
1489         intel_idle_cpuidle_driver_init();
1490         retval = cpuidle_register_driver(&intel_idle_driver);
1491         if (retval) {
1492                 struct cpuidle_driver *drv = cpuidle_get_driver();
1493                 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
1494                        drv ? drv->name : "none");
1495                 goto init_driver_fail;
1496         }
1497
1498         if (boot_cpu_has(X86_FEATURE_ARAT))     /* Always Reliable APIC Timer */
1499                 lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE;
1500
1501         retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
1502                                    intel_idle_cpu_online, NULL);
1503         if (retval < 0)
1504                 goto hp_setup_fail;
1505
1506         pr_debug("lapic_timer_reliable_states 0x%x\n",
1507                  lapic_timer_reliable_states);
1508
1509         return 0;
1510
1511 hp_setup_fail:
1512         intel_idle_cpuidle_devices_uninit();
1513         cpuidle_unregister_driver(&intel_idle_driver);
1514 init_driver_fail:
1515         free_percpu(intel_idle_cpuidle_devices);
1516         return retval;
1517
1518 }
1519 device_initcall(intel_idle_init);
1520
1521 /*
1522  * We are not really modular, but we used to support that.  Meaning we also
1523  * support "intel_idle.max_cstate=..." at boot and also a read-only export of
1524  * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param
1525  * is the easiest way (currently) to continue doing that.
1526  */
1527 module_param(max_cstate, int, 0444);