xref: /linux/drivers/idle/intel_idle.c (revision 0ade34c37012ea5c516d9aa4d19a56e9f40a55ed)
1  /*
2   * intel_idle.c - native hardware idle loop for modern Intel processors
3   *
4   * Copyright (c) 2013, Intel Corporation.
5   * Len Brown <len.brown@intel.com>
6   *
7   * This program is free software; you can redistribute it and/or modify it
8   * under the terms and conditions of the GNU General Public License,
9   * version 2, as published by the Free Software Foundation.
10   *
11   * This program is distributed in the hope it will be useful, but WITHOUT
12   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14   * more details.
15   *
16   * You should have received a copy of the GNU General Public License along with
17   * this program; if not, write to the Free Software Foundation, Inc.,
18   * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19   */
20  
21  /*
22   * intel_idle is a cpuidle driver that loads on specific Intel processors
23   * in lieu of the legacy ACPI processor_idle driver.  The intent is to
24   * make Linux more efficient on these processors, as intel_idle knows
25   * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
26   */
27  
28  /*
29   * Design Assumptions
30   *
31   * All CPUs have same idle states as boot CPU
32   *
33   * Chipset BM_STS (bus master status) bit is a NOP
34   *	for preventing entry into deep C-stats
35   */
36  
37  /*
38   * Known limitations
39   *
40   * The driver currently initializes for_each_online_cpu() upon modprobe.
41   * It it unaware of subsequent processors hot-added to the system.
42   * This means that if you boot with maxcpus=n and later online
43   * processors above n, those processors will use C1 only.
44   *
45   * ACPI has a .suspend hack to turn off deep c-statees during suspend
46   * to avoid complications with the lapic timer workaround.
47   * Have not seen issues with suspend, but may need same workaround here.
48   *
49   */
50  
51  /* un-comment DEBUG to enable pr_debug() statements */
52  #define DEBUG
53  
54  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
55  
56  #include <linux/kernel.h>
57  #include <linux/cpuidle.h>
58  #include <linux/tick.h>
59  #include <trace/events/power.h>
60  #include <linux/sched.h>
61  #include <linux/notifier.h>
62  #include <linux/cpu.h>
63  #include <linux/moduleparam.h>
64  #include <asm/cpu_device_id.h>
65  #include <asm/intel-family.h>
66  #include <asm/mwait.h>
67  #include <asm/msr.h>
68  
69  #define INTEL_IDLE_VERSION "0.4.1"
70  
71  static struct cpuidle_driver intel_idle_driver = {
72  	.name = "intel_idle",
73  	.owner = THIS_MODULE,
74  };
75  /* intel_idle.max_cstate=0 disables driver */
76  static int max_cstate = CPUIDLE_STATE_MAX - 1;
77  
78  static unsigned int mwait_substates;
79  
80  #define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF
81  /* Reliable LAPIC Timer States, bit 1 for C1 etc.  */
82  static unsigned int lapic_timer_reliable_states = (1 << 1);	 /* Default to only C1 */
83  
84  struct idle_cpu {
85  	struct cpuidle_state *state_table;
86  
87  	/*
88  	 * Hardware C-state auto-demotion may not always be optimal.
89  	 * Indicate which enable bits to clear here.
90  	 */
91  	unsigned long auto_demotion_disable_flags;
92  	bool byt_auto_demotion_disable_flag;
93  	bool disable_promotion_to_c1e;
94  };
95  
96  static const struct idle_cpu *icpu;
97  static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
98  static int intel_idle(struct cpuidle_device *dev,
99  			struct cpuidle_driver *drv, int index);
100  static void intel_idle_s2idle(struct cpuidle_device *dev,
101  			      struct cpuidle_driver *drv, int index);
102  static struct cpuidle_state *cpuidle_state_table;
103  
104  /*
105   * Set this flag for states where the HW flushes the TLB for us
106   * and so we don't need cross-calls to keep it consistent.
107   * If this flag is set, SW flushes the TLB, so even if the
108   * HW doesn't do the flushing, this flag is safe to use.
109   */
110  #define CPUIDLE_FLAG_TLB_FLUSHED	0x10000
111  
112  /*
113   * MWAIT takes an 8-bit "hint" in EAX "suggesting"
114   * the C-state (top nibble) and sub-state (bottom nibble)
115   * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
116   *
117   * We store the hint at the top of our "flags" for each state.
118   */
119  #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
120  #define MWAIT2flg(eax) ((eax & 0xFF) << 24)
121  
122  /*
123   * States are indexed by the cstate number,
124   * which is also the index into the MWAIT hint array.
125   * Thus C0 is a dummy.
126   */
127  static struct cpuidle_state nehalem_cstates[] = {
128  	{
129  		.name = "C1",
130  		.desc = "MWAIT 0x00",
131  		.flags = MWAIT2flg(0x00),
132  		.exit_latency = 3,
133  		.target_residency = 6,
134  		.enter = &intel_idle,
135  		.enter_s2idle = intel_idle_s2idle, },
136  	{
137  		.name = "C1E",
138  		.desc = "MWAIT 0x01",
139  		.flags = MWAIT2flg(0x01),
140  		.exit_latency = 10,
141  		.target_residency = 20,
142  		.enter = &intel_idle,
143  		.enter_s2idle = intel_idle_s2idle, },
144  	{
145  		.name = "C3",
146  		.desc = "MWAIT 0x10",
147  		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
148  		.exit_latency = 20,
149  		.target_residency = 80,
150  		.enter = &intel_idle,
151  		.enter_s2idle = intel_idle_s2idle, },
152  	{
153  		.name = "C6",
154  		.desc = "MWAIT 0x20",
155  		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
156  		.exit_latency = 200,
157  		.target_residency = 800,
158  		.enter = &intel_idle,
159  		.enter_s2idle = intel_idle_s2idle, },
160  	{
161  		.enter = NULL }
162  };
163  
164  static struct cpuidle_state snb_cstates[] = {
165  	{
166  		.name = "C1",
167  		.desc = "MWAIT 0x00",
168  		.flags = MWAIT2flg(0x00),
169  		.exit_latency = 2,
170  		.target_residency = 2,
171  		.enter = &intel_idle,
172  		.enter_s2idle = intel_idle_s2idle, },
173  	{
174  		.name = "C1E",
175  		.desc = "MWAIT 0x01",
176  		.flags = MWAIT2flg(0x01),
177  		.exit_latency = 10,
178  		.target_residency = 20,
179  		.enter = &intel_idle,
180  		.enter_s2idle = intel_idle_s2idle, },
181  	{
182  		.name = "C3",
183  		.desc = "MWAIT 0x10",
184  		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
185  		.exit_latency = 80,
186  		.target_residency = 211,
187  		.enter = &intel_idle,
188  		.enter_s2idle = intel_idle_s2idle, },
189  	{
190  		.name = "C6",
191  		.desc = "MWAIT 0x20",
192  		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
193  		.exit_latency = 104,
194  		.target_residency = 345,
195  		.enter = &intel_idle,
196  		.enter_s2idle = intel_idle_s2idle, },
197  	{
198  		.name = "C7",
199  		.desc = "MWAIT 0x30",
200  		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
201  		.exit_latency = 109,
202  		.target_residency = 345,
203  		.enter = &intel_idle,
204  		.enter_s2idle = intel_idle_s2idle, },
205  	{
206  		.enter = NULL }
207  };
208  
209  static struct cpuidle_state byt_cstates[] = {
210  	{
211  		.name = "C1",
212  		.desc = "MWAIT 0x00",
213  		.flags = MWAIT2flg(0x00),
214  		.exit_latency = 1,
215  		.target_residency = 1,
216  		.enter = &intel_idle,
217  		.enter_s2idle = intel_idle_s2idle, },
218  	{
219  		.name = "C6N",
220  		.desc = "MWAIT 0x58",
221  		.flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
222  		.exit_latency = 300,
223  		.target_residency = 275,
224  		.enter = &intel_idle,
225  		.enter_s2idle = intel_idle_s2idle, },
226  	{
227  		.name = "C6S",
228  		.desc = "MWAIT 0x52",
229  		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
230  		.exit_latency = 500,
231  		.target_residency = 560,
232  		.enter = &intel_idle,
233  		.enter_s2idle = intel_idle_s2idle, },
234  	{
235  		.name = "C7",
236  		.desc = "MWAIT 0x60",
237  		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
238  		.exit_latency = 1200,
239  		.target_residency = 4000,
240  		.enter = &intel_idle,
241  		.enter_s2idle = intel_idle_s2idle, },
242  	{
243  		.name = "C7S",
244  		.desc = "MWAIT 0x64",
245  		.flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
246  		.exit_latency = 10000,
247  		.target_residency = 20000,
248  		.enter = &intel_idle,
249  		.enter_s2idle = intel_idle_s2idle, },
250  	{
251  		.enter = NULL }
252  };
253  
254  static struct cpuidle_state cht_cstates[] = {
255  	{
256  		.name = "C1",
257  		.desc = "MWAIT 0x00",
258  		.flags = MWAIT2flg(0x00),
259  		.exit_latency = 1,
260  		.target_residency = 1,
261  		.enter = &intel_idle,
262  		.enter_s2idle = intel_idle_s2idle, },
263  	{
264  		.name = "C6N",
265  		.desc = "MWAIT 0x58",
266  		.flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
267  		.exit_latency = 80,
268  		.target_residency = 275,
269  		.enter = &intel_idle,
270  		.enter_s2idle = intel_idle_s2idle, },
271  	{
272  		.name = "C6S",
273  		.desc = "MWAIT 0x52",
274  		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
275  		.exit_latency = 200,
276  		.target_residency = 560,
277  		.enter = &intel_idle,
278  		.enter_s2idle = intel_idle_s2idle, },
279  	{
280  		.name = "C7",
281  		.desc = "MWAIT 0x60",
282  		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
283  		.exit_latency = 1200,
284  		.target_residency = 4000,
285  		.enter = &intel_idle,
286  		.enter_s2idle = intel_idle_s2idle, },
287  	{
288  		.name = "C7S",
289  		.desc = "MWAIT 0x64",
290  		.flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
291  		.exit_latency = 10000,
292  		.target_residency = 20000,
293  		.enter = &intel_idle,
294  		.enter_s2idle = intel_idle_s2idle, },
295  	{
296  		.enter = NULL }
297  };
298  
299  static struct cpuidle_state ivb_cstates[] = {
300  	{
301  		.name = "C1",
302  		.desc = "MWAIT 0x00",
303  		.flags = MWAIT2flg(0x00),
304  		.exit_latency = 1,
305  		.target_residency = 1,
306  		.enter = &intel_idle,
307  		.enter_s2idle = intel_idle_s2idle, },
308  	{
309  		.name = "C1E",
310  		.desc = "MWAIT 0x01",
311  		.flags = MWAIT2flg(0x01),
312  		.exit_latency = 10,
313  		.target_residency = 20,
314  		.enter = &intel_idle,
315  		.enter_s2idle = intel_idle_s2idle, },
316  	{
317  		.name = "C3",
318  		.desc = "MWAIT 0x10",
319  		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
320  		.exit_latency = 59,
321  		.target_residency = 156,
322  		.enter = &intel_idle,
323  		.enter_s2idle = intel_idle_s2idle, },
324  	{
325  		.name = "C6",
326  		.desc = "MWAIT 0x20",
327  		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
328  		.exit_latency = 80,
329  		.target_residency = 300,
330  		.enter = &intel_idle,
331  		.enter_s2idle = intel_idle_s2idle, },
332  	{
333  		.name = "C7",
334  		.desc = "MWAIT 0x30",
335  		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
336  		.exit_latency = 87,
337  		.target_residency = 300,
338  		.enter = &intel_idle,
339  		.enter_s2idle = intel_idle_s2idle, },
340  	{
341  		.enter = NULL }
342  };
343  
344  static struct cpuidle_state ivt_cstates[] = {
345  	{
346  		.name = "C1",
347  		.desc = "MWAIT 0x00",
348  		.flags = MWAIT2flg(0x00),
349  		.exit_latency = 1,
350  		.target_residency = 1,
351  		.enter = &intel_idle,
352  		.enter_s2idle = intel_idle_s2idle, },
353  	{
354  		.name = "C1E",
355  		.desc = "MWAIT 0x01",
356  		.flags = MWAIT2flg(0x01),
357  		.exit_latency = 10,
358  		.target_residency = 80,
359  		.enter = &intel_idle,
360  		.enter_s2idle = intel_idle_s2idle, },
361  	{
362  		.name = "C3",
363  		.desc = "MWAIT 0x10",
364  		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
365  		.exit_latency = 59,
366  		.target_residency = 156,
367  		.enter = &intel_idle,
368  		.enter_s2idle = intel_idle_s2idle, },
369  	{
370  		.name = "C6",
371  		.desc = "MWAIT 0x20",
372  		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
373  		.exit_latency = 82,
374  		.target_residency = 300,
375  		.enter = &intel_idle,
376  		.enter_s2idle = intel_idle_s2idle, },
377  	{
378  		.enter = NULL }
379  };
380  
381  static struct cpuidle_state ivt_cstates_4s[] = {
382  	{
383  		.name = "C1",
384  		.desc = "MWAIT 0x00",
385  		.flags = MWAIT2flg(0x00),
386  		.exit_latency = 1,
387  		.target_residency = 1,
388  		.enter = &intel_idle,
389  		.enter_s2idle = intel_idle_s2idle, },
390  	{
391  		.name = "C1E",
392  		.desc = "MWAIT 0x01",
393  		.flags = MWAIT2flg(0x01),
394  		.exit_latency = 10,
395  		.target_residency = 250,
396  		.enter = &intel_idle,
397  		.enter_s2idle = intel_idle_s2idle, },
398  	{
399  		.name = "C3",
400  		.desc = "MWAIT 0x10",
401  		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
402  		.exit_latency = 59,
403  		.target_residency = 300,
404  		.enter = &intel_idle,
405  		.enter_s2idle = intel_idle_s2idle, },
406  	{
407  		.name = "C6",
408  		.desc = "MWAIT 0x20",
409  		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
410  		.exit_latency = 84,
411  		.target_residency = 400,
412  		.enter = &intel_idle,
413  		.enter_s2idle = intel_idle_s2idle, },
414  	{
415  		.enter = NULL }
416  };
417  
418  static struct cpuidle_state ivt_cstates_8s[] = {
419  	{
420  		.name = "C1",
421  		.desc = "MWAIT 0x00",
422  		.flags = MWAIT2flg(0x00),
423  		.exit_latency = 1,
424  		.target_residency = 1,
425  		.enter = &intel_idle,
426  		.enter_s2idle = intel_idle_s2idle, },
427  	{
428  		.name = "C1E",
429  		.desc = "MWAIT 0x01",
430  		.flags = MWAIT2flg(0x01),
431  		.exit_latency = 10,
432  		.target_residency = 500,
433  		.enter = &intel_idle,
434  		.enter_s2idle = intel_idle_s2idle, },
435  	{
436  		.name = "C3",
437  		.desc = "MWAIT 0x10",
438  		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
439  		.exit_latency = 59,
440  		.target_residency = 600,
441  		.enter = &intel_idle,
442  		.enter_s2idle = intel_idle_s2idle, },
443  	{
444  		.name = "C6",
445  		.desc = "MWAIT 0x20",
446  		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
447  		.exit_latency = 88,
448  		.target_residency = 700,
449  		.enter = &intel_idle,
450  		.enter_s2idle = intel_idle_s2idle, },
451  	{
452  		.enter = NULL }
453  };
454  
455  static struct cpuidle_state hsw_cstates[] = {
456  	{
457  		.name = "C1",
458  		.desc = "MWAIT 0x00",
459  		.flags = MWAIT2flg(0x00),
460  		.exit_latency = 2,
461  		.target_residency = 2,
462  		.enter = &intel_idle,
463  		.enter_s2idle = intel_idle_s2idle, },
464  	{
465  		.name = "C1E",
466  		.desc = "MWAIT 0x01",
467  		.flags = MWAIT2flg(0x01),
468  		.exit_latency = 10,
469  		.target_residency = 20,
470  		.enter = &intel_idle,
471  		.enter_s2idle = intel_idle_s2idle, },
472  	{
473  		.name = "C3",
474  		.desc = "MWAIT 0x10",
475  		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
476  		.exit_latency = 33,
477  		.target_residency = 100,
478  		.enter = &intel_idle,
479  		.enter_s2idle = intel_idle_s2idle, },
480  	{
481  		.name = "C6",
482  		.desc = "MWAIT 0x20",
483  		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
484  		.exit_latency = 133,
485  		.target_residency = 400,
486  		.enter = &intel_idle,
487  		.enter_s2idle = intel_idle_s2idle, },
488  	{
489  		.name = "C7s",
490  		.desc = "MWAIT 0x32",
491  		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
492  		.exit_latency = 166,
493  		.target_residency = 500,
494  		.enter = &intel_idle,
495  		.enter_s2idle = intel_idle_s2idle, },
496  	{
497  		.name = "C8",
498  		.desc = "MWAIT 0x40",
499  		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
500  		.exit_latency = 300,
501  		.target_residency = 900,
502  		.enter = &intel_idle,
503  		.enter_s2idle = intel_idle_s2idle, },
504  	{
505  		.name = "C9",
506  		.desc = "MWAIT 0x50",
507  		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
508  		.exit_latency = 600,
509  		.target_residency = 1800,
510  		.enter = &intel_idle,
511  		.enter_s2idle = intel_idle_s2idle, },
512  	{
513  		.name = "C10",
514  		.desc = "MWAIT 0x60",
515  		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
516  		.exit_latency = 2600,
517  		.target_residency = 7700,
518  		.enter = &intel_idle,
519  		.enter_s2idle = intel_idle_s2idle, },
520  	{
521  		.enter = NULL }
522  };
523  static struct cpuidle_state bdw_cstates[] = {
524  	{
525  		.name = "C1",
526  		.desc = "MWAIT 0x00",
527  		.flags = MWAIT2flg(0x00),
528  		.exit_latency = 2,
529  		.target_residency = 2,
530  		.enter = &intel_idle,
531  		.enter_s2idle = intel_idle_s2idle, },
532  	{
533  		.name = "C1E",
534  		.desc = "MWAIT 0x01",
535  		.flags = MWAIT2flg(0x01),
536  		.exit_latency = 10,
537  		.target_residency = 20,
538  		.enter = &intel_idle,
539  		.enter_s2idle = intel_idle_s2idle, },
540  	{
541  		.name = "C3",
542  		.desc = "MWAIT 0x10",
543  		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
544  		.exit_latency = 40,
545  		.target_residency = 100,
546  		.enter = &intel_idle,
547  		.enter_s2idle = intel_idle_s2idle, },
548  	{
549  		.name = "C6",
550  		.desc = "MWAIT 0x20",
551  		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
552  		.exit_latency = 133,
553  		.target_residency = 400,
554  		.enter = &intel_idle,
555  		.enter_s2idle = intel_idle_s2idle, },
556  	{
557  		.name = "C7s",
558  		.desc = "MWAIT 0x32",
559  		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
560  		.exit_latency = 166,
561  		.target_residency = 500,
562  		.enter = &intel_idle,
563  		.enter_s2idle = intel_idle_s2idle, },
564  	{
565  		.name = "C8",
566  		.desc = "MWAIT 0x40",
567  		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
568  		.exit_latency = 300,
569  		.target_residency = 900,
570  		.enter = &intel_idle,
571  		.enter_s2idle = intel_idle_s2idle, },
572  	{
573  		.name = "C9",
574  		.desc = "MWAIT 0x50",
575  		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
576  		.exit_latency = 600,
577  		.target_residency = 1800,
578  		.enter = &intel_idle,
579  		.enter_s2idle = intel_idle_s2idle, },
580  	{
581  		.name = "C10",
582  		.desc = "MWAIT 0x60",
583  		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
584  		.exit_latency = 2600,
585  		.target_residency = 7700,
586  		.enter = &intel_idle,
587  		.enter_s2idle = intel_idle_s2idle, },
588  	{
589  		.enter = NULL }
590  };
591  
592  static struct cpuidle_state skl_cstates[] = {
593  	{
594  		.name = "C1",
595  		.desc = "MWAIT 0x00",
596  		.flags = MWAIT2flg(0x00),
597  		.exit_latency = 2,
598  		.target_residency = 2,
599  		.enter = &intel_idle,
600  		.enter_s2idle = intel_idle_s2idle, },
601  	{
602  		.name = "C1E",
603  		.desc = "MWAIT 0x01",
604  		.flags = MWAIT2flg(0x01),
605  		.exit_latency = 10,
606  		.target_residency = 20,
607  		.enter = &intel_idle,
608  		.enter_s2idle = intel_idle_s2idle, },
609  	{
610  		.name = "C3",
611  		.desc = "MWAIT 0x10",
612  		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
613  		.exit_latency = 70,
614  		.target_residency = 100,
615  		.enter = &intel_idle,
616  		.enter_s2idle = intel_idle_s2idle, },
617  	{
618  		.name = "C6",
619  		.desc = "MWAIT 0x20",
620  		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
621  		.exit_latency = 85,
622  		.target_residency = 200,
623  		.enter = &intel_idle,
624  		.enter_s2idle = intel_idle_s2idle, },
625  	{
626  		.name = "C7s",
627  		.desc = "MWAIT 0x33",
628  		.flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
629  		.exit_latency = 124,
630  		.target_residency = 800,
631  		.enter = &intel_idle,
632  		.enter_s2idle = intel_idle_s2idle, },
633  	{
634  		.name = "C8",
635  		.desc = "MWAIT 0x40",
636  		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
637  		.exit_latency = 200,
638  		.target_residency = 800,
639  		.enter = &intel_idle,
640  		.enter_s2idle = intel_idle_s2idle, },
641  	{
642  		.name = "C9",
643  		.desc = "MWAIT 0x50",
644  		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
645  		.exit_latency = 480,
646  		.target_residency = 5000,
647  		.enter = &intel_idle,
648  		.enter_s2idle = intel_idle_s2idle, },
649  	{
650  		.name = "C10",
651  		.desc = "MWAIT 0x60",
652  		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
653  		.exit_latency = 890,
654  		.target_residency = 5000,
655  		.enter = &intel_idle,
656  		.enter_s2idle = intel_idle_s2idle, },
657  	{
658  		.enter = NULL }
659  };
660  
661  static struct cpuidle_state skx_cstates[] = {
662  	{
663  		.name = "C1",
664  		.desc = "MWAIT 0x00",
665  		.flags = MWAIT2flg(0x00),
666  		.exit_latency = 2,
667  		.target_residency = 2,
668  		.enter = &intel_idle,
669  		.enter_s2idle = intel_idle_s2idle, },
670  	{
671  		.name = "C1E",
672  		.desc = "MWAIT 0x01",
673  		.flags = MWAIT2flg(0x01),
674  		.exit_latency = 10,
675  		.target_residency = 20,
676  		.enter = &intel_idle,
677  		.enter_s2idle = intel_idle_s2idle, },
678  	{
679  		.name = "C6",
680  		.desc = "MWAIT 0x20",
681  		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
682  		.exit_latency = 133,
683  		.target_residency = 600,
684  		.enter = &intel_idle,
685  		.enter_s2idle = intel_idle_s2idle, },
686  	{
687  		.enter = NULL }
688  };
689  
690  static struct cpuidle_state atom_cstates[] = {
691  	{
692  		.name = "C1E",
693  		.desc = "MWAIT 0x00",
694  		.flags = MWAIT2flg(0x00),
695  		.exit_latency = 10,
696  		.target_residency = 20,
697  		.enter = &intel_idle,
698  		.enter_s2idle = intel_idle_s2idle, },
699  	{
700  		.name = "C2",
701  		.desc = "MWAIT 0x10",
702  		.flags = MWAIT2flg(0x10),
703  		.exit_latency = 20,
704  		.target_residency = 80,
705  		.enter = &intel_idle,
706  		.enter_s2idle = intel_idle_s2idle, },
707  	{
708  		.name = "C4",
709  		.desc = "MWAIT 0x30",
710  		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
711  		.exit_latency = 100,
712  		.target_residency = 400,
713  		.enter = &intel_idle,
714  		.enter_s2idle = intel_idle_s2idle, },
715  	{
716  		.name = "C6",
717  		.desc = "MWAIT 0x52",
718  		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
719  		.exit_latency = 140,
720  		.target_residency = 560,
721  		.enter = &intel_idle,
722  		.enter_s2idle = intel_idle_s2idle, },
723  	{
724  		.enter = NULL }
725  };
726  static struct cpuidle_state tangier_cstates[] = {
727  	{
728  		.name = "C1",
729  		.desc = "MWAIT 0x00",
730  		.flags = MWAIT2flg(0x00),
731  		.exit_latency = 1,
732  		.target_residency = 4,
733  		.enter = &intel_idle,
734  		.enter_s2idle = intel_idle_s2idle, },
735  	{
736  		.name = "C4",
737  		.desc = "MWAIT 0x30",
738  		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
739  		.exit_latency = 100,
740  		.target_residency = 400,
741  		.enter = &intel_idle,
742  		.enter_s2idle = intel_idle_s2idle, },
743  	{
744  		.name = "C6",
745  		.desc = "MWAIT 0x52",
746  		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
747  		.exit_latency = 140,
748  		.target_residency = 560,
749  		.enter = &intel_idle,
750  		.enter_s2idle = intel_idle_s2idle, },
751  	{
752  		.name = "C7",
753  		.desc = "MWAIT 0x60",
754  		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
755  		.exit_latency = 1200,
756  		.target_residency = 4000,
757  		.enter = &intel_idle,
758  		.enter_s2idle = intel_idle_s2idle, },
759  	{
760  		.name = "C9",
761  		.desc = "MWAIT 0x64",
762  		.flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
763  		.exit_latency = 10000,
764  		.target_residency = 20000,
765  		.enter = &intel_idle,
766  		.enter_s2idle = intel_idle_s2idle, },
767  	{
768  		.enter = NULL }
769  };
770  static struct cpuidle_state avn_cstates[] = {
771  	{
772  		.name = "C1",
773  		.desc = "MWAIT 0x00",
774  		.flags = MWAIT2flg(0x00),
775  		.exit_latency = 2,
776  		.target_residency = 2,
777  		.enter = &intel_idle,
778  		.enter_s2idle = intel_idle_s2idle, },
779  	{
780  		.name = "C6",
781  		.desc = "MWAIT 0x51",
782  		.flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
783  		.exit_latency = 15,
784  		.target_residency = 45,
785  		.enter = &intel_idle,
786  		.enter_s2idle = intel_idle_s2idle, },
787  	{
788  		.enter = NULL }
789  };
790  static struct cpuidle_state knl_cstates[] = {
791  	{
792  		.name = "C1",
793  		.desc = "MWAIT 0x00",
794  		.flags = MWAIT2flg(0x00),
795  		.exit_latency = 1,
796  		.target_residency = 2,
797  		.enter = &intel_idle,
798  		.enter_s2idle = intel_idle_s2idle },
799  	{
800  		.name = "C6",
801  		.desc = "MWAIT 0x10",
802  		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
803  		.exit_latency = 120,
804  		.target_residency = 500,
805  		.enter = &intel_idle,
806  		.enter_s2idle = intel_idle_s2idle },
807  	{
808  		.enter = NULL }
809  };
810  
811  static struct cpuidle_state bxt_cstates[] = {
812  	{
813  		.name = "C1",
814  		.desc = "MWAIT 0x00",
815  		.flags = MWAIT2flg(0x00),
816  		.exit_latency = 2,
817  		.target_residency = 2,
818  		.enter = &intel_idle,
819  		.enter_s2idle = intel_idle_s2idle, },
820  	{
821  		.name = "C1E",
822  		.desc = "MWAIT 0x01",
823  		.flags = MWAIT2flg(0x01),
824  		.exit_latency = 10,
825  		.target_residency = 20,
826  		.enter = &intel_idle,
827  		.enter_s2idle = intel_idle_s2idle, },
828  	{
829  		.name = "C6",
830  		.desc = "MWAIT 0x20",
831  		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
832  		.exit_latency = 133,
833  		.target_residency = 133,
834  		.enter = &intel_idle,
835  		.enter_s2idle = intel_idle_s2idle, },
836  	{
837  		.name = "C7s",
838  		.desc = "MWAIT 0x31",
839  		.flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
840  		.exit_latency = 155,
841  		.target_residency = 155,
842  		.enter = &intel_idle,
843  		.enter_s2idle = intel_idle_s2idle, },
844  	{
845  		.name = "C8",
846  		.desc = "MWAIT 0x40",
847  		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
848  		.exit_latency = 1000,
849  		.target_residency = 1000,
850  		.enter = &intel_idle,
851  		.enter_s2idle = intel_idle_s2idle, },
852  	{
853  		.name = "C9",
854  		.desc = "MWAIT 0x50",
855  		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
856  		.exit_latency = 2000,
857  		.target_residency = 2000,
858  		.enter = &intel_idle,
859  		.enter_s2idle = intel_idle_s2idle, },
860  	{
861  		.name = "C10",
862  		.desc = "MWAIT 0x60",
863  		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
864  		.exit_latency = 10000,
865  		.target_residency = 10000,
866  		.enter = &intel_idle,
867  		.enter_s2idle = intel_idle_s2idle, },
868  	{
869  		.enter = NULL }
870  };
871  
872  static struct cpuidle_state dnv_cstates[] = {
873  	{
874  		.name = "C1",
875  		.desc = "MWAIT 0x00",
876  		.flags = MWAIT2flg(0x00),
877  		.exit_latency = 2,
878  		.target_residency = 2,
879  		.enter = &intel_idle,
880  		.enter_s2idle = intel_idle_s2idle, },
881  	{
882  		.name = "C1E",
883  		.desc = "MWAIT 0x01",
884  		.flags = MWAIT2flg(0x01),
885  		.exit_latency = 10,
886  		.target_residency = 20,
887  		.enter = &intel_idle,
888  		.enter_s2idle = intel_idle_s2idle, },
889  	{
890  		.name = "C6",
891  		.desc = "MWAIT 0x20",
892  		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
893  		.exit_latency = 50,
894  		.target_residency = 500,
895  		.enter = &intel_idle,
896  		.enter_s2idle = intel_idle_s2idle, },
897  	{
898  		.enter = NULL }
899  };
900  
901  /**
902   * intel_idle
903   * @dev: cpuidle_device
904   * @drv: cpuidle driver
905   * @index: index of cpuidle state
906   *
907   * Must be called under local_irq_disable().
908   */
909  static __cpuidle int intel_idle(struct cpuidle_device *dev,
910  				struct cpuidle_driver *drv, int index)
911  {
912  	unsigned long ecx = 1; /* break on interrupt flag */
913  	struct cpuidle_state *state = &drv->states[index];
914  	unsigned long eax = flg2MWAIT(state->flags);
915  	unsigned int cstate;
916  	bool uninitialized_var(tick);
917  	int cpu = smp_processor_id();
918  
919  	/*
920  	 * leave_mm() to avoid costly and often unnecessary wakeups
921  	 * for flushing the user TLB's associated with the active mm.
922  	 */
923  	if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
924  		leave_mm(cpu);
925  
926  	if (!static_cpu_has(X86_FEATURE_ARAT)) {
927  		cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) &
928  				MWAIT_CSTATE_MASK) + 1;
929  		tick = false;
930  		if (!(lapic_timer_reliable_states & (1 << (cstate)))) {
931  			tick = true;
932  			tick_broadcast_enter();
933  		}
934  	}
935  
936  	mwait_idle_with_hints(eax, ecx);
937  
938  	if (!static_cpu_has(X86_FEATURE_ARAT) && tick)
939  		tick_broadcast_exit();
940  
941  	return index;
942  }
943  
944  /**
945   * intel_idle_s2idle - simplified "enter" callback routine for suspend-to-idle
946   * @dev: cpuidle_device
947   * @drv: cpuidle driver
948   * @index: state index
949   */
950  static void intel_idle_s2idle(struct cpuidle_device *dev,
951  			     struct cpuidle_driver *drv, int index)
952  {
953  	unsigned long ecx = 1; /* break on interrupt flag */
954  	unsigned long eax = flg2MWAIT(drv->states[index].flags);
955  
956  	mwait_idle_with_hints(eax, ecx);
957  }
958  
959  static void __setup_broadcast_timer(bool on)
960  {
961  	if (on)
962  		tick_broadcast_enable();
963  	else
964  		tick_broadcast_disable();
965  }
966  
967  static void auto_demotion_disable(void)
968  {
969  	unsigned long long msr_bits;
970  
971  	rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
972  	msr_bits &= ~(icpu->auto_demotion_disable_flags);
973  	wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
974  }
975  static void c1e_promotion_disable(void)
976  {
977  	unsigned long long msr_bits;
978  
979  	rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
980  	msr_bits &= ~0x2;
981  	wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
982  }
983  
984  static const struct idle_cpu idle_cpu_nehalem = {
985  	.state_table = nehalem_cstates,
986  	.auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
987  	.disable_promotion_to_c1e = true,
988  };
989  
990  static const struct idle_cpu idle_cpu_atom = {
991  	.state_table = atom_cstates,
992  };
993  
994  static const struct idle_cpu idle_cpu_tangier = {
995  	.state_table = tangier_cstates,
996  };
997  
998  static const struct idle_cpu idle_cpu_lincroft = {
999  	.state_table = atom_cstates,
1000  	.auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
1001  };
1002  
1003  static const struct idle_cpu idle_cpu_snb = {
1004  	.state_table = snb_cstates,
1005  	.disable_promotion_to_c1e = true,
1006  };
1007  
1008  static const struct idle_cpu idle_cpu_byt = {
1009  	.state_table = byt_cstates,
1010  	.disable_promotion_to_c1e = true,
1011  	.byt_auto_demotion_disable_flag = true,
1012  };
1013  
1014  static const struct idle_cpu idle_cpu_cht = {
1015  	.state_table = cht_cstates,
1016  	.disable_promotion_to_c1e = true,
1017  	.byt_auto_demotion_disable_flag = true,
1018  };
1019  
1020  static const struct idle_cpu idle_cpu_ivb = {
1021  	.state_table = ivb_cstates,
1022  	.disable_promotion_to_c1e = true,
1023  };
1024  
1025  static const struct idle_cpu idle_cpu_ivt = {
1026  	.state_table = ivt_cstates,
1027  	.disable_promotion_to_c1e = true,
1028  };
1029  
1030  static const struct idle_cpu idle_cpu_hsw = {
1031  	.state_table = hsw_cstates,
1032  	.disable_promotion_to_c1e = true,
1033  };
1034  
1035  static const struct idle_cpu idle_cpu_bdw = {
1036  	.state_table = bdw_cstates,
1037  	.disable_promotion_to_c1e = true,
1038  };
1039  
1040  static const struct idle_cpu idle_cpu_skl = {
1041  	.state_table = skl_cstates,
1042  	.disable_promotion_to_c1e = true,
1043  };
1044  
1045  static const struct idle_cpu idle_cpu_skx = {
1046  	.state_table = skx_cstates,
1047  	.disable_promotion_to_c1e = true,
1048  };
1049  
1050  static const struct idle_cpu idle_cpu_avn = {
1051  	.state_table = avn_cstates,
1052  	.disable_promotion_to_c1e = true,
1053  };
1054  
1055  static const struct idle_cpu idle_cpu_knl = {
1056  	.state_table = knl_cstates,
1057  };
1058  
1059  static const struct idle_cpu idle_cpu_bxt = {
1060  	.state_table = bxt_cstates,
1061  	.disable_promotion_to_c1e = true,
1062  };
1063  
1064  static const struct idle_cpu idle_cpu_dnv = {
1065  	.state_table = dnv_cstates,
1066  	.disable_promotion_to_c1e = true,
1067  };
1068  
1069  #define ICPU(model, cpu) \
1070  	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&cpu }
1071  
1072  static const struct x86_cpu_id intel_idle_ids[] __initconst = {
1073  	ICPU(INTEL_FAM6_NEHALEM_EP,		idle_cpu_nehalem),
1074  	ICPU(INTEL_FAM6_NEHALEM,		idle_cpu_nehalem),
1075  	ICPU(INTEL_FAM6_NEHALEM_G,		idle_cpu_nehalem),
1076  	ICPU(INTEL_FAM6_WESTMERE,		idle_cpu_nehalem),
1077  	ICPU(INTEL_FAM6_WESTMERE_EP,		idle_cpu_nehalem),
1078  	ICPU(INTEL_FAM6_NEHALEM_EX,		idle_cpu_nehalem),
1079  	ICPU(INTEL_FAM6_ATOM_PINEVIEW,		idle_cpu_atom),
1080  	ICPU(INTEL_FAM6_ATOM_LINCROFT,		idle_cpu_lincroft),
1081  	ICPU(INTEL_FAM6_WESTMERE_EX,		idle_cpu_nehalem),
1082  	ICPU(INTEL_FAM6_SANDYBRIDGE,		idle_cpu_snb),
1083  	ICPU(INTEL_FAM6_SANDYBRIDGE_X,		idle_cpu_snb),
1084  	ICPU(INTEL_FAM6_ATOM_CEDARVIEW,		idle_cpu_atom),
1085  	ICPU(INTEL_FAM6_ATOM_SILVERMONT1,	idle_cpu_byt),
1086  	ICPU(INTEL_FAM6_ATOM_MERRIFIELD,	idle_cpu_tangier),
1087  	ICPU(INTEL_FAM6_ATOM_AIRMONT,		idle_cpu_cht),
1088  	ICPU(INTEL_FAM6_IVYBRIDGE,		idle_cpu_ivb),
1089  	ICPU(INTEL_FAM6_IVYBRIDGE_X,		idle_cpu_ivt),
1090  	ICPU(INTEL_FAM6_HASWELL_CORE,		idle_cpu_hsw),
1091  	ICPU(INTEL_FAM6_HASWELL_X,		idle_cpu_hsw),
1092  	ICPU(INTEL_FAM6_HASWELL_ULT,		idle_cpu_hsw),
1093  	ICPU(INTEL_FAM6_HASWELL_GT3E,		idle_cpu_hsw),
1094  	ICPU(INTEL_FAM6_ATOM_SILVERMONT2,	idle_cpu_avn),
1095  	ICPU(INTEL_FAM6_BROADWELL_CORE,		idle_cpu_bdw),
1096  	ICPU(INTEL_FAM6_BROADWELL_GT3E,		idle_cpu_bdw),
1097  	ICPU(INTEL_FAM6_BROADWELL_X,		idle_cpu_bdw),
1098  	ICPU(INTEL_FAM6_BROADWELL_XEON_D,	idle_cpu_bdw),
1099  	ICPU(INTEL_FAM6_SKYLAKE_MOBILE,		idle_cpu_skl),
1100  	ICPU(INTEL_FAM6_SKYLAKE_DESKTOP,	idle_cpu_skl),
1101  	ICPU(INTEL_FAM6_KABYLAKE_MOBILE,	idle_cpu_skl),
1102  	ICPU(INTEL_FAM6_KABYLAKE_DESKTOP,	idle_cpu_skl),
1103  	ICPU(INTEL_FAM6_SKYLAKE_X,		idle_cpu_skx),
1104  	ICPU(INTEL_FAM6_XEON_PHI_KNL,		idle_cpu_knl),
1105  	ICPU(INTEL_FAM6_XEON_PHI_KNM,		idle_cpu_knl),
1106  	ICPU(INTEL_FAM6_ATOM_GOLDMONT,		idle_cpu_bxt),
1107  	ICPU(INTEL_FAM6_ATOM_GEMINI_LAKE,	idle_cpu_bxt),
1108  	ICPU(INTEL_FAM6_ATOM_DENVERTON,		idle_cpu_dnv),
1109  	{}
1110  };
1111  
1112  /*
1113   * intel_idle_probe()
1114   */
1115  static int __init intel_idle_probe(void)
1116  {
1117  	unsigned int eax, ebx, ecx;
1118  	const struct x86_cpu_id *id;
1119  
1120  	if (max_cstate == 0) {
1121  		pr_debug("disabled\n");
1122  		return -EPERM;
1123  	}
1124  
1125  	id = x86_match_cpu(intel_idle_ids);
1126  	if (!id) {
1127  		if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
1128  		    boot_cpu_data.x86 == 6)
1129  			pr_debug("does not run on family %d model %d\n",
1130  				 boot_cpu_data.x86, boot_cpu_data.x86_model);
1131  		return -ENODEV;
1132  	}
1133  
1134  	if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
1135  		pr_debug("Please enable MWAIT in BIOS SETUP\n");
1136  		return -ENODEV;
1137  	}
1138  
1139  	if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
1140  		return -ENODEV;
1141  
1142  	cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
1143  
1144  	if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
1145  	    !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
1146  	    !mwait_substates)
1147  			return -ENODEV;
1148  
1149  	pr_debug("MWAIT substates: 0x%x\n", mwait_substates);
1150  
1151  	icpu = (const struct idle_cpu *)id->driver_data;
1152  	cpuidle_state_table = icpu->state_table;
1153  
1154  	pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
1155  		 boot_cpu_data.x86_model);
1156  
1157  	return 0;
1158  }
1159  
1160  /*
1161   * intel_idle_cpuidle_devices_uninit()
1162   * Unregisters the cpuidle devices.
1163   */
1164  static void intel_idle_cpuidle_devices_uninit(void)
1165  {
1166  	int i;
1167  	struct cpuidle_device *dev;
1168  
1169  	for_each_online_cpu(i) {
1170  		dev = per_cpu_ptr(intel_idle_cpuidle_devices, i);
1171  		cpuidle_unregister_device(dev);
1172  	}
1173  }
1174  
1175  /*
1176   * ivt_idle_state_table_update(void)
1177   *
1178   * Tune IVT multi-socket targets
1179   * Assumption: num_sockets == (max_package_num + 1)
1180   */
1181  static void ivt_idle_state_table_update(void)
1182  {
1183  	/* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
1184  	int cpu, package_num, num_sockets = 1;
1185  
1186  	for_each_online_cpu(cpu) {
1187  		package_num = topology_physical_package_id(cpu);
1188  		if (package_num + 1 > num_sockets) {
1189  			num_sockets = package_num + 1;
1190  
1191  			if (num_sockets > 4) {
1192  				cpuidle_state_table = ivt_cstates_8s;
1193  				return;
1194  			}
1195  		}
1196  	}
1197  
1198  	if (num_sockets > 2)
1199  		cpuidle_state_table = ivt_cstates_4s;
1200  
1201  	/* else, 1 and 2 socket systems use default ivt_cstates */
1202  }
1203  
1204  /*
1205   * Translate IRTL (Interrupt Response Time Limit) MSR to usec
1206   */
1207  
1208  static unsigned int irtl_ns_units[] = {
1209  	1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
1210  
1211  static unsigned long long irtl_2_usec(unsigned long long irtl)
1212  {
1213  	unsigned long long ns;
1214  
1215  	if (!irtl)
1216  		return 0;
1217  
1218  	ns = irtl_ns_units[(irtl >> 10) & 0x7];
1219  
1220  	return div64_u64((irtl & 0x3FF) * ns, 1000);
1221  }
1222  /*
1223   * bxt_idle_state_table_update(void)
1224   *
1225   * On BXT, we trust the IRTL to show the definitive maximum latency
1226   * We use the same value for target_residency.
1227   */
1228  static void bxt_idle_state_table_update(void)
1229  {
1230  	unsigned long long msr;
1231  	unsigned int usec;
1232  
1233  	rdmsrl(MSR_PKGC6_IRTL, msr);
1234  	usec = irtl_2_usec(msr);
1235  	if (usec) {
1236  		bxt_cstates[2].exit_latency = usec;
1237  		bxt_cstates[2].target_residency = usec;
1238  	}
1239  
1240  	rdmsrl(MSR_PKGC7_IRTL, msr);
1241  	usec = irtl_2_usec(msr);
1242  	if (usec) {
1243  		bxt_cstates[3].exit_latency = usec;
1244  		bxt_cstates[3].target_residency = usec;
1245  	}
1246  
1247  	rdmsrl(MSR_PKGC8_IRTL, msr);
1248  	usec = irtl_2_usec(msr);
1249  	if (usec) {
1250  		bxt_cstates[4].exit_latency = usec;
1251  		bxt_cstates[4].target_residency = usec;
1252  	}
1253  
1254  	rdmsrl(MSR_PKGC9_IRTL, msr);
1255  	usec = irtl_2_usec(msr);
1256  	if (usec) {
1257  		bxt_cstates[5].exit_latency = usec;
1258  		bxt_cstates[5].target_residency = usec;
1259  	}
1260  
1261  	rdmsrl(MSR_PKGC10_IRTL, msr);
1262  	usec = irtl_2_usec(msr);
1263  	if (usec) {
1264  		bxt_cstates[6].exit_latency = usec;
1265  		bxt_cstates[6].target_residency = usec;
1266  	}
1267  
1268  }
1269  /*
1270   * sklh_idle_state_table_update(void)
1271   *
1272   * On SKL-H (model 0x5e) disable C8 and C9 if:
1273   * C10 is enabled and SGX disabled
1274   */
1275  static void sklh_idle_state_table_update(void)
1276  {
1277  	unsigned long long msr;
1278  	unsigned int eax, ebx, ecx, edx;
1279  
1280  
1281  	/* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
1282  	if (max_cstate <= 7)
1283  		return;
1284  
1285  	/* if PC10 not present in CPUID.MWAIT.EDX */
1286  	if ((mwait_substates & (0xF << 28)) == 0)
1287  		return;
1288  
1289  	rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1290  
1291  	/* PC10 is not enabled in PKG C-state limit */
1292  	if ((msr & 0xF) != 8)
1293  		return;
1294  
1295  	ecx = 0;
1296  	cpuid(7, &eax, &ebx, &ecx, &edx);
1297  
1298  	/* if SGX is present */
1299  	if (ebx & (1 << 2)) {
1300  
1301  		rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
1302  
1303  		/* if SGX is enabled */
1304  		if (msr & (1 << 18))
1305  			return;
1306  	}
1307  
1308  	skl_cstates[5].disabled = 1;	/* C8-SKL */
1309  	skl_cstates[6].disabled = 1;	/* C9-SKL */
1310  }
1311  /*
1312   * intel_idle_state_table_update()
1313   *
1314   * Update the default state_table for this CPU-id
1315   */
1316  
1317  static void intel_idle_state_table_update(void)
1318  {
1319  	switch (boot_cpu_data.x86_model) {
1320  
1321  	case INTEL_FAM6_IVYBRIDGE_X:
1322  		ivt_idle_state_table_update();
1323  		break;
1324  	case INTEL_FAM6_ATOM_GOLDMONT:
1325  	case INTEL_FAM6_ATOM_GEMINI_LAKE:
1326  		bxt_idle_state_table_update();
1327  		break;
1328  	case INTEL_FAM6_SKYLAKE_DESKTOP:
1329  		sklh_idle_state_table_update();
1330  		break;
1331  	}
1332  }
1333  
1334  /*
1335   * intel_idle_cpuidle_driver_init()
1336   * allocate, initialize cpuidle_states
1337   */
1338  static void __init intel_idle_cpuidle_driver_init(void)
1339  {
1340  	int cstate;
1341  	struct cpuidle_driver *drv = &intel_idle_driver;
1342  
1343  	intel_idle_state_table_update();
1344  
1345  	cpuidle_poll_state_init(drv);
1346  	drv->state_count = 1;
1347  
1348  	for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
1349  		int num_substates, mwait_hint, mwait_cstate;
1350  
1351  		if ((cpuidle_state_table[cstate].enter == NULL) &&
1352  		    (cpuidle_state_table[cstate].enter_s2idle == NULL))
1353  			break;
1354  
1355  		if (cstate + 1 > max_cstate) {
1356  			pr_info("max_cstate %d reached\n", max_cstate);
1357  			break;
1358  		}
1359  
1360  		mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
1361  		mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint);
1362  
1363  		/* number of sub-states for this state in CPUID.MWAIT */
1364  		num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4))
1365  					& MWAIT_SUBSTATE_MASK;
1366  
1367  		/* if NO sub-states for this state in CPUID, skip it */
1368  		if (num_substates == 0)
1369  			continue;
1370  
1371  		/* if state marked as disabled, skip it */
1372  		if (cpuidle_state_table[cstate].disabled != 0) {
1373  			pr_debug("state %s is disabled\n",
1374  				 cpuidle_state_table[cstate].name);
1375  			continue;
1376  		}
1377  
1378  
1379  		if (((mwait_cstate + 1) > 2) &&
1380  			!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1381  			mark_tsc_unstable("TSC halts in idle"
1382  					" states deeper than C2");
1383  
1384  		drv->states[drv->state_count] =	/* structure copy */
1385  			cpuidle_state_table[cstate];
1386  
1387  		drv->state_count += 1;
1388  	}
1389  
1390  	if (icpu->byt_auto_demotion_disable_flag) {
1391  		wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
1392  		wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
1393  	}
1394  }
1395  
1396  
1397  /*
1398   * intel_idle_cpu_init()
1399   * allocate, initialize, register cpuidle_devices
1400   * @cpu: cpu/core to initialize
1401   */
1402  static int intel_idle_cpu_init(unsigned int cpu)
1403  {
1404  	struct cpuidle_device *dev;
1405  
1406  	dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1407  	dev->cpu = cpu;
1408  
1409  	if (cpuidle_register_device(dev)) {
1410  		pr_debug("cpuidle_register_device %d failed!\n", cpu);
1411  		return -EIO;
1412  	}
1413  
1414  	if (icpu->auto_demotion_disable_flags)
1415  		auto_demotion_disable();
1416  
1417  	if (icpu->disable_promotion_to_c1e)
1418  		c1e_promotion_disable();
1419  
1420  	return 0;
1421  }
1422  
1423  static int intel_idle_cpu_online(unsigned int cpu)
1424  {
1425  	struct cpuidle_device *dev;
1426  
1427  	if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE)
1428  		__setup_broadcast_timer(true);
1429  
1430  	/*
1431  	 * Some systems can hotplug a cpu at runtime after
1432  	 * the kernel has booted, we have to initialize the
1433  	 * driver in this case
1434  	 */
1435  	dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1436  	if (!dev->registered)
1437  		return intel_idle_cpu_init(cpu);
1438  
1439  	return 0;
1440  }
1441  
1442  static int __init intel_idle_init(void)
1443  {
1444  	int retval;
1445  
1446  	/* Do not load intel_idle at all for now if idle= is passed */
1447  	if (boot_option_idle_override != IDLE_NO_OVERRIDE)
1448  		return -ENODEV;
1449  
1450  	retval = intel_idle_probe();
1451  	if (retval)
1452  		return retval;
1453  
1454  	intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
1455  	if (intel_idle_cpuidle_devices == NULL)
1456  		return -ENOMEM;
1457  
1458  	intel_idle_cpuidle_driver_init();
1459  	retval = cpuidle_register_driver(&intel_idle_driver);
1460  	if (retval) {
1461  		struct cpuidle_driver *drv = cpuidle_get_driver();
1462  		printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
1463  		       drv ? drv->name : "none");
1464  		goto init_driver_fail;
1465  	}
1466  
1467  	if (boot_cpu_has(X86_FEATURE_ARAT))	/* Always Reliable APIC Timer */
1468  		lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE;
1469  
1470  	retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
1471  				   intel_idle_cpu_online, NULL);
1472  	if (retval < 0)
1473  		goto hp_setup_fail;
1474  
1475  	pr_debug("lapic_timer_reliable_states 0x%x\n",
1476  		 lapic_timer_reliable_states);
1477  
1478  	return 0;
1479  
1480  hp_setup_fail:
1481  	intel_idle_cpuidle_devices_uninit();
1482  	cpuidle_unregister_driver(&intel_idle_driver);
1483  init_driver_fail:
1484  	free_percpu(intel_idle_cpuidle_devices);
1485  	return retval;
1486  
1487  }
1488  device_initcall(intel_idle_init);
1489  
1490  /*
1491   * We are not really modular, but we used to support that.  Meaning we also
1492   * support "intel_idle.max_cstate=..." at boot and also a read-only export of
1493   * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param
1494   * is the easiest way (currently) to continue doing that.
1495   */
1496  module_param(max_cstate, int, 0444);
1497