xref: /linux/drivers/pci/pcie/aspm.c (revision 6863aaa88516292b885fdce5dd91925a00c3a3de)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Enable PCIe link L0s/L1 state and Clock Power Management
4  *
5  * Copyright (C) 2007 Intel
6  * Copyright (C) Zhang Yanmin (yanmin.zhang@intel.com)
7  * Copyright (C) Shaohua Li (shaohua.li@intel.com)
8  */
9 
10 #include <linux/kernel.h>
11 #include <linux/math.h>
12 #include <linux/module.h>
13 #include <linux/moduleparam.h>
14 #include <linux/pci.h>
15 #include <linux/pci_regs.h>
16 #include <linux/errno.h>
17 #include <linux/pm.h>
18 #include <linux/init.h>
19 #include <linux/slab.h>
20 #include <linux/jiffies.h>
21 #include <linux/delay.h>
22 #include "../pci.h"
23 
24 #ifdef MODULE_PARAM_PREFIX
25 #undef MODULE_PARAM_PREFIX
26 #endif
27 #define MODULE_PARAM_PREFIX "pcie_aspm."
28 
29 /* Note: those are not register definitions */
30 #define ASPM_STATE_L0S_UP	(1)	/* Upstream direction L0s state */
31 #define ASPM_STATE_L0S_DW	(2)	/* Downstream direction L0s state */
32 #define ASPM_STATE_L1		(4)	/* L1 state */
33 #define ASPM_STATE_L1_1		(8)	/* ASPM L1.1 state */
34 #define ASPM_STATE_L1_2		(0x10)	/* ASPM L1.2 state */
35 #define ASPM_STATE_L1_1_PCIPM	(0x20)	/* PCI PM L1.1 state */
36 #define ASPM_STATE_L1_2_PCIPM	(0x40)	/* PCI PM L1.2 state */
37 #define ASPM_STATE_L1_SS_PCIPM	(ASPM_STATE_L1_1_PCIPM | ASPM_STATE_L1_2_PCIPM)
38 #define ASPM_STATE_L1_2_MASK	(ASPM_STATE_L1_2 | ASPM_STATE_L1_2_PCIPM)
39 #define ASPM_STATE_L1SS		(ASPM_STATE_L1_1 | ASPM_STATE_L1_1_PCIPM |\
40 				 ASPM_STATE_L1_2_MASK)
41 #define ASPM_STATE_L0S		(ASPM_STATE_L0S_UP | ASPM_STATE_L0S_DW)
42 #define ASPM_STATE_ALL		(ASPM_STATE_L0S | ASPM_STATE_L1 |	\
43 				 ASPM_STATE_L1SS)
44 
45 struct pcie_link_state {
46 	struct pci_dev *pdev;		/* Upstream component of the Link */
47 	struct pci_dev *downstream;	/* Downstream component, function 0 */
48 	struct pcie_link_state *root;	/* pointer to the root port link */
49 	struct pcie_link_state *parent;	/* pointer to the parent Link state */
50 	struct list_head sibling;	/* node in link_list */
51 
52 	/* ASPM state */
53 	u32 aspm_support:7;		/* Supported ASPM state */
54 	u32 aspm_enabled:7;		/* Enabled ASPM state */
55 	u32 aspm_capable:7;		/* Capable ASPM state with latency */
56 	u32 aspm_default:7;		/* Default ASPM state by BIOS */
57 	u32 aspm_disable:7;		/* Disabled ASPM state */
58 
59 	/* Clock PM state */
60 	u32 clkpm_capable:1;		/* Clock PM capable? */
61 	u32 clkpm_enabled:1;		/* Current Clock PM state */
62 	u32 clkpm_default:1;		/* Default Clock PM state by BIOS */
63 	u32 clkpm_disable:1;		/* Clock PM disabled */
64 };
65 
66 static int aspm_disabled, aspm_force;
67 static bool aspm_support_enabled = true;
68 static DEFINE_MUTEX(aspm_lock);
69 static LIST_HEAD(link_list);
70 
71 #define POLICY_DEFAULT 0	/* BIOS default setting */
72 #define POLICY_PERFORMANCE 1	/* high performance */
73 #define POLICY_POWERSAVE 2	/* high power saving */
74 #define POLICY_POWER_SUPERSAVE 3 /* possibly even more power saving */
75 
76 #ifdef CONFIG_PCIEASPM_PERFORMANCE
77 static int aspm_policy = POLICY_PERFORMANCE;
78 #elif defined CONFIG_PCIEASPM_POWERSAVE
79 static int aspm_policy = POLICY_POWERSAVE;
80 #elif defined CONFIG_PCIEASPM_POWER_SUPERSAVE
81 static int aspm_policy = POLICY_POWER_SUPERSAVE;
82 #else
83 static int aspm_policy;
84 #endif
85 
86 static const char *policy_str[] = {
87 	[POLICY_DEFAULT] = "default",
88 	[POLICY_PERFORMANCE] = "performance",
89 	[POLICY_POWERSAVE] = "powersave",
90 	[POLICY_POWER_SUPERSAVE] = "powersupersave"
91 };
92 
93 #define LINK_RETRAIN_TIMEOUT HZ
94 
95 /*
96  * The L1 PM substate capability is only implemented in function 0 in a
97  * multi function device.
98  */
99 static struct pci_dev *pci_function_0(struct pci_bus *linkbus)
100 {
101 	struct pci_dev *child;
102 
103 	list_for_each_entry(child, &linkbus->devices, bus_list)
104 		if (PCI_FUNC(child->devfn) == 0)
105 			return child;
106 	return NULL;
107 }
108 
109 static int policy_to_aspm_state(struct pcie_link_state *link)
110 {
111 	switch (aspm_policy) {
112 	case POLICY_PERFORMANCE:
113 		/* Disable ASPM and Clock PM */
114 		return 0;
115 	case POLICY_POWERSAVE:
116 		/* Enable ASPM L0s/L1 */
117 		return (ASPM_STATE_L0S | ASPM_STATE_L1);
118 	case POLICY_POWER_SUPERSAVE:
119 		/* Enable Everything */
120 		return ASPM_STATE_ALL;
121 	case POLICY_DEFAULT:
122 		return link->aspm_default;
123 	}
124 	return 0;
125 }
126 
127 static int policy_to_clkpm_state(struct pcie_link_state *link)
128 {
129 	switch (aspm_policy) {
130 	case POLICY_PERFORMANCE:
131 		/* Disable ASPM and Clock PM */
132 		return 0;
133 	case POLICY_POWERSAVE:
134 	case POLICY_POWER_SUPERSAVE:
135 		/* Enable Clock PM */
136 		return 1;
137 	case POLICY_DEFAULT:
138 		return link->clkpm_default;
139 	}
140 	return 0;
141 }
142 
143 static void pcie_set_clkpm_nocheck(struct pcie_link_state *link, int enable)
144 {
145 	struct pci_dev *child;
146 	struct pci_bus *linkbus = link->pdev->subordinate;
147 	u32 val = enable ? PCI_EXP_LNKCTL_CLKREQ_EN : 0;
148 
149 	list_for_each_entry(child, &linkbus->devices, bus_list)
150 		pcie_capability_clear_and_set_word(child, PCI_EXP_LNKCTL,
151 						   PCI_EXP_LNKCTL_CLKREQ_EN,
152 						   val);
153 	link->clkpm_enabled = !!enable;
154 }
155 
156 static void pcie_set_clkpm(struct pcie_link_state *link, int enable)
157 {
158 	/*
159 	 * Don't enable Clock PM if the link is not Clock PM capable
160 	 * or Clock PM is disabled
161 	 */
162 	if (!link->clkpm_capable || link->clkpm_disable)
163 		enable = 0;
164 	/* Need nothing if the specified equals to current state */
165 	if (link->clkpm_enabled == enable)
166 		return;
167 	pcie_set_clkpm_nocheck(link, enable);
168 }
169 
170 static void pcie_clkpm_cap_init(struct pcie_link_state *link, int blacklist)
171 {
172 	int capable = 1, enabled = 1;
173 	u32 reg32;
174 	u16 reg16;
175 	struct pci_dev *child;
176 	struct pci_bus *linkbus = link->pdev->subordinate;
177 
178 	/* All functions should have the same cap and state, take the worst */
179 	list_for_each_entry(child, &linkbus->devices, bus_list) {
180 		pcie_capability_read_dword(child, PCI_EXP_LNKCAP, &reg32);
181 		if (!(reg32 & PCI_EXP_LNKCAP_CLKPM)) {
182 			capable = 0;
183 			enabled = 0;
184 			break;
185 		}
186 		pcie_capability_read_word(child, PCI_EXP_LNKCTL, &reg16);
187 		if (!(reg16 & PCI_EXP_LNKCTL_CLKREQ_EN))
188 			enabled = 0;
189 	}
190 	link->clkpm_enabled = enabled;
191 	link->clkpm_default = enabled;
192 	link->clkpm_capable = capable;
193 	link->clkpm_disable = blacklist ? 1 : 0;
194 }
195 
196 static bool pcie_retrain_link(struct pcie_link_state *link)
197 {
198 	struct pci_dev *parent = link->pdev;
199 	unsigned long end_jiffies;
200 	u16 reg16;
201 
202 	pcie_capability_read_word(parent, PCI_EXP_LNKCTL, &reg16);
203 	reg16 |= PCI_EXP_LNKCTL_RL;
204 	pcie_capability_write_word(parent, PCI_EXP_LNKCTL, reg16);
205 	if (parent->clear_retrain_link) {
206 		/*
207 		 * Due to an erratum in some devices the Retrain Link bit
208 		 * needs to be cleared again manually to allow the link
209 		 * training to succeed.
210 		 */
211 		reg16 &= ~PCI_EXP_LNKCTL_RL;
212 		pcie_capability_write_word(parent, PCI_EXP_LNKCTL, reg16);
213 	}
214 
215 	/* Wait for link training end. Break out after waiting for timeout */
216 	end_jiffies = jiffies + LINK_RETRAIN_TIMEOUT;
217 	do {
218 		pcie_capability_read_word(parent, PCI_EXP_LNKSTA, &reg16);
219 		if (!(reg16 & PCI_EXP_LNKSTA_LT))
220 			break;
221 		msleep(1);
222 	} while (time_before(jiffies, end_jiffies));
223 	return !(reg16 & PCI_EXP_LNKSTA_LT);
224 }
225 
226 /*
227  * pcie_aspm_configure_common_clock: check if the 2 ends of a link
228  *   could use common clock. If they are, configure them to use the
229  *   common clock. That will reduce the ASPM state exit latency.
230  */
231 static void pcie_aspm_configure_common_clock(struct pcie_link_state *link)
232 {
233 	int same_clock = 1;
234 	u16 reg16, parent_reg, child_reg[8];
235 	struct pci_dev *child, *parent = link->pdev;
236 	struct pci_bus *linkbus = parent->subordinate;
237 	/*
238 	 * All functions of a slot should have the same Slot Clock
239 	 * Configuration, so just check one function
240 	 */
241 	child = list_entry(linkbus->devices.next, struct pci_dev, bus_list);
242 	BUG_ON(!pci_is_pcie(child));
243 
244 	/* Check downstream component if bit Slot Clock Configuration is 1 */
245 	pcie_capability_read_word(child, PCI_EXP_LNKSTA, &reg16);
246 	if (!(reg16 & PCI_EXP_LNKSTA_SLC))
247 		same_clock = 0;
248 
249 	/* Check upstream component if bit Slot Clock Configuration is 1 */
250 	pcie_capability_read_word(parent, PCI_EXP_LNKSTA, &reg16);
251 	if (!(reg16 & PCI_EXP_LNKSTA_SLC))
252 		same_clock = 0;
253 
254 	/* Port might be already in common clock mode */
255 	pcie_capability_read_word(parent, PCI_EXP_LNKCTL, &reg16);
256 	if (same_clock && (reg16 & PCI_EXP_LNKCTL_CCC)) {
257 		bool consistent = true;
258 
259 		list_for_each_entry(child, &linkbus->devices, bus_list) {
260 			pcie_capability_read_word(child, PCI_EXP_LNKCTL,
261 						  &reg16);
262 			if (!(reg16 & PCI_EXP_LNKCTL_CCC)) {
263 				consistent = false;
264 				break;
265 			}
266 		}
267 		if (consistent)
268 			return;
269 		pci_info(parent, "ASPM: current common clock configuration is inconsistent, reconfiguring\n");
270 	}
271 
272 	/* Configure downstream component, all functions */
273 	list_for_each_entry(child, &linkbus->devices, bus_list) {
274 		pcie_capability_read_word(child, PCI_EXP_LNKCTL, &reg16);
275 		child_reg[PCI_FUNC(child->devfn)] = reg16;
276 		if (same_clock)
277 			reg16 |= PCI_EXP_LNKCTL_CCC;
278 		else
279 			reg16 &= ~PCI_EXP_LNKCTL_CCC;
280 		pcie_capability_write_word(child, PCI_EXP_LNKCTL, reg16);
281 	}
282 
283 	/* Configure upstream component */
284 	pcie_capability_read_word(parent, PCI_EXP_LNKCTL, &reg16);
285 	parent_reg = reg16;
286 	if (same_clock)
287 		reg16 |= PCI_EXP_LNKCTL_CCC;
288 	else
289 		reg16 &= ~PCI_EXP_LNKCTL_CCC;
290 	pcie_capability_write_word(parent, PCI_EXP_LNKCTL, reg16);
291 
292 	if (pcie_retrain_link(link))
293 		return;
294 
295 	/* Training failed. Restore common clock configurations */
296 	pci_err(parent, "ASPM: Could not configure common clock\n");
297 	list_for_each_entry(child, &linkbus->devices, bus_list)
298 		pcie_capability_write_word(child, PCI_EXP_LNKCTL,
299 					   child_reg[PCI_FUNC(child->devfn)]);
300 	pcie_capability_write_word(parent, PCI_EXP_LNKCTL, parent_reg);
301 }
302 
303 /* Convert L0s latency encoding to ns */
304 static u32 calc_l0s_latency(u32 lnkcap)
305 {
306 	u32 encoding = (lnkcap & PCI_EXP_LNKCAP_L0SEL) >> 12;
307 
308 	if (encoding == 0x7)
309 		return (5 * 1000);	/* > 4us */
310 	return (64 << encoding);
311 }
312 
313 /* Convert L0s acceptable latency encoding to ns */
314 static u32 calc_l0s_acceptable(u32 encoding)
315 {
316 	if (encoding == 0x7)
317 		return -1U;
318 	return (64 << encoding);
319 }
320 
321 /* Convert L1 latency encoding to ns */
322 static u32 calc_l1_latency(u32 lnkcap)
323 {
324 	u32 encoding = (lnkcap & PCI_EXP_LNKCAP_L1EL) >> 15;
325 
326 	if (encoding == 0x7)
327 		return (65 * 1000);	/* > 64us */
328 	return (1000 << encoding);
329 }
330 
331 /* Convert L1 acceptable latency encoding to ns */
332 static u32 calc_l1_acceptable(u32 encoding)
333 {
334 	if (encoding == 0x7)
335 		return -1U;
336 	return (1000 << encoding);
337 }
338 
339 /* Convert L1SS T_pwr encoding to usec */
340 static u32 calc_l1ss_pwron(struct pci_dev *pdev, u32 scale, u32 val)
341 {
342 	switch (scale) {
343 	case 0:
344 		return val * 2;
345 	case 1:
346 		return val * 10;
347 	case 2:
348 		return val * 100;
349 	}
350 	pci_err(pdev, "%s: Invalid T_PwrOn scale: %u\n", __func__, scale);
351 	return 0;
352 }
353 
354 /*
355  * Encode an LTR_L1.2_THRESHOLD value for the L1 PM Substates Control 1
356  * register.  Ports enter L1.2 when the most recent LTR value is greater
357  * than or equal to LTR_L1.2_THRESHOLD, so we round up to make sure we
358  * don't enter L1.2 too aggressively.
359  *
360  * See PCIe r6.0, sec 5.5.1, 6.18, 7.8.3.3.
361  */
362 static void encode_l12_threshold(u32 threshold_us, u32 *scale, u32 *value)
363 {
364 	u64 threshold_ns = (u64) threshold_us * 1000;
365 
366 	/*
367 	 * LTR_L1.2_THRESHOLD_Value ("value") is a 10-bit field with max
368 	 * value of 0x3ff.
369 	 */
370 	if (threshold_ns <= 0x3ff * 1) {
371 		*scale = 0;		/* Value times 1ns */
372 		*value = threshold_ns;
373 	} else if (threshold_ns <= 0x3ff * 32) {
374 		*scale = 1;		/* Value times 32ns */
375 		*value = roundup(threshold_ns, 32) / 32;
376 	} else if (threshold_ns <= 0x3ff * 1024) {
377 		*scale = 2;		/* Value times 1024ns */
378 		*value = roundup(threshold_ns, 1024) / 1024;
379 	} else if (threshold_ns <= 0x3ff * 32768) {
380 		*scale = 3;		/* Value times 32768ns */
381 		*value = roundup(threshold_ns, 32768) / 32768;
382 	} else if (threshold_ns <= 0x3ff * 1048576) {
383 		*scale = 4;		/* Value times 1048576ns */
384 		*value = roundup(threshold_ns, 1048576) / 1048576;
385 	} else if (threshold_ns <= 0x3ff * (u64) 33554432) {
386 		*scale = 5;		/* Value times 33554432ns */
387 		*value = roundup(threshold_ns, 33554432) / 33554432;
388 	} else {
389 		*scale = 5;
390 		*value = 0x3ff;		/* Max representable value */
391 	}
392 }
393 
394 static void pcie_aspm_check_latency(struct pci_dev *endpoint)
395 {
396 	u32 latency, encoding, lnkcap_up, lnkcap_dw;
397 	u32 l1_switch_latency = 0, latency_up_l0s;
398 	u32 latency_up_l1, latency_dw_l0s, latency_dw_l1;
399 	u32 acceptable_l0s, acceptable_l1;
400 	struct pcie_link_state *link;
401 
402 	/* Device not in D0 doesn't need latency check */
403 	if ((endpoint->current_state != PCI_D0) &&
404 	    (endpoint->current_state != PCI_UNKNOWN))
405 		return;
406 
407 	link = endpoint->bus->self->link_state;
408 
409 	/* Calculate endpoint L0s acceptable latency */
410 	encoding = (endpoint->devcap & PCI_EXP_DEVCAP_L0S) >> 6;
411 	acceptable_l0s = calc_l0s_acceptable(encoding);
412 
413 	/* Calculate endpoint L1 acceptable latency */
414 	encoding = (endpoint->devcap & PCI_EXP_DEVCAP_L1) >> 9;
415 	acceptable_l1 = calc_l1_acceptable(encoding);
416 
417 	while (link) {
418 		struct pci_dev *dev = pci_function_0(link->pdev->subordinate);
419 
420 		/* Read direction exit latencies */
421 		pcie_capability_read_dword(link->pdev, PCI_EXP_LNKCAP,
422 					   &lnkcap_up);
423 		pcie_capability_read_dword(dev, PCI_EXP_LNKCAP,
424 					   &lnkcap_dw);
425 		latency_up_l0s = calc_l0s_latency(lnkcap_up);
426 		latency_up_l1 = calc_l1_latency(lnkcap_up);
427 		latency_dw_l0s = calc_l0s_latency(lnkcap_dw);
428 		latency_dw_l1 = calc_l1_latency(lnkcap_dw);
429 
430 		/* Check upstream direction L0s latency */
431 		if ((link->aspm_capable & ASPM_STATE_L0S_UP) &&
432 		    (latency_up_l0s > acceptable_l0s))
433 			link->aspm_capable &= ~ASPM_STATE_L0S_UP;
434 
435 		/* Check downstream direction L0s latency */
436 		if ((link->aspm_capable & ASPM_STATE_L0S_DW) &&
437 		    (latency_dw_l0s > acceptable_l0s))
438 			link->aspm_capable &= ~ASPM_STATE_L0S_DW;
439 		/*
440 		 * Check L1 latency.
441 		 * Every switch on the path to root complex need 1
442 		 * more microsecond for L1. Spec doesn't mention L0s.
443 		 *
444 		 * The exit latencies for L1 substates are not advertised
445 		 * by a device.  Since the spec also doesn't mention a way
446 		 * to determine max latencies introduced by enabling L1
447 		 * substates on the components, it is not clear how to do
448 		 * a L1 substate exit latency check.  We assume that the
449 		 * L1 exit latencies advertised by a device include L1
450 		 * substate latencies (and hence do not do any check).
451 		 */
452 		latency = max_t(u32, latency_up_l1, latency_dw_l1);
453 		if ((link->aspm_capable & ASPM_STATE_L1) &&
454 		    (latency + l1_switch_latency > acceptable_l1))
455 			link->aspm_capable &= ~ASPM_STATE_L1;
456 		l1_switch_latency += 1000;
457 
458 		link = link->parent;
459 	}
460 }
461 
462 static void pci_clear_and_set_dword(struct pci_dev *pdev, int pos,
463 				    u32 clear, u32 set)
464 {
465 	u32 val;
466 
467 	pci_read_config_dword(pdev, pos, &val);
468 	val &= ~clear;
469 	val |= set;
470 	pci_write_config_dword(pdev, pos, val);
471 }
472 
473 static void aspm_program_l1ss(struct pci_dev *dev, u32 ctl1, u32 ctl2)
474 {
475 	u16 l1ss = dev->l1ss;
476 	u32 l1_2_enable;
477 
478 	/*
479 	 * Per PCIe r6.0, sec 5.5.4, T_POWER_ON in PCI_L1SS_CTL2 must be
480 	 * programmed prior to setting the L1.2 enable bits in PCI_L1SS_CTL1.
481 	 */
482 	pci_write_config_dword(dev, l1ss + PCI_L1SS_CTL2, ctl2);
483 
484 	/*
485 	 * In addition, Common_Mode_Restore_Time and LTR_L1.2_THRESHOLD in
486 	 * PCI_L1SS_CTL1 must be programmed *before* setting the L1.2
487 	 * enable bits, even though they're all in PCI_L1SS_CTL1.
488 	 */
489 	l1_2_enable = ctl1 & PCI_L1SS_CTL1_L1_2_MASK;
490 	ctl1 &= ~PCI_L1SS_CTL1_L1_2_MASK;
491 
492 	pci_write_config_dword(dev, l1ss + PCI_L1SS_CTL1, ctl1);
493 	if (l1_2_enable)
494 		pci_write_config_dword(dev, l1ss + PCI_L1SS_CTL1,
495 				       ctl1 | l1_2_enable);
496 }
497 
498 /* Calculate L1.2 PM substate timing parameters */
499 static void aspm_calc_l1ss_info(struct pcie_link_state *link,
500 				u32 parent_l1ss_cap, u32 child_l1ss_cap)
501 {
502 	struct pci_dev *child = link->downstream, *parent = link->pdev;
503 	u32 val1, val2, scale1, scale2;
504 	u32 t_common_mode, t_power_on, l1_2_threshold, scale, value;
505 	u32 ctl1 = 0, ctl2 = 0;
506 	u32 pctl1, pctl2, cctl1, cctl2;
507 
508 	if (!(link->aspm_support & ASPM_STATE_L1_2_MASK))
509 		return;
510 
511 	/* Choose the greater of the two Port Common_Mode_Restore_Times */
512 	val1 = (parent_l1ss_cap & PCI_L1SS_CAP_CM_RESTORE_TIME) >> 8;
513 	val2 = (child_l1ss_cap & PCI_L1SS_CAP_CM_RESTORE_TIME) >> 8;
514 	t_common_mode = max(val1, val2);
515 
516 	/* Choose the greater of the two Port T_POWER_ON times */
517 	val1   = (parent_l1ss_cap & PCI_L1SS_CAP_P_PWR_ON_VALUE) >> 19;
518 	scale1 = (parent_l1ss_cap & PCI_L1SS_CAP_P_PWR_ON_SCALE) >> 16;
519 	val2   = (child_l1ss_cap & PCI_L1SS_CAP_P_PWR_ON_VALUE) >> 19;
520 	scale2 = (child_l1ss_cap & PCI_L1SS_CAP_P_PWR_ON_SCALE) >> 16;
521 
522 	if (calc_l1ss_pwron(parent, scale1, val1) >
523 	    calc_l1ss_pwron(child, scale2, val2)) {
524 		ctl2 |= scale1 | (val1 << 3);
525 		t_power_on = calc_l1ss_pwron(parent, scale1, val1);
526 	} else {
527 		ctl2 |= scale2 | (val2 << 3);
528 		t_power_on = calc_l1ss_pwron(child, scale2, val2);
529 	}
530 
531 	/*
532 	 * Set LTR_L1.2_THRESHOLD to the time required to transition the
533 	 * Link from L0 to L1.2 and back to L0 so we enter L1.2 only if
534 	 * downstream devices report (via LTR) that they can tolerate at
535 	 * least that much latency.
536 	 *
537 	 * Based on PCIe r3.1, sec 5.5.3.3.1, Figures 5-16 and 5-17, and
538 	 * Table 5-11.  T(POWER_OFF) is at most 2us and T(L1.2) is at
539 	 * least 4us.
540 	 */
541 	l1_2_threshold = 2 + 4 + t_common_mode + t_power_on;
542 	encode_l12_threshold(l1_2_threshold, &scale, &value);
543 	ctl1 |= t_common_mode << 8 | scale << 29 | value << 16;
544 
545 	/* Some broken devices only support dword access to L1 SS */
546 	pci_read_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1, &pctl1);
547 	pci_read_config_dword(parent, parent->l1ss + PCI_L1SS_CTL2, &pctl2);
548 	pci_read_config_dword(child, child->l1ss + PCI_L1SS_CTL1, &cctl1);
549 	pci_read_config_dword(child, child->l1ss + PCI_L1SS_CTL2, &cctl2);
550 
551 	if (ctl1 == pctl1 && ctl1 == cctl1 &&
552 	    ctl2 == pctl2 && ctl2 == cctl2)
553 		return;
554 
555 	pctl1 &= ~(PCI_L1SS_CTL1_CM_RESTORE_TIME |
556 		   PCI_L1SS_CTL1_LTR_L12_TH_VALUE |
557 		   PCI_L1SS_CTL1_LTR_L12_TH_SCALE);
558 	pctl1 |= (ctl1 & (PCI_L1SS_CTL1_CM_RESTORE_TIME |
559 			  PCI_L1SS_CTL1_LTR_L12_TH_VALUE |
560 			  PCI_L1SS_CTL1_LTR_L12_TH_SCALE));
561 	aspm_program_l1ss(parent, pctl1, ctl2);
562 
563 	cctl1 &= ~(PCI_L1SS_CTL1_CM_RESTORE_TIME |
564 		   PCI_L1SS_CTL1_LTR_L12_TH_VALUE |
565 		   PCI_L1SS_CTL1_LTR_L12_TH_SCALE);
566 	cctl1 |= (ctl1 & (PCI_L1SS_CTL1_CM_RESTORE_TIME |
567 			  PCI_L1SS_CTL1_LTR_L12_TH_VALUE |
568 			  PCI_L1SS_CTL1_LTR_L12_TH_SCALE));
569 	aspm_program_l1ss(child, cctl1, ctl2);
570 }
571 
572 static void aspm_l1ss_init(struct pcie_link_state *link)
573 {
574 	struct pci_dev *child = link->downstream, *parent = link->pdev;
575 	u32 parent_l1ss_cap, child_l1ss_cap;
576 	u32 parent_l1ss_ctl1 = 0, child_l1ss_ctl1 = 0;
577 
578 	if (!parent->l1ss || !child->l1ss)
579 		return;
580 
581 	/* Setup L1 substate */
582 	pci_read_config_dword(parent, parent->l1ss + PCI_L1SS_CAP,
583 			      &parent_l1ss_cap);
584 	pci_read_config_dword(child, child->l1ss + PCI_L1SS_CAP,
585 			      &child_l1ss_cap);
586 
587 	if (!(parent_l1ss_cap & PCI_L1SS_CAP_L1_PM_SS))
588 		parent_l1ss_cap = 0;
589 	if (!(child_l1ss_cap & PCI_L1SS_CAP_L1_PM_SS))
590 		child_l1ss_cap = 0;
591 
592 	/*
593 	 * If we don't have LTR for the entire path from the Root Complex
594 	 * to this device, we can't use ASPM L1.2 because it relies on the
595 	 * LTR_L1.2_THRESHOLD.  See PCIe r4.0, secs 5.5.4, 6.18.
596 	 */
597 	if (!child->ltr_path)
598 		child_l1ss_cap &= ~PCI_L1SS_CAP_ASPM_L1_2;
599 
600 	if (parent_l1ss_cap & child_l1ss_cap & PCI_L1SS_CAP_ASPM_L1_1)
601 		link->aspm_support |= ASPM_STATE_L1_1;
602 	if (parent_l1ss_cap & child_l1ss_cap & PCI_L1SS_CAP_ASPM_L1_2)
603 		link->aspm_support |= ASPM_STATE_L1_2;
604 	if (parent_l1ss_cap & child_l1ss_cap & PCI_L1SS_CAP_PCIPM_L1_1)
605 		link->aspm_support |= ASPM_STATE_L1_1_PCIPM;
606 	if (parent_l1ss_cap & child_l1ss_cap & PCI_L1SS_CAP_PCIPM_L1_2)
607 		link->aspm_support |= ASPM_STATE_L1_2_PCIPM;
608 
609 	if (parent_l1ss_cap)
610 		pci_read_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
611 				      &parent_l1ss_ctl1);
612 	if (child_l1ss_cap)
613 		pci_read_config_dword(child, child->l1ss + PCI_L1SS_CTL1,
614 				      &child_l1ss_ctl1);
615 
616 	if (parent_l1ss_ctl1 & child_l1ss_ctl1 & PCI_L1SS_CTL1_ASPM_L1_1)
617 		link->aspm_enabled |= ASPM_STATE_L1_1;
618 	if (parent_l1ss_ctl1 & child_l1ss_ctl1 & PCI_L1SS_CTL1_ASPM_L1_2)
619 		link->aspm_enabled |= ASPM_STATE_L1_2;
620 	if (parent_l1ss_ctl1 & child_l1ss_ctl1 & PCI_L1SS_CTL1_PCIPM_L1_1)
621 		link->aspm_enabled |= ASPM_STATE_L1_1_PCIPM;
622 	if (parent_l1ss_ctl1 & child_l1ss_ctl1 & PCI_L1SS_CTL1_PCIPM_L1_2)
623 		link->aspm_enabled |= ASPM_STATE_L1_2_PCIPM;
624 
625 	if (link->aspm_support & ASPM_STATE_L1SS)
626 		aspm_calc_l1ss_info(link, parent_l1ss_cap, child_l1ss_cap);
627 }
628 
629 static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
630 {
631 	struct pci_dev *child = link->downstream, *parent = link->pdev;
632 	u32 parent_lnkcap, child_lnkcap;
633 	u16 parent_lnkctl, child_lnkctl;
634 	struct pci_bus *linkbus = parent->subordinate;
635 
636 	if (blacklist) {
637 		/* Set enabled/disable so that we will disable ASPM later */
638 		link->aspm_enabled = ASPM_STATE_ALL;
639 		link->aspm_disable = ASPM_STATE_ALL;
640 		return;
641 	}
642 
643 	/*
644 	 * If ASPM not supported, don't mess with the clocks and link,
645 	 * bail out now.
646 	 */
647 	pcie_capability_read_dword(parent, PCI_EXP_LNKCAP, &parent_lnkcap);
648 	pcie_capability_read_dword(child, PCI_EXP_LNKCAP, &child_lnkcap);
649 	if (!(parent_lnkcap & child_lnkcap & PCI_EXP_LNKCAP_ASPMS))
650 		return;
651 
652 	/* Configure common clock before checking latencies */
653 	pcie_aspm_configure_common_clock(link);
654 
655 	/*
656 	 * Re-read upstream/downstream components' register state after
657 	 * clock configuration.  L0s & L1 exit latencies in the otherwise
658 	 * read-only Link Capabilities may change depending on common clock
659 	 * configuration (PCIe r5.0, sec 7.5.3.6).
660 	 */
661 	pcie_capability_read_dword(parent, PCI_EXP_LNKCAP, &parent_lnkcap);
662 	pcie_capability_read_dword(child, PCI_EXP_LNKCAP, &child_lnkcap);
663 	pcie_capability_read_word(parent, PCI_EXP_LNKCTL, &parent_lnkctl);
664 	pcie_capability_read_word(child, PCI_EXP_LNKCTL, &child_lnkctl);
665 
666 	/*
667 	 * Setup L0s state
668 	 *
669 	 * Note that we must not enable L0s in either direction on a
670 	 * given link unless components on both sides of the link each
671 	 * support L0s.
672 	 */
673 	if (parent_lnkcap & child_lnkcap & PCI_EXP_LNKCAP_ASPM_L0S)
674 		link->aspm_support |= ASPM_STATE_L0S;
675 
676 	if (child_lnkctl & PCI_EXP_LNKCTL_ASPM_L0S)
677 		link->aspm_enabled |= ASPM_STATE_L0S_UP;
678 	if (parent_lnkctl & PCI_EXP_LNKCTL_ASPM_L0S)
679 		link->aspm_enabled |= ASPM_STATE_L0S_DW;
680 
681 	/* Setup L1 state */
682 	if (parent_lnkcap & child_lnkcap & PCI_EXP_LNKCAP_ASPM_L1)
683 		link->aspm_support |= ASPM_STATE_L1;
684 
685 	if (parent_lnkctl & child_lnkctl & PCI_EXP_LNKCTL_ASPM_L1)
686 		link->aspm_enabled |= ASPM_STATE_L1;
687 
688 	aspm_l1ss_init(link);
689 
690 	/* Save default state */
691 	link->aspm_default = link->aspm_enabled;
692 
693 	/* Setup initial capable state. Will be updated later */
694 	link->aspm_capable = link->aspm_support;
695 
696 	/* Get and check endpoint acceptable latencies */
697 	list_for_each_entry(child, &linkbus->devices, bus_list) {
698 		if (pci_pcie_type(child) != PCI_EXP_TYPE_ENDPOINT &&
699 		    pci_pcie_type(child) != PCI_EXP_TYPE_LEG_END)
700 			continue;
701 
702 		pcie_aspm_check_latency(child);
703 	}
704 }
705 
706 /* Configure the ASPM L1 substates */
707 static void pcie_config_aspm_l1ss(struct pcie_link_state *link, u32 state)
708 {
709 	u32 val, enable_req;
710 	struct pci_dev *child = link->downstream, *parent = link->pdev;
711 
712 	enable_req = (link->aspm_enabled ^ state) & state;
713 
714 	/*
715 	 * Here are the rules specified in the PCIe spec for enabling L1SS:
716 	 * - When enabling L1.x, enable bit at parent first, then at child
717 	 * - When disabling L1.x, disable bit at child first, then at parent
718 	 * - When enabling ASPM L1.x, need to disable L1
719 	 *   (at child followed by parent).
720 	 * - The ASPM/PCIPM L1.2 must be disabled while programming timing
721 	 *   parameters
722 	 *
723 	 * To keep it simple, disable all L1SS bits first, and later enable
724 	 * what is needed.
725 	 */
726 
727 	/* Disable all L1 substates */
728 	pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1,
729 				PCI_L1SS_CTL1_L1SS_MASK, 0);
730 	pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
731 				PCI_L1SS_CTL1_L1SS_MASK, 0);
732 	/*
733 	 * If needed, disable L1, and it gets enabled later
734 	 * in pcie_config_aspm_link().
735 	 */
736 	if (enable_req & (ASPM_STATE_L1_1 | ASPM_STATE_L1_2)) {
737 		pcie_capability_clear_and_set_word(child, PCI_EXP_LNKCTL,
738 						   PCI_EXP_LNKCTL_ASPM_L1, 0);
739 		pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL,
740 						   PCI_EXP_LNKCTL_ASPM_L1, 0);
741 	}
742 
743 	val = 0;
744 	if (state & ASPM_STATE_L1_1)
745 		val |= PCI_L1SS_CTL1_ASPM_L1_1;
746 	if (state & ASPM_STATE_L1_2)
747 		val |= PCI_L1SS_CTL1_ASPM_L1_2;
748 	if (state & ASPM_STATE_L1_1_PCIPM)
749 		val |= PCI_L1SS_CTL1_PCIPM_L1_1;
750 	if (state & ASPM_STATE_L1_2_PCIPM)
751 		val |= PCI_L1SS_CTL1_PCIPM_L1_2;
752 
753 	/* Enable what we need to enable */
754 	pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
755 				PCI_L1SS_CTL1_L1SS_MASK, val);
756 	pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1,
757 				PCI_L1SS_CTL1_L1SS_MASK, val);
758 }
759 
760 void pci_save_aspm_l1ss_state(struct pci_dev *dev)
761 {
762 	struct pci_cap_saved_state *save_state;
763 	u16 l1ss = dev->l1ss;
764 	u32 *cap;
765 
766 	if (!l1ss)
767 		return;
768 
769 	save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_L1SS);
770 	if (!save_state)
771 		return;
772 
773 	cap = (u32 *)&save_state->cap.data[0];
774 	pci_read_config_dword(dev, l1ss + PCI_L1SS_CTL2, cap++);
775 	pci_read_config_dword(dev, l1ss + PCI_L1SS_CTL1, cap++);
776 }
777 
778 void pci_restore_aspm_l1ss_state(struct pci_dev *dev)
779 {
780 	struct pci_cap_saved_state *save_state;
781 	u32 *cap, ctl1, ctl2;
782 	u16 l1ss = dev->l1ss;
783 
784 	if (!l1ss)
785 		return;
786 
787 	save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_L1SS);
788 	if (!save_state)
789 		return;
790 
791 	cap = (u32 *)&save_state->cap.data[0];
792 	ctl2 = *cap++;
793 	ctl1 = *cap;
794 	aspm_program_l1ss(dev, ctl1, ctl2);
795 }
796 
797 static void pcie_config_aspm_dev(struct pci_dev *pdev, u32 val)
798 {
799 	pcie_capability_clear_and_set_word(pdev, PCI_EXP_LNKCTL,
800 					   PCI_EXP_LNKCTL_ASPMC, val);
801 }
802 
803 static void pcie_config_aspm_link(struct pcie_link_state *link, u32 state)
804 {
805 	u32 upstream = 0, dwstream = 0;
806 	struct pci_dev *child = link->downstream, *parent = link->pdev;
807 	struct pci_bus *linkbus = parent->subordinate;
808 
809 	/* Enable only the states that were not explicitly disabled */
810 	state &= (link->aspm_capable & ~link->aspm_disable);
811 
812 	/* Can't enable any substates if L1 is not enabled */
813 	if (!(state & ASPM_STATE_L1))
814 		state &= ~ASPM_STATE_L1SS;
815 
816 	/* Spec says both ports must be in D0 before enabling PCI PM substates*/
817 	if (parent->current_state != PCI_D0 || child->current_state != PCI_D0) {
818 		state &= ~ASPM_STATE_L1_SS_PCIPM;
819 		state |= (link->aspm_enabled & ASPM_STATE_L1_SS_PCIPM);
820 	}
821 
822 	/* Nothing to do if the link is already in the requested state */
823 	if (link->aspm_enabled == state)
824 		return;
825 	/* Convert ASPM state to upstream/downstream ASPM register state */
826 	if (state & ASPM_STATE_L0S_UP)
827 		dwstream |= PCI_EXP_LNKCTL_ASPM_L0S;
828 	if (state & ASPM_STATE_L0S_DW)
829 		upstream |= PCI_EXP_LNKCTL_ASPM_L0S;
830 	if (state & ASPM_STATE_L1) {
831 		upstream |= PCI_EXP_LNKCTL_ASPM_L1;
832 		dwstream |= PCI_EXP_LNKCTL_ASPM_L1;
833 	}
834 
835 	if (link->aspm_capable & ASPM_STATE_L1SS)
836 		pcie_config_aspm_l1ss(link, state);
837 
838 	/*
839 	 * Spec 2.0 suggests all functions should be configured the
840 	 * same setting for ASPM. Enabling ASPM L1 should be done in
841 	 * upstream component first and then downstream, and vice
842 	 * versa for disabling ASPM L1. Spec doesn't mention L0S.
843 	 */
844 	if (state & ASPM_STATE_L1)
845 		pcie_config_aspm_dev(parent, upstream);
846 	list_for_each_entry(child, &linkbus->devices, bus_list)
847 		pcie_config_aspm_dev(child, dwstream);
848 	if (!(state & ASPM_STATE_L1))
849 		pcie_config_aspm_dev(parent, upstream);
850 
851 	link->aspm_enabled = state;
852 }
853 
854 static void pcie_config_aspm_path(struct pcie_link_state *link)
855 {
856 	while (link) {
857 		pcie_config_aspm_link(link, policy_to_aspm_state(link));
858 		link = link->parent;
859 	}
860 }
861 
862 static void free_link_state(struct pcie_link_state *link)
863 {
864 	link->pdev->link_state = NULL;
865 	kfree(link);
866 }
867 
868 static int pcie_aspm_sanity_check(struct pci_dev *pdev)
869 {
870 	struct pci_dev *child;
871 	u32 reg32;
872 
873 	/*
874 	 * Some functions in a slot might not all be PCIe functions,
875 	 * very strange. Disable ASPM for the whole slot
876 	 */
877 	list_for_each_entry(child, &pdev->subordinate->devices, bus_list) {
878 		if (!pci_is_pcie(child))
879 			return -EINVAL;
880 
881 		/*
882 		 * If ASPM is disabled then we're not going to change
883 		 * the BIOS state. It's safe to continue even if it's a
884 		 * pre-1.1 device
885 		 */
886 
887 		if (aspm_disabled)
888 			continue;
889 
890 		/*
891 		 * Disable ASPM for pre-1.1 PCIe device, we follow MS to use
892 		 * RBER bit to determine if a function is 1.1 version device
893 		 */
894 		pcie_capability_read_dword(child, PCI_EXP_DEVCAP, &reg32);
895 		if (!(reg32 & PCI_EXP_DEVCAP_RBER) && !aspm_force) {
896 			pci_info(child, "disabling ASPM on pre-1.1 PCIe device.  You can enable it with 'pcie_aspm=force'\n");
897 			return -EINVAL;
898 		}
899 	}
900 	return 0;
901 }
902 
903 static struct pcie_link_state *alloc_pcie_link_state(struct pci_dev *pdev)
904 {
905 	struct pcie_link_state *link;
906 
907 	link = kzalloc(sizeof(*link), GFP_KERNEL);
908 	if (!link)
909 		return NULL;
910 
911 	INIT_LIST_HEAD(&link->sibling);
912 	link->pdev = pdev;
913 	link->downstream = pci_function_0(pdev->subordinate);
914 
915 	/*
916 	 * Root Ports and PCI/PCI-X to PCIe Bridges are roots of PCIe
917 	 * hierarchies.  Note that some PCIe host implementations omit
918 	 * the root ports entirely, in which case a downstream port on
919 	 * a switch may become the root of the link state chain for all
920 	 * its subordinate endpoints.
921 	 */
922 	if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT ||
923 	    pci_pcie_type(pdev) == PCI_EXP_TYPE_PCIE_BRIDGE ||
924 	    !pdev->bus->parent->self) {
925 		link->root = link;
926 	} else {
927 		struct pcie_link_state *parent;
928 
929 		parent = pdev->bus->parent->self->link_state;
930 		if (!parent) {
931 			kfree(link);
932 			return NULL;
933 		}
934 
935 		link->parent = parent;
936 		link->root = link->parent->root;
937 	}
938 
939 	list_add(&link->sibling, &link_list);
940 	pdev->link_state = link;
941 	return link;
942 }
943 
944 static void pcie_aspm_update_sysfs_visibility(struct pci_dev *pdev)
945 {
946 	struct pci_dev *child;
947 
948 	list_for_each_entry(child, &pdev->subordinate->devices, bus_list)
949 		sysfs_update_group(&child->dev.kobj, &aspm_ctrl_attr_group);
950 }
951 
952 /*
953  * pcie_aspm_init_link_state: Initiate PCI express link state.
954  * It is called after the pcie and its children devices are scanned.
955  * @pdev: the root port or switch downstream port
956  */
957 void pcie_aspm_init_link_state(struct pci_dev *pdev)
958 {
959 	struct pcie_link_state *link;
960 	int blacklist = !!pcie_aspm_sanity_check(pdev);
961 
962 	if (!aspm_support_enabled)
963 		return;
964 
965 	if (pdev->link_state)
966 		return;
967 
968 	/*
969 	 * We allocate pcie_link_state for the component on the upstream
970 	 * end of a Link, so there's nothing to do unless this device is
971 	 * downstream port.
972 	 */
973 	if (!pcie_downstream_port(pdev))
974 		return;
975 
976 	/* VIA has a strange chipset, root port is under a bridge */
977 	if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT &&
978 	    pdev->bus->self)
979 		return;
980 
981 	down_read(&pci_bus_sem);
982 	if (list_empty(&pdev->subordinate->devices))
983 		goto out;
984 
985 	mutex_lock(&aspm_lock);
986 	link = alloc_pcie_link_state(pdev);
987 	if (!link)
988 		goto unlock;
989 	/*
990 	 * Setup initial ASPM state. Note that we need to configure
991 	 * upstream links also because capable state of them can be
992 	 * update through pcie_aspm_cap_init().
993 	 */
994 	pcie_aspm_cap_init(link, blacklist);
995 
996 	/* Setup initial Clock PM state */
997 	pcie_clkpm_cap_init(link, blacklist);
998 
999 	/*
1000 	 * At this stage drivers haven't had an opportunity to change the
1001 	 * link policy setting. Enabling ASPM on broken hardware can cripple
1002 	 * it even before the driver has had a chance to disable ASPM, so
1003 	 * default to a safe level right now. If we're enabling ASPM beyond
1004 	 * the BIOS's expectation, we'll do so once pci_enable_device() is
1005 	 * called.
1006 	 */
1007 	if (aspm_policy != POLICY_POWERSAVE &&
1008 	    aspm_policy != POLICY_POWER_SUPERSAVE) {
1009 		pcie_config_aspm_path(link);
1010 		pcie_set_clkpm(link, policy_to_clkpm_state(link));
1011 	}
1012 
1013 	pcie_aspm_update_sysfs_visibility(pdev);
1014 
1015 unlock:
1016 	mutex_unlock(&aspm_lock);
1017 out:
1018 	up_read(&pci_bus_sem);
1019 }
1020 
1021 /* Recheck latencies and update aspm_capable for links under the root */
1022 static void pcie_update_aspm_capable(struct pcie_link_state *root)
1023 {
1024 	struct pcie_link_state *link;
1025 	BUG_ON(root->parent);
1026 	list_for_each_entry(link, &link_list, sibling) {
1027 		if (link->root != root)
1028 			continue;
1029 		link->aspm_capable = link->aspm_support;
1030 	}
1031 	list_for_each_entry(link, &link_list, sibling) {
1032 		struct pci_dev *child;
1033 		struct pci_bus *linkbus = link->pdev->subordinate;
1034 		if (link->root != root)
1035 			continue;
1036 		list_for_each_entry(child, &linkbus->devices, bus_list) {
1037 			if ((pci_pcie_type(child) != PCI_EXP_TYPE_ENDPOINT) &&
1038 			    (pci_pcie_type(child) != PCI_EXP_TYPE_LEG_END))
1039 				continue;
1040 			pcie_aspm_check_latency(child);
1041 		}
1042 	}
1043 }
1044 
1045 /* @pdev: the endpoint device */
1046 void pcie_aspm_exit_link_state(struct pci_dev *pdev)
1047 {
1048 	struct pci_dev *parent = pdev->bus->self;
1049 	struct pcie_link_state *link, *root, *parent_link;
1050 
1051 	if (!parent || !parent->link_state)
1052 		return;
1053 
1054 	down_read(&pci_bus_sem);
1055 	mutex_lock(&aspm_lock);
1056 	/*
1057 	 * All PCIe functions are in one slot, remove one function will remove
1058 	 * the whole slot, so just wait until we are the last function left.
1059 	 */
1060 	if (!list_empty(&parent->subordinate->devices))
1061 		goto out;
1062 
1063 	link = parent->link_state;
1064 	root = link->root;
1065 	parent_link = link->parent;
1066 
1067 	/* All functions are removed, so just disable ASPM for the link */
1068 	pcie_config_aspm_link(link, 0);
1069 	list_del(&link->sibling);
1070 	/* Clock PM is for endpoint device */
1071 	free_link_state(link);
1072 
1073 	/* Recheck latencies and configure upstream links */
1074 	if (parent_link) {
1075 		pcie_update_aspm_capable(root);
1076 		pcie_config_aspm_path(parent_link);
1077 	}
1078 out:
1079 	mutex_unlock(&aspm_lock);
1080 	up_read(&pci_bus_sem);
1081 }
1082 
1083 void pcie_aspm_powersave_config_link(struct pci_dev *pdev)
1084 {
1085 	struct pcie_link_state *link = pdev->link_state;
1086 
1087 	if (aspm_disabled || !link)
1088 		return;
1089 
1090 	if (aspm_policy != POLICY_POWERSAVE &&
1091 	    aspm_policy != POLICY_POWER_SUPERSAVE)
1092 		return;
1093 
1094 	down_read(&pci_bus_sem);
1095 	mutex_lock(&aspm_lock);
1096 	pcie_config_aspm_path(link);
1097 	pcie_set_clkpm(link, policy_to_clkpm_state(link));
1098 	mutex_unlock(&aspm_lock);
1099 	up_read(&pci_bus_sem);
1100 }
1101 
1102 static struct pcie_link_state *pcie_aspm_get_link(struct pci_dev *pdev)
1103 {
1104 	struct pci_dev *bridge;
1105 
1106 	if (!pci_is_pcie(pdev))
1107 		return NULL;
1108 
1109 	bridge = pci_upstream_bridge(pdev);
1110 	if (!bridge || !pci_is_pcie(bridge))
1111 		return NULL;
1112 
1113 	return bridge->link_state;
1114 }
1115 
1116 static int __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem)
1117 {
1118 	struct pcie_link_state *link = pcie_aspm_get_link(pdev);
1119 
1120 	if (!link)
1121 		return -EINVAL;
1122 	/*
1123 	 * A driver requested that ASPM be disabled on this device, but
1124 	 * if we don't have permission to manage ASPM (e.g., on ACPI
1125 	 * systems we have to observe the FADT ACPI_FADT_NO_ASPM bit and
1126 	 * the _OSC method), we can't honor that request.  Windows has
1127 	 * a similar mechanism using "PciASPMOptOut", which is also
1128 	 * ignored in this situation.
1129 	 */
1130 	if (aspm_disabled) {
1131 		pci_warn(pdev, "can't disable ASPM; OS doesn't have ASPM control\n");
1132 		return -EPERM;
1133 	}
1134 
1135 	if (sem)
1136 		down_read(&pci_bus_sem);
1137 	mutex_lock(&aspm_lock);
1138 	if (state & PCIE_LINK_STATE_L0S)
1139 		link->aspm_disable |= ASPM_STATE_L0S;
1140 	if (state & PCIE_LINK_STATE_L1)
1141 		/* L1 PM substates require L1 */
1142 		link->aspm_disable |= ASPM_STATE_L1 | ASPM_STATE_L1SS;
1143 	if (state & PCIE_LINK_STATE_L1_1)
1144 		link->aspm_disable |= ASPM_STATE_L1_1;
1145 	if (state & PCIE_LINK_STATE_L1_2)
1146 		link->aspm_disable |= ASPM_STATE_L1_2;
1147 	if (state & PCIE_LINK_STATE_L1_1_PCIPM)
1148 		link->aspm_disable |= ASPM_STATE_L1_1_PCIPM;
1149 	if (state & PCIE_LINK_STATE_L1_2_PCIPM)
1150 		link->aspm_disable |= ASPM_STATE_L1_2_PCIPM;
1151 	pcie_config_aspm_link(link, policy_to_aspm_state(link));
1152 
1153 	if (state & PCIE_LINK_STATE_CLKPM)
1154 		link->clkpm_disable = 1;
1155 	pcie_set_clkpm(link, policy_to_clkpm_state(link));
1156 	mutex_unlock(&aspm_lock);
1157 	if (sem)
1158 		up_read(&pci_bus_sem);
1159 
1160 	return 0;
1161 }
1162 
1163 int pci_disable_link_state_locked(struct pci_dev *pdev, int state)
1164 {
1165 	return __pci_disable_link_state(pdev, state, false);
1166 }
1167 EXPORT_SYMBOL(pci_disable_link_state_locked);
1168 
1169 /**
1170  * pci_disable_link_state - Disable device's link state, so the link will
1171  * never enter specific states.  Note that if the BIOS didn't grant ASPM
1172  * control to the OS, this does nothing because we can't touch the LNKCTL
1173  * register. Returns 0 or a negative errno.
1174  *
1175  * @pdev: PCI device
1176  * @state: ASPM link state to disable
1177  */
1178 int pci_disable_link_state(struct pci_dev *pdev, int state)
1179 {
1180 	return __pci_disable_link_state(pdev, state, true);
1181 }
1182 EXPORT_SYMBOL(pci_disable_link_state);
1183 
1184 static int pcie_aspm_set_policy(const char *val,
1185 				const struct kernel_param *kp)
1186 {
1187 	int i;
1188 	struct pcie_link_state *link;
1189 
1190 	if (aspm_disabled)
1191 		return -EPERM;
1192 	i = sysfs_match_string(policy_str, val);
1193 	if (i < 0)
1194 		return i;
1195 	if (i == aspm_policy)
1196 		return 0;
1197 
1198 	down_read(&pci_bus_sem);
1199 	mutex_lock(&aspm_lock);
1200 	aspm_policy = i;
1201 	list_for_each_entry(link, &link_list, sibling) {
1202 		pcie_config_aspm_link(link, policy_to_aspm_state(link));
1203 		pcie_set_clkpm(link, policy_to_clkpm_state(link));
1204 	}
1205 	mutex_unlock(&aspm_lock);
1206 	up_read(&pci_bus_sem);
1207 	return 0;
1208 }
1209 
1210 static int pcie_aspm_get_policy(char *buffer, const struct kernel_param *kp)
1211 {
1212 	int i, cnt = 0;
1213 	for (i = 0; i < ARRAY_SIZE(policy_str); i++)
1214 		if (i == aspm_policy)
1215 			cnt += sprintf(buffer + cnt, "[%s] ", policy_str[i]);
1216 		else
1217 			cnt += sprintf(buffer + cnt, "%s ", policy_str[i]);
1218 	cnt += sprintf(buffer + cnt, "\n");
1219 	return cnt;
1220 }
1221 
1222 module_param_call(policy, pcie_aspm_set_policy, pcie_aspm_get_policy,
1223 	NULL, 0644);
1224 
1225 /**
1226  * pcie_aspm_enabled - Check if PCIe ASPM has been enabled for a device.
1227  * @pdev: Target device.
1228  *
1229  * Relies on the upstream bridge's link_state being valid.  The link_state
1230  * is deallocated only when the last child of the bridge (i.e., @pdev or a
1231  * sibling) is removed, and the caller should be holding a reference to
1232  * @pdev, so this should be safe.
1233  */
1234 bool pcie_aspm_enabled(struct pci_dev *pdev)
1235 {
1236 	struct pcie_link_state *link = pcie_aspm_get_link(pdev);
1237 
1238 	if (!link)
1239 		return false;
1240 
1241 	return link->aspm_enabled;
1242 }
1243 EXPORT_SYMBOL_GPL(pcie_aspm_enabled);
1244 
1245 static ssize_t aspm_attr_show_common(struct device *dev,
1246 				     struct device_attribute *attr,
1247 				     char *buf, u8 state)
1248 {
1249 	struct pci_dev *pdev = to_pci_dev(dev);
1250 	struct pcie_link_state *link = pcie_aspm_get_link(pdev);
1251 
1252 	return sysfs_emit(buf, "%d\n", (link->aspm_enabled & state) ? 1 : 0);
1253 }
1254 
1255 static ssize_t aspm_attr_store_common(struct device *dev,
1256 				      struct device_attribute *attr,
1257 				      const char *buf, size_t len, u8 state)
1258 {
1259 	struct pci_dev *pdev = to_pci_dev(dev);
1260 	struct pcie_link_state *link = pcie_aspm_get_link(pdev);
1261 	bool state_enable;
1262 
1263 	if (kstrtobool(buf, &state_enable) < 0)
1264 		return -EINVAL;
1265 
1266 	down_read(&pci_bus_sem);
1267 	mutex_lock(&aspm_lock);
1268 
1269 	if (state_enable) {
1270 		link->aspm_disable &= ~state;
1271 		/* need to enable L1 for substates */
1272 		if (state & ASPM_STATE_L1SS)
1273 			link->aspm_disable &= ~ASPM_STATE_L1;
1274 	} else {
1275 		link->aspm_disable |= state;
1276 	}
1277 
1278 	pcie_config_aspm_link(link, policy_to_aspm_state(link));
1279 
1280 	mutex_unlock(&aspm_lock);
1281 	up_read(&pci_bus_sem);
1282 
1283 	return len;
1284 }
1285 
1286 #define ASPM_ATTR(_f, _s)						\
1287 static ssize_t _f##_show(struct device *dev,				\
1288 			 struct device_attribute *attr, char *buf)	\
1289 { return aspm_attr_show_common(dev, attr, buf, ASPM_STATE_##_s); }	\
1290 									\
1291 static ssize_t _f##_store(struct device *dev,				\
1292 			  struct device_attribute *attr,		\
1293 			  const char *buf, size_t len)			\
1294 { return aspm_attr_store_common(dev, attr, buf, len, ASPM_STATE_##_s); }
1295 
1296 ASPM_ATTR(l0s_aspm, L0S)
1297 ASPM_ATTR(l1_aspm, L1)
1298 ASPM_ATTR(l1_1_aspm, L1_1)
1299 ASPM_ATTR(l1_2_aspm, L1_2)
1300 ASPM_ATTR(l1_1_pcipm, L1_1_PCIPM)
1301 ASPM_ATTR(l1_2_pcipm, L1_2_PCIPM)
1302 
1303 static ssize_t clkpm_show(struct device *dev,
1304 			  struct device_attribute *attr, char *buf)
1305 {
1306 	struct pci_dev *pdev = to_pci_dev(dev);
1307 	struct pcie_link_state *link = pcie_aspm_get_link(pdev);
1308 
1309 	return sysfs_emit(buf, "%d\n", link->clkpm_enabled);
1310 }
1311 
1312 static ssize_t clkpm_store(struct device *dev,
1313 			   struct device_attribute *attr,
1314 			   const char *buf, size_t len)
1315 {
1316 	struct pci_dev *pdev = to_pci_dev(dev);
1317 	struct pcie_link_state *link = pcie_aspm_get_link(pdev);
1318 	bool state_enable;
1319 
1320 	if (kstrtobool(buf, &state_enable) < 0)
1321 		return -EINVAL;
1322 
1323 	down_read(&pci_bus_sem);
1324 	mutex_lock(&aspm_lock);
1325 
1326 	link->clkpm_disable = !state_enable;
1327 	pcie_set_clkpm(link, policy_to_clkpm_state(link));
1328 
1329 	mutex_unlock(&aspm_lock);
1330 	up_read(&pci_bus_sem);
1331 
1332 	return len;
1333 }
1334 
1335 static DEVICE_ATTR_RW(clkpm);
1336 static DEVICE_ATTR_RW(l0s_aspm);
1337 static DEVICE_ATTR_RW(l1_aspm);
1338 static DEVICE_ATTR_RW(l1_1_aspm);
1339 static DEVICE_ATTR_RW(l1_2_aspm);
1340 static DEVICE_ATTR_RW(l1_1_pcipm);
1341 static DEVICE_ATTR_RW(l1_2_pcipm);
1342 
1343 static struct attribute *aspm_ctrl_attrs[] = {
1344 	&dev_attr_clkpm.attr,
1345 	&dev_attr_l0s_aspm.attr,
1346 	&dev_attr_l1_aspm.attr,
1347 	&dev_attr_l1_1_aspm.attr,
1348 	&dev_attr_l1_2_aspm.attr,
1349 	&dev_attr_l1_1_pcipm.attr,
1350 	&dev_attr_l1_2_pcipm.attr,
1351 	NULL
1352 };
1353 
1354 static umode_t aspm_ctrl_attrs_are_visible(struct kobject *kobj,
1355 					   struct attribute *a, int n)
1356 {
1357 	struct device *dev = kobj_to_dev(kobj);
1358 	struct pci_dev *pdev = to_pci_dev(dev);
1359 	struct pcie_link_state *link = pcie_aspm_get_link(pdev);
1360 	static const u8 aspm_state_map[] = {
1361 		ASPM_STATE_L0S,
1362 		ASPM_STATE_L1,
1363 		ASPM_STATE_L1_1,
1364 		ASPM_STATE_L1_2,
1365 		ASPM_STATE_L1_1_PCIPM,
1366 		ASPM_STATE_L1_2_PCIPM,
1367 	};
1368 
1369 	if (aspm_disabled || !link)
1370 		return 0;
1371 
1372 	if (n == 0)
1373 		return link->clkpm_capable ? a->mode : 0;
1374 
1375 	return link->aspm_capable & aspm_state_map[n - 1] ? a->mode : 0;
1376 }
1377 
1378 const struct attribute_group aspm_ctrl_attr_group = {
1379 	.name = "link",
1380 	.attrs = aspm_ctrl_attrs,
1381 	.is_visible = aspm_ctrl_attrs_are_visible,
1382 };
1383 
1384 static int __init pcie_aspm_disable(char *str)
1385 {
1386 	if (!strcmp(str, "off")) {
1387 		aspm_policy = POLICY_DEFAULT;
1388 		aspm_disabled = 1;
1389 		aspm_support_enabled = false;
1390 		printk(KERN_INFO "PCIe ASPM is disabled\n");
1391 	} else if (!strcmp(str, "force")) {
1392 		aspm_force = 1;
1393 		printk(KERN_INFO "PCIe ASPM is forcibly enabled\n");
1394 	}
1395 	return 1;
1396 }
1397 
1398 __setup("pcie_aspm=", pcie_aspm_disable);
1399 
1400 void pcie_no_aspm(void)
1401 {
1402 	/*
1403 	 * Disabling ASPM is intended to prevent the kernel from modifying
1404 	 * existing hardware state, not to clear existing state. To that end:
1405 	 * (a) set policy to POLICY_DEFAULT in order to avoid changing state
1406 	 * (b) prevent userspace from changing policy
1407 	 */
1408 	if (!aspm_force) {
1409 		aspm_policy = POLICY_DEFAULT;
1410 		aspm_disabled = 1;
1411 	}
1412 }
1413 
1414 bool pcie_aspm_support_enabled(void)
1415 {
1416 	return aspm_support_enabled;
1417 }
1418