xref: /linux/drivers/pci/pcie/bwctrl.c (revision 7f81907b7e3f93dfed2e903af52659baa4944341)
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * PCIe bandwidth controller
4  *
5  * Author: Alexandru Gagniuc <mr.nuke.me@gmail.com>
6  *
7  * Copyright (C) 2019 Dell Inc
8  * Copyright (C) 2023-2024 Intel Corporation
9  *
10  * The PCIe bandwidth controller provides a way to alter PCIe Link Speeds
11  * and notify the operating system when the Link Width or Speed changes. The
12  * notification capability is required for all Root Ports and Downstream
13  * Ports supporting Link Width wider than x1 and/or multiple Link Speeds.
14  *
15  * This service port driver hooks into the Bandwidth Notification interrupt
16  * watching for changes or links becoming degraded in operation. It updates
17  * the cached Current Link Speed that is exposed to user space through sysfs.
18  */
19 
20 #define dev_fmt(fmt) "bwctrl: " fmt
21 
22 #include <linux/atomic.h>
23 #include <linux/bitops.h>
24 #include <linux/bits.h>
25 #include <linux/cleanup.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/mutex.h>
29 #include <linux/pci.h>
30 #include <linux/pci-bwctrl.h>
31 #include <linux/rwsem.h>
32 #include <linux/slab.h>
33 #include <linux/types.h>
34 
35 #include "../pci.h"
36 #include "portdrv.h"
37 
38 /**
39  * struct pcie_bwctrl_data - PCIe bandwidth controller
40  * @set_speed_mutex:	Serializes link speed changes
41  * @cdev:		Thermal cooling device associated with the port
42  */
43 struct pcie_bwctrl_data {
44 	struct mutex set_speed_mutex;
45 	struct thermal_cooling_device *cdev;
46 };
47 
48 /* Prevent port removal during Link Speed changes. */
49 static DECLARE_RWSEM(pcie_bwctrl_setspeed_rwsem);
50 
51 static bool pcie_valid_speed(enum pci_bus_speed speed)
52 {
53 	return (speed >= PCIE_SPEED_2_5GT) && (speed <= PCIE_SPEED_64_0GT);
54 }
55 
56 static u16 pci_bus_speed2lnkctl2(enum pci_bus_speed speed)
57 {
58 	static const u8 speed_conv[] = {
59 		[PCIE_SPEED_2_5GT] = PCI_EXP_LNKCTL2_TLS_2_5GT,
60 		[PCIE_SPEED_5_0GT] = PCI_EXP_LNKCTL2_TLS_5_0GT,
61 		[PCIE_SPEED_8_0GT] = PCI_EXP_LNKCTL2_TLS_8_0GT,
62 		[PCIE_SPEED_16_0GT] = PCI_EXP_LNKCTL2_TLS_16_0GT,
63 		[PCIE_SPEED_32_0GT] = PCI_EXP_LNKCTL2_TLS_32_0GT,
64 		[PCIE_SPEED_64_0GT] = PCI_EXP_LNKCTL2_TLS_64_0GT,
65 	};
66 
67 	if (WARN_ON_ONCE(!pcie_valid_speed(speed)))
68 		return 0;
69 
70 	return speed_conv[speed];
71 }
72 
73 static inline u16 pcie_supported_speeds2target_speed(u8 supported_speeds)
74 {
75 	return __fls(supported_speeds);
76 }
77 
78 /**
79  * pcie_bwctrl_select_speed - Select Target Link Speed
80  * @port:	PCIe Port
81  * @speed_req:	Requested PCIe Link Speed
82  *
83  * Select Target Link Speed by take into account Supported Link Speeds of
84  * both the Root Port and the Endpoint.
85  *
86  * Return: Target Link Speed (1=2.5GT/s, 2=5GT/s, 3=8GT/s, etc.)
87  */
88 static u16 pcie_bwctrl_select_speed(struct pci_dev *port, enum pci_bus_speed speed_req)
89 {
90 	struct pci_bus *bus = port->subordinate;
91 	u8 desired_speeds, supported_speeds;
92 	struct pci_dev *dev;
93 
94 	desired_speeds = GENMASK(pci_bus_speed2lnkctl2(speed_req),
95 				 __fls(PCI_EXP_LNKCAP2_SLS_2_5GB));
96 
97 	supported_speeds = port->supported_speeds;
98 	if (bus) {
99 		down_read(&pci_bus_sem);
100 		dev = list_first_entry_or_null(&bus->devices, struct pci_dev, bus_list);
101 		if (dev)
102 			supported_speeds &= dev->supported_speeds;
103 		up_read(&pci_bus_sem);
104 	}
105 	if (!supported_speeds)
106 		supported_speeds = PCI_EXP_LNKCAP2_SLS_2_5GB;
107 
108 	return pcie_supported_speeds2target_speed(supported_speeds & desired_speeds);
109 }
110 
111 static int pcie_bwctrl_change_speed(struct pci_dev *port, u16 target_speed, bool use_lt)
112 {
113 	int ret;
114 
115 	ret = pcie_capability_clear_and_set_word(port, PCI_EXP_LNKCTL2,
116 						 PCI_EXP_LNKCTL2_TLS, target_speed);
117 	if (ret != PCIBIOS_SUCCESSFUL)
118 		return pcibios_err_to_errno(ret);
119 
120 	return pcie_retrain_link(port, use_lt);
121 }
122 
123 /**
124  * pcie_set_target_speed - Set downstream Link Speed for PCIe Port
125  * @port:	PCIe Port
126  * @speed_req:	Requested PCIe Link Speed
127  * @use_lt:	Wait for the LT or DLLLA bit to detect the end of link training
128  *
129  * Attempt to set PCIe Port Link Speed to @speed_req. @speed_req may be
130  * adjusted downwards to the best speed supported by both the Port and PCIe
131  * Device underneath it.
132  *
133  * Return:
134  * * 0		- on success
135  * * -EINVAL	- @speed_req is not a PCIe Link Speed
136  * * -ENODEV	- @port is not controllable
137  * * -ETIMEDOUT	- changing Link Speed took too long
138  * * -EAGAIN	- Link Speed was changed but @speed_req was not achieved
139  */
140 int pcie_set_target_speed(struct pci_dev *port, enum pci_bus_speed speed_req,
141 			  bool use_lt)
142 {
143 	struct pci_bus *bus = port->subordinate;
144 	u16 target_speed;
145 	int ret;
146 
147 	if (WARN_ON_ONCE(!pcie_valid_speed(speed_req)))
148 		return -EINVAL;
149 
150 	if (bus && bus->cur_bus_speed == speed_req)
151 		return 0;
152 
153 	target_speed = pcie_bwctrl_select_speed(port, speed_req);
154 
155 	scoped_guard(rwsem_read, &pcie_bwctrl_setspeed_rwsem) {
156 		struct pcie_bwctrl_data *data = port->link_bwctrl;
157 
158 		/*
159 		 * port->link_bwctrl is NULL during initial scan when called
160 		 * e.g. from the Target Speed quirk.
161 		 */
162 		if (data)
163 			mutex_lock(&data->set_speed_mutex);
164 
165 		ret = pcie_bwctrl_change_speed(port, target_speed, use_lt);
166 
167 		if (data)
168 			mutex_unlock(&data->set_speed_mutex);
169 	}
170 
171 	/*
172 	 * Despite setting higher speed into the Target Link Speed, empty
173 	 * bus won't train to 5GT+ speeds.
174 	 */
175 	if (!ret && bus && bus->cur_bus_speed != speed_req &&
176 	    !list_empty(&bus->devices))
177 		ret = -EAGAIN;
178 
179 	return ret;
180 }
181 
182 static void pcie_bwnotif_enable(struct pcie_device *srv)
183 {
184 	struct pci_dev *port = srv->port;
185 	u16 link_status;
186 	int ret;
187 
188 	/* Note if LBMS has been seen so far */
189 	ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status);
190 	if (ret == PCIBIOS_SUCCESSFUL && link_status & PCI_EXP_LNKSTA_LBMS)
191 		set_bit(PCI_LINK_LBMS_SEEN, &port->priv_flags);
192 
193 	pcie_capability_set_word(port, PCI_EXP_LNKCTL,
194 				 PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE);
195 	pcie_capability_write_word(port, PCI_EXP_LNKSTA,
196 				   PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS);
197 
198 	/*
199 	 * Update after enabling notifications & clearing status bits ensures
200 	 * link speed is up to date.
201 	 */
202 	pcie_update_link_speed(port->subordinate);
203 }
204 
205 static void pcie_bwnotif_disable(struct pci_dev *port)
206 {
207 	pcie_capability_clear_word(port, PCI_EXP_LNKCTL,
208 				   PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE);
209 }
210 
211 static irqreturn_t pcie_bwnotif_irq(int irq, void *context)
212 {
213 	struct pcie_device *srv = context;
214 	struct pci_dev *port = srv->port;
215 	u16 link_status, events;
216 	int ret;
217 
218 	ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status);
219 	if (ret != PCIBIOS_SUCCESSFUL)
220 		return IRQ_NONE;
221 
222 	events = link_status & (PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS);
223 	if (!events)
224 		return IRQ_NONE;
225 
226 	if (events & PCI_EXP_LNKSTA_LBMS)
227 		set_bit(PCI_LINK_LBMS_SEEN, &port->priv_flags);
228 
229 	pcie_capability_write_word(port, PCI_EXP_LNKSTA, events);
230 
231 	/*
232 	 * Interrupts will not be triggered from any further Link Speed
233 	 * change until LBMS is cleared by the write. Therefore, re-read the
234 	 * speed (inside pcie_update_link_speed()) after LBMS has been
235 	 * cleared to avoid missing link speed changes.
236 	 */
237 	pcie_update_link_speed(port->subordinate);
238 
239 	return IRQ_HANDLED;
240 }
241 
242 void pcie_reset_lbms(struct pci_dev *port)
243 {
244 	clear_bit(PCI_LINK_LBMS_SEEN, &port->priv_flags);
245 	pcie_capability_write_word(port, PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_LBMS);
246 }
247 
248 static int pcie_bwnotif_probe(struct pcie_device *srv)
249 {
250 	struct pci_dev *port = srv->port;
251 	int ret;
252 
253 	/* Can happen if we run out of bus numbers during enumeration. */
254 	if (!port->subordinate)
255 		return -ENODEV;
256 
257 	struct pcie_bwctrl_data *data = devm_kzalloc(&srv->device,
258 						     sizeof(*data), GFP_KERNEL);
259 	if (!data)
260 		return -ENOMEM;
261 
262 	ret = devm_mutex_init(&srv->device, &data->set_speed_mutex);
263 	if (ret)
264 		return ret;
265 
266 	scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) {
267 		port->link_bwctrl = data;
268 
269 		ret = request_irq(srv->irq, pcie_bwnotif_irq,
270 				  IRQF_SHARED, "PCIe bwctrl", srv);
271 		if (ret) {
272 			port->link_bwctrl = NULL;
273 			return ret;
274 		}
275 
276 		pcie_bwnotif_enable(srv);
277 	}
278 
279 	pci_dbg(port, "enabled with IRQ %d\n", srv->irq);
280 
281 	/* Don't fail on errors. Don't leave IS_ERR() "pointer" into ->cdev */
282 	port->link_bwctrl->cdev = pcie_cooling_device_register(port);
283 	if (IS_ERR(port->link_bwctrl->cdev))
284 		port->link_bwctrl->cdev = NULL;
285 
286 	return 0;
287 }
288 
289 static void pcie_bwnotif_remove(struct pcie_device *srv)
290 {
291 	struct pcie_bwctrl_data *data = srv->port->link_bwctrl;
292 
293 	pcie_cooling_device_unregister(data->cdev);
294 
295 	scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) {
296 		pcie_bwnotif_disable(srv->port);
297 
298 		free_irq(srv->irq, srv);
299 
300 		srv->port->link_bwctrl = NULL;
301 	}
302 }
303 
304 static int pcie_bwnotif_suspend(struct pcie_device *srv)
305 {
306 	pcie_bwnotif_disable(srv->port);
307 	return 0;
308 }
309 
310 static int pcie_bwnotif_resume(struct pcie_device *srv)
311 {
312 	pcie_bwnotif_enable(srv);
313 	return 0;
314 }
315 
316 static struct pcie_port_service_driver pcie_bwctrl_driver = {
317 	.name		= "pcie_bwctrl",
318 	.port_type	= PCIE_ANY_PORT,
319 	.service	= PCIE_PORT_SERVICE_BWCTRL,
320 	.probe		= pcie_bwnotif_probe,
321 	.suspend	= pcie_bwnotif_suspend,
322 	.resume		= pcie_bwnotif_resume,
323 	.remove		= pcie_bwnotif_remove,
324 };
325 
326 int __init pcie_bwctrl_init(void)
327 {
328 	return pcie_port_service_register(&pcie_bwctrl_driver);
329 }
330