xref: /linux/drivers/pci/pcie/bwctrl.c (revision 7f71507851fc7764b36a3221839607d3a45c2025)
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * PCIe bandwidth controller
4  *
5  * Author: Alexandru Gagniuc <mr.nuke.me@gmail.com>
6  *
7  * Copyright (C) 2019 Dell Inc
8  * Copyright (C) 2023-2024 Intel Corporation
9  *
10  * The PCIe bandwidth controller provides a way to alter PCIe Link Speeds
11  * and notify the operating system when the Link Width or Speed changes. The
12  * notification capability is required for all Root Ports and Downstream
13  * Ports supporting Link Width wider than x1 and/or multiple Link Speeds.
14  *
15  * This service port driver hooks into the Bandwidth Notification interrupt
16  * watching for changes or links becoming degraded in operation. It updates
17  * the cached Current Link Speed that is exposed to user space through sysfs.
18  */
19 
20 #define dev_fmt(fmt) "bwctrl: " fmt
21 
22 #include <linux/atomic.h>
23 #include <linux/bitops.h>
24 #include <linux/bits.h>
25 #include <linux/cleanup.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/mutex.h>
29 #include <linux/pci.h>
30 #include <linux/pci-bwctrl.h>
31 #include <linux/rwsem.h>
32 #include <linux/slab.h>
33 #include <linux/types.h>
34 
35 #include "../pci.h"
36 #include "portdrv.h"
37 
38 /**
39  * struct pcie_bwctrl_data - PCIe bandwidth controller
40  * @set_speed_mutex:	Serializes link speed changes
41  * @lbms_count:		Count for LBMS (since last reset)
42  * @cdev:		Thermal cooling device associated with the port
43  */
44 struct pcie_bwctrl_data {
45 	struct mutex set_speed_mutex;
46 	atomic_t lbms_count;
47 	struct thermal_cooling_device *cdev;
48 };
49 
50 /*
51  * Prevent port removal during LBMS count accessors and Link Speed changes.
52  *
53  * These have to be differentiated because pcie_bwctrl_change_speed() calls
54  * pcie_retrain_link() which uses LBMS count reset accessor on success
55  * (using just one rwsem triggers "possible recursive locking detected"
56  * warning).
57  */
58 static DECLARE_RWSEM(pcie_bwctrl_lbms_rwsem);
59 static DECLARE_RWSEM(pcie_bwctrl_setspeed_rwsem);
60 
61 static bool pcie_valid_speed(enum pci_bus_speed speed)
62 {
63 	return (speed >= PCIE_SPEED_2_5GT) && (speed <= PCIE_SPEED_64_0GT);
64 }
65 
66 static u16 pci_bus_speed2lnkctl2(enum pci_bus_speed speed)
67 {
68 	static const u8 speed_conv[] = {
69 		[PCIE_SPEED_2_5GT] = PCI_EXP_LNKCTL2_TLS_2_5GT,
70 		[PCIE_SPEED_5_0GT] = PCI_EXP_LNKCTL2_TLS_5_0GT,
71 		[PCIE_SPEED_8_0GT] = PCI_EXP_LNKCTL2_TLS_8_0GT,
72 		[PCIE_SPEED_16_0GT] = PCI_EXP_LNKCTL2_TLS_16_0GT,
73 		[PCIE_SPEED_32_0GT] = PCI_EXP_LNKCTL2_TLS_32_0GT,
74 		[PCIE_SPEED_64_0GT] = PCI_EXP_LNKCTL2_TLS_64_0GT,
75 	};
76 
77 	if (WARN_ON_ONCE(!pcie_valid_speed(speed)))
78 		return 0;
79 
80 	return speed_conv[speed];
81 }
82 
83 static inline u16 pcie_supported_speeds2target_speed(u8 supported_speeds)
84 {
85 	return __fls(supported_speeds);
86 }
87 
88 /**
89  * pcie_bwctrl_select_speed - Select Target Link Speed
90  * @port:	PCIe Port
91  * @speed_req:	Requested PCIe Link Speed
92  *
93  * Select Target Link Speed by take into account Supported Link Speeds of
94  * both the Root Port and the Endpoint.
95  *
96  * Return: Target Link Speed (1=2.5GT/s, 2=5GT/s, 3=8GT/s, etc.)
97  */
98 static u16 pcie_bwctrl_select_speed(struct pci_dev *port, enum pci_bus_speed speed_req)
99 {
100 	struct pci_bus *bus = port->subordinate;
101 	u8 desired_speeds, supported_speeds;
102 	struct pci_dev *dev;
103 
104 	desired_speeds = GENMASK(pci_bus_speed2lnkctl2(speed_req),
105 				 __fls(PCI_EXP_LNKCAP2_SLS_2_5GB));
106 
107 	supported_speeds = port->supported_speeds;
108 	if (bus) {
109 		down_read(&pci_bus_sem);
110 		dev = list_first_entry_or_null(&bus->devices, struct pci_dev, bus_list);
111 		if (dev)
112 			supported_speeds &= dev->supported_speeds;
113 		up_read(&pci_bus_sem);
114 	}
115 	if (!supported_speeds)
116 		return PCI_EXP_LNKCAP2_SLS_2_5GB;
117 
118 	return pcie_supported_speeds2target_speed(supported_speeds & desired_speeds);
119 }
120 
121 static int pcie_bwctrl_change_speed(struct pci_dev *port, u16 target_speed, bool use_lt)
122 {
123 	int ret;
124 
125 	ret = pcie_capability_clear_and_set_word(port, PCI_EXP_LNKCTL2,
126 						 PCI_EXP_LNKCTL2_TLS, target_speed);
127 	if (ret != PCIBIOS_SUCCESSFUL)
128 		return pcibios_err_to_errno(ret);
129 
130 	ret = pcie_retrain_link(port, use_lt);
131 	if (ret < 0)
132 		return ret;
133 
134 	/*
135 	 * Ensure link speed updates also with platforms that have problems
136 	 * with notifications.
137 	 */
138 	if (port->subordinate)
139 		pcie_update_link_speed(port->subordinate);
140 
141 	return 0;
142 }
143 
144 /**
145  * pcie_set_target_speed - Set downstream Link Speed for PCIe Port
146  * @port:	PCIe Port
147  * @speed_req:	Requested PCIe Link Speed
148  * @use_lt:	Wait for the LT or DLLLA bit to detect the end of link training
149  *
150  * Attempt to set PCIe Port Link Speed to @speed_req. @speed_req may be
151  * adjusted downwards to the best speed supported by both the Port and PCIe
152  * Device underneath it.
153  *
154  * Return:
155  * * 0		- on success
156  * * -EINVAL	- @speed_req is not a PCIe Link Speed
157  * * -ENODEV	- @port is not controllable
158  * * -ETIMEDOUT	- changing Link Speed took too long
159  * * -EAGAIN	- Link Speed was changed but @speed_req was not achieved
160  */
161 int pcie_set_target_speed(struct pci_dev *port, enum pci_bus_speed speed_req,
162 			  bool use_lt)
163 {
164 	struct pci_bus *bus = port->subordinate;
165 	u16 target_speed;
166 	int ret;
167 
168 	if (WARN_ON_ONCE(!pcie_valid_speed(speed_req)))
169 		return -EINVAL;
170 
171 	if (bus && bus->cur_bus_speed == speed_req)
172 		return 0;
173 
174 	target_speed = pcie_bwctrl_select_speed(port, speed_req);
175 
176 	scoped_guard(rwsem_read, &pcie_bwctrl_setspeed_rwsem) {
177 		struct pcie_bwctrl_data *data = port->link_bwctrl;
178 
179 		/*
180 		 * port->link_bwctrl is NULL during initial scan when called
181 		 * e.g. from the Target Speed quirk.
182 		 */
183 		if (data)
184 			mutex_lock(&data->set_speed_mutex);
185 
186 		ret = pcie_bwctrl_change_speed(port, target_speed, use_lt);
187 
188 		if (data)
189 			mutex_unlock(&data->set_speed_mutex);
190 	}
191 
192 	/*
193 	 * Despite setting higher speed into the Target Link Speed, empty
194 	 * bus won't train to 5GT+ speeds.
195 	 */
196 	if (!ret && bus && bus->cur_bus_speed != speed_req &&
197 	    !list_empty(&bus->devices))
198 		ret = -EAGAIN;
199 
200 	return ret;
201 }
202 
203 static void pcie_bwnotif_enable(struct pcie_device *srv)
204 {
205 	struct pcie_bwctrl_data *data = srv->port->link_bwctrl;
206 	struct pci_dev *port = srv->port;
207 	u16 link_status;
208 	int ret;
209 
210 	/* Count LBMS seen so far as one */
211 	ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status);
212 	if (ret == PCIBIOS_SUCCESSFUL && link_status & PCI_EXP_LNKSTA_LBMS)
213 		atomic_inc(&data->lbms_count);
214 
215 	pcie_capability_set_word(port, PCI_EXP_LNKCTL,
216 				 PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE);
217 	pcie_capability_write_word(port, PCI_EXP_LNKSTA,
218 				   PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS);
219 
220 	/*
221 	 * Update after enabling notifications & clearing status bits ensures
222 	 * link speed is up to date.
223 	 */
224 	pcie_update_link_speed(port->subordinate);
225 }
226 
227 static void pcie_bwnotif_disable(struct pci_dev *port)
228 {
229 	pcie_capability_clear_word(port, PCI_EXP_LNKCTL,
230 				   PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE);
231 }
232 
233 static irqreturn_t pcie_bwnotif_irq(int irq, void *context)
234 {
235 	struct pcie_device *srv = context;
236 	struct pcie_bwctrl_data *data = srv->port->link_bwctrl;
237 	struct pci_dev *port = srv->port;
238 	u16 link_status, events;
239 	int ret;
240 
241 	ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status);
242 	if (ret != PCIBIOS_SUCCESSFUL)
243 		return IRQ_NONE;
244 
245 	events = link_status & (PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS);
246 	if (!events)
247 		return IRQ_NONE;
248 
249 	if (events & PCI_EXP_LNKSTA_LBMS)
250 		atomic_inc(&data->lbms_count);
251 
252 	pcie_capability_write_word(port, PCI_EXP_LNKSTA, events);
253 
254 	/*
255 	 * Interrupts will not be triggered from any further Link Speed
256 	 * change until LBMS is cleared by the write. Therefore, re-read the
257 	 * speed (inside pcie_update_link_speed()) after LBMS has been
258 	 * cleared to avoid missing link speed changes.
259 	 */
260 	pcie_update_link_speed(port->subordinate);
261 
262 	return IRQ_HANDLED;
263 }
264 
265 void pcie_reset_lbms_count(struct pci_dev *port)
266 {
267 	struct pcie_bwctrl_data *data;
268 
269 	guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem);
270 	data = port->link_bwctrl;
271 	if (data)
272 		atomic_set(&data->lbms_count, 0);
273 	else
274 		pcie_capability_write_word(port, PCI_EXP_LNKSTA,
275 					   PCI_EXP_LNKSTA_LBMS);
276 }
277 
278 int pcie_lbms_count(struct pci_dev *port, unsigned long *val)
279 {
280 	struct pcie_bwctrl_data *data;
281 
282 	guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem);
283 	data = port->link_bwctrl;
284 	if (!data)
285 		return -ENOTTY;
286 
287 	*val = atomic_read(&data->lbms_count);
288 
289 	return 0;
290 }
291 
292 static int pcie_bwnotif_probe(struct pcie_device *srv)
293 {
294 	struct pci_dev *port = srv->port;
295 	int ret;
296 
297 	struct pcie_bwctrl_data *data = devm_kzalloc(&srv->device,
298 						     sizeof(*data), GFP_KERNEL);
299 	if (!data)
300 		return -ENOMEM;
301 
302 	ret = devm_mutex_init(&srv->device, &data->set_speed_mutex);
303 	if (ret)
304 		return ret;
305 
306 	ret = devm_request_irq(&srv->device, srv->irq, pcie_bwnotif_irq,
307 			       IRQF_SHARED, "PCIe bwctrl", srv);
308 	if (ret)
309 		return ret;
310 
311 	scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) {
312 		scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) {
313 			port->link_bwctrl = no_free_ptr(data);
314 			pcie_bwnotif_enable(srv);
315 		}
316 	}
317 
318 	pci_dbg(port, "enabled with IRQ %d\n", srv->irq);
319 
320 	/* Don't fail on errors. Don't leave IS_ERR() "pointer" into ->cdev */
321 	port->link_bwctrl->cdev = pcie_cooling_device_register(port);
322 	if (IS_ERR(port->link_bwctrl->cdev))
323 		port->link_bwctrl->cdev = NULL;
324 
325 	return 0;
326 }
327 
328 static void pcie_bwnotif_remove(struct pcie_device *srv)
329 {
330 	struct pcie_bwctrl_data *data = srv->port->link_bwctrl;
331 
332 	pcie_cooling_device_unregister(data->cdev);
333 
334 	pcie_bwnotif_disable(srv->port);
335 
336 	scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem)
337 		scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem)
338 			srv->port->link_bwctrl = NULL;
339 }
340 
341 static int pcie_bwnotif_suspend(struct pcie_device *srv)
342 {
343 	pcie_bwnotif_disable(srv->port);
344 	return 0;
345 }
346 
347 static int pcie_bwnotif_resume(struct pcie_device *srv)
348 {
349 	pcie_bwnotif_enable(srv);
350 	return 0;
351 }
352 
353 static struct pcie_port_service_driver pcie_bwctrl_driver = {
354 	.name		= "pcie_bwctrl",
355 	.port_type	= PCIE_ANY_PORT,
356 	.service	= PCIE_PORT_SERVICE_BWCTRL,
357 	.probe		= pcie_bwnotif_probe,
358 	.suspend	= pcie_bwnotif_suspend,
359 	.resume		= pcie_bwnotif_resume,
360 	.remove		= pcie_bwnotif_remove,
361 };
362 
363 int __init pcie_bwctrl_init(void)
364 {
365 	return pcie_port_service_register(&pcie_bwctrl_driver);
366 }
367