1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3 * PCIe bandwidth controller
4 *
5 * Author: Alexandru Gagniuc <mr.nuke.me@gmail.com>
6 *
7 * Copyright (C) 2019 Dell Inc
8 * Copyright (C) 2023-2024 Intel Corporation
9 *
10 * The PCIe bandwidth controller provides a way to alter PCIe Link Speeds
11 * and notify the operating system when the Link Width or Speed changes. The
12 * notification capability is required for all Root Ports and Downstream
13 * Ports supporting Link Width wider than x1 and/or multiple Link Speeds.
14 *
15 * This service port driver hooks into the Bandwidth Notification interrupt
16 * watching for changes or links becoming degraded in operation. It updates
17 * the cached Current Link Speed that is exposed to user space through sysfs.
18 */
19
20 #define dev_fmt(fmt) "bwctrl: " fmt
21
22 #include <linux/atomic.h>
23 #include <linux/bitops.h>
24 #include <linux/bits.h>
25 #include <linux/cleanup.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/mutex.h>
29 #include <linux/pci.h>
30 #include <linux/pci-bwctrl.h>
31 #include <linux/rwsem.h>
32 #include <linux/slab.h>
33 #include <linux/types.h>
34
35 #include "../pci.h"
36 #include "portdrv.h"
37
38 /**
39 * struct pcie_bwctrl_data - PCIe bandwidth controller
40 * @set_speed_mutex: Serializes link speed changes
41 * @cdev: Thermal cooling device associated with the port
42 */
43 struct pcie_bwctrl_data {
44 struct mutex set_speed_mutex;
45 struct thermal_cooling_device *cdev;
46 };
47
48 /* Prevent port removal during Link Speed changes. */
49 static DECLARE_RWSEM(pcie_bwctrl_setspeed_rwsem);
50
pcie_valid_speed(enum pci_bus_speed speed)51 static bool pcie_valid_speed(enum pci_bus_speed speed)
52 {
53 return (speed >= PCIE_SPEED_2_5GT) && (speed <= PCIE_SPEED_64_0GT);
54 }
55
pci_bus_speed2lnkctl2(enum pci_bus_speed speed)56 static u16 pci_bus_speed2lnkctl2(enum pci_bus_speed speed)
57 {
58 static const u8 speed_conv[] = {
59 [PCIE_SPEED_2_5GT] = PCI_EXP_LNKCTL2_TLS_2_5GT,
60 [PCIE_SPEED_5_0GT] = PCI_EXP_LNKCTL2_TLS_5_0GT,
61 [PCIE_SPEED_8_0GT] = PCI_EXP_LNKCTL2_TLS_8_0GT,
62 [PCIE_SPEED_16_0GT] = PCI_EXP_LNKCTL2_TLS_16_0GT,
63 [PCIE_SPEED_32_0GT] = PCI_EXP_LNKCTL2_TLS_32_0GT,
64 [PCIE_SPEED_64_0GT] = PCI_EXP_LNKCTL2_TLS_64_0GT,
65 };
66
67 if (WARN_ON_ONCE(!pcie_valid_speed(speed)))
68 return 0;
69
70 return speed_conv[speed];
71 }
72
pcie_supported_speeds2target_speed(u8 supported_speeds)73 static inline u16 pcie_supported_speeds2target_speed(u8 supported_speeds)
74 {
75 return __fls(supported_speeds);
76 }
77
78 /**
79 * pcie_bwctrl_select_speed - Select Target Link Speed
80 * @port: PCIe Port
81 * @speed_req: Requested PCIe Link Speed
82 *
83 * Select Target Link Speed by take into account Supported Link Speeds of
84 * both the Root Port and the Endpoint.
85 *
86 * Return: Target Link Speed (1=2.5GT/s, 2=5GT/s, 3=8GT/s, etc.)
87 */
pcie_bwctrl_select_speed(struct pci_dev * port,enum pci_bus_speed speed_req)88 static u16 pcie_bwctrl_select_speed(struct pci_dev *port, enum pci_bus_speed speed_req)
89 {
90 struct pci_bus *bus = port->subordinate;
91 u8 desired_speeds, supported_speeds;
92 struct pci_dev *dev;
93
94 desired_speeds = GENMASK(pci_bus_speed2lnkctl2(speed_req),
95 __fls(PCI_EXP_LNKCAP2_SLS_2_5GB));
96
97 supported_speeds = port->supported_speeds;
98 if (bus) {
99 down_read(&pci_bus_sem);
100 dev = list_first_entry_or_null(&bus->devices, struct pci_dev, bus_list);
101 if (dev)
102 supported_speeds &= dev->supported_speeds;
103 up_read(&pci_bus_sem);
104 }
105 if (!supported_speeds)
106 supported_speeds = PCI_EXP_LNKCAP2_SLS_2_5GB;
107
108 return pcie_supported_speeds2target_speed(supported_speeds & desired_speeds);
109 }
110
pcie_bwctrl_change_speed(struct pci_dev * port,u16 target_speed,bool use_lt)111 static int pcie_bwctrl_change_speed(struct pci_dev *port, u16 target_speed, bool use_lt)
112 {
113 int ret;
114
115 ret = pcie_capability_clear_and_set_word(port, PCI_EXP_LNKCTL2,
116 PCI_EXP_LNKCTL2_TLS, target_speed);
117 if (ret != PCIBIOS_SUCCESSFUL)
118 return pcibios_err_to_errno(ret);
119
120 return pcie_retrain_link(port, use_lt);
121 }
122
123 /**
124 * pcie_set_target_speed - Set downstream Link Speed for PCIe Port
125 * @port: PCIe Port
126 * @speed_req: Requested PCIe Link Speed
127 * @use_lt: Wait for the LT or DLLLA bit to detect the end of link training
128 *
129 * Attempt to set PCIe Port Link Speed to @speed_req. @speed_req may be
130 * adjusted downwards to the best speed supported by both the Port and PCIe
131 * Device underneath it.
132 *
133 * Return:
134 * * 0 - on success
135 * * -EINVAL - @speed_req is not a PCIe Link Speed
136 * * -ENODEV - @port is not controllable
137 * * -ETIMEDOUT - changing Link Speed took too long
138 * * -EAGAIN - Link Speed was changed but @speed_req was not achieved
139 */
pcie_set_target_speed(struct pci_dev * port,enum pci_bus_speed speed_req,bool use_lt)140 int pcie_set_target_speed(struct pci_dev *port, enum pci_bus_speed speed_req,
141 bool use_lt)
142 {
143 struct pci_bus *bus = port->subordinate;
144 u16 target_speed;
145 int ret;
146
147 if (WARN_ON_ONCE(!pcie_valid_speed(speed_req)))
148 return -EINVAL;
149
150 if (bus && bus->cur_bus_speed == speed_req)
151 return 0;
152
153 target_speed = pcie_bwctrl_select_speed(port, speed_req);
154
155 scoped_guard(rwsem_read, &pcie_bwctrl_setspeed_rwsem) {
156 struct pcie_bwctrl_data *data = port->link_bwctrl;
157
158 /*
159 * port->link_bwctrl is NULL during initial scan when called
160 * e.g. from the Target Speed quirk.
161 */
162 if (data)
163 mutex_lock(&data->set_speed_mutex);
164
165 ret = pcie_bwctrl_change_speed(port, target_speed, use_lt);
166
167 if (data)
168 mutex_unlock(&data->set_speed_mutex);
169 }
170
171 /*
172 * Despite setting higher speed into the Target Link Speed, empty
173 * bus won't train to 5GT+ speeds.
174 */
175 if (!ret && bus && bus->cur_bus_speed != speed_req &&
176 !list_empty(&bus->devices))
177 ret = -EAGAIN;
178
179 return ret;
180 }
181
pcie_bwnotif_enable(struct pcie_device * srv)182 static void pcie_bwnotif_enable(struct pcie_device *srv)
183 {
184 struct pci_dev *port = srv->port;
185 u16 link_status;
186 int ret;
187
188 /* Note if LBMS has been seen so far */
189 ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status);
190 if (ret == PCIBIOS_SUCCESSFUL && link_status & PCI_EXP_LNKSTA_LBMS)
191 set_bit(PCI_LINK_LBMS_SEEN, &port->priv_flags);
192
193 pcie_capability_set_word(port, PCI_EXP_LNKCTL,
194 PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE);
195 pcie_capability_write_word(port, PCI_EXP_LNKSTA,
196 PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS);
197
198 /*
199 * Update after enabling notifications & clearing status bits ensures
200 * link speed is up to date.
201 */
202 pcie_update_link_speed(port->subordinate);
203 }
204
pcie_bwnotif_disable(struct pci_dev * port)205 static void pcie_bwnotif_disable(struct pci_dev *port)
206 {
207 pcie_capability_clear_word(port, PCI_EXP_LNKCTL,
208 PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE);
209 }
210
pcie_bwnotif_irq(int irq,void * context)211 static irqreturn_t pcie_bwnotif_irq(int irq, void *context)
212 {
213 struct pcie_device *srv = context;
214 struct pci_dev *port = srv->port;
215 u16 link_status, events;
216 int ret;
217
218 ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status);
219 if (ret != PCIBIOS_SUCCESSFUL)
220 return IRQ_NONE;
221
222 events = link_status & (PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS);
223 if (!events)
224 return IRQ_NONE;
225
226 if (events & PCI_EXP_LNKSTA_LBMS)
227 set_bit(PCI_LINK_LBMS_SEEN, &port->priv_flags);
228
229 pcie_capability_write_word(port, PCI_EXP_LNKSTA, events);
230
231 /*
232 * Interrupts will not be triggered from any further Link Speed
233 * change until LBMS is cleared by the write. Therefore, re-read the
234 * speed (inside pcie_update_link_speed()) after LBMS has been
235 * cleared to avoid missing link speed changes.
236 */
237 pcie_update_link_speed(port->subordinate);
238
239 return IRQ_HANDLED;
240 }
241
pcie_reset_lbms(struct pci_dev * port)242 void pcie_reset_lbms(struct pci_dev *port)
243 {
244 clear_bit(PCI_LINK_LBMS_SEEN, &port->priv_flags);
245 pcie_capability_write_word(port, PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_LBMS);
246 }
247
pcie_bwnotif_probe(struct pcie_device * srv)248 static int pcie_bwnotif_probe(struct pcie_device *srv)
249 {
250 struct pci_dev *port = srv->port;
251 int ret;
252
253 /* Can happen if we run out of bus numbers during enumeration. */
254 if (!port->subordinate)
255 return -ENODEV;
256
257 struct pcie_bwctrl_data *data = devm_kzalloc(&srv->device,
258 sizeof(*data), GFP_KERNEL);
259 if (!data)
260 return -ENOMEM;
261
262 ret = devm_mutex_init(&srv->device, &data->set_speed_mutex);
263 if (ret)
264 return ret;
265
266 scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) {
267 port->link_bwctrl = data;
268
269 ret = request_irq(srv->irq, pcie_bwnotif_irq,
270 IRQF_SHARED, "PCIe bwctrl", srv);
271 if (ret) {
272 port->link_bwctrl = NULL;
273 return ret;
274 }
275
276 pcie_bwnotif_enable(srv);
277 }
278
279 pci_dbg(port, "enabled with IRQ %d\n", srv->irq);
280
281 /* Don't fail on errors. Don't leave IS_ERR() "pointer" into ->cdev */
282 port->link_bwctrl->cdev = pcie_cooling_device_register(port);
283 if (IS_ERR(port->link_bwctrl->cdev))
284 port->link_bwctrl->cdev = NULL;
285
286 return 0;
287 }
288
pcie_bwnotif_remove(struct pcie_device * srv)289 static void pcie_bwnotif_remove(struct pcie_device *srv)
290 {
291 struct pcie_bwctrl_data *data = srv->port->link_bwctrl;
292
293 pcie_cooling_device_unregister(data->cdev);
294
295 scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) {
296 pcie_bwnotif_disable(srv->port);
297
298 free_irq(srv->irq, srv);
299
300 srv->port->link_bwctrl = NULL;
301 }
302 }
303
pcie_bwnotif_suspend(struct pcie_device * srv)304 static int pcie_bwnotif_suspend(struct pcie_device *srv)
305 {
306 pcie_bwnotif_disable(srv->port);
307 return 0;
308 }
309
pcie_bwnotif_resume(struct pcie_device * srv)310 static int pcie_bwnotif_resume(struct pcie_device *srv)
311 {
312 pcie_bwnotif_enable(srv);
313 return 0;
314 }
315
316 static struct pcie_port_service_driver pcie_bwctrl_driver = {
317 .name = "pcie_bwctrl",
318 .port_type = PCIE_ANY_PORT,
319 .service = PCIE_PORT_SERVICE_BWCTRL,
320 .probe = pcie_bwnotif_probe,
321 .suspend = pcie_bwnotif_suspend,
322 .resume = pcie_bwnotif_resume,
323 .remove = pcie_bwnotif_remove,
324 };
325
pcie_bwctrl_init(void)326 int __init pcie_bwctrl_init(void)
327 {
328 return pcie_port_service_register(&pcie_bwctrl_driver);
329 }
330