1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * PCIe bandwidth controller 4 * 5 * Author: Alexandru Gagniuc <mr.nuke.me@gmail.com> 6 * 7 * Copyright (C) 2019 Dell Inc 8 * Copyright (C) 2023-2024 Intel Corporation 9 * 10 * The PCIe bandwidth controller provides a way to alter PCIe Link Speeds 11 * and notify the operating system when the Link Width or Speed changes. The 12 * notification capability is required for all Root Ports and Downstream 13 * Ports supporting Link Width wider than x1 and/or multiple Link Speeds. 14 * 15 * This service port driver hooks into the Bandwidth Notification interrupt 16 * watching for changes or links becoming degraded in operation. It updates 17 * the cached Current Link Speed that is exposed to user space through sysfs. 18 */ 19 20 #define dev_fmt(fmt) "bwctrl: " fmt 21 22 #include <linux/atomic.h> 23 #include <linux/bitops.h> 24 #include <linux/bits.h> 25 #include <linux/cleanup.h> 26 #include <linux/errno.h> 27 #include <linux/interrupt.h> 28 #include <linux/mutex.h> 29 #include <linux/pci.h> 30 #include <linux/pci-bwctrl.h> 31 #include <linux/rwsem.h> 32 #include <linux/slab.h> 33 #include <linux/types.h> 34 35 #include "../pci.h" 36 #include "portdrv.h" 37 38 /** 39 * struct pcie_bwctrl_data - PCIe bandwidth controller 40 * @set_speed_mutex: Serializes link speed changes 41 * @lbms_count: Count for LBMS (since last reset) 42 * @cdev: Thermal cooling device associated with the port 43 */ 44 struct pcie_bwctrl_data { 45 struct mutex set_speed_mutex; 46 atomic_t lbms_count; 47 struct thermal_cooling_device *cdev; 48 }; 49 50 /* 51 * Prevent port removal during LBMS count accessors and Link Speed changes. 52 * 53 * These have to be differentiated because pcie_bwctrl_change_speed() calls 54 * pcie_retrain_link() which uses LBMS count reset accessor on success 55 * (using just one rwsem triggers "possible recursive locking detected" 56 * warning). 57 */ 58 static DECLARE_RWSEM(pcie_bwctrl_lbms_rwsem); 59 static DECLARE_RWSEM(pcie_bwctrl_setspeed_rwsem); 60 61 static bool pcie_valid_speed(enum pci_bus_speed speed) 62 { 63 return (speed >= PCIE_SPEED_2_5GT) && (speed <= PCIE_SPEED_64_0GT); 64 } 65 66 static u16 pci_bus_speed2lnkctl2(enum pci_bus_speed speed) 67 { 68 static const u8 speed_conv[] = { 69 [PCIE_SPEED_2_5GT] = PCI_EXP_LNKCTL2_TLS_2_5GT, 70 [PCIE_SPEED_5_0GT] = PCI_EXP_LNKCTL2_TLS_5_0GT, 71 [PCIE_SPEED_8_0GT] = PCI_EXP_LNKCTL2_TLS_8_0GT, 72 [PCIE_SPEED_16_0GT] = PCI_EXP_LNKCTL2_TLS_16_0GT, 73 [PCIE_SPEED_32_0GT] = PCI_EXP_LNKCTL2_TLS_32_0GT, 74 [PCIE_SPEED_64_0GT] = PCI_EXP_LNKCTL2_TLS_64_0GT, 75 }; 76 77 if (WARN_ON_ONCE(!pcie_valid_speed(speed))) 78 return 0; 79 80 return speed_conv[speed]; 81 } 82 83 static inline u16 pcie_supported_speeds2target_speed(u8 supported_speeds) 84 { 85 return __fls(supported_speeds); 86 } 87 88 /** 89 * pcie_bwctrl_select_speed - Select Target Link Speed 90 * @port: PCIe Port 91 * @speed_req: Requested PCIe Link Speed 92 * 93 * Select Target Link Speed by take into account Supported Link Speeds of 94 * both the Root Port and the Endpoint. 95 * 96 * Return: Target Link Speed (1=2.5GT/s, 2=5GT/s, 3=8GT/s, etc.) 97 */ 98 static u16 pcie_bwctrl_select_speed(struct pci_dev *port, enum pci_bus_speed speed_req) 99 { 100 struct pci_bus *bus = port->subordinate; 101 u8 desired_speeds, supported_speeds; 102 struct pci_dev *dev; 103 104 desired_speeds = GENMASK(pci_bus_speed2lnkctl2(speed_req), 105 __fls(PCI_EXP_LNKCAP2_SLS_2_5GB)); 106 107 supported_speeds = port->supported_speeds; 108 if (bus) { 109 down_read(&pci_bus_sem); 110 dev = list_first_entry_or_null(&bus->devices, struct pci_dev, bus_list); 111 if (dev) 112 supported_speeds &= dev->supported_speeds; 113 up_read(&pci_bus_sem); 114 } 115 if (!supported_speeds) 116 return PCI_EXP_LNKCAP2_SLS_2_5GB; 117 118 return pcie_supported_speeds2target_speed(supported_speeds & desired_speeds); 119 } 120 121 static int pcie_bwctrl_change_speed(struct pci_dev *port, u16 target_speed, bool use_lt) 122 { 123 int ret; 124 125 ret = pcie_capability_clear_and_set_word(port, PCI_EXP_LNKCTL2, 126 PCI_EXP_LNKCTL2_TLS, target_speed); 127 if (ret != PCIBIOS_SUCCESSFUL) 128 return pcibios_err_to_errno(ret); 129 130 ret = pcie_retrain_link(port, use_lt); 131 if (ret < 0) 132 return ret; 133 134 /* 135 * Ensure link speed updates also with platforms that have problems 136 * with notifications. 137 */ 138 if (port->subordinate) 139 pcie_update_link_speed(port->subordinate); 140 141 return 0; 142 } 143 144 /** 145 * pcie_set_target_speed - Set downstream Link Speed for PCIe Port 146 * @port: PCIe Port 147 * @speed_req: Requested PCIe Link Speed 148 * @use_lt: Wait for the LT or DLLLA bit to detect the end of link training 149 * 150 * Attempt to set PCIe Port Link Speed to @speed_req. @speed_req may be 151 * adjusted downwards to the best speed supported by both the Port and PCIe 152 * Device underneath it. 153 * 154 * Return: 155 * * 0 - on success 156 * * -EINVAL - @speed_req is not a PCIe Link Speed 157 * * -ENODEV - @port is not controllable 158 * * -ETIMEDOUT - changing Link Speed took too long 159 * * -EAGAIN - Link Speed was changed but @speed_req was not achieved 160 */ 161 int pcie_set_target_speed(struct pci_dev *port, enum pci_bus_speed speed_req, 162 bool use_lt) 163 { 164 struct pci_bus *bus = port->subordinate; 165 u16 target_speed; 166 int ret; 167 168 if (WARN_ON_ONCE(!pcie_valid_speed(speed_req))) 169 return -EINVAL; 170 171 if (bus && bus->cur_bus_speed == speed_req) 172 return 0; 173 174 target_speed = pcie_bwctrl_select_speed(port, speed_req); 175 176 scoped_guard(rwsem_read, &pcie_bwctrl_setspeed_rwsem) { 177 struct pcie_bwctrl_data *data = port->link_bwctrl; 178 179 /* 180 * port->link_bwctrl is NULL during initial scan when called 181 * e.g. from the Target Speed quirk. 182 */ 183 if (data) 184 mutex_lock(&data->set_speed_mutex); 185 186 ret = pcie_bwctrl_change_speed(port, target_speed, use_lt); 187 188 if (data) 189 mutex_unlock(&data->set_speed_mutex); 190 } 191 192 /* 193 * Despite setting higher speed into the Target Link Speed, empty 194 * bus won't train to 5GT+ speeds. 195 */ 196 if (!ret && bus && bus->cur_bus_speed != speed_req && 197 !list_empty(&bus->devices)) 198 ret = -EAGAIN; 199 200 return ret; 201 } 202 203 static void pcie_bwnotif_enable(struct pcie_device *srv) 204 { 205 struct pcie_bwctrl_data *data = srv->port->link_bwctrl; 206 struct pci_dev *port = srv->port; 207 u16 link_status; 208 int ret; 209 210 /* Count LBMS seen so far as one */ 211 ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status); 212 if (ret == PCIBIOS_SUCCESSFUL && link_status & PCI_EXP_LNKSTA_LBMS) 213 atomic_inc(&data->lbms_count); 214 215 pcie_capability_set_word(port, PCI_EXP_LNKCTL, 216 PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE); 217 pcie_capability_write_word(port, PCI_EXP_LNKSTA, 218 PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS); 219 220 /* 221 * Update after enabling notifications & clearing status bits ensures 222 * link speed is up to date. 223 */ 224 pcie_update_link_speed(port->subordinate); 225 } 226 227 static void pcie_bwnotif_disable(struct pci_dev *port) 228 { 229 pcie_capability_clear_word(port, PCI_EXP_LNKCTL, 230 PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE); 231 } 232 233 static irqreturn_t pcie_bwnotif_irq(int irq, void *context) 234 { 235 struct pcie_device *srv = context; 236 struct pcie_bwctrl_data *data = srv->port->link_bwctrl; 237 struct pci_dev *port = srv->port; 238 u16 link_status, events; 239 int ret; 240 241 ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status); 242 if (ret != PCIBIOS_SUCCESSFUL) 243 return IRQ_NONE; 244 245 events = link_status & (PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS); 246 if (!events) 247 return IRQ_NONE; 248 249 if (events & PCI_EXP_LNKSTA_LBMS) 250 atomic_inc(&data->lbms_count); 251 252 pcie_capability_write_word(port, PCI_EXP_LNKSTA, events); 253 254 /* 255 * Interrupts will not be triggered from any further Link Speed 256 * change until LBMS is cleared by the write. Therefore, re-read the 257 * speed (inside pcie_update_link_speed()) after LBMS has been 258 * cleared to avoid missing link speed changes. 259 */ 260 pcie_update_link_speed(port->subordinate); 261 262 return IRQ_HANDLED; 263 } 264 265 void pcie_reset_lbms_count(struct pci_dev *port) 266 { 267 struct pcie_bwctrl_data *data; 268 269 guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem); 270 data = port->link_bwctrl; 271 if (data) 272 atomic_set(&data->lbms_count, 0); 273 else 274 pcie_capability_write_word(port, PCI_EXP_LNKSTA, 275 PCI_EXP_LNKSTA_LBMS); 276 } 277 278 int pcie_lbms_count(struct pci_dev *port, unsigned long *val) 279 { 280 struct pcie_bwctrl_data *data; 281 282 guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem); 283 data = port->link_bwctrl; 284 if (!data) 285 return -ENOTTY; 286 287 *val = atomic_read(&data->lbms_count); 288 289 return 0; 290 } 291 292 static int pcie_bwnotif_probe(struct pcie_device *srv) 293 { 294 struct pci_dev *port = srv->port; 295 int ret; 296 297 struct pcie_bwctrl_data *data = devm_kzalloc(&srv->device, 298 sizeof(*data), GFP_KERNEL); 299 if (!data) 300 return -ENOMEM; 301 302 ret = devm_mutex_init(&srv->device, &data->set_speed_mutex); 303 if (ret) 304 return ret; 305 306 ret = devm_request_irq(&srv->device, srv->irq, pcie_bwnotif_irq, 307 IRQF_SHARED, "PCIe bwctrl", srv); 308 if (ret) 309 return ret; 310 311 scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) { 312 scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) { 313 port->link_bwctrl = no_free_ptr(data); 314 pcie_bwnotif_enable(srv); 315 } 316 } 317 318 pci_dbg(port, "enabled with IRQ %d\n", srv->irq); 319 320 /* Don't fail on errors. Don't leave IS_ERR() "pointer" into ->cdev */ 321 port->link_bwctrl->cdev = pcie_cooling_device_register(port); 322 if (IS_ERR(port->link_bwctrl->cdev)) 323 port->link_bwctrl->cdev = NULL; 324 325 return 0; 326 } 327 328 static void pcie_bwnotif_remove(struct pcie_device *srv) 329 { 330 struct pcie_bwctrl_data *data = srv->port->link_bwctrl; 331 332 pcie_cooling_device_unregister(data->cdev); 333 334 pcie_bwnotif_disable(srv->port); 335 336 scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) 337 scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) 338 srv->port->link_bwctrl = NULL; 339 } 340 341 static int pcie_bwnotif_suspend(struct pcie_device *srv) 342 { 343 pcie_bwnotif_disable(srv->port); 344 return 0; 345 } 346 347 static int pcie_bwnotif_resume(struct pcie_device *srv) 348 { 349 pcie_bwnotif_enable(srv); 350 return 0; 351 } 352 353 static struct pcie_port_service_driver pcie_bwctrl_driver = { 354 .name = "pcie_bwctrl", 355 .port_type = PCIE_ANY_PORT, 356 .service = PCIE_PORT_SERVICE_BWCTRL, 357 .probe = pcie_bwnotif_probe, 358 .suspend = pcie_bwnotif_suspend, 359 .resume = pcie_bwnotif_resume, 360 .remove = pcie_bwnotif_remove, 361 }; 362 363 int __init pcie_bwctrl_init(void) 364 { 365 return pcie_port_service_register(&pcie_bwctrl_driver); 366 } 367