1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * PCIe bandwidth controller 4 * 5 * Author: Alexandru Gagniuc <mr.nuke.me@gmail.com> 6 * 7 * Copyright (C) 2019 Dell Inc 8 * Copyright (C) 2023-2024 Intel Corporation 9 * 10 * The PCIe bandwidth controller provides a way to alter PCIe Link Speeds 11 * and notify the operating system when the Link Width or Speed changes. The 12 * notification capability is required for all Root Ports and Downstream 13 * Ports supporting Link Width wider than x1 and/or multiple Link Speeds. 14 * 15 * This service port driver hooks into the Bandwidth Notification interrupt 16 * watching for changes or links becoming degraded in operation. It updates 17 * the cached Current Link Speed that is exposed to user space through sysfs. 18 */ 19 20 #define dev_fmt(fmt) "bwctrl: " fmt 21 22 #include <linux/atomic.h> 23 #include <linux/bitops.h> 24 #include <linux/bits.h> 25 #include <linux/cleanup.h> 26 #include <linux/errno.h> 27 #include <linux/interrupt.h> 28 #include <linux/mutex.h> 29 #include <linux/pci.h> 30 #include <linux/pci-bwctrl.h> 31 #include <linux/rwsem.h> 32 #include <linux/slab.h> 33 #include <linux/types.h> 34 35 #include "../pci.h" 36 #include "portdrv.h" 37 38 /** 39 * struct pcie_bwctrl_data - PCIe bandwidth controller 40 * @set_speed_mutex: Serializes link speed changes 41 * @cdev: Thermal cooling device associated with the port 42 */ 43 struct pcie_bwctrl_data { 44 struct mutex set_speed_mutex; 45 struct thermal_cooling_device *cdev; 46 }; 47 48 /* Prevent port removal during Link Speed changes. */ 49 static DECLARE_RWSEM(pcie_bwctrl_setspeed_rwsem); 50 51 static bool pcie_valid_speed(enum pci_bus_speed speed) 52 { 53 return (speed >= PCIE_SPEED_2_5GT) && (speed <= PCIE_SPEED_64_0GT); 54 } 55 56 static u16 pci_bus_speed2lnkctl2(enum pci_bus_speed speed) 57 { 58 static const u8 speed_conv[] = { 59 [PCIE_SPEED_2_5GT] = PCI_EXP_LNKCTL2_TLS_2_5GT, 60 [PCIE_SPEED_5_0GT] = PCI_EXP_LNKCTL2_TLS_5_0GT, 61 [PCIE_SPEED_8_0GT] = PCI_EXP_LNKCTL2_TLS_8_0GT, 62 [PCIE_SPEED_16_0GT] = PCI_EXP_LNKCTL2_TLS_16_0GT, 63 [PCIE_SPEED_32_0GT] = PCI_EXP_LNKCTL2_TLS_32_0GT, 64 [PCIE_SPEED_64_0GT] = PCI_EXP_LNKCTL2_TLS_64_0GT, 65 }; 66 67 if (WARN_ON_ONCE(!pcie_valid_speed(speed))) 68 return 0; 69 70 return speed_conv[speed]; 71 } 72 73 static inline u16 pcie_supported_speeds2target_speed(u8 supported_speeds) 74 { 75 return __fls(supported_speeds); 76 } 77 78 /** 79 * pcie_bwctrl_select_speed - Select Target Link Speed 80 * @port: PCIe Port 81 * @speed_req: Requested PCIe Link Speed 82 * 83 * Select Target Link Speed by take into account Supported Link Speeds of 84 * both the Root Port and the Endpoint. 85 * 86 * Return: Target Link Speed (1=2.5GT/s, 2=5GT/s, 3=8GT/s, etc.) 87 */ 88 static u16 pcie_bwctrl_select_speed(struct pci_dev *port, enum pci_bus_speed speed_req) 89 { 90 struct pci_bus *bus = port->subordinate; 91 u8 desired_speeds, supported_speeds; 92 struct pci_dev *dev; 93 94 desired_speeds = GENMASK(pci_bus_speed2lnkctl2(speed_req), 95 __fls(PCI_EXP_LNKCAP2_SLS_2_5GB)); 96 97 supported_speeds = port->supported_speeds; 98 if (bus) { 99 down_read(&pci_bus_sem); 100 dev = list_first_entry_or_null(&bus->devices, struct pci_dev, bus_list); 101 if (dev) 102 supported_speeds &= dev->supported_speeds; 103 up_read(&pci_bus_sem); 104 } 105 if (!supported_speeds) 106 supported_speeds = PCI_EXP_LNKCAP2_SLS_2_5GB; 107 108 return pcie_supported_speeds2target_speed(supported_speeds & desired_speeds); 109 } 110 111 static int pcie_bwctrl_change_speed(struct pci_dev *port, u16 target_speed, bool use_lt) 112 { 113 int ret; 114 115 ret = pcie_capability_clear_and_set_word(port, PCI_EXP_LNKCTL2, 116 PCI_EXP_LNKCTL2_TLS, target_speed); 117 if (ret != PCIBIOS_SUCCESSFUL) 118 return pcibios_err_to_errno(ret); 119 120 return pcie_retrain_link(port, use_lt); 121 } 122 123 /** 124 * pcie_set_target_speed - Set downstream Link Speed for PCIe Port 125 * @port: PCIe Port 126 * @speed_req: Requested PCIe Link Speed 127 * @use_lt: Wait for the LT or DLLLA bit to detect the end of link training 128 * 129 * Attempt to set PCIe Port Link Speed to @speed_req. @speed_req may be 130 * adjusted downwards to the best speed supported by both the Port and PCIe 131 * Device underneath it. 132 * 133 * Return: 134 * * 0 - on success 135 * * -EINVAL - @speed_req is not a PCIe Link Speed 136 * * -ENODEV - @port is not controllable 137 * * -ETIMEDOUT - changing Link Speed took too long 138 * * -EAGAIN - Link Speed was changed but @speed_req was not achieved 139 */ 140 int pcie_set_target_speed(struct pci_dev *port, enum pci_bus_speed speed_req, 141 bool use_lt) 142 { 143 struct pci_bus *bus = port->subordinate; 144 u16 target_speed; 145 int ret; 146 147 if (WARN_ON_ONCE(!pcie_valid_speed(speed_req))) 148 return -EINVAL; 149 150 if (bus && bus->cur_bus_speed == speed_req) 151 return 0; 152 153 target_speed = pcie_bwctrl_select_speed(port, speed_req); 154 155 scoped_guard(rwsem_read, &pcie_bwctrl_setspeed_rwsem) { 156 struct pcie_bwctrl_data *data = port->link_bwctrl; 157 158 /* 159 * port->link_bwctrl is NULL during initial scan when called 160 * e.g. from the Target Speed quirk. 161 */ 162 if (data) 163 mutex_lock(&data->set_speed_mutex); 164 165 ret = pcie_bwctrl_change_speed(port, target_speed, use_lt); 166 167 if (data) 168 mutex_unlock(&data->set_speed_mutex); 169 } 170 171 /* 172 * Despite setting higher speed into the Target Link Speed, empty 173 * bus won't train to 5GT+ speeds. 174 */ 175 if (!ret && bus && bus->cur_bus_speed != speed_req && 176 !list_empty(&bus->devices)) 177 ret = -EAGAIN; 178 179 return ret; 180 } 181 182 static void pcie_bwnotif_enable(struct pcie_device *srv) 183 { 184 struct pci_dev *port = srv->port; 185 u16 link_status; 186 int ret; 187 188 /* Note if LBMS has been seen so far */ 189 ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status); 190 if (ret == PCIBIOS_SUCCESSFUL && link_status & PCI_EXP_LNKSTA_LBMS) 191 set_bit(PCI_LINK_LBMS_SEEN, &port->priv_flags); 192 193 pcie_capability_set_word(port, PCI_EXP_LNKCTL, 194 PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE); 195 pcie_capability_write_word(port, PCI_EXP_LNKSTA, 196 PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS); 197 198 /* 199 * Update after enabling notifications & clearing status bits ensures 200 * link speed is up to date. 201 */ 202 pcie_update_link_speed(port->subordinate); 203 } 204 205 static void pcie_bwnotif_disable(struct pci_dev *port) 206 { 207 pcie_capability_clear_word(port, PCI_EXP_LNKCTL, 208 PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE); 209 } 210 211 static irqreturn_t pcie_bwnotif_irq(int irq, void *context) 212 { 213 struct pcie_device *srv = context; 214 struct pci_dev *port = srv->port; 215 u16 link_status, events; 216 int ret; 217 218 ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status); 219 if (ret != PCIBIOS_SUCCESSFUL) 220 return IRQ_NONE; 221 222 events = link_status & (PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS); 223 if (!events) 224 return IRQ_NONE; 225 226 if (events & PCI_EXP_LNKSTA_LBMS) 227 set_bit(PCI_LINK_LBMS_SEEN, &port->priv_flags); 228 229 pcie_capability_write_word(port, PCI_EXP_LNKSTA, events); 230 231 /* 232 * Interrupts will not be triggered from any further Link Speed 233 * change until LBMS is cleared by the write. Therefore, re-read the 234 * speed (inside pcie_update_link_speed()) after LBMS has been 235 * cleared to avoid missing link speed changes. 236 */ 237 pcie_update_link_speed(port->subordinate); 238 239 return IRQ_HANDLED; 240 } 241 242 void pcie_reset_lbms(struct pci_dev *port) 243 { 244 clear_bit(PCI_LINK_LBMS_SEEN, &port->priv_flags); 245 pcie_capability_write_word(port, PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_LBMS); 246 } 247 248 static int pcie_bwnotif_probe(struct pcie_device *srv) 249 { 250 struct pci_dev *port = srv->port; 251 int ret; 252 253 /* Can happen if we run out of bus numbers during enumeration. */ 254 if (!port->subordinate) 255 return -ENODEV; 256 257 struct pcie_bwctrl_data *data = devm_kzalloc(&srv->device, 258 sizeof(*data), GFP_KERNEL); 259 if (!data) 260 return -ENOMEM; 261 262 ret = devm_mutex_init(&srv->device, &data->set_speed_mutex); 263 if (ret) 264 return ret; 265 266 scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) { 267 port->link_bwctrl = data; 268 269 ret = request_irq(srv->irq, pcie_bwnotif_irq, 270 IRQF_SHARED, "PCIe bwctrl", srv); 271 if (ret) { 272 port->link_bwctrl = NULL; 273 return ret; 274 } 275 276 pcie_bwnotif_enable(srv); 277 } 278 279 pci_dbg(port, "enabled with IRQ %d\n", srv->irq); 280 281 /* Don't fail on errors. Don't leave IS_ERR() "pointer" into ->cdev */ 282 port->link_bwctrl->cdev = pcie_cooling_device_register(port); 283 if (IS_ERR(port->link_bwctrl->cdev)) 284 port->link_bwctrl->cdev = NULL; 285 286 return 0; 287 } 288 289 static void pcie_bwnotif_remove(struct pcie_device *srv) 290 { 291 struct pcie_bwctrl_data *data = srv->port->link_bwctrl; 292 293 pcie_cooling_device_unregister(data->cdev); 294 295 scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) { 296 pcie_bwnotif_disable(srv->port); 297 298 free_irq(srv->irq, srv); 299 300 srv->port->link_bwctrl = NULL; 301 } 302 } 303 304 static int pcie_bwnotif_suspend(struct pcie_device *srv) 305 { 306 pcie_bwnotif_disable(srv->port); 307 return 0; 308 } 309 310 static int pcie_bwnotif_resume(struct pcie_device *srv) 311 { 312 pcie_bwnotif_enable(srv); 313 return 0; 314 } 315 316 static struct pcie_port_service_driver pcie_bwctrl_driver = { 317 .name = "pcie_bwctrl", 318 .port_type = PCIE_ANY_PORT, 319 .service = PCIE_PORT_SERVICE_BWCTRL, 320 .probe = pcie_bwnotif_probe, 321 .suspend = pcie_bwnotif_suspend, 322 .resume = pcie_bwnotif_resume, 323 .remove = pcie_bwnotif_remove, 324 }; 325 326 int __init pcie_bwctrl_init(void) 327 { 328 return pcie_port_service_register(&pcie_bwctrl_driver); 329 } 330