1 // SPDX-License-Identifier: GPL-2.0 2 3 #define pr_fmt(fmt) "PCI: " fmt 4 5 #include <linux/pci.h> 6 #include <linux/acpi.h> 7 #include <linux/init.h> 8 #include <linux/irq.h> 9 #include <linux/dmi.h> 10 #include <linux/slab.h> 11 #include <linux/pci-acpi.h> 12 #include <asm/numa.h> 13 #include <asm/pci_x86.h> 14 15 struct pci_root_info { 16 struct acpi_pci_root_info common; 17 struct pci_sysdata sd; 18 #ifdef CONFIG_PCI_MMCONFIG 19 bool mcfg_added; 20 u8 start_bus; 21 u8 end_bus; 22 #endif 23 }; 24 25 bool pci_use_e820 = true; 26 static bool pci_use_crs = true; 27 static bool pci_ignore_seg; 28 29 static int __init set_use_crs(const struct dmi_system_id *id) 30 { 31 pci_use_crs = true; 32 return 0; 33 } 34 35 static int __init set_nouse_crs(const struct dmi_system_id *id) 36 { 37 pci_use_crs = false; 38 return 0; 39 } 40 41 static int __init set_ignore_seg(const struct dmi_system_id *id) 42 { 43 pr_info("%s detected: ignoring ACPI _SEG\n", id->ident); 44 pci_ignore_seg = true; 45 return 0; 46 } 47 48 static int __init set_no_e820(const struct dmi_system_id *id) 49 { 50 pr_info("%s detected: not clipping E820 regions from _CRS\n", 51 id->ident); 52 pci_use_e820 = false; 53 return 0; 54 } 55 56 static const struct dmi_system_id pci_crs_quirks[] __initconst = { 57 /* http://bugzilla.kernel.org/show_bug.cgi?id=14183 */ 58 { 59 .callback = set_use_crs, 60 .ident = "IBM System x3800", 61 .matches = { 62 DMI_MATCH(DMI_SYS_VENDOR, "IBM"), 63 DMI_MATCH(DMI_PRODUCT_NAME, "x3800"), 64 }, 65 }, 66 /* https://bugzilla.kernel.org/show_bug.cgi?id=16007 */ 67 /* 2006 AMD HT/VIA system with two host bridges */ 68 { 69 .callback = set_use_crs, 70 .ident = "ASRock ALiveSATA2-GLAN", 71 .matches = { 72 DMI_MATCH(DMI_PRODUCT_NAME, "ALiveSATA2-GLAN"), 73 }, 74 }, 75 /* https://bugzilla.kernel.org/show_bug.cgi?id=30552 */ 76 /* 2006 AMD HT/VIA system with two host bridges */ 77 { 78 .callback = set_use_crs, 79 .ident = "ASUS M2V-MX SE", 80 .matches = { 81 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), 82 DMI_MATCH(DMI_BOARD_NAME, "M2V-MX SE"), 83 DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), 84 }, 85 }, 86 /* https://bugzilla.kernel.org/show_bug.cgi?id=42619 */ 87 { 88 .callback = set_use_crs, 89 .ident = "MSI MS-7253", 90 .matches = { 91 DMI_MATCH(DMI_BOARD_VENDOR, "MICRO-STAR INTERNATIONAL CO., LTD"), 92 DMI_MATCH(DMI_BOARD_NAME, "MS-7253"), 93 DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"), 94 }, 95 }, 96 /* https://bugs.launchpad.net/ubuntu/+source/alsa-driver/+bug/931368 */ 97 /* https://bugs.launchpad.net/ubuntu/+source/alsa-driver/+bug/1033299 */ 98 { 99 .callback = set_use_crs, 100 .ident = "Foxconn K8M890-8237A", 101 .matches = { 102 DMI_MATCH(DMI_BOARD_VENDOR, "Foxconn"), 103 DMI_MATCH(DMI_BOARD_NAME, "K8M890-8237A"), 104 DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"), 105 }, 106 }, 107 108 /* Now for the blacklist.. */ 109 110 /* https://bugzilla.redhat.com/show_bug.cgi?id=769657 */ 111 { 112 .callback = set_nouse_crs, 113 .ident = "Dell Studio 1557", 114 .matches = { 115 DMI_MATCH(DMI_BOARD_VENDOR, "Dell Inc."), 116 DMI_MATCH(DMI_PRODUCT_NAME, "Studio 1557"), 117 DMI_MATCH(DMI_BIOS_VERSION, "A09"), 118 }, 119 }, 120 /* https://bugzilla.redhat.com/show_bug.cgi?id=769657 */ 121 { 122 .callback = set_nouse_crs, 123 .ident = "Thinkpad SL510", 124 .matches = { 125 DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), 126 DMI_MATCH(DMI_BOARD_NAME, "2847DFG"), 127 DMI_MATCH(DMI_BIOS_VERSION, "6JET85WW (1.43 )"), 128 }, 129 }, 130 /* https://bugzilla.kernel.org/show_bug.cgi?id=42606 */ 131 { 132 .callback = set_nouse_crs, 133 .ident = "Supermicro X8DTH", 134 .matches = { 135 DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"), 136 DMI_MATCH(DMI_PRODUCT_NAME, "X8DTH-i/6/iF/6F"), 137 DMI_MATCH(DMI_BIOS_VERSION, "2.0a"), 138 }, 139 }, 140 141 /* https://bugzilla.kernel.org/show_bug.cgi?id=15362 */ 142 { 143 .callback = set_ignore_seg, 144 .ident = "HP xw9300", 145 .matches = { 146 DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), 147 DMI_MATCH(DMI_PRODUCT_NAME, "HP xw9300 Workstation"), 148 }, 149 }, 150 151 /* 152 * Many Lenovo models with "IIL" in their DMI_PRODUCT_VERSION have 153 * an E820 reserved region that covers the entire 32-bit host 154 * bridge memory window from _CRS. Using the E820 region to clip 155 * _CRS means no space is available for hot-added or uninitialized 156 * PCI devices. This typically breaks I2C controllers for touchpads 157 * and hot-added Thunderbolt devices. See the commit log for 158 * models known to require this quirk and related bug reports. 159 */ 160 { 161 .callback = set_no_e820, 162 .ident = "Lenovo *IIL* product version", 163 .matches = { 164 DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), 165 DMI_MATCH(DMI_PRODUCT_VERSION, "IIL"), 166 }, 167 }, 168 169 /* 170 * The Acer Spin 5 (SP513-54N) has the same E820 reservation covering 171 * the entire _CRS 32-bit window issue as the Lenovo *IIL* models. 172 * See https://bugs.launchpad.net/bugs/1884232 173 */ 174 { 175 .callback = set_no_e820, 176 .ident = "Acer Spin 5 (SP513-54N)", 177 .matches = { 178 DMI_MATCH(DMI_SYS_VENDOR, "Acer"), 179 DMI_MATCH(DMI_PRODUCT_NAME, "Spin SP513-54N"), 180 }, 181 }, 182 183 /* 184 * Clevo X170KM-G barebones have the same E820 reservation covering 185 * the entire _CRS 32-bit window issue as the Lenovo *IIL* models. 186 * See https://bugzilla.kernel.org/show_bug.cgi?id=214259 187 */ 188 { 189 .callback = set_no_e820, 190 .ident = "Clevo X170KM-G Barebone", 191 .matches = { 192 DMI_MATCH(DMI_BOARD_NAME, "X170KM-G"), 193 }, 194 }, 195 {} 196 }; 197 198 void __init pci_acpi_crs_quirks(void) 199 { 200 int year = dmi_get_bios_year(); 201 202 if (year >= 0 && year < 2008 && iomem_resource.end <= 0xffffffff) 203 pci_use_crs = false; 204 205 /* 206 * Some firmware includes unusable space (host bridge registers, 207 * hidden PCI device BARs, etc) in PCI host bridge _CRS. This is a 208 * firmware defect, and 4dc2287c1805 ("x86: avoid E820 regions when 209 * allocating address space") has clipped out the unusable space in 210 * the past. 211 * 212 * But other firmware supplies E820 reserved regions that cover 213 * entire _CRS windows, so clipping throws away the entire window, 214 * leaving none for hot-added or uninitialized devices. These E820 215 * entries are probably *not* a firmware defect, so disable the 216 * clipping by default for post-2022 machines. 217 * 218 * We already have quirks to disable clipping for pre-2023 219 * machines, and we'll likely need quirks to *enable* clipping for 220 * post-2022 machines that incorrectly include unusable space in 221 * _CRS. 222 */ 223 if (year >= 2023) 224 pci_use_e820 = false; 225 226 dmi_check_system(pci_crs_quirks); 227 228 /* 229 * If the user specifies "pci=use_crs" or "pci=nocrs" explicitly, that 230 * takes precedence over anything we figured out above. 231 */ 232 if (pci_probe & PCI_ROOT_NO_CRS) 233 pci_use_crs = false; 234 else if (pci_probe & PCI_USE__CRS) 235 pci_use_crs = true; 236 237 pr_info("%s host bridge windows from ACPI; if necessary, use \"pci=%s\" and report a bug\n", 238 pci_use_crs ? "Using" : "Ignoring", 239 pci_use_crs ? "nocrs" : "use_crs"); 240 241 /* "pci=use_e820"/"pci=no_e820" on the kernel cmdline takes precedence */ 242 if (pci_probe & PCI_NO_E820) 243 pci_use_e820 = false; 244 else if (pci_probe & PCI_USE_E820) 245 pci_use_e820 = true; 246 247 pr_info("%s E820 reservations for host bridge windows\n", 248 pci_use_e820 ? "Using" : "Ignoring"); 249 if (pci_probe & (PCI_NO_E820 | PCI_USE_E820)) 250 pr_info("Please notify linux-pci@vger.kernel.org so future kernels can do this automatically\n"); 251 } 252 253 /* 254 * Check if pdev is part of a PCIe switch that is directly below the 255 * specified bridge. 256 */ 257 static bool pcie_switch_directly_under(struct pci_dev *bridge, 258 struct pci_dev *pdev) 259 { 260 struct pci_dev *parent = pci_upstream_bridge(pdev); 261 262 /* If the device doesn't have a parent, it's not under anything */ 263 if (!parent) 264 return false; 265 266 /* 267 * If the device has a PCIe type, check if it is below the 268 * corresponding PCIe switch components (if applicable). Then check 269 * if its upstream port is directly beneath the specified bridge. 270 */ 271 switch (pci_pcie_type(pdev)) { 272 case PCI_EXP_TYPE_UPSTREAM: 273 return parent == bridge; 274 275 case PCI_EXP_TYPE_DOWNSTREAM: 276 if (pci_pcie_type(parent) != PCI_EXP_TYPE_UPSTREAM) 277 return false; 278 parent = pci_upstream_bridge(parent); 279 return parent == bridge; 280 281 case PCI_EXP_TYPE_ENDPOINT: 282 if (pci_pcie_type(parent) != PCI_EXP_TYPE_DOWNSTREAM) 283 return false; 284 parent = pci_upstream_bridge(parent); 285 if (!parent || pci_pcie_type(parent) != PCI_EXP_TYPE_UPSTREAM) 286 return false; 287 parent = pci_upstream_bridge(parent); 288 return parent == bridge; 289 } 290 291 return false; 292 } 293 294 static bool pcie_has_usb4_host_interface(struct pci_dev *pdev) 295 { 296 struct fwnode_handle *fwnode; 297 298 /* 299 * For USB4, the tunneled PCIe Root or Downstream Ports are marked 300 * with the "usb4-host-interface" ACPI property, so we look for 301 * that first. This should cover most cases. 302 */ 303 fwnode = fwnode_find_reference(dev_fwnode(&pdev->dev), 304 "usb4-host-interface", 0); 305 if (!IS_ERR(fwnode)) { 306 fwnode_handle_put(fwnode); 307 return true; 308 } 309 310 /* 311 * Any integrated Thunderbolt 3/4 PCIe Root Ports from Intel 312 * before Alder Lake do not have the "usb4-host-interface" 313 * property so we use their PCI IDs instead. All these are 314 * tunneled. This list is not expected to grow. 315 */ 316 if (pdev->vendor == PCI_VENDOR_ID_INTEL) { 317 switch (pdev->device) { 318 /* Ice Lake Thunderbolt 3 PCIe Root Ports */ 319 case 0x8a1d: 320 case 0x8a1f: 321 case 0x8a21: 322 case 0x8a23: 323 /* Tiger Lake-LP Thunderbolt 4 PCIe Root Ports */ 324 case 0x9a23: 325 case 0x9a25: 326 case 0x9a27: 327 case 0x9a29: 328 /* Tiger Lake-H Thunderbolt 4 PCIe Root Ports */ 329 case 0x9a2b: 330 case 0x9a2d: 331 case 0x9a2f: 332 case 0x9a31: 333 return true; 334 } 335 } 336 337 return false; 338 } 339 340 bool arch_pci_dev_is_removable(struct pci_dev *pdev) 341 { 342 struct pci_dev *parent, *root; 343 344 /* pdev without a parent or Root Port is never tunneled */ 345 parent = pci_upstream_bridge(pdev); 346 if (!parent) 347 return false; 348 root = pcie_find_root_port(pdev); 349 if (!root) 350 return false; 351 352 /* Internal PCIe devices are not tunneled */ 353 if (!root->external_facing) 354 return false; 355 356 /* Anything directly behind a "usb4-host-interface" is tunneled */ 357 if (pcie_has_usb4_host_interface(parent)) 358 return true; 359 360 /* 361 * Check if this is a discrete Thunderbolt/USB4 controller that is 362 * directly behind the non-USB4 PCIe Root Port marked as 363 * "ExternalFacingPort". Those are not behind a PCIe tunnel. 364 */ 365 if (pcie_switch_directly_under(root, pdev)) 366 return false; 367 368 /* PCIe devices after the discrete chip are tunneled */ 369 return true; 370 } 371 372 #ifdef CONFIG_PCI_MMCONFIG 373 static int check_segment(u16 seg, struct device *dev, char *estr) 374 { 375 if (seg) { 376 dev_err(dev, "%s can't access configuration space under this host bridge\n", 377 estr); 378 return -EIO; 379 } 380 381 /* 382 * Failure in adding MMCFG information is not fatal, 383 * just can't access extended configuration space of 384 * devices under this host bridge. 385 */ 386 dev_warn(dev, "%s can't access extended configuration space under this bridge\n", 387 estr); 388 389 return 0; 390 } 391 392 static int setup_mcfg_map(struct acpi_pci_root_info *ci) 393 { 394 int result, seg; 395 struct pci_root_info *info; 396 struct acpi_pci_root *root = ci->root; 397 struct device *dev = &ci->bridge->dev; 398 399 info = container_of(ci, struct pci_root_info, common); 400 info->start_bus = (u8)root->secondary.start; 401 info->end_bus = (u8)root->secondary.end; 402 info->mcfg_added = false; 403 seg = info->sd.domain; 404 405 dev_dbg(dev, "%s(%04x %pR ECAM %pa)\n", __func__, seg, 406 &root->secondary, &root->mcfg_addr); 407 408 /* return success if MMCFG is not in use */ 409 if (raw_pci_ext_ops && raw_pci_ext_ops != &pci_mmcfg) 410 return 0; 411 412 if (!(pci_probe & PCI_PROBE_MMCONF)) 413 return check_segment(seg, dev, "MMCONFIG is disabled,"); 414 415 result = pci_mmconfig_insert(dev, seg, info->start_bus, info->end_bus, 416 root->mcfg_addr); 417 if (result == 0) { 418 /* enable MMCFG if it hasn't been enabled yet */ 419 if (raw_pci_ext_ops == NULL) 420 raw_pci_ext_ops = &pci_mmcfg; 421 info->mcfg_added = true; 422 } else if (result != -EEXIST) 423 return check_segment(seg, dev, 424 "fail to add MMCONFIG information,"); 425 426 return 0; 427 } 428 429 static void teardown_mcfg_map(struct acpi_pci_root_info *ci) 430 { 431 struct pci_root_info *info; 432 433 info = container_of(ci, struct pci_root_info, common); 434 if (info->mcfg_added) { 435 pci_mmconfig_delete(info->sd.domain, 436 info->start_bus, info->end_bus); 437 info->mcfg_added = false; 438 } 439 } 440 #else 441 static int setup_mcfg_map(struct acpi_pci_root_info *ci) 442 { 443 return 0; 444 } 445 446 static void teardown_mcfg_map(struct acpi_pci_root_info *ci) 447 { 448 } 449 #endif 450 451 static int pci_acpi_root_get_node(struct acpi_pci_root *root) 452 { 453 int busnum = root->secondary.start; 454 struct acpi_device *device = root->device; 455 int node = acpi_get_node(device->handle); 456 457 if (node == NUMA_NO_NODE) { 458 node = x86_pci_root_bus_node(busnum); 459 if (node != 0 && node != NUMA_NO_NODE) 460 dev_info(&device->dev, FW_BUG "no _PXM; falling back to node %d from hardware (may be inconsistent with ACPI node numbers)\n", 461 node); 462 } 463 if (node != NUMA_NO_NODE && !node_online(node)) 464 node = NUMA_NO_NODE; 465 466 return node; 467 } 468 469 static int pci_acpi_root_init_info(struct acpi_pci_root_info *ci) 470 { 471 return setup_mcfg_map(ci); 472 } 473 474 static void pci_acpi_root_release_info(struct acpi_pci_root_info *ci) 475 { 476 teardown_mcfg_map(ci); 477 kfree(container_of(ci, struct pci_root_info, common)); 478 } 479 480 /* 481 * An IO port or MMIO resource assigned to a PCI host bridge may be 482 * consumed by the host bridge itself or available to its child 483 * bus/devices. The ACPI specification defines a bit (Producer/Consumer) 484 * to tell whether the resource is consumed by the host bridge itself, 485 * but firmware hasn't used that bit consistently, so we can't rely on it. 486 * 487 * On x86 and IA64 platforms, all IO port and MMIO resources are assumed 488 * to be available to child bus/devices except one special case: 489 * IO port [0xCF8-0xCFF] is consumed by the host bridge itself 490 * to access PCI configuration space. 491 * 492 * So explicitly filter out PCI CFG IO ports[0xCF8-0xCFF]. 493 */ 494 static bool resource_is_pcicfg_ioport(struct resource *res) 495 { 496 return (res->flags & IORESOURCE_IO) && 497 res->start == 0xCF8 && res->end == 0xCFF; 498 } 499 500 static int pci_acpi_root_prepare_resources(struct acpi_pci_root_info *ci) 501 { 502 struct acpi_device *device = ci->bridge; 503 int busnum = ci->root->secondary.start; 504 struct resource_entry *entry, *tmp; 505 int status; 506 507 status = acpi_pci_probe_root_resources(ci); 508 509 if (pci_use_crs) { 510 resource_list_for_each_entry_safe(entry, tmp, &ci->resources) 511 if (resource_is_pcicfg_ioport(entry->res)) 512 resource_list_destroy_entry(entry); 513 return status; 514 } 515 516 resource_list_for_each_entry_safe(entry, tmp, &ci->resources) { 517 dev_printk(KERN_DEBUG, &device->dev, 518 "host bridge window %pR (ignored)\n", entry->res); 519 resource_list_destroy_entry(entry); 520 } 521 x86_pci_root_bus_resources(busnum, &ci->resources); 522 523 return 0; 524 } 525 526 static struct acpi_pci_root_ops acpi_pci_root_ops = { 527 .pci_ops = &pci_root_ops, 528 .init_info = pci_acpi_root_init_info, 529 .release_info = pci_acpi_root_release_info, 530 .prepare_resources = pci_acpi_root_prepare_resources, 531 }; 532 533 struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) 534 { 535 int domain = root->segment; 536 int busnum = root->secondary.start; 537 int node = pci_acpi_root_get_node(root); 538 struct pci_bus *bus; 539 540 if (pci_ignore_seg) 541 root->segment = domain = 0; 542 543 if (domain && !pci_domains_supported) { 544 pr_warn("pci_bus %04x:%02x: ignored (multiple domains not supported)\n", 545 domain, busnum); 546 return NULL; 547 } 548 549 bus = pci_find_bus(domain, busnum); 550 if (bus) { 551 /* 552 * If the desired bus has been scanned already, replace 553 * its bus->sysdata. 554 */ 555 struct pci_sysdata sd = { 556 .domain = domain, 557 .node = node, 558 .companion = root->device 559 }; 560 561 memcpy(bus->sysdata, &sd, sizeof(sd)); 562 } else { 563 struct pci_root_info *info; 564 565 info = kzalloc(sizeof(*info), GFP_KERNEL); 566 if (!info) 567 dev_err(&root->device->dev, 568 "pci_bus %04x:%02x: ignored (out of memory)\n", 569 domain, busnum); 570 else { 571 info->sd.domain = domain; 572 info->sd.node = node; 573 info->sd.companion = root->device; 574 bus = acpi_pci_root_create(root, &acpi_pci_root_ops, 575 &info->common, &info->sd); 576 } 577 } 578 579 /* After the PCI-E bus has been walked and all devices discovered, 580 * configure any settings of the fabric that might be necessary. 581 */ 582 if (bus) { 583 struct pci_bus *child; 584 list_for_each_entry(child, &bus->children, node) 585 pcie_bus_configure_settings(child); 586 } 587 588 return bus; 589 } 590 591 int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) 592 { 593 /* 594 * We pass NULL as parent to pci_create_root_bus(), so if it is not NULL 595 * here, pci_create_root_bus() has been called by someone else and 596 * sysdata is likely to be different from what we expect. Let it go in 597 * that case. 598 */ 599 if (!bridge->dev.parent) { 600 struct pci_sysdata *sd = bridge->bus->sysdata; 601 ACPI_COMPANION_SET(&bridge->dev, sd->companion); 602 } 603 return 0; 604 } 605 606 int __init pci_acpi_init(void) 607 { 608 struct pci_dev *dev = NULL; 609 610 if (acpi_noirq) 611 return -ENODEV; 612 613 pr_info("Using ACPI for IRQ routing\n"); 614 acpi_irq_penalty_init(); 615 pcibios_enable_irq = acpi_pci_irq_enable; 616 pcibios_disable_irq = acpi_pci_irq_disable; 617 x86_init.pci.init_irq = x86_init_noop; 618 619 if (pci_routeirq) { 620 /* 621 * PCI IRQ routing is set up by pci_enable_device(), but we 622 * also do it here in case there are still broken drivers that 623 * don't use pci_enable_device(). 624 */ 625 pr_info("Routing PCI interrupts for all devices because \"pci=routeirq\" specified\n"); 626 for_each_pci_dev(dev) 627 acpi_pci_irq_enable(dev); 628 } 629 630 return 0; 631 } 632