1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright 2006 Jake Moilanen <moilanen@austin.ibm.com>, IBM Corp. 4 * Copyright 2006-2007 Michael Ellerman, IBM Corp. 5 */ 6 7 #include <linux/crash_dump.h> 8 #include <linux/device.h> 9 #include <linux/irq.h> 10 #include <linux/irqchip/irq-msi-lib.h> 11 #include <linux/irqdomain.h> 12 #include <linux/msi.h> 13 #include <linux/seq_file.h> 14 15 #include <asm/rtas.h> 16 #include <asm/hw_irq.h> 17 #include <asm/ppc-pci.h> 18 #include <asm/machdep.h> 19 20 #include "pseries.h" 21 22 static int query_token, change_token; 23 24 #define RTAS_QUERY_FN 0 25 #define RTAS_CHANGE_FN 1 26 #define RTAS_RESET_FN 2 27 #define RTAS_CHANGE_MSI_FN 3 28 #define RTAS_CHANGE_MSIX_FN 4 29 #define RTAS_CHANGE_32MSI_FN 5 30 #define RTAS_CHANGE_32MSIX_FN 6 31 32 /* RTAS Helpers */ 33 34 static int rtas_change_msi(struct pci_dn *pdn, u32 func, u32 num_irqs) 35 { 36 u32 addr, seq_num, rtas_ret[3]; 37 unsigned long buid; 38 int rc; 39 40 addr = rtas_config_addr(pdn->busno, pdn->devfn, 0); 41 buid = pdn->phb->buid; 42 43 seq_num = 1; 44 do { 45 if (func == RTAS_CHANGE_MSI_FN || func == RTAS_CHANGE_MSIX_FN || 46 func == RTAS_CHANGE_32MSI_FN || func == RTAS_CHANGE_32MSIX_FN) 47 rc = rtas_call(change_token, 6, 4, rtas_ret, addr, 48 BUID_HI(buid), BUID_LO(buid), 49 func, num_irqs, seq_num); 50 else 51 rc = rtas_call(change_token, 6, 3, rtas_ret, addr, 52 BUID_HI(buid), BUID_LO(buid), 53 func, num_irqs, seq_num); 54 55 seq_num = rtas_ret[1]; 56 } while (rtas_busy_delay(rc)); 57 58 /* 59 * If the RTAS call succeeded, return the number of irqs allocated. 60 * If not, make sure we return a negative error code. 61 */ 62 if (rc == 0) 63 rc = rtas_ret[0]; 64 else if (rc > 0) 65 rc = -rc; 66 67 pr_debug("rtas_msi: ibm,change_msi(func=%d,num=%d), got %d rc = %d\n", 68 func, num_irqs, rtas_ret[0], rc); 69 70 return rc; 71 } 72 73 static void rtas_disable_msi(struct pci_dev *pdev) 74 { 75 struct pci_dn *pdn; 76 77 pdn = pci_get_pdn(pdev); 78 if (!pdn) 79 return; 80 81 /* 82 * disabling MSI with the explicit interface also disables MSI-X 83 */ 84 if (rtas_change_msi(pdn, RTAS_CHANGE_MSI_FN, 0) != 0) { 85 /* 86 * may have failed because explicit interface is not 87 * present 88 */ 89 if (rtas_change_msi(pdn, RTAS_CHANGE_FN, 0) != 0) { 90 pr_debug("rtas_msi: Setting MSIs to 0 failed!\n"); 91 } 92 } 93 } 94 95 static int rtas_query_irq_number(struct pci_dn *pdn, int offset) 96 { 97 u32 addr, rtas_ret[2]; 98 unsigned long buid; 99 int rc; 100 101 addr = rtas_config_addr(pdn->busno, pdn->devfn, 0); 102 buid = pdn->phb->buid; 103 104 do { 105 rc = rtas_call(query_token, 4, 3, rtas_ret, addr, 106 BUID_HI(buid), BUID_LO(buid), offset); 107 } while (rtas_busy_delay(rc)); 108 109 if (rc) { 110 pr_debug("rtas_msi: error (%d) querying source number\n", rc); 111 return rc; 112 } 113 114 return rtas_ret[0]; 115 } 116 117 static int check_req(struct pci_dev *pdev, int nvec, char *prop_name) 118 { 119 struct device_node *dn; 120 const __be32 *p; 121 u32 req_msi; 122 123 dn = pci_device_to_OF_node(pdev); 124 125 p = of_get_property(dn, prop_name, NULL); 126 if (!p) { 127 pr_debug("rtas_msi: No %s on %pOF\n", prop_name, dn); 128 return -ENOENT; 129 } 130 131 req_msi = be32_to_cpup(p); 132 if (req_msi < nvec) { 133 pr_debug("rtas_msi: %s requests < %d MSIs\n", prop_name, nvec); 134 135 if (req_msi == 0) /* Be paranoid */ 136 return -ENOSPC; 137 138 return req_msi; 139 } 140 141 return 0; 142 } 143 144 static int check_req_msi(struct pci_dev *pdev, int nvec) 145 { 146 return check_req(pdev, nvec, "ibm,req#msi"); 147 } 148 149 static int check_req_msix(struct pci_dev *pdev, int nvec) 150 { 151 return check_req(pdev, nvec, "ibm,req#msi-x"); 152 } 153 154 /* Quota calculation */ 155 156 static struct device_node *__find_pe_total_msi(struct device_node *node, int *total) 157 { 158 struct device_node *dn; 159 const __be32 *p; 160 161 dn = of_node_get(node); 162 while (dn) { 163 p = of_get_property(dn, "ibm,pe-total-#msi", NULL); 164 if (p) { 165 pr_debug("rtas_msi: found prop on dn %pOF\n", 166 dn); 167 *total = be32_to_cpup(p); 168 return dn; 169 } 170 171 dn = of_get_next_parent(dn); 172 } 173 174 return NULL; 175 } 176 177 static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total) 178 { 179 return __find_pe_total_msi(pci_device_to_OF_node(dev), total); 180 } 181 182 static struct device_node *find_pe_dn(struct pci_dev *dev, int *total) 183 { 184 struct device_node *dn; 185 struct eeh_dev *edev; 186 187 /* Found our PE and assume 8 at that point. */ 188 189 dn = pci_device_to_OF_node(dev); 190 if (!dn) 191 return NULL; 192 193 /* Get the top level device in the PE */ 194 edev = pdn_to_eeh_dev(PCI_DN(dn)); 195 if (edev->pe) 196 edev = list_first_entry(&edev->pe->edevs, struct eeh_dev, 197 entry); 198 dn = pci_device_to_OF_node(edev->pdev); 199 if (!dn) 200 return NULL; 201 202 /* We actually want the parent */ 203 dn = of_get_parent(dn); 204 if (!dn) 205 return NULL; 206 207 /* Hardcode of 8 for old firmwares */ 208 *total = 8; 209 pr_debug("rtas_msi: using PE dn %pOF\n", dn); 210 211 return dn; 212 } 213 214 struct msi_counts { 215 struct device_node *requestor; 216 int num_devices; 217 int request; 218 int quota; 219 int spare; 220 int over_quota; 221 }; 222 223 static void *count_non_bridge_devices(struct device_node *dn, void *data) 224 { 225 struct msi_counts *counts = data; 226 const __be32 *p; 227 u32 class; 228 229 pr_debug("rtas_msi: counting %pOF\n", dn); 230 231 p = of_get_property(dn, "class-code", NULL); 232 class = p ? be32_to_cpup(p) : 0; 233 234 if ((class >> 8) != PCI_CLASS_BRIDGE_PCI) 235 counts->num_devices++; 236 237 return NULL; 238 } 239 240 static void *count_spare_msis(struct device_node *dn, void *data) 241 { 242 struct msi_counts *counts = data; 243 const __be32 *p; 244 int req; 245 246 if (dn == counts->requestor) 247 req = counts->request; 248 else { 249 /* We don't know if a driver will try to use MSI or MSI-X, 250 * so we just have to punt and use the larger of the two. */ 251 req = 0; 252 p = of_get_property(dn, "ibm,req#msi", NULL); 253 if (p) 254 req = be32_to_cpup(p); 255 256 p = of_get_property(dn, "ibm,req#msi-x", NULL); 257 if (p) 258 req = max(req, (int)be32_to_cpup(p)); 259 } 260 261 if (req < counts->quota) 262 counts->spare += counts->quota - req; 263 else if (req > counts->quota) 264 counts->over_quota++; 265 266 return NULL; 267 } 268 269 static int msi_quota_for_device(struct pci_dev *dev, int request) 270 { 271 struct device_node *pe_dn; 272 struct msi_counts counts; 273 int total; 274 275 pr_debug("rtas_msi: calc quota for %s, request %d\n", pci_name(dev), 276 request); 277 278 pe_dn = find_pe_total_msi(dev, &total); 279 if (!pe_dn) 280 pe_dn = find_pe_dn(dev, &total); 281 282 if (!pe_dn) { 283 pr_err("rtas_msi: couldn't find PE for %s\n", pci_name(dev)); 284 goto out; 285 } 286 287 pr_debug("rtas_msi: found PE %pOF\n", pe_dn); 288 289 memset(&counts, 0, sizeof(struct msi_counts)); 290 291 /* Work out how many devices we have below this PE */ 292 pci_traverse_device_nodes(pe_dn, count_non_bridge_devices, &counts); 293 294 if (counts.num_devices == 0) { 295 pr_err("rtas_msi: found 0 devices under PE for %s\n", 296 pci_name(dev)); 297 goto out; 298 } 299 300 counts.quota = total / counts.num_devices; 301 if (request <= counts.quota) 302 goto out; 303 304 /* else, we have some more calculating to do */ 305 counts.requestor = pci_device_to_OF_node(dev); 306 counts.request = request; 307 pci_traverse_device_nodes(pe_dn, count_spare_msis, &counts); 308 309 /* If the quota isn't an integer multiple of the total, we can 310 * use the remainder as spare MSIs for anyone that wants them. */ 311 counts.spare += total % counts.num_devices; 312 313 /* Divide any spare by the number of over-quota requestors */ 314 if (counts.over_quota) 315 counts.quota += counts.spare / counts.over_quota; 316 317 /* And finally clamp the request to the possibly adjusted quota */ 318 request = min(counts.quota, request); 319 320 pr_debug("rtas_msi: request clamped to quota %d\n", request); 321 out: 322 of_node_put(pe_dn); 323 324 return request; 325 } 326 327 static void rtas_hack_32bit_msi_gen2(struct pci_dev *pdev) 328 { 329 u32 addr_hi, addr_lo; 330 331 /* 332 * We should only get in here for IODA1 configs. This is based on the 333 * fact that we using RTAS for MSIs, we don't have the 32 bit MSI RTAS 334 * support, and we are in a PCIe Gen2 slot. 335 */ 336 dev_info(&pdev->dev, 337 "rtas_msi: No 32 bit MSI firmware support, forcing 32 bit MSI\n"); 338 pci_read_config_dword(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI, &addr_hi); 339 addr_lo = 0xffff0000 | ((addr_hi >> (48 - 32)) << 4); 340 pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_LO, addr_lo); 341 pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI, 0); 342 } 343 344 static int rtas_prepare_msi_irqs(struct pci_dev *pdev, int nvec_in, int type, 345 msi_alloc_info_t *arg) 346 { 347 struct pci_dn *pdn; 348 int quota, rc; 349 int nvec = nvec_in; 350 int use_32bit_msi_hack = 0; 351 352 if (type == PCI_CAP_ID_MSIX) 353 rc = check_req_msix(pdev, nvec); 354 else 355 rc = check_req_msi(pdev, nvec); 356 357 if (rc) 358 return rc; 359 360 quota = msi_quota_for_device(pdev, nvec); 361 362 if (quota && quota < nvec) 363 return quota; 364 365 /* 366 * Firmware currently refuse any non power of two allocation 367 * so we round up if the quota will allow it. 368 */ 369 if (type == PCI_CAP_ID_MSIX) { 370 int m = roundup_pow_of_two(nvec); 371 quota = msi_quota_for_device(pdev, m); 372 373 if (quota >= m) 374 nvec = m; 375 } 376 377 pdn = pci_get_pdn(pdev); 378 379 /* 380 * Try the new more explicit firmware interface, if that fails fall 381 * back to the old interface. The old interface is known to never 382 * return MSI-Xs. 383 */ 384 again: 385 if (type == PCI_CAP_ID_MSI) { 386 if (pdev->no_64bit_msi) { 387 rc = rtas_change_msi(pdn, RTAS_CHANGE_32MSI_FN, nvec); 388 if (rc < 0) { 389 /* 390 * We only want to run the 32 bit MSI hack below if 391 * the max bus speed is Gen2 speed 392 */ 393 if (pdev->bus->max_bus_speed != PCIE_SPEED_5_0GT) 394 return rc; 395 396 use_32bit_msi_hack = 1; 397 } 398 } else 399 rc = -1; 400 401 if (rc < 0) 402 rc = rtas_change_msi(pdn, RTAS_CHANGE_MSI_FN, nvec); 403 404 if (rc < 0) { 405 pr_debug("rtas_msi: trying the old firmware call.\n"); 406 rc = rtas_change_msi(pdn, RTAS_CHANGE_FN, nvec); 407 } 408 409 if (use_32bit_msi_hack && rc > 0) 410 rtas_hack_32bit_msi_gen2(pdev); 411 } else { 412 if (pdev->no_64bit_msi) 413 rc = rtas_change_msi(pdn, RTAS_CHANGE_32MSIX_FN, nvec); 414 else 415 rc = rtas_change_msi(pdn, RTAS_CHANGE_MSIX_FN, nvec); 416 } 417 418 if (rc != nvec) { 419 if (nvec != nvec_in) { 420 nvec = nvec_in; 421 goto again; 422 } 423 pr_debug("rtas_msi: rtas_change_msi() failed\n"); 424 return rc; 425 } 426 427 return 0; 428 } 429 430 static int pseries_msi_ops_prepare(struct irq_domain *domain, struct device *dev, 431 int nvec, msi_alloc_info_t *arg) 432 { 433 struct msi_domain_info *info = domain->host_data; 434 struct pci_dev *pdev = to_pci_dev(dev); 435 int type = (info->flags & MSI_FLAG_PCI_MSIX) ? PCI_CAP_ID_MSIX : PCI_CAP_ID_MSI; 436 437 return rtas_prepare_msi_irqs(pdev, nvec, type, arg); 438 } 439 440 /* 441 * RTAS can not disable one MSI at a time. It's all or nothing. Do it 442 * at the end after all IRQs have been freed. 443 */ 444 static void pseries_msi_ops_teardown(struct irq_domain *domain, msi_alloc_info_t *arg) 445 { 446 struct msi_desc *desc = arg->desc; 447 struct pci_dev *pdev = msi_desc_to_pci_dev(desc); 448 449 rtas_disable_msi(pdev); 450 } 451 452 static void pseries_msi_shutdown(struct irq_data *d) 453 { 454 d = d->parent_data; 455 if (d->chip->irq_shutdown) 456 d->chip->irq_shutdown(d); 457 } 458 459 static void pseries_msi_write_msg(struct irq_data *data, struct msi_msg *msg) 460 { 461 struct msi_desc *entry = irq_data_get_msi_desc(data); 462 463 /* 464 * Do not update the MSIx vector table. It's not strictly necessary 465 * because the table is initialized by the underlying hypervisor, PowerVM 466 * or QEMU/KVM. However, if the MSIx vector entry is cleared, any further 467 * activation will fail. This can happen in some drivers (eg. IPR) which 468 * deactivate an IRQ used for testing MSI support. 469 */ 470 entry->msg = *msg; 471 } 472 473 static bool pseries_init_dev_msi_info(struct device *dev, struct irq_domain *domain, 474 struct irq_domain *real_parent, struct msi_domain_info *info) 475 { 476 struct irq_chip *chip = info->chip; 477 478 if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info)) 479 return false; 480 481 chip->irq_shutdown = pseries_msi_shutdown; 482 chip->irq_write_msi_msg = pseries_msi_write_msg; 483 484 info->ops->msi_prepare = pseries_msi_ops_prepare; 485 info->ops->msi_teardown = pseries_msi_ops_teardown; 486 487 return true; 488 } 489 490 #define PSERIES_PCI_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \ 491 MSI_FLAG_USE_DEF_CHIP_OPS | \ 492 MSI_FLAG_PCI_MSI_MASK_PARENT) 493 #define PSERIES_PCI_MSI_FLAGS_SUPPORTED (MSI_GENERIC_FLAGS_MASK | \ 494 MSI_FLAG_PCI_MSIX | \ 495 MSI_FLAG_MSIX_CONTIGUOUS | \ 496 MSI_FLAG_MULTI_PCI_MSI) 497 498 static const struct msi_parent_ops pseries_msi_parent_ops = { 499 .required_flags = PSERIES_PCI_MSI_FLAGS_REQUIRED, 500 .supported_flags = PSERIES_PCI_MSI_FLAGS_SUPPORTED, 501 .chip_flags = MSI_CHIP_FLAG_SET_EOI, 502 .bus_select_token = DOMAIN_BUS_NEXUS, 503 .bus_select_mask = MATCH_PCI_MSI, 504 .prefix = "pSeries-", 505 .init_dev_msi_info = pseries_init_dev_msi_info, 506 }; 507 508 static void pseries_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) 509 { 510 struct pci_dev *dev = msi_desc_to_pci_dev(irq_data_get_msi_desc(data)); 511 512 if (dev->current_state == PCI_D0) 513 __pci_read_msi_msg(irq_data_get_msi_desc(data), msg); 514 else 515 get_cached_msi_msg(data->irq, msg); 516 } 517 518 static struct irq_chip pseries_msi_irq_chip = { 519 .name = "pSeries-MSI", 520 .irq_shutdown = pseries_msi_shutdown, 521 .irq_mask = irq_chip_mask_parent, 522 .irq_unmask = irq_chip_unmask_parent, 523 .irq_eoi = irq_chip_eoi_parent, 524 .irq_set_affinity = irq_chip_set_affinity_parent, 525 .irq_compose_msi_msg = pseries_msi_compose_msg, 526 }; 527 528 static int pseries_irq_parent_domain_alloc(struct irq_domain *domain, unsigned int virq, 529 irq_hw_number_t hwirq) 530 { 531 struct irq_fwspec parent_fwspec; 532 int ret; 533 534 parent_fwspec.fwnode = domain->parent->fwnode; 535 parent_fwspec.param_count = 2; 536 parent_fwspec.param[0] = hwirq; 537 parent_fwspec.param[1] = IRQ_TYPE_EDGE_RISING; 538 539 ret = irq_domain_alloc_irqs_parent(domain, virq, 1, &parent_fwspec); 540 if (ret) 541 return ret; 542 543 return 0; 544 } 545 546 static int pseries_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, 547 unsigned int nr_irqs, void *arg) 548 { 549 struct pci_controller *phb = domain->host_data; 550 msi_alloc_info_t *info = arg; 551 struct msi_desc *desc = info->desc; 552 struct pci_dev *pdev = msi_desc_to_pci_dev(desc); 553 int hwirq; 554 int i, ret; 555 556 hwirq = rtas_query_irq_number(pci_get_pdn(pdev), desc->msi_index); 557 if (hwirq < 0) { 558 dev_err(&pdev->dev, "Failed to query HW IRQ: %d\n", hwirq); 559 return hwirq; 560 } 561 562 dev_dbg(&pdev->dev, "%s bridge %pOF %d/%x #%d\n", __func__, 563 phb->dn, virq, hwirq, nr_irqs); 564 565 for (i = 0; i < nr_irqs; i++) { 566 ret = pseries_irq_parent_domain_alloc(domain, virq + i, hwirq + i); 567 if (ret) 568 goto out; 569 570 irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i, 571 &pseries_msi_irq_chip, domain->host_data); 572 } 573 574 return 0; 575 576 out: 577 /* TODO: handle RTAS cleanup in ->msi_finish() ? */ 578 irq_domain_free_irqs_parent(domain, virq, i); 579 return ret; 580 } 581 582 static void pseries_irq_domain_free(struct irq_domain *domain, unsigned int virq, 583 unsigned int nr_irqs) 584 { 585 struct irq_data *d = irq_domain_get_irq_data(domain, virq); 586 struct pci_controller *phb = irq_data_get_irq_chip_data(d); 587 588 pr_debug("%s bridge %pOF %d #%d\n", __func__, phb->dn, virq, nr_irqs); 589 irq_domain_free_irqs_parent(domain, virq, nr_irqs); 590 } 591 592 static const struct irq_domain_ops pseries_irq_domain_ops = { 593 .select = msi_lib_irq_domain_select, 594 .alloc = pseries_irq_domain_alloc, 595 .free = pseries_irq_domain_free, 596 }; 597 598 static int __pseries_msi_allocate_domains(struct pci_controller *phb, 599 unsigned int count) 600 { 601 struct irq_domain *parent = irq_get_default_domain(); 602 struct irq_domain_info info = { 603 .fwnode = of_fwnode_handle(phb->dn), 604 .ops = &pseries_irq_domain_ops, 605 .host_data = phb, 606 .size = count, 607 .parent = parent, 608 }; 609 610 phb->dev_domain = msi_create_parent_irq_domain(&info, &pseries_msi_parent_ops); 611 if (!phb->dev_domain) { 612 pr_err("PCI: failed to create MSI IRQ domain bridge %pOF (domain %d)\n", 613 phb->dn, phb->global_number); 614 return -ENOMEM; 615 } 616 617 return 0; 618 } 619 620 int pseries_msi_allocate_domains(struct pci_controller *phb) 621 { 622 int count; 623 624 if (!__find_pe_total_msi(phb->dn, &count)) { 625 pr_err("PCI: failed to find MSIs for bridge %pOF (domain %d)\n", 626 phb->dn, phb->global_number); 627 return -ENOSPC; 628 } 629 630 return __pseries_msi_allocate_domains(phb, count); 631 } 632 633 void pseries_msi_free_domains(struct pci_controller *phb) 634 { 635 if (phb->dev_domain) 636 irq_domain_remove(phb->dev_domain); 637 } 638 639 static void rtas_msi_pci_irq_fixup(struct pci_dev *pdev) 640 { 641 /* No LSI -> leave MSIs (if any) configured */ 642 if (!pdev->irq) { 643 dev_dbg(&pdev->dev, "rtas_msi: no LSI, nothing to do.\n"); 644 return; 645 } 646 647 /* No MSI -> MSIs can't have been assigned by fw, leave LSI */ 648 if (check_req_msi(pdev, 1) && check_req_msix(pdev, 1)) { 649 dev_dbg(&pdev->dev, "rtas_msi: no req#msi/x, nothing to do.\n"); 650 return; 651 } 652 653 dev_dbg(&pdev->dev, "rtas_msi: disabling existing MSI.\n"); 654 rtas_disable_msi(pdev); 655 } 656 657 static int rtas_msi_init(void) 658 { 659 query_token = rtas_function_token(RTAS_FN_IBM_QUERY_INTERRUPT_SOURCE_NUMBER); 660 change_token = rtas_function_token(RTAS_FN_IBM_CHANGE_MSI); 661 662 if ((query_token == RTAS_UNKNOWN_SERVICE) || 663 (change_token == RTAS_UNKNOWN_SERVICE)) { 664 pr_debug("rtas_msi: no RTAS tokens, no MSI support.\n"); 665 return -1; 666 } 667 668 pr_debug("rtas_msi: Registering RTAS MSI callbacks.\n"); 669 670 WARN_ON(ppc_md.pci_irq_fixup); 671 ppc_md.pci_irq_fixup = rtas_msi_pci_irq_fixup; 672 673 return 0; 674 } 675 machine_arch_initcall(pseries, rtas_msi_init); 676