1 /* 2 * Support PCI/PCIe on PowerNV platforms 3 * 4 * Copyright 2011 Benjamin Herrenschmidt, IBM Corp. 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12 #undef DEBUG 13 14 #include <linux/kernel.h> 15 #include <linux/pci.h> 16 #include <linux/crash_dump.h> 17 #include <linux/debugfs.h> 18 #include <linux/delay.h> 19 #include <linux/string.h> 20 #include <linux/init.h> 21 #include <linux/bootmem.h> 22 #include <linux/irq.h> 23 #include <linux/io.h> 24 #include <linux/msi.h> 25 #include <linux/memblock.h> 26 27 #include <asm/sections.h> 28 #include <asm/io.h> 29 #include <asm/prom.h> 30 #include <asm/pci-bridge.h> 31 #include <asm/machdep.h> 32 #include <asm/msi_bitmap.h> 33 #include <asm/ppc-pci.h> 34 #include <asm/opal.h> 35 #include <asm/iommu.h> 36 #include <asm/tce.h> 37 #include <asm/xics.h> 38 #include <asm/debug.h> 39 #include <asm/firmware.h> 40 #include <asm/pnv-pci.h> 41 42 #include <misc/cxl.h> 43 44 #include "powernv.h" 45 #include "pci.h" 46 47 static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, 48 const char *fmt, ...) 49 { 50 struct va_format vaf; 51 va_list args; 52 char pfix[32]; 53 54 va_start(args, fmt); 55 56 vaf.fmt = fmt; 57 vaf.va = &args; 58 59 if (pe->pdev) 60 strlcpy(pfix, dev_name(&pe->pdev->dev), sizeof(pfix)); 61 else 62 sprintf(pfix, "%04x:%02x ", 63 pci_domain_nr(pe->pbus), pe->pbus->number); 64 65 printk("%spci %s: [PE# %.3d] %pV", 66 level, pfix, pe->pe_number, &vaf); 67 68 va_end(args); 69 } 70 71 #define pe_err(pe, fmt, ...) \ 72 pe_level_printk(pe, KERN_ERR, fmt, ##__VA_ARGS__) 73 #define pe_warn(pe, fmt, ...) \ 74 pe_level_printk(pe, KERN_WARNING, fmt, ##__VA_ARGS__) 75 #define pe_info(pe, fmt, ...) \ 76 pe_level_printk(pe, KERN_INFO, fmt, ##__VA_ARGS__) 77 78 static bool pnv_iommu_bypass_disabled __read_mostly; 79 80 static int __init iommu_setup(char *str) 81 { 82 if (!str) 83 return -EINVAL; 84 85 while (*str) { 86 if (!strncmp(str, "nobypass", 8)) { 87 pnv_iommu_bypass_disabled = true; 88 pr_info("PowerNV: IOMMU bypass window disabled.\n"); 89 break; 90 } 91 str += strcspn(str, ","); 92 if (*str == ',') 93 str++; 94 } 95 96 return 0; 97 } 98 early_param("iommu", iommu_setup); 99 100 /* 101 * stdcix is only supposed to be used in hypervisor real mode as per 102 * the architecture spec 103 */ 104 static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr) 105 { 106 __asm__ __volatile__("stdcix %0,0,%1" 107 : : "r" (val), "r" (paddr) : "memory"); 108 } 109 110 static inline bool pnv_pci_is_mem_pref_64(unsigned long flags) 111 { 112 return ((flags & (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)) == 113 (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)); 114 } 115 116 static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no) 117 { 118 if (!(pe_no >= 0 && pe_no < phb->ioda.total_pe)) { 119 pr_warn("%s: Invalid PE %d on PHB#%x\n", 120 __func__, pe_no, phb->hose->global_number); 121 return; 122 } 123 124 if (test_and_set_bit(pe_no, phb->ioda.pe_alloc)) { 125 pr_warn("%s: PE %d was assigned on PHB#%x\n", 126 __func__, pe_no, phb->hose->global_number); 127 return; 128 } 129 130 phb->ioda.pe_array[pe_no].phb = phb; 131 phb->ioda.pe_array[pe_no].pe_number = pe_no; 132 } 133 134 static int pnv_ioda_alloc_pe(struct pnv_phb *phb) 135 { 136 unsigned long pe; 137 138 do { 139 pe = find_next_zero_bit(phb->ioda.pe_alloc, 140 phb->ioda.total_pe, 0); 141 if (pe >= phb->ioda.total_pe) 142 return IODA_INVALID_PE; 143 } while(test_and_set_bit(pe, phb->ioda.pe_alloc)); 144 145 phb->ioda.pe_array[pe].phb = phb; 146 phb->ioda.pe_array[pe].pe_number = pe; 147 return pe; 148 } 149 150 static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe) 151 { 152 WARN_ON(phb->ioda.pe_array[pe].pdev); 153 154 memset(&phb->ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe)); 155 clear_bit(pe, phb->ioda.pe_alloc); 156 } 157 158 /* The default M64 BAR is shared by all PEs */ 159 static int pnv_ioda2_init_m64(struct pnv_phb *phb) 160 { 161 const char *desc; 162 struct resource *r; 163 s64 rc; 164 165 /* Configure the default M64 BAR */ 166 rc = opal_pci_set_phb_mem_window(phb->opal_id, 167 OPAL_M64_WINDOW_TYPE, 168 phb->ioda.m64_bar_idx, 169 phb->ioda.m64_base, 170 0, /* unused */ 171 phb->ioda.m64_size); 172 if (rc != OPAL_SUCCESS) { 173 desc = "configuring"; 174 goto fail; 175 } 176 177 /* Enable the default M64 BAR */ 178 rc = opal_pci_phb_mmio_enable(phb->opal_id, 179 OPAL_M64_WINDOW_TYPE, 180 phb->ioda.m64_bar_idx, 181 OPAL_ENABLE_M64_SPLIT); 182 if (rc != OPAL_SUCCESS) { 183 desc = "enabling"; 184 goto fail; 185 } 186 187 /* Mark the M64 BAR assigned */ 188 set_bit(phb->ioda.m64_bar_idx, &phb->ioda.m64_bar_alloc); 189 190 /* 191 * Strip off the segment used by the reserved PE, which is 192 * expected to be 0 or last one of PE capabicity. 193 */ 194 r = &phb->hose->mem_resources[1]; 195 if (phb->ioda.reserved_pe == 0) 196 r->start += phb->ioda.m64_segsize; 197 else if (phb->ioda.reserved_pe == (phb->ioda.total_pe - 1)) 198 r->end -= phb->ioda.m64_segsize; 199 else 200 pr_warn(" Cannot strip M64 segment for reserved PE#%d\n", 201 phb->ioda.reserved_pe); 202 203 return 0; 204 205 fail: 206 pr_warn(" Failure %lld %s M64 BAR#%d\n", 207 rc, desc, phb->ioda.m64_bar_idx); 208 opal_pci_phb_mmio_enable(phb->opal_id, 209 OPAL_M64_WINDOW_TYPE, 210 phb->ioda.m64_bar_idx, 211 OPAL_DISABLE_M64); 212 return -EIO; 213 } 214 215 static void pnv_ioda2_reserve_m64_pe(struct pnv_phb *phb) 216 { 217 resource_size_t sgsz = phb->ioda.m64_segsize; 218 struct pci_dev *pdev; 219 struct resource *r; 220 int base, step, i; 221 222 /* 223 * Root bus always has full M64 range and root port has 224 * M64 range used in reality. So we're checking root port 225 * instead of root bus. 226 */ 227 list_for_each_entry(pdev, &phb->hose->bus->devices, bus_list) { 228 for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) { 229 r = &pdev->resource[PCI_BRIDGE_RESOURCES + i]; 230 if (!r->parent || 231 !pnv_pci_is_mem_pref_64(r->flags)) 232 continue; 233 234 base = (r->start - phb->ioda.m64_base) / sgsz; 235 for (step = 0; step < resource_size(r) / sgsz; step++) 236 pnv_ioda_reserve_pe(phb, base + step); 237 } 238 } 239 } 240 241 static int pnv_ioda2_pick_m64_pe(struct pnv_phb *phb, 242 struct pci_bus *bus, int all) 243 { 244 resource_size_t segsz = phb->ioda.m64_segsize; 245 struct pci_dev *pdev; 246 struct resource *r; 247 struct pnv_ioda_pe *master_pe, *pe; 248 unsigned long size, *pe_alloc; 249 bool found; 250 int start, i, j; 251 252 /* Root bus shouldn't use M64 */ 253 if (pci_is_root_bus(bus)) 254 return IODA_INVALID_PE; 255 256 /* We support only one M64 window on each bus */ 257 found = false; 258 pci_bus_for_each_resource(bus, r, i) { 259 if (r && r->parent && 260 pnv_pci_is_mem_pref_64(r->flags)) { 261 found = true; 262 break; 263 } 264 } 265 266 /* No M64 window found ? */ 267 if (!found) 268 return IODA_INVALID_PE; 269 270 /* Allocate bitmap */ 271 size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long)); 272 pe_alloc = kzalloc(size, GFP_KERNEL); 273 if (!pe_alloc) { 274 pr_warn("%s: Out of memory !\n", 275 __func__); 276 return IODA_INVALID_PE; 277 } 278 279 /* 280 * Figure out reserved PE numbers by the PE 281 * the its child PEs. 282 */ 283 start = (r->start - phb->ioda.m64_base) / segsz; 284 for (i = 0; i < resource_size(r) / segsz; i++) 285 set_bit(start + i, pe_alloc); 286 287 if (all) 288 goto done; 289 290 /* 291 * If the PE doesn't cover all subordinate buses, 292 * we need subtract from reserved PEs for children. 293 */ 294 list_for_each_entry(pdev, &bus->devices, bus_list) { 295 if (!pdev->subordinate) 296 continue; 297 298 pci_bus_for_each_resource(pdev->subordinate, r, i) { 299 if (!r || !r->parent || 300 !pnv_pci_is_mem_pref_64(r->flags)) 301 continue; 302 303 start = (r->start - phb->ioda.m64_base) / segsz; 304 for (j = 0; j < resource_size(r) / segsz ; j++) 305 clear_bit(start + j, pe_alloc); 306 } 307 } 308 309 /* 310 * the current bus might not own M64 window and that's all 311 * contributed by its child buses. For the case, we needn't 312 * pick M64 dependent PE#. 313 */ 314 if (bitmap_empty(pe_alloc, phb->ioda.total_pe)) { 315 kfree(pe_alloc); 316 return IODA_INVALID_PE; 317 } 318 319 /* 320 * Figure out the master PE and put all slave PEs to master 321 * PE's list to form compound PE. 322 */ 323 done: 324 master_pe = NULL; 325 i = -1; 326 while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe, i + 1)) < 327 phb->ioda.total_pe) { 328 pe = &phb->ioda.pe_array[i]; 329 330 if (!master_pe) { 331 pe->flags |= PNV_IODA_PE_MASTER; 332 INIT_LIST_HEAD(&pe->slaves); 333 master_pe = pe; 334 } else { 335 pe->flags |= PNV_IODA_PE_SLAVE; 336 pe->master = master_pe; 337 list_add_tail(&pe->list, &master_pe->slaves); 338 } 339 } 340 341 kfree(pe_alloc); 342 return master_pe->pe_number; 343 } 344 345 static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) 346 { 347 struct pci_controller *hose = phb->hose; 348 struct device_node *dn = hose->dn; 349 struct resource *res; 350 const u32 *r; 351 u64 pci_addr; 352 353 /* FIXME: Support M64 for P7IOC */ 354 if (phb->type != PNV_PHB_IODA2) { 355 pr_info(" Not support M64 window\n"); 356 return; 357 } 358 359 if (!firmware_has_feature(FW_FEATURE_OPALv3)) { 360 pr_info(" Firmware too old to support M64 window\n"); 361 return; 362 } 363 364 r = of_get_property(dn, "ibm,opal-m64-window", NULL); 365 if (!r) { 366 pr_info(" No <ibm,opal-m64-window> on %s\n", 367 dn->full_name); 368 return; 369 } 370 371 res = &hose->mem_resources[1]; 372 res->start = of_translate_address(dn, r + 2); 373 res->end = res->start + of_read_number(r + 4, 2) - 1; 374 res->flags = (IORESOURCE_MEM | IORESOURCE_MEM_64 | IORESOURCE_PREFETCH); 375 pci_addr = of_read_number(r, 2); 376 hose->mem_offset[1] = res->start - pci_addr; 377 378 phb->ioda.m64_size = resource_size(res); 379 phb->ioda.m64_segsize = phb->ioda.m64_size / phb->ioda.total_pe; 380 phb->ioda.m64_base = pci_addr; 381 382 pr_info(" MEM64 0x%016llx..0x%016llx -> 0x%016llx\n", 383 res->start, res->end, pci_addr); 384 385 /* Use last M64 BAR to cover M64 window */ 386 phb->ioda.m64_bar_idx = 15; 387 phb->init_m64 = pnv_ioda2_init_m64; 388 phb->reserve_m64_pe = pnv_ioda2_reserve_m64_pe; 389 phb->pick_m64_pe = pnv_ioda2_pick_m64_pe; 390 } 391 392 static void pnv_ioda_freeze_pe(struct pnv_phb *phb, int pe_no) 393 { 394 struct pnv_ioda_pe *pe = &phb->ioda.pe_array[pe_no]; 395 struct pnv_ioda_pe *slave; 396 s64 rc; 397 398 /* Fetch master PE */ 399 if (pe->flags & PNV_IODA_PE_SLAVE) { 400 pe = pe->master; 401 if (WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER))) 402 return; 403 404 pe_no = pe->pe_number; 405 } 406 407 /* Freeze master PE */ 408 rc = opal_pci_eeh_freeze_set(phb->opal_id, 409 pe_no, 410 OPAL_EEH_ACTION_SET_FREEZE_ALL); 411 if (rc != OPAL_SUCCESS) { 412 pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n", 413 __func__, rc, phb->hose->global_number, pe_no); 414 return; 415 } 416 417 /* Freeze slave PEs */ 418 if (!(pe->flags & PNV_IODA_PE_MASTER)) 419 return; 420 421 list_for_each_entry(slave, &pe->slaves, list) { 422 rc = opal_pci_eeh_freeze_set(phb->opal_id, 423 slave->pe_number, 424 OPAL_EEH_ACTION_SET_FREEZE_ALL); 425 if (rc != OPAL_SUCCESS) 426 pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n", 427 __func__, rc, phb->hose->global_number, 428 slave->pe_number); 429 } 430 } 431 432 static int pnv_ioda_unfreeze_pe(struct pnv_phb *phb, int pe_no, int opt) 433 { 434 struct pnv_ioda_pe *pe, *slave; 435 s64 rc; 436 437 /* Find master PE */ 438 pe = &phb->ioda.pe_array[pe_no]; 439 if (pe->flags & PNV_IODA_PE_SLAVE) { 440 pe = pe->master; 441 WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER)); 442 pe_no = pe->pe_number; 443 } 444 445 /* Clear frozen state for master PE */ 446 rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, opt); 447 if (rc != OPAL_SUCCESS) { 448 pr_warn("%s: Failure %lld clear %d on PHB#%x-PE#%x\n", 449 __func__, rc, opt, phb->hose->global_number, pe_no); 450 return -EIO; 451 } 452 453 if (!(pe->flags & PNV_IODA_PE_MASTER)) 454 return 0; 455 456 /* Clear frozen state for slave PEs */ 457 list_for_each_entry(slave, &pe->slaves, list) { 458 rc = opal_pci_eeh_freeze_clear(phb->opal_id, 459 slave->pe_number, 460 opt); 461 if (rc != OPAL_SUCCESS) { 462 pr_warn("%s: Failure %lld clear %d on PHB#%x-PE#%x\n", 463 __func__, rc, opt, phb->hose->global_number, 464 slave->pe_number); 465 return -EIO; 466 } 467 } 468 469 return 0; 470 } 471 472 static int pnv_ioda_get_pe_state(struct pnv_phb *phb, int pe_no) 473 { 474 struct pnv_ioda_pe *slave, *pe; 475 u8 fstate, state; 476 __be16 pcierr; 477 s64 rc; 478 479 /* Sanity check on PE number */ 480 if (pe_no < 0 || pe_no >= phb->ioda.total_pe) 481 return OPAL_EEH_STOPPED_PERM_UNAVAIL; 482 483 /* 484 * Fetch the master PE and the PE instance might be 485 * not initialized yet. 486 */ 487 pe = &phb->ioda.pe_array[pe_no]; 488 if (pe->flags & PNV_IODA_PE_SLAVE) { 489 pe = pe->master; 490 WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER)); 491 pe_no = pe->pe_number; 492 } 493 494 /* Check the master PE */ 495 rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, 496 &state, &pcierr, NULL); 497 if (rc != OPAL_SUCCESS) { 498 pr_warn("%s: Failure %lld getting " 499 "PHB#%x-PE#%x state\n", 500 __func__, rc, 501 phb->hose->global_number, pe_no); 502 return OPAL_EEH_STOPPED_TEMP_UNAVAIL; 503 } 504 505 /* Check the slave PE */ 506 if (!(pe->flags & PNV_IODA_PE_MASTER)) 507 return state; 508 509 list_for_each_entry(slave, &pe->slaves, list) { 510 rc = opal_pci_eeh_freeze_status(phb->opal_id, 511 slave->pe_number, 512 &fstate, 513 &pcierr, 514 NULL); 515 if (rc != OPAL_SUCCESS) { 516 pr_warn("%s: Failure %lld getting " 517 "PHB#%x-PE#%x state\n", 518 __func__, rc, 519 phb->hose->global_number, slave->pe_number); 520 return OPAL_EEH_STOPPED_TEMP_UNAVAIL; 521 } 522 523 /* 524 * Override the result based on the ascending 525 * priority. 526 */ 527 if (fstate > state) 528 state = fstate; 529 } 530 531 return state; 532 } 533 534 /* Currently those 2 are only used when MSIs are enabled, this will change 535 * but in the meantime, we need to protect them to avoid warnings 536 */ 537 #ifdef CONFIG_PCI_MSI 538 static struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev) 539 { 540 struct pci_controller *hose = pci_bus_to_host(dev->bus); 541 struct pnv_phb *phb = hose->private_data; 542 struct pci_dn *pdn = pci_get_pdn(dev); 543 544 if (!pdn) 545 return NULL; 546 if (pdn->pe_number == IODA_INVALID_PE) 547 return NULL; 548 return &phb->ioda.pe_array[pdn->pe_number]; 549 } 550 #endif /* CONFIG_PCI_MSI */ 551 552 static int pnv_ioda_set_one_peltv(struct pnv_phb *phb, 553 struct pnv_ioda_pe *parent, 554 struct pnv_ioda_pe *child, 555 bool is_add) 556 { 557 const char *desc = is_add ? "adding" : "removing"; 558 uint8_t op = is_add ? OPAL_ADD_PE_TO_DOMAIN : 559 OPAL_REMOVE_PE_FROM_DOMAIN; 560 struct pnv_ioda_pe *slave; 561 long rc; 562 563 /* Parent PE affects child PE */ 564 rc = opal_pci_set_peltv(phb->opal_id, parent->pe_number, 565 child->pe_number, op); 566 if (rc != OPAL_SUCCESS) { 567 pe_warn(child, "OPAL error %ld %s to parent PELTV\n", 568 rc, desc); 569 return -ENXIO; 570 } 571 572 if (!(child->flags & PNV_IODA_PE_MASTER)) 573 return 0; 574 575 /* Compound case: parent PE affects slave PEs */ 576 list_for_each_entry(slave, &child->slaves, list) { 577 rc = opal_pci_set_peltv(phb->opal_id, parent->pe_number, 578 slave->pe_number, op); 579 if (rc != OPAL_SUCCESS) { 580 pe_warn(slave, "OPAL error %ld %s to parent PELTV\n", 581 rc, desc); 582 return -ENXIO; 583 } 584 } 585 586 return 0; 587 } 588 589 static int pnv_ioda_set_peltv(struct pnv_phb *phb, 590 struct pnv_ioda_pe *pe, 591 bool is_add) 592 { 593 struct pnv_ioda_pe *slave; 594 struct pci_dev *pdev; 595 int ret; 596 597 /* 598 * Clear PE frozen state. If it's master PE, we need 599 * clear slave PE frozen state as well. 600 */ 601 if (is_add) { 602 opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number, 603 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); 604 if (pe->flags & PNV_IODA_PE_MASTER) { 605 list_for_each_entry(slave, &pe->slaves, list) 606 opal_pci_eeh_freeze_clear(phb->opal_id, 607 slave->pe_number, 608 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); 609 } 610 } 611 612 /* 613 * Associate PE in PELT. We need add the PE into the 614 * corresponding PELT-V as well. Otherwise, the error 615 * originated from the PE might contribute to other 616 * PEs. 617 */ 618 ret = pnv_ioda_set_one_peltv(phb, pe, pe, is_add); 619 if (ret) 620 return ret; 621 622 /* For compound PEs, any one affects all of them */ 623 if (pe->flags & PNV_IODA_PE_MASTER) { 624 list_for_each_entry(slave, &pe->slaves, list) { 625 ret = pnv_ioda_set_one_peltv(phb, slave, pe, is_add); 626 if (ret) 627 return ret; 628 } 629 } 630 631 if (pe->flags & (PNV_IODA_PE_BUS_ALL | PNV_IODA_PE_BUS)) 632 pdev = pe->pbus->self; 633 else 634 pdev = pe->pdev->bus->self; 635 while (pdev) { 636 struct pci_dn *pdn = pci_get_pdn(pdev); 637 struct pnv_ioda_pe *parent; 638 639 if (pdn && pdn->pe_number != IODA_INVALID_PE) { 640 parent = &phb->ioda.pe_array[pdn->pe_number]; 641 ret = pnv_ioda_set_one_peltv(phb, parent, pe, is_add); 642 if (ret) 643 return ret; 644 } 645 646 pdev = pdev->bus->self; 647 } 648 649 return 0; 650 } 651 652 static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) 653 { 654 struct pci_dev *parent; 655 uint8_t bcomp, dcomp, fcomp; 656 long rc, rid_end, rid; 657 658 /* Bus validation ? */ 659 if (pe->pbus) { 660 int count; 661 662 dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER; 663 fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER; 664 parent = pe->pbus->self; 665 if (pe->flags & PNV_IODA_PE_BUS_ALL) 666 count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1; 667 else 668 count = 1; 669 670 switch(count) { 671 case 1: bcomp = OpalPciBusAll; break; 672 case 2: bcomp = OpalPciBus7Bits; break; 673 case 4: bcomp = OpalPciBus6Bits; break; 674 case 8: bcomp = OpalPciBus5Bits; break; 675 case 16: bcomp = OpalPciBus4Bits; break; 676 case 32: bcomp = OpalPciBus3Bits; break; 677 default: 678 pr_err("%s: Number of subordinate busses %d" 679 " unsupported\n", 680 pci_name(pe->pbus->self), count); 681 /* Do an exact match only */ 682 bcomp = OpalPciBusAll; 683 } 684 rid_end = pe->rid + (count << 8); 685 } else { 686 parent = pe->pdev->bus->self; 687 bcomp = OpalPciBusAll; 688 dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER; 689 fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER; 690 rid_end = pe->rid + 1; 691 } 692 693 /* 694 * Associate PE in PELT. We need add the PE into the 695 * corresponding PELT-V as well. Otherwise, the error 696 * originated from the PE might contribute to other 697 * PEs. 698 */ 699 rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid, 700 bcomp, dcomp, fcomp, OPAL_MAP_PE); 701 if (rc) { 702 pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc); 703 return -ENXIO; 704 } 705 706 /* Configure PELTV */ 707 pnv_ioda_set_peltv(phb, pe, true); 708 709 /* Setup reverse map */ 710 for (rid = pe->rid; rid < rid_end; rid++) 711 phb->ioda.pe_rmap[rid] = pe->pe_number; 712 713 /* Setup one MVTs on IODA1 */ 714 if (phb->type != PNV_PHB_IODA1) { 715 pe->mve_number = 0; 716 goto out; 717 } 718 719 pe->mve_number = pe->pe_number; 720 rc = opal_pci_set_mve(phb->opal_id, pe->mve_number, pe->pe_number); 721 if (rc != OPAL_SUCCESS) { 722 pe_err(pe, "OPAL error %ld setting up MVE %d\n", 723 rc, pe->mve_number); 724 pe->mve_number = -1; 725 } else { 726 rc = opal_pci_set_mve_enable(phb->opal_id, 727 pe->mve_number, OPAL_ENABLE_MVE); 728 if (rc) { 729 pe_err(pe, "OPAL error %ld enabling MVE %d\n", 730 rc, pe->mve_number); 731 pe->mve_number = -1; 732 } 733 } 734 735 out: 736 return 0; 737 } 738 739 static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb, 740 struct pnv_ioda_pe *pe) 741 { 742 struct pnv_ioda_pe *lpe; 743 744 list_for_each_entry(lpe, &phb->ioda.pe_dma_list, dma_link) { 745 if (lpe->dma_weight < pe->dma_weight) { 746 list_add_tail(&pe->dma_link, &lpe->dma_link); 747 return; 748 } 749 } 750 list_add_tail(&pe->dma_link, &phb->ioda.pe_dma_list); 751 } 752 753 static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev) 754 { 755 /* This is quite simplistic. The "base" weight of a device 756 * is 10. 0 means no DMA is to be accounted for it. 757 */ 758 759 /* If it's a bridge, no DMA */ 760 if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) 761 return 0; 762 763 /* Reduce the weight of slow USB controllers */ 764 if (dev->class == PCI_CLASS_SERIAL_USB_UHCI || 765 dev->class == PCI_CLASS_SERIAL_USB_OHCI || 766 dev->class == PCI_CLASS_SERIAL_USB_EHCI) 767 return 3; 768 769 /* Increase the weight of RAID (includes Obsidian) */ 770 if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID) 771 return 15; 772 773 /* Default */ 774 return 10; 775 } 776 777 #if 0 778 static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) 779 { 780 struct pci_controller *hose = pci_bus_to_host(dev->bus); 781 struct pnv_phb *phb = hose->private_data; 782 struct pci_dn *pdn = pci_get_pdn(dev); 783 struct pnv_ioda_pe *pe; 784 int pe_num; 785 786 if (!pdn) { 787 pr_err("%s: Device tree node not associated properly\n", 788 pci_name(dev)); 789 return NULL; 790 } 791 if (pdn->pe_number != IODA_INVALID_PE) 792 return NULL; 793 794 /* PE#0 has been pre-set */ 795 if (dev->bus->number == 0) 796 pe_num = 0; 797 else 798 pe_num = pnv_ioda_alloc_pe(phb); 799 if (pe_num == IODA_INVALID_PE) { 800 pr_warning("%s: Not enough PE# available, disabling device\n", 801 pci_name(dev)); 802 return NULL; 803 } 804 805 /* NOTE: We get only one ref to the pci_dev for the pdn, not for the 806 * pointer in the PE data structure, both should be destroyed at the 807 * same time. However, this needs to be looked at more closely again 808 * once we actually start removing things (Hotplug, SR-IOV, ...) 809 * 810 * At some point we want to remove the PDN completely anyways 811 */ 812 pe = &phb->ioda.pe_array[pe_num]; 813 pci_dev_get(dev); 814 pdn->pcidev = dev; 815 pdn->pe_number = pe_num; 816 pe->pdev = dev; 817 pe->pbus = NULL; 818 pe->tce32_seg = -1; 819 pe->mve_number = -1; 820 pe->rid = dev->bus->number << 8 | pdn->devfn; 821 822 pe_info(pe, "Associated device to PE\n"); 823 824 if (pnv_ioda_configure_pe(phb, pe)) { 825 /* XXX What do we do here ? */ 826 if (pe_num) 827 pnv_ioda_free_pe(phb, pe_num); 828 pdn->pe_number = IODA_INVALID_PE; 829 pe->pdev = NULL; 830 pci_dev_put(dev); 831 return NULL; 832 } 833 834 /* Assign a DMA weight to the device */ 835 pe->dma_weight = pnv_ioda_dma_weight(dev); 836 if (pe->dma_weight != 0) { 837 phb->ioda.dma_weight += pe->dma_weight; 838 phb->ioda.dma_pe_count++; 839 } 840 841 /* Link the PE */ 842 pnv_ioda_link_pe_by_weight(phb, pe); 843 844 return pe; 845 } 846 #endif /* Useful for SRIOV case */ 847 848 static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) 849 { 850 struct pci_dev *dev; 851 852 list_for_each_entry(dev, &bus->devices, bus_list) { 853 struct pci_dn *pdn = pci_get_pdn(dev); 854 855 if (pdn == NULL) { 856 pr_warn("%s: No device node associated with device !\n", 857 pci_name(dev)); 858 continue; 859 } 860 pdn->pcidev = dev; 861 pdn->pe_number = pe->pe_number; 862 pe->dma_weight += pnv_ioda_dma_weight(dev); 863 if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) 864 pnv_ioda_setup_same_PE(dev->subordinate, pe); 865 } 866 } 867 868 /* 869 * There're 2 types of PCI bus sensitive PEs: One that is compromised of 870 * single PCI bus. Another one that contains the primary PCI bus and its 871 * subordinate PCI devices and buses. The second type of PE is normally 872 * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports. 873 */ 874 static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) 875 { 876 struct pci_controller *hose = pci_bus_to_host(bus); 877 struct pnv_phb *phb = hose->private_data; 878 struct pnv_ioda_pe *pe; 879 int pe_num = IODA_INVALID_PE; 880 881 /* Check if PE is determined by M64 */ 882 if (phb->pick_m64_pe) 883 pe_num = phb->pick_m64_pe(phb, bus, all); 884 885 /* The PE number isn't pinned by M64 */ 886 if (pe_num == IODA_INVALID_PE) 887 pe_num = pnv_ioda_alloc_pe(phb); 888 889 if (pe_num == IODA_INVALID_PE) { 890 pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n", 891 __func__, pci_domain_nr(bus), bus->number); 892 return; 893 } 894 895 pe = &phb->ioda.pe_array[pe_num]; 896 pe->flags |= (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS); 897 pe->pbus = bus; 898 pe->pdev = NULL; 899 pe->tce32_seg = -1; 900 pe->mve_number = -1; 901 pe->rid = bus->busn_res.start << 8; 902 pe->dma_weight = 0; 903 904 if (all) 905 pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n", 906 bus->busn_res.start, bus->busn_res.end, pe_num); 907 else 908 pe_info(pe, "Secondary bus %d associated with PE#%d\n", 909 bus->busn_res.start, pe_num); 910 911 if (pnv_ioda_configure_pe(phb, pe)) { 912 /* XXX What do we do here ? */ 913 if (pe_num) 914 pnv_ioda_free_pe(phb, pe_num); 915 pe->pbus = NULL; 916 return; 917 } 918 919 /* Associate it with all child devices */ 920 pnv_ioda_setup_same_PE(bus, pe); 921 922 /* Put PE to the list */ 923 list_add_tail(&pe->list, &phb->ioda.pe_list); 924 925 /* Account for one DMA PE if at least one DMA capable device exist 926 * below the bridge 927 */ 928 if (pe->dma_weight != 0) { 929 phb->ioda.dma_weight += pe->dma_weight; 930 phb->ioda.dma_pe_count++; 931 } 932 933 /* Link the PE */ 934 pnv_ioda_link_pe_by_weight(phb, pe); 935 } 936 937 static void pnv_ioda_setup_PEs(struct pci_bus *bus) 938 { 939 struct pci_dev *dev; 940 941 pnv_ioda_setup_bus_PE(bus, 0); 942 943 list_for_each_entry(dev, &bus->devices, bus_list) { 944 if (dev->subordinate) { 945 if (pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE) 946 pnv_ioda_setup_bus_PE(dev->subordinate, 1); 947 else 948 pnv_ioda_setup_PEs(dev->subordinate); 949 } 950 } 951 } 952 953 /* 954 * Configure PEs so that the downstream PCI buses and devices 955 * could have their associated PE#. Unfortunately, we didn't 956 * figure out the way to identify the PLX bridge yet. So we 957 * simply put the PCI bus and the subordinate behind the root 958 * port to PE# here. The game rule here is expected to be changed 959 * as soon as we can detected PLX bridge correctly. 960 */ 961 static void pnv_pci_ioda_setup_PEs(void) 962 { 963 struct pci_controller *hose, *tmp; 964 struct pnv_phb *phb; 965 966 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 967 phb = hose->private_data; 968 969 /* M64 layout might affect PE allocation */ 970 if (phb->reserve_m64_pe) 971 phb->reserve_m64_pe(phb); 972 973 pnv_ioda_setup_PEs(hose->bus); 974 } 975 } 976 977 static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev) 978 { 979 struct pci_dn *pdn = pci_get_pdn(pdev); 980 struct pnv_ioda_pe *pe; 981 982 /* 983 * The function can be called while the PE# 984 * hasn't been assigned. Do nothing for the 985 * case. 986 */ 987 if (!pdn || pdn->pe_number == IODA_INVALID_PE) 988 return; 989 990 pe = &phb->ioda.pe_array[pdn->pe_number]; 991 WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops); 992 set_iommu_table_base_and_group(&pdev->dev, &pe->tce32_table); 993 } 994 995 static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb, 996 struct pci_dev *pdev, u64 dma_mask) 997 { 998 struct pci_dn *pdn = pci_get_pdn(pdev); 999 struct pnv_ioda_pe *pe; 1000 uint64_t top; 1001 bool bypass = false; 1002 1003 if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) 1004 return -ENODEV;; 1005 1006 pe = &phb->ioda.pe_array[pdn->pe_number]; 1007 if (pe->tce_bypass_enabled) { 1008 top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1; 1009 bypass = (dma_mask >= top); 1010 } 1011 1012 if (bypass) { 1013 dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n"); 1014 set_dma_ops(&pdev->dev, &dma_direct_ops); 1015 set_dma_offset(&pdev->dev, pe->tce_bypass_base); 1016 } else { 1017 dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n"); 1018 set_dma_ops(&pdev->dev, &dma_iommu_ops); 1019 set_iommu_table_base(&pdev->dev, &pe->tce32_table); 1020 } 1021 *pdev->dev.dma_mask = dma_mask; 1022 return 0; 1023 } 1024 1025 static u64 pnv_pci_ioda_dma_get_required_mask(struct pnv_phb *phb, 1026 struct pci_dev *pdev) 1027 { 1028 struct pci_dn *pdn = pci_get_pdn(pdev); 1029 struct pnv_ioda_pe *pe; 1030 u64 end, mask; 1031 1032 if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) 1033 return 0; 1034 1035 pe = &phb->ioda.pe_array[pdn->pe_number]; 1036 if (!pe->tce_bypass_enabled) 1037 return __dma_get_required_mask(&pdev->dev); 1038 1039 1040 end = pe->tce_bypass_base + memblock_end_of_DRAM(); 1041 mask = 1ULL << (fls64(end) - 1); 1042 mask += mask - 1; 1043 1044 return mask; 1045 } 1046 1047 static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, 1048 struct pci_bus *bus, 1049 bool add_to_iommu_group) 1050 { 1051 struct pci_dev *dev; 1052 1053 list_for_each_entry(dev, &bus->devices, bus_list) { 1054 if (add_to_iommu_group) 1055 set_iommu_table_base_and_group(&dev->dev, 1056 &pe->tce32_table); 1057 else 1058 set_iommu_table_base(&dev->dev, &pe->tce32_table); 1059 1060 if (dev->subordinate) 1061 pnv_ioda_setup_bus_dma(pe, dev->subordinate, 1062 add_to_iommu_group); 1063 } 1064 } 1065 1066 static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe, 1067 struct iommu_table *tbl, 1068 __be64 *startp, __be64 *endp, bool rm) 1069 { 1070 __be64 __iomem *invalidate = rm ? 1071 (__be64 __iomem *)pe->tce_inval_reg_phys : 1072 (__be64 __iomem *)tbl->it_index; 1073 unsigned long start, end, inc; 1074 const unsigned shift = tbl->it_page_shift; 1075 1076 start = __pa(startp); 1077 end = __pa(endp); 1078 1079 /* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */ 1080 if (tbl->it_busno) { 1081 start <<= shift; 1082 end <<= shift; 1083 inc = 128ull << shift; 1084 start |= tbl->it_busno; 1085 end |= tbl->it_busno; 1086 } else if (tbl->it_type & TCE_PCI_SWINV_PAIR) { 1087 /* p7ioc-style invalidation, 2 TCEs per write */ 1088 start |= (1ull << 63); 1089 end |= (1ull << 63); 1090 inc = 16; 1091 } else { 1092 /* Default (older HW) */ 1093 inc = 128; 1094 } 1095 1096 end |= inc - 1; /* round up end to be different than start */ 1097 1098 mb(); /* Ensure above stores are visible */ 1099 while (start <= end) { 1100 if (rm) 1101 __raw_rm_writeq(cpu_to_be64(start), invalidate); 1102 else 1103 __raw_writeq(cpu_to_be64(start), invalidate); 1104 start += inc; 1105 } 1106 1107 /* 1108 * The iommu layer will do another mb() for us on build() 1109 * and we don't care on free() 1110 */ 1111 } 1112 1113 static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe, 1114 struct iommu_table *tbl, 1115 __be64 *startp, __be64 *endp, bool rm) 1116 { 1117 unsigned long start, end, inc; 1118 __be64 __iomem *invalidate = rm ? 1119 (__be64 __iomem *)pe->tce_inval_reg_phys : 1120 (__be64 __iomem *)tbl->it_index; 1121 const unsigned shift = tbl->it_page_shift; 1122 1123 /* We'll invalidate DMA address in PE scope */ 1124 start = 0x2ull << 60; 1125 start |= (pe->pe_number & 0xFF); 1126 end = start; 1127 1128 /* Figure out the start, end and step */ 1129 inc = tbl->it_offset + (((u64)startp - tbl->it_base) / sizeof(u64)); 1130 start |= (inc << shift); 1131 inc = tbl->it_offset + (((u64)endp - tbl->it_base) / sizeof(u64)); 1132 end |= (inc << shift); 1133 inc = (0x1ull << shift); 1134 mb(); 1135 1136 while (start <= end) { 1137 if (rm) 1138 __raw_rm_writeq(cpu_to_be64(start), invalidate); 1139 else 1140 __raw_writeq(cpu_to_be64(start), invalidate); 1141 start += inc; 1142 } 1143 } 1144 1145 void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl, 1146 __be64 *startp, __be64 *endp, bool rm) 1147 { 1148 struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe, 1149 tce32_table); 1150 struct pnv_phb *phb = pe->phb; 1151 1152 if (phb->type == PNV_PHB_IODA1) 1153 pnv_pci_ioda1_tce_invalidate(pe, tbl, startp, endp, rm); 1154 else 1155 pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp, rm); 1156 } 1157 1158 static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, 1159 struct pnv_ioda_pe *pe, unsigned int base, 1160 unsigned int segs) 1161 { 1162 1163 struct page *tce_mem = NULL; 1164 const __be64 *swinvp; 1165 struct iommu_table *tbl; 1166 unsigned int i; 1167 int64_t rc; 1168 void *addr; 1169 1170 /* 256M DMA window, 4K TCE pages, 8 bytes TCE */ 1171 #define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8) 1172 1173 /* XXX FIXME: Handle 64-bit only DMA devices */ 1174 /* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */ 1175 /* XXX FIXME: Allocate multi-level tables on PHB3 */ 1176 1177 /* We shouldn't already have a 32-bit DMA associated */ 1178 if (WARN_ON(pe->tce32_seg >= 0)) 1179 return; 1180 1181 /* Grab a 32-bit TCE table */ 1182 pe->tce32_seg = base; 1183 pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n", 1184 (base << 28), ((base + segs) << 28) - 1); 1185 1186 /* XXX Currently, we allocate one big contiguous table for the 1187 * TCEs. We only really need one chunk per 256M of TCE space 1188 * (ie per segment) but that's an optimization for later, it 1189 * requires some added smarts with our get/put_tce implementation 1190 */ 1191 tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL, 1192 get_order(TCE32_TABLE_SIZE * segs)); 1193 if (!tce_mem) { 1194 pe_err(pe, " Failed to allocate a 32-bit TCE memory\n"); 1195 goto fail; 1196 } 1197 addr = page_address(tce_mem); 1198 memset(addr, 0, TCE32_TABLE_SIZE * segs); 1199 1200 /* Configure HW */ 1201 for (i = 0; i < segs; i++) { 1202 rc = opal_pci_map_pe_dma_window(phb->opal_id, 1203 pe->pe_number, 1204 base + i, 1, 1205 __pa(addr) + TCE32_TABLE_SIZE * i, 1206 TCE32_TABLE_SIZE, 0x1000); 1207 if (rc) { 1208 pe_err(pe, " Failed to configure 32-bit TCE table," 1209 " err %ld\n", rc); 1210 goto fail; 1211 } 1212 } 1213 1214 /* Setup linux iommu table */ 1215 tbl = &pe->tce32_table; 1216 pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs, 1217 base << 28, IOMMU_PAGE_SHIFT_4K); 1218 1219 /* OPAL variant of P7IOC SW invalidated TCEs */ 1220 swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); 1221 if (swinvp) { 1222 /* We need a couple more fields -- an address and a data 1223 * to or. Since the bus is only printed out on table free 1224 * errors, and on the first pass the data will be a relative 1225 * bus number, print that out instead. 1226 */ 1227 pe->tce_inval_reg_phys = be64_to_cpup(swinvp); 1228 tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys, 1229 8); 1230 tbl->it_type |= (TCE_PCI_SWINV_CREATE | 1231 TCE_PCI_SWINV_FREE | 1232 TCE_PCI_SWINV_PAIR); 1233 } 1234 iommu_init_table(tbl, phb->hose->node); 1235 iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); 1236 1237 if (pe->pdev) 1238 set_iommu_table_base_and_group(&pe->pdev->dev, tbl); 1239 else 1240 pnv_ioda_setup_bus_dma(pe, pe->pbus, true); 1241 1242 return; 1243 fail: 1244 /* XXX Failure: Try to fallback to 64-bit only ? */ 1245 if (pe->tce32_seg >= 0) 1246 pe->tce32_seg = -1; 1247 if (tce_mem) 1248 __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs)); 1249 } 1250 1251 static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable) 1252 { 1253 struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe, 1254 tce32_table); 1255 uint16_t window_id = (pe->pe_number << 1 ) + 1; 1256 int64_t rc; 1257 1258 pe_info(pe, "%sabling 64-bit DMA bypass\n", enable ? "En" : "Dis"); 1259 if (enable) { 1260 phys_addr_t top = memblock_end_of_DRAM(); 1261 1262 top = roundup_pow_of_two(top); 1263 rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id, 1264 pe->pe_number, 1265 window_id, 1266 pe->tce_bypass_base, 1267 top); 1268 } else { 1269 rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id, 1270 pe->pe_number, 1271 window_id, 1272 pe->tce_bypass_base, 1273 0); 1274 1275 /* 1276 * EEH needs the mapping between IOMMU table and group 1277 * of those VFIO/KVM pass-through devices. We can postpone 1278 * resetting DMA ops until the DMA mask is configured in 1279 * host side. 1280 */ 1281 if (pe->pdev) 1282 set_iommu_table_base(&pe->pdev->dev, tbl); 1283 else 1284 pnv_ioda_setup_bus_dma(pe, pe->pbus, false); 1285 } 1286 if (rc) 1287 pe_err(pe, "OPAL error %lld configuring bypass window\n", rc); 1288 else 1289 pe->tce_bypass_enabled = enable; 1290 } 1291 1292 static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb, 1293 struct pnv_ioda_pe *pe) 1294 { 1295 /* TVE #1 is selected by PCI address bit 59 */ 1296 pe->tce_bypass_base = 1ull << 59; 1297 1298 /* Install set_bypass callback for VFIO */ 1299 pe->tce32_table.set_bypass = pnv_pci_ioda2_set_bypass; 1300 1301 /* Enable bypass by default */ 1302 pnv_pci_ioda2_set_bypass(&pe->tce32_table, true); 1303 } 1304 1305 static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, 1306 struct pnv_ioda_pe *pe) 1307 { 1308 struct page *tce_mem = NULL; 1309 void *addr; 1310 const __be64 *swinvp; 1311 struct iommu_table *tbl; 1312 unsigned int tce_table_size, end; 1313 int64_t rc; 1314 1315 /* We shouldn't already have a 32-bit DMA associated */ 1316 if (WARN_ON(pe->tce32_seg >= 0)) 1317 return; 1318 1319 /* The PE will reserve all possible 32-bits space */ 1320 pe->tce32_seg = 0; 1321 end = (1 << ilog2(phb->ioda.m32_pci_base)); 1322 tce_table_size = (end / 0x1000) * 8; 1323 pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n", 1324 end); 1325 1326 /* Allocate TCE table */ 1327 tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL, 1328 get_order(tce_table_size)); 1329 if (!tce_mem) { 1330 pe_err(pe, "Failed to allocate a 32-bit TCE memory\n"); 1331 goto fail; 1332 } 1333 addr = page_address(tce_mem); 1334 memset(addr, 0, tce_table_size); 1335 1336 /* 1337 * Map TCE table through TVT. The TVE index is the PE number 1338 * shifted by 1 bit for 32-bits DMA space. 1339 */ 1340 rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, 1341 pe->pe_number << 1, 1, __pa(addr), 1342 tce_table_size, 0x1000); 1343 if (rc) { 1344 pe_err(pe, "Failed to configure 32-bit TCE table," 1345 " err %ld\n", rc); 1346 goto fail; 1347 } 1348 1349 /* Setup linux iommu table */ 1350 tbl = &pe->tce32_table; 1351 pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0, 1352 IOMMU_PAGE_SHIFT_4K); 1353 1354 /* OPAL variant of PHB3 invalidated TCEs */ 1355 swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); 1356 if (swinvp) { 1357 /* We need a couple more fields -- an address and a data 1358 * to or. Since the bus is only printed out on table free 1359 * errors, and on the first pass the data will be a relative 1360 * bus number, print that out instead. 1361 */ 1362 pe->tce_inval_reg_phys = be64_to_cpup(swinvp); 1363 tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys, 1364 8); 1365 tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE); 1366 } 1367 iommu_init_table(tbl, phb->hose->node); 1368 iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); 1369 1370 if (pe->pdev) 1371 set_iommu_table_base_and_group(&pe->pdev->dev, tbl); 1372 else 1373 pnv_ioda_setup_bus_dma(pe, pe->pbus, true); 1374 1375 /* Also create a bypass window */ 1376 if (!pnv_iommu_bypass_disabled) 1377 pnv_pci_ioda2_setup_bypass_pe(phb, pe); 1378 1379 return; 1380 fail: 1381 if (pe->tce32_seg >= 0) 1382 pe->tce32_seg = -1; 1383 if (tce_mem) 1384 __free_pages(tce_mem, get_order(tce_table_size)); 1385 } 1386 1387 static void pnv_ioda_setup_dma(struct pnv_phb *phb) 1388 { 1389 struct pci_controller *hose = phb->hose; 1390 unsigned int residual, remaining, segs, tw, base; 1391 struct pnv_ioda_pe *pe; 1392 1393 /* If we have more PE# than segments available, hand out one 1394 * per PE until we run out and let the rest fail. If not, 1395 * then we assign at least one segment per PE, plus more based 1396 * on the amount of devices under that PE 1397 */ 1398 if (phb->ioda.dma_pe_count > phb->ioda.tce32_count) 1399 residual = 0; 1400 else 1401 residual = phb->ioda.tce32_count - 1402 phb->ioda.dma_pe_count; 1403 1404 pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n", 1405 hose->global_number, phb->ioda.tce32_count); 1406 pr_info("PCI: %d PE# for a total weight of %d\n", 1407 phb->ioda.dma_pe_count, phb->ioda.dma_weight); 1408 1409 /* Walk our PE list and configure their DMA segments, hand them 1410 * out one base segment plus any residual segments based on 1411 * weight 1412 */ 1413 remaining = phb->ioda.tce32_count; 1414 tw = phb->ioda.dma_weight; 1415 base = 0; 1416 list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) { 1417 if (!pe->dma_weight) 1418 continue; 1419 if (!remaining) { 1420 pe_warn(pe, "No DMA32 resources available\n"); 1421 continue; 1422 } 1423 segs = 1; 1424 if (residual) { 1425 segs += ((pe->dma_weight * residual) + (tw / 2)) / tw; 1426 if (segs > remaining) 1427 segs = remaining; 1428 } 1429 1430 /* 1431 * For IODA2 compliant PHB3, we needn't care about the weight. 1432 * The all available 32-bits DMA space will be assigned to 1433 * the specific PE. 1434 */ 1435 if (phb->type == PNV_PHB_IODA1) { 1436 pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n", 1437 pe->dma_weight, segs); 1438 pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs); 1439 } else { 1440 pe_info(pe, "Assign DMA32 space\n"); 1441 segs = 0; 1442 pnv_pci_ioda2_setup_dma_pe(phb, pe); 1443 } 1444 1445 remaining -= segs; 1446 base += segs; 1447 } 1448 } 1449 1450 #ifdef CONFIG_PCI_MSI 1451 static void pnv_ioda2_msi_eoi(struct irq_data *d) 1452 { 1453 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); 1454 struct irq_chip *chip = irq_data_get_irq_chip(d); 1455 struct pnv_phb *phb = container_of(chip, struct pnv_phb, 1456 ioda.irq_chip); 1457 int64_t rc; 1458 1459 rc = opal_pci_msi_eoi(phb->opal_id, hw_irq); 1460 WARN_ON_ONCE(rc); 1461 1462 icp_native_eoi(d); 1463 } 1464 1465 1466 static void set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq) 1467 { 1468 struct irq_data *idata; 1469 struct irq_chip *ichip; 1470 1471 if (phb->type != PNV_PHB_IODA2) 1472 return; 1473 1474 if (!phb->ioda.irq_chip_init) { 1475 /* 1476 * First time we setup an MSI IRQ, we need to setup the 1477 * corresponding IRQ chip to route correctly. 1478 */ 1479 idata = irq_get_irq_data(virq); 1480 ichip = irq_data_get_irq_chip(idata); 1481 phb->ioda.irq_chip_init = 1; 1482 phb->ioda.irq_chip = *ichip; 1483 phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi; 1484 } 1485 irq_set_chip(virq, &phb->ioda.irq_chip); 1486 } 1487 1488 #ifdef CONFIG_CXL_BASE 1489 1490 struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev) 1491 { 1492 struct pci_controller *hose = pci_bus_to_host(dev->bus); 1493 1494 return of_node_get(hose->dn); 1495 } 1496 EXPORT_SYMBOL(pnv_pci_get_phb_node); 1497 1498 int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode) 1499 { 1500 struct pci_controller *hose = pci_bus_to_host(dev->bus); 1501 struct pnv_phb *phb = hose->private_data; 1502 struct pnv_ioda_pe *pe; 1503 int rc; 1504 1505 pe = pnv_ioda_get_pe(dev); 1506 if (!pe) 1507 return -ENODEV; 1508 1509 pe_info(pe, "Switching PHB to CXL\n"); 1510 1511 rc = opal_pci_set_phb_cxl_mode(phb->opal_id, mode, pe->pe_number); 1512 if (rc) 1513 dev_err(&dev->dev, "opal_pci_set_phb_cxl_mode failed: %i\n", rc); 1514 1515 return rc; 1516 } 1517 EXPORT_SYMBOL(pnv_phb_to_cxl_mode); 1518 1519 /* Find PHB for cxl dev and allocate MSI hwirqs? 1520 * Returns the absolute hardware IRQ number 1521 */ 1522 int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num) 1523 { 1524 struct pci_controller *hose = pci_bus_to_host(dev->bus); 1525 struct pnv_phb *phb = hose->private_data; 1526 int hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, num); 1527 1528 if (hwirq < 0) { 1529 dev_warn(&dev->dev, "Failed to find a free MSI\n"); 1530 return -ENOSPC; 1531 } 1532 1533 return phb->msi_base + hwirq; 1534 } 1535 EXPORT_SYMBOL(pnv_cxl_alloc_hwirqs); 1536 1537 void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num) 1538 { 1539 struct pci_controller *hose = pci_bus_to_host(dev->bus); 1540 struct pnv_phb *phb = hose->private_data; 1541 1542 msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, num); 1543 } 1544 EXPORT_SYMBOL(pnv_cxl_release_hwirqs); 1545 1546 void pnv_cxl_release_hwirq_ranges(struct cxl_irq_ranges *irqs, 1547 struct pci_dev *dev) 1548 { 1549 struct pci_controller *hose = pci_bus_to_host(dev->bus); 1550 struct pnv_phb *phb = hose->private_data; 1551 int i, hwirq; 1552 1553 for (i = 1; i < CXL_IRQ_RANGES; i++) { 1554 if (!irqs->range[i]) 1555 continue; 1556 pr_devel("cxl release irq range 0x%x: offset: 0x%lx limit: %ld\n", 1557 i, irqs->offset[i], 1558 irqs->range[i]); 1559 hwirq = irqs->offset[i] - phb->msi_base; 1560 msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, 1561 irqs->range[i]); 1562 } 1563 } 1564 EXPORT_SYMBOL(pnv_cxl_release_hwirq_ranges); 1565 1566 int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs, 1567 struct pci_dev *dev, int num) 1568 { 1569 struct pci_controller *hose = pci_bus_to_host(dev->bus); 1570 struct pnv_phb *phb = hose->private_data; 1571 int i, hwirq, try; 1572 1573 memset(irqs, 0, sizeof(struct cxl_irq_ranges)); 1574 1575 /* 0 is reserved for the multiplexed PSL DSI interrupt */ 1576 for (i = 1; i < CXL_IRQ_RANGES && num; i++) { 1577 try = num; 1578 while (try) { 1579 hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, try); 1580 if (hwirq >= 0) 1581 break; 1582 try /= 2; 1583 } 1584 if (!try) 1585 goto fail; 1586 1587 irqs->offset[i] = phb->msi_base + hwirq; 1588 irqs->range[i] = try; 1589 pr_devel("cxl alloc irq range 0x%x: offset: 0x%lx limit: %li\n", 1590 i, irqs->offset[i], irqs->range[i]); 1591 num -= try; 1592 } 1593 if (num) 1594 goto fail; 1595 1596 return 0; 1597 fail: 1598 pnv_cxl_release_hwirq_ranges(irqs, dev); 1599 return -ENOSPC; 1600 } 1601 EXPORT_SYMBOL(pnv_cxl_alloc_hwirq_ranges); 1602 1603 int pnv_cxl_get_irq_count(struct pci_dev *dev) 1604 { 1605 struct pci_controller *hose = pci_bus_to_host(dev->bus); 1606 struct pnv_phb *phb = hose->private_data; 1607 1608 return phb->msi_bmp.irq_count; 1609 } 1610 EXPORT_SYMBOL(pnv_cxl_get_irq_count); 1611 1612 int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, 1613 unsigned int virq) 1614 { 1615 struct pci_controller *hose = pci_bus_to_host(dev->bus); 1616 struct pnv_phb *phb = hose->private_data; 1617 unsigned int xive_num = hwirq - phb->msi_base; 1618 struct pnv_ioda_pe *pe; 1619 int rc; 1620 1621 if (!(pe = pnv_ioda_get_pe(dev))) 1622 return -ENODEV; 1623 1624 /* Assign XIVE to PE */ 1625 rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num); 1626 if (rc) { 1627 pe_warn(pe, "%s: OPAL error %d setting msi_base 0x%x " 1628 "hwirq 0x%x XIVE 0x%x PE\n", 1629 pci_name(dev), rc, phb->msi_base, hwirq, xive_num); 1630 return -EIO; 1631 } 1632 set_msi_irq_chip(phb, virq); 1633 1634 return 0; 1635 } 1636 EXPORT_SYMBOL(pnv_cxl_ioda_msi_setup); 1637 #endif 1638 1639 static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, 1640 unsigned int hwirq, unsigned int virq, 1641 unsigned int is_64, struct msi_msg *msg) 1642 { 1643 struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev); 1644 unsigned int xive_num = hwirq - phb->msi_base; 1645 __be32 data; 1646 int rc; 1647 1648 /* No PE assigned ? bail out ... no MSI for you ! */ 1649 if (pe == NULL) 1650 return -ENXIO; 1651 1652 /* Check if we have an MVE */ 1653 if (pe->mve_number < 0) 1654 return -ENXIO; 1655 1656 /* Force 32-bit MSI on some broken devices */ 1657 if (dev->no_64bit_msi) 1658 is_64 = 0; 1659 1660 /* Assign XIVE to PE */ 1661 rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num); 1662 if (rc) { 1663 pr_warn("%s: OPAL error %d setting XIVE %d PE\n", 1664 pci_name(dev), rc, xive_num); 1665 return -EIO; 1666 } 1667 1668 if (is_64) { 1669 __be64 addr64; 1670 1671 rc = opal_get_msi_64(phb->opal_id, pe->mve_number, xive_num, 1, 1672 &addr64, &data); 1673 if (rc) { 1674 pr_warn("%s: OPAL error %d getting 64-bit MSI data\n", 1675 pci_name(dev), rc); 1676 return -EIO; 1677 } 1678 msg->address_hi = be64_to_cpu(addr64) >> 32; 1679 msg->address_lo = be64_to_cpu(addr64) & 0xfffffffful; 1680 } else { 1681 __be32 addr32; 1682 1683 rc = opal_get_msi_32(phb->opal_id, pe->mve_number, xive_num, 1, 1684 &addr32, &data); 1685 if (rc) { 1686 pr_warn("%s: OPAL error %d getting 32-bit MSI data\n", 1687 pci_name(dev), rc); 1688 return -EIO; 1689 } 1690 msg->address_hi = 0; 1691 msg->address_lo = be32_to_cpu(addr32); 1692 } 1693 msg->data = be32_to_cpu(data); 1694 1695 set_msi_irq_chip(phb, virq); 1696 1697 pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d)," 1698 " address=%x_%08x data=%x PE# %d\n", 1699 pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num, 1700 msg->address_hi, msg->address_lo, data, pe->pe_number); 1701 1702 return 0; 1703 } 1704 1705 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) 1706 { 1707 unsigned int count; 1708 const __be32 *prop = of_get_property(phb->hose->dn, 1709 "ibm,opal-msi-ranges", NULL); 1710 if (!prop) { 1711 /* BML Fallback */ 1712 prop = of_get_property(phb->hose->dn, "msi-ranges", NULL); 1713 } 1714 if (!prop) 1715 return; 1716 1717 phb->msi_base = be32_to_cpup(prop); 1718 count = be32_to_cpup(prop + 1); 1719 if (msi_bitmap_alloc(&phb->msi_bmp, count, phb->hose->dn)) { 1720 pr_err("PCI %d: Failed to allocate MSI bitmap !\n", 1721 phb->hose->global_number); 1722 return; 1723 } 1724 1725 phb->msi_setup = pnv_pci_ioda_msi_setup; 1726 phb->msi32_support = 1; 1727 pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n", 1728 count, phb->msi_base); 1729 } 1730 #else 1731 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { } 1732 #endif /* CONFIG_PCI_MSI */ 1733 1734 /* 1735 * This function is supposed to be called on basis of PE from top 1736 * to bottom style. So the the I/O or MMIO segment assigned to 1737 * parent PE could be overrided by its child PEs if necessary. 1738 */ 1739 static void pnv_ioda_setup_pe_seg(struct pci_controller *hose, 1740 struct pnv_ioda_pe *pe) 1741 { 1742 struct pnv_phb *phb = hose->private_data; 1743 struct pci_bus_region region; 1744 struct resource *res; 1745 int i, index; 1746 int rc; 1747 1748 /* 1749 * NOTE: We only care PCI bus based PE for now. For PCI 1750 * device based PE, for example SRIOV sensitive VF should 1751 * be figured out later. 1752 */ 1753 BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))); 1754 1755 pci_bus_for_each_resource(pe->pbus, res, i) { 1756 if (!res || !res->flags || 1757 res->start > res->end) 1758 continue; 1759 1760 if (res->flags & IORESOURCE_IO) { 1761 region.start = res->start - phb->ioda.io_pci_base; 1762 region.end = res->end - phb->ioda.io_pci_base; 1763 index = region.start / phb->ioda.io_segsize; 1764 1765 while (index < phb->ioda.total_pe && 1766 region.start <= region.end) { 1767 phb->ioda.io_segmap[index] = pe->pe_number; 1768 rc = opal_pci_map_pe_mmio_window(phb->opal_id, 1769 pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index); 1770 if (rc != OPAL_SUCCESS) { 1771 pr_err("%s: OPAL error %d when mapping IO " 1772 "segment #%d to PE#%d\n", 1773 __func__, rc, index, pe->pe_number); 1774 break; 1775 } 1776 1777 region.start += phb->ioda.io_segsize; 1778 index++; 1779 } 1780 } else if (res->flags & IORESOURCE_MEM) { 1781 region.start = res->start - 1782 hose->mem_offset[0] - 1783 phb->ioda.m32_pci_base; 1784 region.end = res->end - 1785 hose->mem_offset[0] - 1786 phb->ioda.m32_pci_base; 1787 index = region.start / phb->ioda.m32_segsize; 1788 1789 while (index < phb->ioda.total_pe && 1790 region.start <= region.end) { 1791 phb->ioda.m32_segmap[index] = pe->pe_number; 1792 rc = opal_pci_map_pe_mmio_window(phb->opal_id, 1793 pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index); 1794 if (rc != OPAL_SUCCESS) { 1795 pr_err("%s: OPAL error %d when mapping M32 " 1796 "segment#%d to PE#%d", 1797 __func__, rc, index, pe->pe_number); 1798 break; 1799 } 1800 1801 region.start += phb->ioda.m32_segsize; 1802 index++; 1803 } 1804 } 1805 } 1806 } 1807 1808 static void pnv_pci_ioda_setup_seg(void) 1809 { 1810 struct pci_controller *tmp, *hose; 1811 struct pnv_phb *phb; 1812 struct pnv_ioda_pe *pe; 1813 1814 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1815 phb = hose->private_data; 1816 list_for_each_entry(pe, &phb->ioda.pe_list, list) { 1817 pnv_ioda_setup_pe_seg(hose, pe); 1818 } 1819 } 1820 } 1821 1822 static void pnv_pci_ioda_setup_DMA(void) 1823 { 1824 struct pci_controller *hose, *tmp; 1825 struct pnv_phb *phb; 1826 1827 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1828 pnv_ioda_setup_dma(hose->private_data); 1829 1830 /* Mark the PHB initialization done */ 1831 phb = hose->private_data; 1832 phb->initialized = 1; 1833 } 1834 } 1835 1836 static void pnv_pci_ioda_create_dbgfs(void) 1837 { 1838 #ifdef CONFIG_DEBUG_FS 1839 struct pci_controller *hose, *tmp; 1840 struct pnv_phb *phb; 1841 char name[16]; 1842 1843 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1844 phb = hose->private_data; 1845 1846 sprintf(name, "PCI%04x", hose->global_number); 1847 phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root); 1848 if (!phb->dbgfs) 1849 pr_warning("%s: Error on creating debugfs on PHB#%x\n", 1850 __func__, hose->global_number); 1851 } 1852 #endif /* CONFIG_DEBUG_FS */ 1853 } 1854 1855 static void pnv_pci_ioda_fixup(void) 1856 { 1857 pnv_pci_ioda_setup_PEs(); 1858 pnv_pci_ioda_setup_seg(); 1859 pnv_pci_ioda_setup_DMA(); 1860 1861 pnv_pci_ioda_create_dbgfs(); 1862 1863 #ifdef CONFIG_EEH 1864 eeh_init(); 1865 eeh_addr_cache_build(); 1866 #endif 1867 } 1868 1869 /* 1870 * Returns the alignment for I/O or memory windows for P2P 1871 * bridges. That actually depends on how PEs are segmented. 1872 * For now, we return I/O or M32 segment size for PE sensitive 1873 * P2P bridges. Otherwise, the default values (4KiB for I/O, 1874 * 1MiB for memory) will be returned. 1875 * 1876 * The current PCI bus might be put into one PE, which was 1877 * create against the parent PCI bridge. For that case, we 1878 * needn't enlarge the alignment so that we can save some 1879 * resources. 1880 */ 1881 static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, 1882 unsigned long type) 1883 { 1884 struct pci_dev *bridge; 1885 struct pci_controller *hose = pci_bus_to_host(bus); 1886 struct pnv_phb *phb = hose->private_data; 1887 int num_pci_bridges = 0; 1888 1889 bridge = bus->self; 1890 while (bridge) { 1891 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) { 1892 num_pci_bridges++; 1893 if (num_pci_bridges >= 2) 1894 return 1; 1895 } 1896 1897 bridge = bridge->bus->self; 1898 } 1899 1900 /* We fail back to M32 if M64 isn't supported */ 1901 if (phb->ioda.m64_segsize && 1902 pnv_pci_is_mem_pref_64(type)) 1903 return phb->ioda.m64_segsize; 1904 if (type & IORESOURCE_MEM) 1905 return phb->ioda.m32_segsize; 1906 1907 return phb->ioda.io_segsize; 1908 } 1909 1910 /* Prevent enabling devices for which we couldn't properly 1911 * assign a PE 1912 */ 1913 static int pnv_pci_enable_device_hook(struct pci_dev *dev) 1914 { 1915 struct pci_controller *hose = pci_bus_to_host(dev->bus); 1916 struct pnv_phb *phb = hose->private_data; 1917 struct pci_dn *pdn; 1918 1919 /* The function is probably called while the PEs have 1920 * not be created yet. For example, resource reassignment 1921 * during PCI probe period. We just skip the check if 1922 * PEs isn't ready. 1923 */ 1924 if (!phb->initialized) 1925 return 0; 1926 1927 pdn = pci_get_pdn(dev); 1928 if (!pdn || pdn->pe_number == IODA_INVALID_PE) 1929 return -EINVAL; 1930 1931 return 0; 1932 } 1933 1934 static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus, 1935 u32 devfn) 1936 { 1937 return phb->ioda.pe_rmap[(bus->number << 8) | devfn]; 1938 } 1939 1940 static void pnv_pci_ioda_shutdown(struct pnv_phb *phb) 1941 { 1942 opal_pci_reset(phb->opal_id, OPAL_RESET_PCI_IODA_TABLE, 1943 OPAL_ASSERT_RESET); 1944 } 1945 1946 static void __init pnv_pci_init_ioda_phb(struct device_node *np, 1947 u64 hub_id, int ioda_type) 1948 { 1949 struct pci_controller *hose; 1950 struct pnv_phb *phb; 1951 unsigned long size, m32map_off, pemap_off, iomap_off = 0; 1952 const __be64 *prop64; 1953 const __be32 *prop32; 1954 int len; 1955 u64 phb_id; 1956 void *aux; 1957 long rc; 1958 1959 pr_info("Initializing IODA%d OPAL PHB %s\n", ioda_type, np->full_name); 1960 1961 prop64 = of_get_property(np, "ibm,opal-phbid", NULL); 1962 if (!prop64) { 1963 pr_err(" Missing \"ibm,opal-phbid\" property !\n"); 1964 return; 1965 } 1966 phb_id = be64_to_cpup(prop64); 1967 pr_debug(" PHB-ID : 0x%016llx\n", phb_id); 1968 1969 phb = memblock_virt_alloc(sizeof(struct pnv_phb), 0); 1970 1971 /* Allocate PCI controller */ 1972 phb->hose = hose = pcibios_alloc_controller(np); 1973 if (!phb->hose) { 1974 pr_err(" Can't allocate PCI controller for %s\n", 1975 np->full_name); 1976 memblock_free(__pa(phb), sizeof(struct pnv_phb)); 1977 return; 1978 } 1979 1980 spin_lock_init(&phb->lock); 1981 prop32 = of_get_property(np, "bus-range", &len); 1982 if (prop32 && len == 8) { 1983 hose->first_busno = be32_to_cpu(prop32[0]); 1984 hose->last_busno = be32_to_cpu(prop32[1]); 1985 } else { 1986 pr_warn(" Broken <bus-range> on %s\n", np->full_name); 1987 hose->first_busno = 0; 1988 hose->last_busno = 0xff; 1989 } 1990 hose->private_data = phb; 1991 phb->hub_id = hub_id; 1992 phb->opal_id = phb_id; 1993 phb->type = ioda_type; 1994 1995 /* Detect specific models for error handling */ 1996 if (of_device_is_compatible(np, "ibm,p7ioc-pciex")) 1997 phb->model = PNV_PHB_MODEL_P7IOC; 1998 else if (of_device_is_compatible(np, "ibm,power8-pciex")) 1999 phb->model = PNV_PHB_MODEL_PHB3; 2000 else 2001 phb->model = PNV_PHB_MODEL_UNKNOWN; 2002 2003 /* Parse 32-bit and IO ranges (if any) */ 2004 pci_process_bridge_OF_ranges(hose, np, !hose->global_number); 2005 2006 /* Get registers */ 2007 phb->regs = of_iomap(np, 0); 2008 if (phb->regs == NULL) 2009 pr_err(" Failed to map registers !\n"); 2010 2011 /* Initialize more IODA stuff */ 2012 phb->ioda.total_pe = 1; 2013 prop32 = of_get_property(np, "ibm,opal-num-pes", NULL); 2014 if (prop32) 2015 phb->ioda.total_pe = be32_to_cpup(prop32); 2016 prop32 = of_get_property(np, "ibm,opal-reserved-pe", NULL); 2017 if (prop32) 2018 phb->ioda.reserved_pe = be32_to_cpup(prop32); 2019 2020 /* Parse 64-bit MMIO range */ 2021 pnv_ioda_parse_m64_window(phb); 2022 2023 phb->ioda.m32_size = resource_size(&hose->mem_resources[0]); 2024 /* FW Has already off top 64k of M32 space (MSI space) */ 2025 phb->ioda.m32_size += 0x10000; 2026 2027 phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe; 2028 phb->ioda.m32_pci_base = hose->mem_resources[0].start - hose->mem_offset[0]; 2029 phb->ioda.io_size = hose->pci_io_size; 2030 phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe; 2031 phb->ioda.io_pci_base = 0; /* XXX calculate this ? */ 2032 2033 /* Allocate aux data & arrays. We don't have IO ports on PHB3 */ 2034 size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long)); 2035 m32map_off = size; 2036 size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]); 2037 if (phb->type == PNV_PHB_IODA1) { 2038 iomap_off = size; 2039 size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]); 2040 } 2041 pemap_off = size; 2042 size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe); 2043 aux = memblock_virt_alloc(size, 0); 2044 phb->ioda.pe_alloc = aux; 2045 phb->ioda.m32_segmap = aux + m32map_off; 2046 if (phb->type == PNV_PHB_IODA1) 2047 phb->ioda.io_segmap = aux + iomap_off; 2048 phb->ioda.pe_array = aux + pemap_off; 2049 set_bit(phb->ioda.reserved_pe, phb->ioda.pe_alloc); 2050 2051 INIT_LIST_HEAD(&phb->ioda.pe_dma_list); 2052 INIT_LIST_HEAD(&phb->ioda.pe_list); 2053 2054 /* Calculate how many 32-bit TCE segments we have */ 2055 phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28; 2056 2057 #if 0 /* We should really do that ... */ 2058 rc = opal_pci_set_phb_mem_window(opal->phb_id, 2059 window_type, 2060 window_num, 2061 starting_real_address, 2062 starting_pci_address, 2063 segment_size); 2064 #endif 2065 2066 pr_info(" %03d (%03d) PE's M32: 0x%x [segment=0x%x]\n", 2067 phb->ioda.total_pe, phb->ioda.reserved_pe, 2068 phb->ioda.m32_size, phb->ioda.m32_segsize); 2069 if (phb->ioda.m64_size) 2070 pr_info(" M64: 0x%lx [segment=0x%lx]\n", 2071 phb->ioda.m64_size, phb->ioda.m64_segsize); 2072 if (phb->ioda.io_size) 2073 pr_info(" IO: 0x%x [segment=0x%x]\n", 2074 phb->ioda.io_size, phb->ioda.io_segsize); 2075 2076 2077 phb->hose->ops = &pnv_pci_ops; 2078 phb->get_pe_state = pnv_ioda_get_pe_state; 2079 phb->freeze_pe = pnv_ioda_freeze_pe; 2080 phb->unfreeze_pe = pnv_ioda_unfreeze_pe; 2081 #ifdef CONFIG_EEH 2082 phb->eeh_ops = &ioda_eeh_ops; 2083 #endif 2084 2085 /* Setup RID -> PE mapping function */ 2086 phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe; 2087 2088 /* Setup TCEs */ 2089 phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; 2090 phb->dma_set_mask = pnv_pci_ioda_dma_set_mask; 2091 phb->dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask; 2092 2093 /* Setup shutdown function for kexec */ 2094 phb->shutdown = pnv_pci_ioda_shutdown; 2095 2096 /* Setup MSI support */ 2097 pnv_pci_init_ioda_msis(phb); 2098 2099 /* 2100 * We pass the PCI probe flag PCI_REASSIGN_ALL_RSRC here 2101 * to let the PCI core do resource assignment. It's supposed 2102 * that the PCI core will do correct I/O and MMIO alignment 2103 * for the P2P bridge bars so that each PCI bus (excluding 2104 * the child P2P bridges) can form individual PE. 2105 */ 2106 ppc_md.pcibios_fixup = pnv_pci_ioda_fixup; 2107 ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook; 2108 ppc_md.pcibios_window_alignment = pnv_pci_window_alignment; 2109 ppc_md.pcibios_reset_secondary_bus = pnv_pci_reset_secondary_bus; 2110 pci_add_flags(PCI_REASSIGN_ALL_RSRC); 2111 2112 /* Reset IODA tables to a clean state */ 2113 rc = opal_pci_reset(phb_id, OPAL_RESET_PCI_IODA_TABLE, OPAL_ASSERT_RESET); 2114 if (rc) 2115 pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc); 2116 2117 /* If we're running in kdump kerenl, the previous kerenl never 2118 * shutdown PCI devices correctly. We already got IODA table 2119 * cleaned out. So we have to issue PHB reset to stop all PCI 2120 * transactions from previous kerenl. 2121 */ 2122 if (is_kdump_kernel()) { 2123 pr_info(" Issue PHB reset ...\n"); 2124 ioda_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL); 2125 ioda_eeh_phb_reset(hose, EEH_RESET_DEACTIVATE); 2126 } 2127 2128 /* Remove M64 resource if we can't configure it successfully */ 2129 if (!phb->init_m64 || phb->init_m64(phb)) 2130 hose->mem_resources[1].flags = 0; 2131 } 2132 2133 void __init pnv_pci_init_ioda2_phb(struct device_node *np) 2134 { 2135 pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2); 2136 } 2137 2138 void __init pnv_pci_init_ioda_hub(struct device_node *np) 2139 { 2140 struct device_node *phbn; 2141 const __be64 *prop64; 2142 u64 hub_id; 2143 2144 pr_info("Probing IODA IO-Hub %s\n", np->full_name); 2145 2146 prop64 = of_get_property(np, "ibm,opal-hubid", NULL); 2147 if (!prop64) { 2148 pr_err(" Missing \"ibm,opal-hubid\" property !\n"); 2149 return; 2150 } 2151 hub_id = be64_to_cpup(prop64); 2152 pr_devel(" HUB-ID : 0x%016llx\n", hub_id); 2153 2154 /* Count child PHBs */ 2155 for_each_child_of_node(np, phbn) { 2156 /* Look for IODA1 PHBs */ 2157 if (of_device_is_compatible(phbn, "ibm,ioda-phb")) 2158 pnv_pci_init_ioda_phb(phbn, hub_id, PNV_PHB_IODA1); 2159 } 2160 } 2161