1 /* 2 * Support PCI/PCIe on PowerNV platforms 3 * 4 * Copyright 2011 Benjamin Herrenschmidt, IBM Corp. 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12 #undef DEBUG 13 14 #include <linux/kernel.h> 15 #include <linux/pci.h> 16 #include <linux/crash_dump.h> 17 #include <linux/debugfs.h> 18 #include <linux/delay.h> 19 #include <linux/string.h> 20 #include <linux/init.h> 21 #include <linux/bootmem.h> 22 #include <linux/irq.h> 23 #include <linux/io.h> 24 #include <linux/msi.h> 25 #include <linux/memblock.h> 26 27 #include <asm/sections.h> 28 #include <asm/io.h> 29 #include <asm/prom.h> 30 #include <asm/pci-bridge.h> 31 #include <asm/machdep.h> 32 #include <asm/msi_bitmap.h> 33 #include <asm/ppc-pci.h> 34 #include <asm/opal.h> 35 #include <asm/iommu.h> 36 #include <asm/tce.h> 37 #include <asm/xics.h> 38 #include <asm/debug.h> 39 #include <asm/firmware.h> 40 #include <asm/pnv-pci.h> 41 42 #include <misc/cxl.h> 43 44 #include "powernv.h" 45 #include "pci.h" 46 47 static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, 48 const char *fmt, ...) 49 { 50 struct va_format vaf; 51 va_list args; 52 char pfix[32]; 53 54 va_start(args, fmt); 55 56 vaf.fmt = fmt; 57 vaf.va = &args; 58 59 if (pe->pdev) 60 strlcpy(pfix, dev_name(&pe->pdev->dev), sizeof(pfix)); 61 else 62 sprintf(pfix, "%04x:%02x ", 63 pci_domain_nr(pe->pbus), pe->pbus->number); 64 65 printk("%spci %s: [PE# %.3d] %pV", 66 level, pfix, pe->pe_number, &vaf); 67 68 va_end(args); 69 } 70 71 #define pe_err(pe, fmt, ...) \ 72 pe_level_printk(pe, KERN_ERR, fmt, ##__VA_ARGS__) 73 #define pe_warn(pe, fmt, ...) \ 74 pe_level_printk(pe, KERN_WARNING, fmt, ##__VA_ARGS__) 75 #define pe_info(pe, fmt, ...) \ 76 pe_level_printk(pe, KERN_INFO, fmt, ##__VA_ARGS__) 77 78 /* 79 * stdcix is only supposed to be used in hypervisor real mode as per 80 * the architecture spec 81 */ 82 static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr) 83 { 84 __asm__ __volatile__("stdcix %0,0,%1" 85 : : "r" (val), "r" (paddr) : "memory"); 86 } 87 88 static inline bool pnv_pci_is_mem_pref_64(unsigned long flags) 89 { 90 return ((flags & (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)) == 91 (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)); 92 } 93 94 static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no) 95 { 96 if (!(pe_no >= 0 && pe_no < phb->ioda.total_pe)) { 97 pr_warn("%s: Invalid PE %d on PHB#%x\n", 98 __func__, pe_no, phb->hose->global_number); 99 return; 100 } 101 102 if (test_and_set_bit(pe_no, phb->ioda.pe_alloc)) { 103 pr_warn("%s: PE %d was assigned on PHB#%x\n", 104 __func__, pe_no, phb->hose->global_number); 105 return; 106 } 107 108 phb->ioda.pe_array[pe_no].phb = phb; 109 phb->ioda.pe_array[pe_no].pe_number = pe_no; 110 } 111 112 static int pnv_ioda_alloc_pe(struct pnv_phb *phb) 113 { 114 unsigned long pe; 115 116 do { 117 pe = find_next_zero_bit(phb->ioda.pe_alloc, 118 phb->ioda.total_pe, 0); 119 if (pe >= phb->ioda.total_pe) 120 return IODA_INVALID_PE; 121 } while(test_and_set_bit(pe, phb->ioda.pe_alloc)); 122 123 phb->ioda.pe_array[pe].phb = phb; 124 phb->ioda.pe_array[pe].pe_number = pe; 125 return pe; 126 } 127 128 static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe) 129 { 130 WARN_ON(phb->ioda.pe_array[pe].pdev); 131 132 memset(&phb->ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe)); 133 clear_bit(pe, phb->ioda.pe_alloc); 134 } 135 136 /* The default M64 BAR is shared by all PEs */ 137 static int pnv_ioda2_init_m64(struct pnv_phb *phb) 138 { 139 const char *desc; 140 struct resource *r; 141 s64 rc; 142 143 /* Configure the default M64 BAR */ 144 rc = opal_pci_set_phb_mem_window(phb->opal_id, 145 OPAL_M64_WINDOW_TYPE, 146 phb->ioda.m64_bar_idx, 147 phb->ioda.m64_base, 148 0, /* unused */ 149 phb->ioda.m64_size); 150 if (rc != OPAL_SUCCESS) { 151 desc = "configuring"; 152 goto fail; 153 } 154 155 /* Enable the default M64 BAR */ 156 rc = opal_pci_phb_mmio_enable(phb->opal_id, 157 OPAL_M64_WINDOW_TYPE, 158 phb->ioda.m64_bar_idx, 159 OPAL_ENABLE_M64_SPLIT); 160 if (rc != OPAL_SUCCESS) { 161 desc = "enabling"; 162 goto fail; 163 } 164 165 /* Mark the M64 BAR assigned */ 166 set_bit(phb->ioda.m64_bar_idx, &phb->ioda.m64_bar_alloc); 167 168 /* 169 * Strip off the segment used by the reserved PE, which is 170 * expected to be 0 or last one of PE capabicity. 171 */ 172 r = &phb->hose->mem_resources[1]; 173 if (phb->ioda.reserved_pe == 0) 174 r->start += phb->ioda.m64_segsize; 175 else if (phb->ioda.reserved_pe == (phb->ioda.total_pe - 1)) 176 r->end -= phb->ioda.m64_segsize; 177 else 178 pr_warn(" Cannot strip M64 segment for reserved PE#%d\n", 179 phb->ioda.reserved_pe); 180 181 return 0; 182 183 fail: 184 pr_warn(" Failure %lld %s M64 BAR#%d\n", 185 rc, desc, phb->ioda.m64_bar_idx); 186 opal_pci_phb_mmio_enable(phb->opal_id, 187 OPAL_M64_WINDOW_TYPE, 188 phb->ioda.m64_bar_idx, 189 OPAL_DISABLE_M64); 190 return -EIO; 191 } 192 193 static void pnv_ioda2_reserve_m64_pe(struct pnv_phb *phb) 194 { 195 resource_size_t sgsz = phb->ioda.m64_segsize; 196 struct pci_dev *pdev; 197 struct resource *r; 198 int base, step, i; 199 200 /* 201 * Root bus always has full M64 range and root port has 202 * M64 range used in reality. So we're checking root port 203 * instead of root bus. 204 */ 205 list_for_each_entry(pdev, &phb->hose->bus->devices, bus_list) { 206 for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) { 207 r = &pdev->resource[PCI_BRIDGE_RESOURCES + i]; 208 if (!r->parent || 209 !pnv_pci_is_mem_pref_64(r->flags)) 210 continue; 211 212 base = (r->start - phb->ioda.m64_base) / sgsz; 213 for (step = 0; step < resource_size(r) / sgsz; step++) 214 pnv_ioda_reserve_pe(phb, base + step); 215 } 216 } 217 } 218 219 static int pnv_ioda2_pick_m64_pe(struct pnv_phb *phb, 220 struct pci_bus *bus, int all) 221 { 222 resource_size_t segsz = phb->ioda.m64_segsize; 223 struct pci_dev *pdev; 224 struct resource *r; 225 struct pnv_ioda_pe *master_pe, *pe; 226 unsigned long size, *pe_alloc; 227 bool found; 228 int start, i, j; 229 230 /* Root bus shouldn't use M64 */ 231 if (pci_is_root_bus(bus)) 232 return IODA_INVALID_PE; 233 234 /* We support only one M64 window on each bus */ 235 found = false; 236 pci_bus_for_each_resource(bus, r, i) { 237 if (r && r->parent && 238 pnv_pci_is_mem_pref_64(r->flags)) { 239 found = true; 240 break; 241 } 242 } 243 244 /* No M64 window found ? */ 245 if (!found) 246 return IODA_INVALID_PE; 247 248 /* Allocate bitmap */ 249 size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long)); 250 pe_alloc = kzalloc(size, GFP_KERNEL); 251 if (!pe_alloc) { 252 pr_warn("%s: Out of memory !\n", 253 __func__); 254 return IODA_INVALID_PE; 255 } 256 257 /* 258 * Figure out reserved PE numbers by the PE 259 * the its child PEs. 260 */ 261 start = (r->start - phb->ioda.m64_base) / segsz; 262 for (i = 0; i < resource_size(r) / segsz; i++) 263 set_bit(start + i, pe_alloc); 264 265 if (all) 266 goto done; 267 268 /* 269 * If the PE doesn't cover all subordinate buses, 270 * we need subtract from reserved PEs for children. 271 */ 272 list_for_each_entry(pdev, &bus->devices, bus_list) { 273 if (!pdev->subordinate) 274 continue; 275 276 pci_bus_for_each_resource(pdev->subordinate, r, i) { 277 if (!r || !r->parent || 278 !pnv_pci_is_mem_pref_64(r->flags)) 279 continue; 280 281 start = (r->start - phb->ioda.m64_base) / segsz; 282 for (j = 0; j < resource_size(r) / segsz ; j++) 283 clear_bit(start + j, pe_alloc); 284 } 285 } 286 287 /* 288 * the current bus might not own M64 window and that's all 289 * contributed by its child buses. For the case, we needn't 290 * pick M64 dependent PE#. 291 */ 292 if (bitmap_empty(pe_alloc, phb->ioda.total_pe)) { 293 kfree(pe_alloc); 294 return IODA_INVALID_PE; 295 } 296 297 /* 298 * Figure out the master PE and put all slave PEs to master 299 * PE's list to form compound PE. 300 */ 301 done: 302 master_pe = NULL; 303 i = -1; 304 while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe, i + 1)) < 305 phb->ioda.total_pe) { 306 pe = &phb->ioda.pe_array[i]; 307 308 if (!master_pe) { 309 pe->flags |= PNV_IODA_PE_MASTER; 310 INIT_LIST_HEAD(&pe->slaves); 311 master_pe = pe; 312 } else { 313 pe->flags |= PNV_IODA_PE_SLAVE; 314 pe->master = master_pe; 315 list_add_tail(&pe->list, &master_pe->slaves); 316 } 317 } 318 319 kfree(pe_alloc); 320 return master_pe->pe_number; 321 } 322 323 static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) 324 { 325 struct pci_controller *hose = phb->hose; 326 struct device_node *dn = hose->dn; 327 struct resource *res; 328 const u32 *r; 329 u64 pci_addr; 330 331 /* FIXME: Support M64 for P7IOC */ 332 if (phb->type != PNV_PHB_IODA2) { 333 pr_info(" Not support M64 window\n"); 334 return; 335 } 336 337 if (!firmware_has_feature(FW_FEATURE_OPALv3)) { 338 pr_info(" Firmware too old to support M64 window\n"); 339 return; 340 } 341 342 r = of_get_property(dn, "ibm,opal-m64-window", NULL); 343 if (!r) { 344 pr_info(" No <ibm,opal-m64-window> on %s\n", 345 dn->full_name); 346 return; 347 } 348 349 res = &hose->mem_resources[1]; 350 res->start = of_translate_address(dn, r + 2); 351 res->end = res->start + of_read_number(r + 4, 2) - 1; 352 res->flags = (IORESOURCE_MEM | IORESOURCE_MEM_64 | IORESOURCE_PREFETCH); 353 pci_addr = of_read_number(r, 2); 354 hose->mem_offset[1] = res->start - pci_addr; 355 356 phb->ioda.m64_size = resource_size(res); 357 phb->ioda.m64_segsize = phb->ioda.m64_size / phb->ioda.total_pe; 358 phb->ioda.m64_base = pci_addr; 359 360 /* Use last M64 BAR to cover M64 window */ 361 phb->ioda.m64_bar_idx = 15; 362 phb->init_m64 = pnv_ioda2_init_m64; 363 phb->reserve_m64_pe = pnv_ioda2_reserve_m64_pe; 364 phb->pick_m64_pe = pnv_ioda2_pick_m64_pe; 365 } 366 367 static void pnv_ioda_freeze_pe(struct pnv_phb *phb, int pe_no) 368 { 369 struct pnv_ioda_pe *pe = &phb->ioda.pe_array[pe_no]; 370 struct pnv_ioda_pe *slave; 371 s64 rc; 372 373 /* Fetch master PE */ 374 if (pe->flags & PNV_IODA_PE_SLAVE) { 375 pe = pe->master; 376 if (WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER))) 377 return; 378 379 pe_no = pe->pe_number; 380 } 381 382 /* Freeze master PE */ 383 rc = opal_pci_eeh_freeze_set(phb->opal_id, 384 pe_no, 385 OPAL_EEH_ACTION_SET_FREEZE_ALL); 386 if (rc != OPAL_SUCCESS) { 387 pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n", 388 __func__, rc, phb->hose->global_number, pe_no); 389 return; 390 } 391 392 /* Freeze slave PEs */ 393 if (!(pe->flags & PNV_IODA_PE_MASTER)) 394 return; 395 396 list_for_each_entry(slave, &pe->slaves, list) { 397 rc = opal_pci_eeh_freeze_set(phb->opal_id, 398 slave->pe_number, 399 OPAL_EEH_ACTION_SET_FREEZE_ALL); 400 if (rc != OPAL_SUCCESS) 401 pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n", 402 __func__, rc, phb->hose->global_number, 403 slave->pe_number); 404 } 405 } 406 407 static int pnv_ioda_unfreeze_pe(struct pnv_phb *phb, int pe_no, int opt) 408 { 409 struct pnv_ioda_pe *pe, *slave; 410 s64 rc; 411 412 /* Find master PE */ 413 pe = &phb->ioda.pe_array[pe_no]; 414 if (pe->flags & PNV_IODA_PE_SLAVE) { 415 pe = pe->master; 416 WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER)); 417 pe_no = pe->pe_number; 418 } 419 420 /* Clear frozen state for master PE */ 421 rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, opt); 422 if (rc != OPAL_SUCCESS) { 423 pr_warn("%s: Failure %lld clear %d on PHB#%x-PE#%x\n", 424 __func__, rc, opt, phb->hose->global_number, pe_no); 425 return -EIO; 426 } 427 428 if (!(pe->flags & PNV_IODA_PE_MASTER)) 429 return 0; 430 431 /* Clear frozen state for slave PEs */ 432 list_for_each_entry(slave, &pe->slaves, list) { 433 rc = opal_pci_eeh_freeze_clear(phb->opal_id, 434 slave->pe_number, 435 opt); 436 if (rc != OPAL_SUCCESS) { 437 pr_warn("%s: Failure %lld clear %d on PHB#%x-PE#%x\n", 438 __func__, rc, opt, phb->hose->global_number, 439 slave->pe_number); 440 return -EIO; 441 } 442 } 443 444 return 0; 445 } 446 447 static int pnv_ioda_get_pe_state(struct pnv_phb *phb, int pe_no) 448 { 449 struct pnv_ioda_pe *slave, *pe; 450 u8 fstate, state; 451 __be16 pcierr; 452 s64 rc; 453 454 /* Sanity check on PE number */ 455 if (pe_no < 0 || pe_no >= phb->ioda.total_pe) 456 return OPAL_EEH_STOPPED_PERM_UNAVAIL; 457 458 /* 459 * Fetch the master PE and the PE instance might be 460 * not initialized yet. 461 */ 462 pe = &phb->ioda.pe_array[pe_no]; 463 if (pe->flags & PNV_IODA_PE_SLAVE) { 464 pe = pe->master; 465 WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER)); 466 pe_no = pe->pe_number; 467 } 468 469 /* Check the master PE */ 470 rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, 471 &state, &pcierr, NULL); 472 if (rc != OPAL_SUCCESS) { 473 pr_warn("%s: Failure %lld getting " 474 "PHB#%x-PE#%x state\n", 475 __func__, rc, 476 phb->hose->global_number, pe_no); 477 return OPAL_EEH_STOPPED_TEMP_UNAVAIL; 478 } 479 480 /* Check the slave PE */ 481 if (!(pe->flags & PNV_IODA_PE_MASTER)) 482 return state; 483 484 list_for_each_entry(slave, &pe->slaves, list) { 485 rc = opal_pci_eeh_freeze_status(phb->opal_id, 486 slave->pe_number, 487 &fstate, 488 &pcierr, 489 NULL); 490 if (rc != OPAL_SUCCESS) { 491 pr_warn("%s: Failure %lld getting " 492 "PHB#%x-PE#%x state\n", 493 __func__, rc, 494 phb->hose->global_number, slave->pe_number); 495 return OPAL_EEH_STOPPED_TEMP_UNAVAIL; 496 } 497 498 /* 499 * Override the result based on the ascending 500 * priority. 501 */ 502 if (fstate > state) 503 state = fstate; 504 } 505 506 return state; 507 } 508 509 /* Currently those 2 are only used when MSIs are enabled, this will change 510 * but in the meantime, we need to protect them to avoid warnings 511 */ 512 #ifdef CONFIG_PCI_MSI 513 static struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev) 514 { 515 struct pci_controller *hose = pci_bus_to_host(dev->bus); 516 struct pnv_phb *phb = hose->private_data; 517 struct pci_dn *pdn = pci_get_pdn(dev); 518 519 if (!pdn) 520 return NULL; 521 if (pdn->pe_number == IODA_INVALID_PE) 522 return NULL; 523 return &phb->ioda.pe_array[pdn->pe_number]; 524 } 525 #endif /* CONFIG_PCI_MSI */ 526 527 static int pnv_ioda_set_one_peltv(struct pnv_phb *phb, 528 struct pnv_ioda_pe *parent, 529 struct pnv_ioda_pe *child, 530 bool is_add) 531 { 532 const char *desc = is_add ? "adding" : "removing"; 533 uint8_t op = is_add ? OPAL_ADD_PE_TO_DOMAIN : 534 OPAL_REMOVE_PE_FROM_DOMAIN; 535 struct pnv_ioda_pe *slave; 536 long rc; 537 538 /* Parent PE affects child PE */ 539 rc = opal_pci_set_peltv(phb->opal_id, parent->pe_number, 540 child->pe_number, op); 541 if (rc != OPAL_SUCCESS) { 542 pe_warn(child, "OPAL error %ld %s to parent PELTV\n", 543 rc, desc); 544 return -ENXIO; 545 } 546 547 if (!(child->flags & PNV_IODA_PE_MASTER)) 548 return 0; 549 550 /* Compound case: parent PE affects slave PEs */ 551 list_for_each_entry(slave, &child->slaves, list) { 552 rc = opal_pci_set_peltv(phb->opal_id, parent->pe_number, 553 slave->pe_number, op); 554 if (rc != OPAL_SUCCESS) { 555 pe_warn(slave, "OPAL error %ld %s to parent PELTV\n", 556 rc, desc); 557 return -ENXIO; 558 } 559 } 560 561 return 0; 562 } 563 564 static int pnv_ioda_set_peltv(struct pnv_phb *phb, 565 struct pnv_ioda_pe *pe, 566 bool is_add) 567 { 568 struct pnv_ioda_pe *slave; 569 struct pci_dev *pdev; 570 int ret; 571 572 /* 573 * Clear PE frozen state. If it's master PE, we need 574 * clear slave PE frozen state as well. 575 */ 576 if (is_add) { 577 opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number, 578 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); 579 if (pe->flags & PNV_IODA_PE_MASTER) { 580 list_for_each_entry(slave, &pe->slaves, list) 581 opal_pci_eeh_freeze_clear(phb->opal_id, 582 slave->pe_number, 583 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); 584 } 585 } 586 587 /* 588 * Associate PE in PELT. We need add the PE into the 589 * corresponding PELT-V as well. Otherwise, the error 590 * originated from the PE might contribute to other 591 * PEs. 592 */ 593 ret = pnv_ioda_set_one_peltv(phb, pe, pe, is_add); 594 if (ret) 595 return ret; 596 597 /* For compound PEs, any one affects all of them */ 598 if (pe->flags & PNV_IODA_PE_MASTER) { 599 list_for_each_entry(slave, &pe->slaves, list) { 600 ret = pnv_ioda_set_one_peltv(phb, slave, pe, is_add); 601 if (ret) 602 return ret; 603 } 604 } 605 606 if (pe->flags & (PNV_IODA_PE_BUS_ALL | PNV_IODA_PE_BUS)) 607 pdev = pe->pbus->self; 608 else 609 pdev = pe->pdev->bus->self; 610 while (pdev) { 611 struct pci_dn *pdn = pci_get_pdn(pdev); 612 struct pnv_ioda_pe *parent; 613 614 if (pdn && pdn->pe_number != IODA_INVALID_PE) { 615 parent = &phb->ioda.pe_array[pdn->pe_number]; 616 ret = pnv_ioda_set_one_peltv(phb, parent, pe, is_add); 617 if (ret) 618 return ret; 619 } 620 621 pdev = pdev->bus->self; 622 } 623 624 return 0; 625 } 626 627 static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) 628 { 629 struct pci_dev *parent; 630 uint8_t bcomp, dcomp, fcomp; 631 long rc, rid_end, rid; 632 633 /* Bus validation ? */ 634 if (pe->pbus) { 635 int count; 636 637 dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER; 638 fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER; 639 parent = pe->pbus->self; 640 if (pe->flags & PNV_IODA_PE_BUS_ALL) 641 count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1; 642 else 643 count = 1; 644 645 switch(count) { 646 case 1: bcomp = OpalPciBusAll; break; 647 case 2: bcomp = OpalPciBus7Bits; break; 648 case 4: bcomp = OpalPciBus6Bits; break; 649 case 8: bcomp = OpalPciBus5Bits; break; 650 case 16: bcomp = OpalPciBus4Bits; break; 651 case 32: bcomp = OpalPciBus3Bits; break; 652 default: 653 pr_err("%s: Number of subordinate busses %d" 654 " unsupported\n", 655 pci_name(pe->pbus->self), count); 656 /* Do an exact match only */ 657 bcomp = OpalPciBusAll; 658 } 659 rid_end = pe->rid + (count << 8); 660 } else { 661 parent = pe->pdev->bus->self; 662 bcomp = OpalPciBusAll; 663 dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER; 664 fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER; 665 rid_end = pe->rid + 1; 666 } 667 668 /* 669 * Associate PE in PELT. We need add the PE into the 670 * corresponding PELT-V as well. Otherwise, the error 671 * originated from the PE might contribute to other 672 * PEs. 673 */ 674 rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid, 675 bcomp, dcomp, fcomp, OPAL_MAP_PE); 676 if (rc) { 677 pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc); 678 return -ENXIO; 679 } 680 681 /* Configure PELTV */ 682 pnv_ioda_set_peltv(phb, pe, true); 683 684 /* Setup reverse map */ 685 for (rid = pe->rid; rid < rid_end; rid++) 686 phb->ioda.pe_rmap[rid] = pe->pe_number; 687 688 /* Setup one MVTs on IODA1 */ 689 if (phb->type != PNV_PHB_IODA1) { 690 pe->mve_number = 0; 691 goto out; 692 } 693 694 pe->mve_number = pe->pe_number; 695 rc = opal_pci_set_mve(phb->opal_id, pe->mve_number, pe->pe_number); 696 if (rc != OPAL_SUCCESS) { 697 pe_err(pe, "OPAL error %ld setting up MVE %d\n", 698 rc, pe->mve_number); 699 pe->mve_number = -1; 700 } else { 701 rc = opal_pci_set_mve_enable(phb->opal_id, 702 pe->mve_number, OPAL_ENABLE_MVE); 703 if (rc) { 704 pe_err(pe, "OPAL error %ld enabling MVE %d\n", 705 rc, pe->mve_number); 706 pe->mve_number = -1; 707 } 708 } 709 710 out: 711 return 0; 712 } 713 714 static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb, 715 struct pnv_ioda_pe *pe) 716 { 717 struct pnv_ioda_pe *lpe; 718 719 list_for_each_entry(lpe, &phb->ioda.pe_dma_list, dma_link) { 720 if (lpe->dma_weight < pe->dma_weight) { 721 list_add_tail(&pe->dma_link, &lpe->dma_link); 722 return; 723 } 724 } 725 list_add_tail(&pe->dma_link, &phb->ioda.pe_dma_list); 726 } 727 728 static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev) 729 { 730 /* This is quite simplistic. The "base" weight of a device 731 * is 10. 0 means no DMA is to be accounted for it. 732 */ 733 734 /* If it's a bridge, no DMA */ 735 if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) 736 return 0; 737 738 /* Reduce the weight of slow USB controllers */ 739 if (dev->class == PCI_CLASS_SERIAL_USB_UHCI || 740 dev->class == PCI_CLASS_SERIAL_USB_OHCI || 741 dev->class == PCI_CLASS_SERIAL_USB_EHCI) 742 return 3; 743 744 /* Increase the weight of RAID (includes Obsidian) */ 745 if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID) 746 return 15; 747 748 /* Default */ 749 return 10; 750 } 751 752 #if 0 753 static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) 754 { 755 struct pci_controller *hose = pci_bus_to_host(dev->bus); 756 struct pnv_phb *phb = hose->private_data; 757 struct pci_dn *pdn = pci_get_pdn(dev); 758 struct pnv_ioda_pe *pe; 759 int pe_num; 760 761 if (!pdn) { 762 pr_err("%s: Device tree node not associated properly\n", 763 pci_name(dev)); 764 return NULL; 765 } 766 if (pdn->pe_number != IODA_INVALID_PE) 767 return NULL; 768 769 /* PE#0 has been pre-set */ 770 if (dev->bus->number == 0) 771 pe_num = 0; 772 else 773 pe_num = pnv_ioda_alloc_pe(phb); 774 if (pe_num == IODA_INVALID_PE) { 775 pr_warning("%s: Not enough PE# available, disabling device\n", 776 pci_name(dev)); 777 return NULL; 778 } 779 780 /* NOTE: We get only one ref to the pci_dev for the pdn, not for the 781 * pointer in the PE data structure, both should be destroyed at the 782 * same time. However, this needs to be looked at more closely again 783 * once we actually start removing things (Hotplug, SR-IOV, ...) 784 * 785 * At some point we want to remove the PDN completely anyways 786 */ 787 pe = &phb->ioda.pe_array[pe_num]; 788 pci_dev_get(dev); 789 pdn->pcidev = dev; 790 pdn->pe_number = pe_num; 791 pe->pdev = dev; 792 pe->pbus = NULL; 793 pe->tce32_seg = -1; 794 pe->mve_number = -1; 795 pe->rid = dev->bus->number << 8 | pdn->devfn; 796 797 pe_info(pe, "Associated device to PE\n"); 798 799 if (pnv_ioda_configure_pe(phb, pe)) { 800 /* XXX What do we do here ? */ 801 if (pe_num) 802 pnv_ioda_free_pe(phb, pe_num); 803 pdn->pe_number = IODA_INVALID_PE; 804 pe->pdev = NULL; 805 pci_dev_put(dev); 806 return NULL; 807 } 808 809 /* Assign a DMA weight to the device */ 810 pe->dma_weight = pnv_ioda_dma_weight(dev); 811 if (pe->dma_weight != 0) { 812 phb->ioda.dma_weight += pe->dma_weight; 813 phb->ioda.dma_pe_count++; 814 } 815 816 /* Link the PE */ 817 pnv_ioda_link_pe_by_weight(phb, pe); 818 819 return pe; 820 } 821 #endif /* Useful for SRIOV case */ 822 823 static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) 824 { 825 struct pci_dev *dev; 826 827 list_for_each_entry(dev, &bus->devices, bus_list) { 828 struct pci_dn *pdn = pci_get_pdn(dev); 829 830 if (pdn == NULL) { 831 pr_warn("%s: No device node associated with device !\n", 832 pci_name(dev)); 833 continue; 834 } 835 pdn->pcidev = dev; 836 pdn->pe_number = pe->pe_number; 837 pe->dma_weight += pnv_ioda_dma_weight(dev); 838 if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) 839 pnv_ioda_setup_same_PE(dev->subordinate, pe); 840 } 841 } 842 843 /* 844 * There're 2 types of PCI bus sensitive PEs: One that is compromised of 845 * single PCI bus. Another one that contains the primary PCI bus and its 846 * subordinate PCI devices and buses. The second type of PE is normally 847 * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports. 848 */ 849 static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) 850 { 851 struct pci_controller *hose = pci_bus_to_host(bus); 852 struct pnv_phb *phb = hose->private_data; 853 struct pnv_ioda_pe *pe; 854 int pe_num = IODA_INVALID_PE; 855 856 /* Check if PE is determined by M64 */ 857 if (phb->pick_m64_pe) 858 pe_num = phb->pick_m64_pe(phb, bus, all); 859 860 /* The PE number isn't pinned by M64 */ 861 if (pe_num == IODA_INVALID_PE) 862 pe_num = pnv_ioda_alloc_pe(phb); 863 864 if (pe_num == IODA_INVALID_PE) { 865 pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n", 866 __func__, pci_domain_nr(bus), bus->number); 867 return; 868 } 869 870 pe = &phb->ioda.pe_array[pe_num]; 871 pe->flags |= (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS); 872 pe->pbus = bus; 873 pe->pdev = NULL; 874 pe->tce32_seg = -1; 875 pe->mve_number = -1; 876 pe->rid = bus->busn_res.start << 8; 877 pe->dma_weight = 0; 878 879 if (all) 880 pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n", 881 bus->busn_res.start, bus->busn_res.end, pe_num); 882 else 883 pe_info(pe, "Secondary bus %d associated with PE#%d\n", 884 bus->busn_res.start, pe_num); 885 886 if (pnv_ioda_configure_pe(phb, pe)) { 887 /* XXX What do we do here ? */ 888 if (pe_num) 889 pnv_ioda_free_pe(phb, pe_num); 890 pe->pbus = NULL; 891 return; 892 } 893 894 /* Associate it with all child devices */ 895 pnv_ioda_setup_same_PE(bus, pe); 896 897 /* Put PE to the list */ 898 list_add_tail(&pe->list, &phb->ioda.pe_list); 899 900 /* Account for one DMA PE if at least one DMA capable device exist 901 * below the bridge 902 */ 903 if (pe->dma_weight != 0) { 904 phb->ioda.dma_weight += pe->dma_weight; 905 phb->ioda.dma_pe_count++; 906 } 907 908 /* Link the PE */ 909 pnv_ioda_link_pe_by_weight(phb, pe); 910 } 911 912 static void pnv_ioda_setup_PEs(struct pci_bus *bus) 913 { 914 struct pci_dev *dev; 915 916 pnv_ioda_setup_bus_PE(bus, 0); 917 918 list_for_each_entry(dev, &bus->devices, bus_list) { 919 if (dev->subordinate) { 920 if (pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE) 921 pnv_ioda_setup_bus_PE(dev->subordinate, 1); 922 else 923 pnv_ioda_setup_PEs(dev->subordinate); 924 } 925 } 926 } 927 928 /* 929 * Configure PEs so that the downstream PCI buses and devices 930 * could have their associated PE#. Unfortunately, we didn't 931 * figure out the way to identify the PLX bridge yet. So we 932 * simply put the PCI bus and the subordinate behind the root 933 * port to PE# here. The game rule here is expected to be changed 934 * as soon as we can detected PLX bridge correctly. 935 */ 936 static void pnv_pci_ioda_setup_PEs(void) 937 { 938 struct pci_controller *hose, *tmp; 939 struct pnv_phb *phb; 940 941 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 942 phb = hose->private_data; 943 944 /* M64 layout might affect PE allocation */ 945 if (phb->reserve_m64_pe) 946 phb->reserve_m64_pe(phb); 947 948 pnv_ioda_setup_PEs(hose->bus); 949 } 950 } 951 952 static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev) 953 { 954 struct pci_dn *pdn = pci_get_pdn(pdev); 955 struct pnv_ioda_pe *pe; 956 957 /* 958 * The function can be called while the PE# 959 * hasn't been assigned. Do nothing for the 960 * case. 961 */ 962 if (!pdn || pdn->pe_number == IODA_INVALID_PE) 963 return; 964 965 pe = &phb->ioda.pe_array[pdn->pe_number]; 966 WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops); 967 set_iommu_table_base_and_group(&pdev->dev, &pe->tce32_table); 968 } 969 970 static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb, 971 struct pci_dev *pdev, u64 dma_mask) 972 { 973 struct pci_dn *pdn = pci_get_pdn(pdev); 974 struct pnv_ioda_pe *pe; 975 uint64_t top; 976 bool bypass = false; 977 978 if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) 979 return -ENODEV;; 980 981 pe = &phb->ioda.pe_array[pdn->pe_number]; 982 if (pe->tce_bypass_enabled) { 983 top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1; 984 bypass = (dma_mask >= top); 985 } 986 987 if (bypass) { 988 dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n"); 989 set_dma_ops(&pdev->dev, &dma_direct_ops); 990 set_dma_offset(&pdev->dev, pe->tce_bypass_base); 991 } else { 992 dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n"); 993 set_dma_ops(&pdev->dev, &dma_iommu_ops); 994 set_iommu_table_base(&pdev->dev, &pe->tce32_table); 995 } 996 *pdev->dev.dma_mask = dma_mask; 997 return 0; 998 } 999 1000 static u64 pnv_pci_ioda_dma_get_required_mask(struct pnv_phb *phb, 1001 struct pci_dev *pdev) 1002 { 1003 struct pci_dn *pdn = pci_get_pdn(pdev); 1004 struct pnv_ioda_pe *pe; 1005 u64 end, mask; 1006 1007 if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) 1008 return 0; 1009 1010 pe = &phb->ioda.pe_array[pdn->pe_number]; 1011 if (!pe->tce_bypass_enabled) 1012 return __dma_get_required_mask(&pdev->dev); 1013 1014 1015 end = pe->tce_bypass_base + memblock_end_of_DRAM(); 1016 mask = 1ULL << (fls64(end) - 1); 1017 mask += mask - 1; 1018 1019 return mask; 1020 } 1021 1022 static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, 1023 struct pci_bus *bus, 1024 bool add_to_iommu_group) 1025 { 1026 struct pci_dev *dev; 1027 1028 list_for_each_entry(dev, &bus->devices, bus_list) { 1029 if (add_to_iommu_group) 1030 set_iommu_table_base_and_group(&dev->dev, 1031 &pe->tce32_table); 1032 else 1033 set_iommu_table_base(&dev->dev, &pe->tce32_table); 1034 1035 if (dev->subordinate) 1036 pnv_ioda_setup_bus_dma(pe, dev->subordinate, 1037 add_to_iommu_group); 1038 } 1039 } 1040 1041 static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe, 1042 struct iommu_table *tbl, 1043 __be64 *startp, __be64 *endp, bool rm) 1044 { 1045 __be64 __iomem *invalidate = rm ? 1046 (__be64 __iomem *)pe->tce_inval_reg_phys : 1047 (__be64 __iomem *)tbl->it_index; 1048 unsigned long start, end, inc; 1049 const unsigned shift = tbl->it_page_shift; 1050 1051 start = __pa(startp); 1052 end = __pa(endp); 1053 1054 /* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */ 1055 if (tbl->it_busno) { 1056 start <<= shift; 1057 end <<= shift; 1058 inc = 128ull << shift; 1059 start |= tbl->it_busno; 1060 end |= tbl->it_busno; 1061 } else if (tbl->it_type & TCE_PCI_SWINV_PAIR) { 1062 /* p7ioc-style invalidation, 2 TCEs per write */ 1063 start |= (1ull << 63); 1064 end |= (1ull << 63); 1065 inc = 16; 1066 } else { 1067 /* Default (older HW) */ 1068 inc = 128; 1069 } 1070 1071 end |= inc - 1; /* round up end to be different than start */ 1072 1073 mb(); /* Ensure above stores are visible */ 1074 while (start <= end) { 1075 if (rm) 1076 __raw_rm_writeq(cpu_to_be64(start), invalidate); 1077 else 1078 __raw_writeq(cpu_to_be64(start), invalidate); 1079 start += inc; 1080 } 1081 1082 /* 1083 * The iommu layer will do another mb() for us on build() 1084 * and we don't care on free() 1085 */ 1086 } 1087 1088 static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe, 1089 struct iommu_table *tbl, 1090 __be64 *startp, __be64 *endp, bool rm) 1091 { 1092 unsigned long start, end, inc; 1093 __be64 __iomem *invalidate = rm ? 1094 (__be64 __iomem *)pe->tce_inval_reg_phys : 1095 (__be64 __iomem *)tbl->it_index; 1096 const unsigned shift = tbl->it_page_shift; 1097 1098 /* We'll invalidate DMA address in PE scope */ 1099 start = 0x2ull << 60; 1100 start |= (pe->pe_number & 0xFF); 1101 end = start; 1102 1103 /* Figure out the start, end and step */ 1104 inc = tbl->it_offset + (((u64)startp - tbl->it_base) / sizeof(u64)); 1105 start |= (inc << shift); 1106 inc = tbl->it_offset + (((u64)endp - tbl->it_base) / sizeof(u64)); 1107 end |= (inc << shift); 1108 inc = (0x1ull << shift); 1109 mb(); 1110 1111 while (start <= end) { 1112 if (rm) 1113 __raw_rm_writeq(cpu_to_be64(start), invalidate); 1114 else 1115 __raw_writeq(cpu_to_be64(start), invalidate); 1116 start += inc; 1117 } 1118 } 1119 1120 void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl, 1121 __be64 *startp, __be64 *endp, bool rm) 1122 { 1123 struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe, 1124 tce32_table); 1125 struct pnv_phb *phb = pe->phb; 1126 1127 if (phb->type == PNV_PHB_IODA1) 1128 pnv_pci_ioda1_tce_invalidate(pe, tbl, startp, endp, rm); 1129 else 1130 pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp, rm); 1131 } 1132 1133 static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, 1134 struct pnv_ioda_pe *pe, unsigned int base, 1135 unsigned int segs) 1136 { 1137 1138 struct page *tce_mem = NULL; 1139 const __be64 *swinvp; 1140 struct iommu_table *tbl; 1141 unsigned int i; 1142 int64_t rc; 1143 void *addr; 1144 1145 /* 256M DMA window, 4K TCE pages, 8 bytes TCE */ 1146 #define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8) 1147 1148 /* XXX FIXME: Handle 64-bit only DMA devices */ 1149 /* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */ 1150 /* XXX FIXME: Allocate multi-level tables on PHB3 */ 1151 1152 /* We shouldn't already have a 32-bit DMA associated */ 1153 if (WARN_ON(pe->tce32_seg >= 0)) 1154 return; 1155 1156 /* Grab a 32-bit TCE table */ 1157 pe->tce32_seg = base; 1158 pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n", 1159 (base << 28), ((base + segs) << 28) - 1); 1160 1161 /* XXX Currently, we allocate one big contiguous table for the 1162 * TCEs. We only really need one chunk per 256M of TCE space 1163 * (ie per segment) but that's an optimization for later, it 1164 * requires some added smarts with our get/put_tce implementation 1165 */ 1166 tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL, 1167 get_order(TCE32_TABLE_SIZE * segs)); 1168 if (!tce_mem) { 1169 pe_err(pe, " Failed to allocate a 32-bit TCE memory\n"); 1170 goto fail; 1171 } 1172 addr = page_address(tce_mem); 1173 memset(addr, 0, TCE32_TABLE_SIZE * segs); 1174 1175 /* Configure HW */ 1176 for (i = 0; i < segs; i++) { 1177 rc = opal_pci_map_pe_dma_window(phb->opal_id, 1178 pe->pe_number, 1179 base + i, 1, 1180 __pa(addr) + TCE32_TABLE_SIZE * i, 1181 TCE32_TABLE_SIZE, 0x1000); 1182 if (rc) { 1183 pe_err(pe, " Failed to configure 32-bit TCE table," 1184 " err %ld\n", rc); 1185 goto fail; 1186 } 1187 } 1188 1189 /* Setup linux iommu table */ 1190 tbl = &pe->tce32_table; 1191 pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs, 1192 base << 28, IOMMU_PAGE_SHIFT_4K); 1193 1194 /* OPAL variant of P7IOC SW invalidated TCEs */ 1195 swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); 1196 if (swinvp) { 1197 /* We need a couple more fields -- an address and a data 1198 * to or. Since the bus is only printed out on table free 1199 * errors, and on the first pass the data will be a relative 1200 * bus number, print that out instead. 1201 */ 1202 pe->tce_inval_reg_phys = be64_to_cpup(swinvp); 1203 tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys, 1204 8); 1205 tbl->it_type |= (TCE_PCI_SWINV_CREATE | 1206 TCE_PCI_SWINV_FREE | 1207 TCE_PCI_SWINV_PAIR); 1208 } 1209 iommu_init_table(tbl, phb->hose->node); 1210 iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); 1211 1212 if (pe->pdev) 1213 set_iommu_table_base_and_group(&pe->pdev->dev, tbl); 1214 else 1215 pnv_ioda_setup_bus_dma(pe, pe->pbus, true); 1216 1217 return; 1218 fail: 1219 /* XXX Failure: Try to fallback to 64-bit only ? */ 1220 if (pe->tce32_seg >= 0) 1221 pe->tce32_seg = -1; 1222 if (tce_mem) 1223 __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs)); 1224 } 1225 1226 static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable) 1227 { 1228 struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe, 1229 tce32_table); 1230 uint16_t window_id = (pe->pe_number << 1 ) + 1; 1231 int64_t rc; 1232 1233 pe_info(pe, "%sabling 64-bit DMA bypass\n", enable ? "En" : "Dis"); 1234 if (enable) { 1235 phys_addr_t top = memblock_end_of_DRAM(); 1236 1237 top = roundup_pow_of_two(top); 1238 rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id, 1239 pe->pe_number, 1240 window_id, 1241 pe->tce_bypass_base, 1242 top); 1243 } else { 1244 rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id, 1245 pe->pe_number, 1246 window_id, 1247 pe->tce_bypass_base, 1248 0); 1249 1250 /* 1251 * EEH needs the mapping between IOMMU table and group 1252 * of those VFIO/KVM pass-through devices. We can postpone 1253 * resetting DMA ops until the DMA mask is configured in 1254 * host side. 1255 */ 1256 if (pe->pdev) 1257 set_iommu_table_base(&pe->pdev->dev, tbl); 1258 else 1259 pnv_ioda_setup_bus_dma(pe, pe->pbus, false); 1260 } 1261 if (rc) 1262 pe_err(pe, "OPAL error %lld configuring bypass window\n", rc); 1263 else 1264 pe->tce_bypass_enabled = enable; 1265 } 1266 1267 static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb, 1268 struct pnv_ioda_pe *pe) 1269 { 1270 /* TVE #1 is selected by PCI address bit 59 */ 1271 pe->tce_bypass_base = 1ull << 59; 1272 1273 /* Install set_bypass callback for VFIO */ 1274 pe->tce32_table.set_bypass = pnv_pci_ioda2_set_bypass; 1275 1276 /* Enable bypass by default */ 1277 pnv_pci_ioda2_set_bypass(&pe->tce32_table, true); 1278 } 1279 1280 static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, 1281 struct pnv_ioda_pe *pe) 1282 { 1283 struct page *tce_mem = NULL; 1284 void *addr; 1285 const __be64 *swinvp; 1286 struct iommu_table *tbl; 1287 unsigned int tce_table_size, end; 1288 int64_t rc; 1289 1290 /* We shouldn't already have a 32-bit DMA associated */ 1291 if (WARN_ON(pe->tce32_seg >= 0)) 1292 return; 1293 1294 /* The PE will reserve all possible 32-bits space */ 1295 pe->tce32_seg = 0; 1296 end = (1 << ilog2(phb->ioda.m32_pci_base)); 1297 tce_table_size = (end / 0x1000) * 8; 1298 pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n", 1299 end); 1300 1301 /* Allocate TCE table */ 1302 tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL, 1303 get_order(tce_table_size)); 1304 if (!tce_mem) { 1305 pe_err(pe, "Failed to allocate a 32-bit TCE memory\n"); 1306 goto fail; 1307 } 1308 addr = page_address(tce_mem); 1309 memset(addr, 0, tce_table_size); 1310 1311 /* 1312 * Map TCE table through TVT. The TVE index is the PE number 1313 * shifted by 1 bit for 32-bits DMA space. 1314 */ 1315 rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, 1316 pe->pe_number << 1, 1, __pa(addr), 1317 tce_table_size, 0x1000); 1318 if (rc) { 1319 pe_err(pe, "Failed to configure 32-bit TCE table," 1320 " err %ld\n", rc); 1321 goto fail; 1322 } 1323 1324 /* Setup linux iommu table */ 1325 tbl = &pe->tce32_table; 1326 pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0, 1327 IOMMU_PAGE_SHIFT_4K); 1328 1329 /* OPAL variant of PHB3 invalidated TCEs */ 1330 swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); 1331 if (swinvp) { 1332 /* We need a couple more fields -- an address and a data 1333 * to or. Since the bus is only printed out on table free 1334 * errors, and on the first pass the data will be a relative 1335 * bus number, print that out instead. 1336 */ 1337 pe->tce_inval_reg_phys = be64_to_cpup(swinvp); 1338 tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys, 1339 8); 1340 tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE); 1341 } 1342 iommu_init_table(tbl, phb->hose->node); 1343 iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); 1344 1345 if (pe->pdev) 1346 set_iommu_table_base_and_group(&pe->pdev->dev, tbl); 1347 else 1348 pnv_ioda_setup_bus_dma(pe, pe->pbus, true); 1349 1350 /* Also create a bypass window */ 1351 pnv_pci_ioda2_setup_bypass_pe(phb, pe); 1352 return; 1353 fail: 1354 if (pe->tce32_seg >= 0) 1355 pe->tce32_seg = -1; 1356 if (tce_mem) 1357 __free_pages(tce_mem, get_order(tce_table_size)); 1358 } 1359 1360 static void pnv_ioda_setup_dma(struct pnv_phb *phb) 1361 { 1362 struct pci_controller *hose = phb->hose; 1363 unsigned int residual, remaining, segs, tw, base; 1364 struct pnv_ioda_pe *pe; 1365 1366 /* If we have more PE# than segments available, hand out one 1367 * per PE until we run out and let the rest fail. If not, 1368 * then we assign at least one segment per PE, plus more based 1369 * on the amount of devices under that PE 1370 */ 1371 if (phb->ioda.dma_pe_count > phb->ioda.tce32_count) 1372 residual = 0; 1373 else 1374 residual = phb->ioda.tce32_count - 1375 phb->ioda.dma_pe_count; 1376 1377 pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n", 1378 hose->global_number, phb->ioda.tce32_count); 1379 pr_info("PCI: %d PE# for a total weight of %d\n", 1380 phb->ioda.dma_pe_count, phb->ioda.dma_weight); 1381 1382 /* Walk our PE list and configure their DMA segments, hand them 1383 * out one base segment plus any residual segments based on 1384 * weight 1385 */ 1386 remaining = phb->ioda.tce32_count; 1387 tw = phb->ioda.dma_weight; 1388 base = 0; 1389 list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) { 1390 if (!pe->dma_weight) 1391 continue; 1392 if (!remaining) { 1393 pe_warn(pe, "No DMA32 resources available\n"); 1394 continue; 1395 } 1396 segs = 1; 1397 if (residual) { 1398 segs += ((pe->dma_weight * residual) + (tw / 2)) / tw; 1399 if (segs > remaining) 1400 segs = remaining; 1401 } 1402 1403 /* 1404 * For IODA2 compliant PHB3, we needn't care about the weight. 1405 * The all available 32-bits DMA space will be assigned to 1406 * the specific PE. 1407 */ 1408 if (phb->type == PNV_PHB_IODA1) { 1409 pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n", 1410 pe->dma_weight, segs); 1411 pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs); 1412 } else { 1413 pe_info(pe, "Assign DMA32 space\n"); 1414 segs = 0; 1415 pnv_pci_ioda2_setup_dma_pe(phb, pe); 1416 } 1417 1418 remaining -= segs; 1419 base += segs; 1420 } 1421 } 1422 1423 #ifdef CONFIG_PCI_MSI 1424 static void pnv_ioda2_msi_eoi(struct irq_data *d) 1425 { 1426 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); 1427 struct irq_chip *chip = irq_data_get_irq_chip(d); 1428 struct pnv_phb *phb = container_of(chip, struct pnv_phb, 1429 ioda.irq_chip); 1430 int64_t rc; 1431 1432 rc = opal_pci_msi_eoi(phb->opal_id, hw_irq); 1433 WARN_ON_ONCE(rc); 1434 1435 icp_native_eoi(d); 1436 } 1437 1438 1439 static void set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq) 1440 { 1441 struct irq_data *idata; 1442 struct irq_chip *ichip; 1443 1444 if (phb->type != PNV_PHB_IODA2) 1445 return; 1446 1447 if (!phb->ioda.irq_chip_init) { 1448 /* 1449 * First time we setup an MSI IRQ, we need to setup the 1450 * corresponding IRQ chip to route correctly. 1451 */ 1452 idata = irq_get_irq_data(virq); 1453 ichip = irq_data_get_irq_chip(idata); 1454 phb->ioda.irq_chip_init = 1; 1455 phb->ioda.irq_chip = *ichip; 1456 phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi; 1457 } 1458 irq_set_chip(virq, &phb->ioda.irq_chip); 1459 } 1460 1461 #ifdef CONFIG_CXL_BASE 1462 1463 struct device_node *pnv_pci_to_phb_node(struct pci_dev *dev) 1464 { 1465 struct pci_controller *hose = pci_bus_to_host(dev->bus); 1466 1467 return hose->dn; 1468 } 1469 EXPORT_SYMBOL(pnv_pci_to_phb_node); 1470 1471 int pnv_phb_to_cxl(struct pci_dev *dev) 1472 { 1473 struct pci_controller *hose = pci_bus_to_host(dev->bus); 1474 struct pnv_phb *phb = hose->private_data; 1475 struct pnv_ioda_pe *pe; 1476 int rc; 1477 1478 pe = pnv_ioda_get_pe(dev); 1479 if (!pe) 1480 return -ENODEV; 1481 1482 pe_info(pe, "Switching PHB to CXL\n"); 1483 1484 rc = opal_pci_set_phb_cxl_mode(phb->opal_id, 1, pe->pe_number); 1485 if (rc) 1486 dev_err(&dev->dev, "opal_pci_set_phb_cxl_mode failed: %i\n", rc); 1487 1488 return rc; 1489 } 1490 EXPORT_SYMBOL(pnv_phb_to_cxl); 1491 1492 /* Find PHB for cxl dev and allocate MSI hwirqs? 1493 * Returns the absolute hardware IRQ number 1494 */ 1495 int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num) 1496 { 1497 struct pci_controller *hose = pci_bus_to_host(dev->bus); 1498 struct pnv_phb *phb = hose->private_data; 1499 int hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, num); 1500 1501 if (hwirq < 0) { 1502 dev_warn(&dev->dev, "Failed to find a free MSI\n"); 1503 return -ENOSPC; 1504 } 1505 1506 return phb->msi_base + hwirq; 1507 } 1508 EXPORT_SYMBOL(pnv_cxl_alloc_hwirqs); 1509 1510 void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num) 1511 { 1512 struct pci_controller *hose = pci_bus_to_host(dev->bus); 1513 struct pnv_phb *phb = hose->private_data; 1514 1515 msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, num); 1516 } 1517 EXPORT_SYMBOL(pnv_cxl_release_hwirqs); 1518 1519 void pnv_cxl_release_hwirq_ranges(struct cxl_irq_ranges *irqs, 1520 struct pci_dev *dev) 1521 { 1522 struct pci_controller *hose = pci_bus_to_host(dev->bus); 1523 struct pnv_phb *phb = hose->private_data; 1524 int i, hwirq; 1525 1526 for (i = 1; i < CXL_IRQ_RANGES; i++) { 1527 if (!irqs->range[i]) 1528 continue; 1529 pr_devel("cxl release irq range 0x%x: offset: 0x%lx limit: %ld\n", 1530 i, irqs->offset[i], 1531 irqs->range[i]); 1532 hwirq = irqs->offset[i] - phb->msi_base; 1533 msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, 1534 irqs->range[i]); 1535 } 1536 } 1537 EXPORT_SYMBOL(pnv_cxl_release_hwirq_ranges); 1538 1539 int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs, 1540 struct pci_dev *dev, int num) 1541 { 1542 struct pci_controller *hose = pci_bus_to_host(dev->bus); 1543 struct pnv_phb *phb = hose->private_data; 1544 int i, hwirq, try; 1545 1546 memset(irqs, 0, sizeof(struct cxl_irq_ranges)); 1547 1548 /* 0 is reserved for the multiplexed PSL DSI interrupt */ 1549 for (i = 1; i < CXL_IRQ_RANGES && num; i++) { 1550 try = num; 1551 while (try) { 1552 hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, try); 1553 if (hwirq >= 0) 1554 break; 1555 try /= 2; 1556 } 1557 if (!try) 1558 goto fail; 1559 1560 irqs->offset[i] = phb->msi_base + hwirq; 1561 irqs->range[i] = try; 1562 pr_devel("cxl alloc irq range 0x%x: offset: 0x%lx limit: %li\n", 1563 i, irqs->offset[i], irqs->range[i]); 1564 num -= try; 1565 } 1566 if (num) 1567 goto fail; 1568 1569 return 0; 1570 fail: 1571 pnv_cxl_release_hwirq_ranges(irqs, dev); 1572 return -ENOSPC; 1573 } 1574 EXPORT_SYMBOL(pnv_cxl_alloc_hwirq_ranges); 1575 1576 int pnv_cxl_get_irq_count(struct pci_dev *dev) 1577 { 1578 struct pci_controller *hose = pci_bus_to_host(dev->bus); 1579 struct pnv_phb *phb = hose->private_data; 1580 1581 return phb->msi_bmp.irq_count; 1582 } 1583 EXPORT_SYMBOL(pnv_cxl_get_irq_count); 1584 1585 int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, 1586 unsigned int virq) 1587 { 1588 struct pci_controller *hose = pci_bus_to_host(dev->bus); 1589 struct pnv_phb *phb = hose->private_data; 1590 unsigned int xive_num = hwirq - phb->msi_base; 1591 struct pnv_ioda_pe *pe; 1592 int rc; 1593 1594 if (!(pe = pnv_ioda_get_pe(dev))) 1595 return -ENODEV; 1596 1597 /* Assign XIVE to PE */ 1598 rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num); 1599 if (rc) { 1600 pe_warn(pe, "%s: OPAL error %d setting msi_base 0x%x " 1601 "hwirq 0x%x XIVE 0x%x PE\n", 1602 pci_name(dev), rc, phb->msi_base, hwirq, xive_num); 1603 return -EIO; 1604 } 1605 set_msi_irq_chip(phb, virq); 1606 1607 return 0; 1608 } 1609 EXPORT_SYMBOL(pnv_cxl_ioda_msi_setup); 1610 #endif 1611 1612 static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, 1613 unsigned int hwirq, unsigned int virq, 1614 unsigned int is_64, struct msi_msg *msg) 1615 { 1616 struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev); 1617 unsigned int xive_num = hwirq - phb->msi_base; 1618 __be32 data; 1619 int rc; 1620 1621 /* No PE assigned ? bail out ... no MSI for you ! */ 1622 if (pe == NULL) 1623 return -ENXIO; 1624 1625 /* Check if we have an MVE */ 1626 if (pe->mve_number < 0) 1627 return -ENXIO; 1628 1629 /* Force 32-bit MSI on some broken devices */ 1630 if (dev->no_64bit_msi) 1631 is_64 = 0; 1632 1633 /* Assign XIVE to PE */ 1634 rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num); 1635 if (rc) { 1636 pr_warn("%s: OPAL error %d setting XIVE %d PE\n", 1637 pci_name(dev), rc, xive_num); 1638 return -EIO; 1639 } 1640 1641 if (is_64) { 1642 __be64 addr64; 1643 1644 rc = opal_get_msi_64(phb->opal_id, pe->mve_number, xive_num, 1, 1645 &addr64, &data); 1646 if (rc) { 1647 pr_warn("%s: OPAL error %d getting 64-bit MSI data\n", 1648 pci_name(dev), rc); 1649 return -EIO; 1650 } 1651 msg->address_hi = be64_to_cpu(addr64) >> 32; 1652 msg->address_lo = be64_to_cpu(addr64) & 0xfffffffful; 1653 } else { 1654 __be32 addr32; 1655 1656 rc = opal_get_msi_32(phb->opal_id, pe->mve_number, xive_num, 1, 1657 &addr32, &data); 1658 if (rc) { 1659 pr_warn("%s: OPAL error %d getting 32-bit MSI data\n", 1660 pci_name(dev), rc); 1661 return -EIO; 1662 } 1663 msg->address_hi = 0; 1664 msg->address_lo = be32_to_cpu(addr32); 1665 } 1666 msg->data = be32_to_cpu(data); 1667 1668 set_msi_irq_chip(phb, virq); 1669 1670 pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d)," 1671 " address=%x_%08x data=%x PE# %d\n", 1672 pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num, 1673 msg->address_hi, msg->address_lo, data, pe->pe_number); 1674 1675 return 0; 1676 } 1677 1678 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) 1679 { 1680 unsigned int count; 1681 const __be32 *prop = of_get_property(phb->hose->dn, 1682 "ibm,opal-msi-ranges", NULL); 1683 if (!prop) { 1684 /* BML Fallback */ 1685 prop = of_get_property(phb->hose->dn, "msi-ranges", NULL); 1686 } 1687 if (!prop) 1688 return; 1689 1690 phb->msi_base = be32_to_cpup(prop); 1691 count = be32_to_cpup(prop + 1); 1692 if (msi_bitmap_alloc(&phb->msi_bmp, count, phb->hose->dn)) { 1693 pr_err("PCI %d: Failed to allocate MSI bitmap !\n", 1694 phb->hose->global_number); 1695 return; 1696 } 1697 1698 phb->msi_setup = pnv_pci_ioda_msi_setup; 1699 phb->msi32_support = 1; 1700 pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n", 1701 count, phb->msi_base); 1702 } 1703 #else 1704 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { } 1705 #endif /* CONFIG_PCI_MSI */ 1706 1707 /* 1708 * This function is supposed to be called on basis of PE from top 1709 * to bottom style. So the the I/O or MMIO segment assigned to 1710 * parent PE could be overrided by its child PEs if necessary. 1711 */ 1712 static void pnv_ioda_setup_pe_seg(struct pci_controller *hose, 1713 struct pnv_ioda_pe *pe) 1714 { 1715 struct pnv_phb *phb = hose->private_data; 1716 struct pci_bus_region region; 1717 struct resource *res; 1718 int i, index; 1719 int rc; 1720 1721 /* 1722 * NOTE: We only care PCI bus based PE for now. For PCI 1723 * device based PE, for example SRIOV sensitive VF should 1724 * be figured out later. 1725 */ 1726 BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))); 1727 1728 pci_bus_for_each_resource(pe->pbus, res, i) { 1729 if (!res || !res->flags || 1730 res->start > res->end) 1731 continue; 1732 1733 if (res->flags & IORESOURCE_IO) { 1734 region.start = res->start - phb->ioda.io_pci_base; 1735 region.end = res->end - phb->ioda.io_pci_base; 1736 index = region.start / phb->ioda.io_segsize; 1737 1738 while (index < phb->ioda.total_pe && 1739 region.start <= region.end) { 1740 phb->ioda.io_segmap[index] = pe->pe_number; 1741 rc = opal_pci_map_pe_mmio_window(phb->opal_id, 1742 pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index); 1743 if (rc != OPAL_SUCCESS) { 1744 pr_err("%s: OPAL error %d when mapping IO " 1745 "segment #%d to PE#%d\n", 1746 __func__, rc, index, pe->pe_number); 1747 break; 1748 } 1749 1750 region.start += phb->ioda.io_segsize; 1751 index++; 1752 } 1753 } else if (res->flags & IORESOURCE_MEM) { 1754 region.start = res->start - 1755 hose->mem_offset[0] - 1756 phb->ioda.m32_pci_base; 1757 region.end = res->end - 1758 hose->mem_offset[0] - 1759 phb->ioda.m32_pci_base; 1760 index = region.start / phb->ioda.m32_segsize; 1761 1762 while (index < phb->ioda.total_pe && 1763 region.start <= region.end) { 1764 phb->ioda.m32_segmap[index] = pe->pe_number; 1765 rc = opal_pci_map_pe_mmio_window(phb->opal_id, 1766 pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index); 1767 if (rc != OPAL_SUCCESS) { 1768 pr_err("%s: OPAL error %d when mapping M32 " 1769 "segment#%d to PE#%d", 1770 __func__, rc, index, pe->pe_number); 1771 break; 1772 } 1773 1774 region.start += phb->ioda.m32_segsize; 1775 index++; 1776 } 1777 } 1778 } 1779 } 1780 1781 static void pnv_pci_ioda_setup_seg(void) 1782 { 1783 struct pci_controller *tmp, *hose; 1784 struct pnv_phb *phb; 1785 struct pnv_ioda_pe *pe; 1786 1787 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1788 phb = hose->private_data; 1789 list_for_each_entry(pe, &phb->ioda.pe_list, list) { 1790 pnv_ioda_setup_pe_seg(hose, pe); 1791 } 1792 } 1793 } 1794 1795 static void pnv_pci_ioda_setup_DMA(void) 1796 { 1797 struct pci_controller *hose, *tmp; 1798 struct pnv_phb *phb; 1799 1800 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1801 pnv_ioda_setup_dma(hose->private_data); 1802 1803 /* Mark the PHB initialization done */ 1804 phb = hose->private_data; 1805 phb->initialized = 1; 1806 } 1807 } 1808 1809 static void pnv_pci_ioda_create_dbgfs(void) 1810 { 1811 #ifdef CONFIG_DEBUG_FS 1812 struct pci_controller *hose, *tmp; 1813 struct pnv_phb *phb; 1814 char name[16]; 1815 1816 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1817 phb = hose->private_data; 1818 1819 sprintf(name, "PCI%04x", hose->global_number); 1820 phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root); 1821 if (!phb->dbgfs) 1822 pr_warning("%s: Error on creating debugfs on PHB#%x\n", 1823 __func__, hose->global_number); 1824 } 1825 #endif /* CONFIG_DEBUG_FS */ 1826 } 1827 1828 static void pnv_pci_ioda_fixup(void) 1829 { 1830 pnv_pci_ioda_setup_PEs(); 1831 pnv_pci_ioda_setup_seg(); 1832 pnv_pci_ioda_setup_DMA(); 1833 1834 pnv_pci_ioda_create_dbgfs(); 1835 1836 #ifdef CONFIG_EEH 1837 eeh_init(); 1838 eeh_addr_cache_build(); 1839 #endif 1840 } 1841 1842 /* 1843 * Returns the alignment for I/O or memory windows for P2P 1844 * bridges. That actually depends on how PEs are segmented. 1845 * For now, we return I/O or M32 segment size for PE sensitive 1846 * P2P bridges. Otherwise, the default values (4KiB for I/O, 1847 * 1MiB for memory) will be returned. 1848 * 1849 * The current PCI bus might be put into one PE, which was 1850 * create against the parent PCI bridge. For that case, we 1851 * needn't enlarge the alignment so that we can save some 1852 * resources. 1853 */ 1854 static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, 1855 unsigned long type) 1856 { 1857 struct pci_dev *bridge; 1858 struct pci_controller *hose = pci_bus_to_host(bus); 1859 struct pnv_phb *phb = hose->private_data; 1860 int num_pci_bridges = 0; 1861 1862 bridge = bus->self; 1863 while (bridge) { 1864 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) { 1865 num_pci_bridges++; 1866 if (num_pci_bridges >= 2) 1867 return 1; 1868 } 1869 1870 bridge = bridge->bus->self; 1871 } 1872 1873 /* We fail back to M32 if M64 isn't supported */ 1874 if (phb->ioda.m64_segsize && 1875 pnv_pci_is_mem_pref_64(type)) 1876 return phb->ioda.m64_segsize; 1877 if (type & IORESOURCE_MEM) 1878 return phb->ioda.m32_segsize; 1879 1880 return phb->ioda.io_segsize; 1881 } 1882 1883 /* Prevent enabling devices for which we couldn't properly 1884 * assign a PE 1885 */ 1886 static int pnv_pci_enable_device_hook(struct pci_dev *dev) 1887 { 1888 struct pci_controller *hose = pci_bus_to_host(dev->bus); 1889 struct pnv_phb *phb = hose->private_data; 1890 struct pci_dn *pdn; 1891 1892 /* The function is probably called while the PEs have 1893 * not be created yet. For example, resource reassignment 1894 * during PCI probe period. We just skip the check if 1895 * PEs isn't ready. 1896 */ 1897 if (!phb->initialized) 1898 return 0; 1899 1900 pdn = pci_get_pdn(dev); 1901 if (!pdn || pdn->pe_number == IODA_INVALID_PE) 1902 return -EINVAL; 1903 1904 return 0; 1905 } 1906 1907 static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus, 1908 u32 devfn) 1909 { 1910 return phb->ioda.pe_rmap[(bus->number << 8) | devfn]; 1911 } 1912 1913 static void pnv_pci_ioda_shutdown(struct pnv_phb *phb) 1914 { 1915 opal_pci_reset(phb->opal_id, OPAL_RESET_PCI_IODA_TABLE, 1916 OPAL_ASSERT_RESET); 1917 } 1918 1919 static void __init pnv_pci_init_ioda_phb(struct device_node *np, 1920 u64 hub_id, int ioda_type) 1921 { 1922 struct pci_controller *hose; 1923 struct pnv_phb *phb; 1924 unsigned long size, m32map_off, pemap_off, iomap_off = 0; 1925 const __be64 *prop64; 1926 const __be32 *prop32; 1927 int len; 1928 u64 phb_id; 1929 void *aux; 1930 long rc; 1931 1932 pr_info("Initializing IODA%d OPAL PHB %s\n", ioda_type, np->full_name); 1933 1934 prop64 = of_get_property(np, "ibm,opal-phbid", NULL); 1935 if (!prop64) { 1936 pr_err(" Missing \"ibm,opal-phbid\" property !\n"); 1937 return; 1938 } 1939 phb_id = be64_to_cpup(prop64); 1940 pr_debug(" PHB-ID : 0x%016llx\n", phb_id); 1941 1942 phb = memblock_virt_alloc(sizeof(struct pnv_phb), 0); 1943 1944 /* Allocate PCI controller */ 1945 phb->hose = hose = pcibios_alloc_controller(np); 1946 if (!phb->hose) { 1947 pr_err(" Can't allocate PCI controller for %s\n", 1948 np->full_name); 1949 memblock_free(__pa(phb), sizeof(struct pnv_phb)); 1950 return; 1951 } 1952 1953 spin_lock_init(&phb->lock); 1954 prop32 = of_get_property(np, "bus-range", &len); 1955 if (prop32 && len == 8) { 1956 hose->first_busno = be32_to_cpu(prop32[0]); 1957 hose->last_busno = be32_to_cpu(prop32[1]); 1958 } else { 1959 pr_warn(" Broken <bus-range> on %s\n", np->full_name); 1960 hose->first_busno = 0; 1961 hose->last_busno = 0xff; 1962 } 1963 hose->private_data = phb; 1964 phb->hub_id = hub_id; 1965 phb->opal_id = phb_id; 1966 phb->type = ioda_type; 1967 1968 /* Detect specific models for error handling */ 1969 if (of_device_is_compatible(np, "ibm,p7ioc-pciex")) 1970 phb->model = PNV_PHB_MODEL_P7IOC; 1971 else if (of_device_is_compatible(np, "ibm,power8-pciex")) 1972 phb->model = PNV_PHB_MODEL_PHB3; 1973 else 1974 phb->model = PNV_PHB_MODEL_UNKNOWN; 1975 1976 /* Parse 32-bit and IO ranges (if any) */ 1977 pci_process_bridge_OF_ranges(hose, np, !hose->global_number); 1978 1979 /* Get registers */ 1980 phb->regs = of_iomap(np, 0); 1981 if (phb->regs == NULL) 1982 pr_err(" Failed to map registers !\n"); 1983 1984 /* Initialize more IODA stuff */ 1985 phb->ioda.total_pe = 1; 1986 prop32 = of_get_property(np, "ibm,opal-num-pes", NULL); 1987 if (prop32) 1988 phb->ioda.total_pe = be32_to_cpup(prop32); 1989 prop32 = of_get_property(np, "ibm,opal-reserved-pe", NULL); 1990 if (prop32) 1991 phb->ioda.reserved_pe = be32_to_cpup(prop32); 1992 1993 /* Parse 64-bit MMIO range */ 1994 pnv_ioda_parse_m64_window(phb); 1995 1996 phb->ioda.m32_size = resource_size(&hose->mem_resources[0]); 1997 /* FW Has already off top 64k of M32 space (MSI space) */ 1998 phb->ioda.m32_size += 0x10000; 1999 2000 phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe; 2001 phb->ioda.m32_pci_base = hose->mem_resources[0].start - hose->mem_offset[0]; 2002 phb->ioda.io_size = hose->pci_io_size; 2003 phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe; 2004 phb->ioda.io_pci_base = 0; /* XXX calculate this ? */ 2005 2006 /* Allocate aux data & arrays. We don't have IO ports on PHB3 */ 2007 size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long)); 2008 m32map_off = size; 2009 size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]); 2010 if (phb->type == PNV_PHB_IODA1) { 2011 iomap_off = size; 2012 size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]); 2013 } 2014 pemap_off = size; 2015 size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe); 2016 aux = memblock_virt_alloc(size, 0); 2017 phb->ioda.pe_alloc = aux; 2018 phb->ioda.m32_segmap = aux + m32map_off; 2019 if (phb->type == PNV_PHB_IODA1) 2020 phb->ioda.io_segmap = aux + iomap_off; 2021 phb->ioda.pe_array = aux + pemap_off; 2022 set_bit(phb->ioda.reserved_pe, phb->ioda.pe_alloc); 2023 2024 INIT_LIST_HEAD(&phb->ioda.pe_dma_list); 2025 INIT_LIST_HEAD(&phb->ioda.pe_list); 2026 2027 /* Calculate how many 32-bit TCE segments we have */ 2028 phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28; 2029 2030 #if 0 /* We should really do that ... */ 2031 rc = opal_pci_set_phb_mem_window(opal->phb_id, 2032 window_type, 2033 window_num, 2034 starting_real_address, 2035 starting_pci_address, 2036 segment_size); 2037 #endif 2038 2039 pr_info(" %03d (%03d) PE's M32: 0x%x [segment=0x%x]\n", 2040 phb->ioda.total_pe, phb->ioda.reserved_pe, 2041 phb->ioda.m32_size, phb->ioda.m32_segsize); 2042 if (phb->ioda.m64_size) 2043 pr_info(" M64: 0x%lx [segment=0x%lx]\n", 2044 phb->ioda.m64_size, phb->ioda.m64_segsize); 2045 if (phb->ioda.io_size) 2046 pr_info(" IO: 0x%x [segment=0x%x]\n", 2047 phb->ioda.io_size, phb->ioda.io_segsize); 2048 2049 2050 phb->hose->ops = &pnv_pci_ops; 2051 phb->get_pe_state = pnv_ioda_get_pe_state; 2052 phb->freeze_pe = pnv_ioda_freeze_pe; 2053 phb->unfreeze_pe = pnv_ioda_unfreeze_pe; 2054 #ifdef CONFIG_EEH 2055 phb->eeh_ops = &ioda_eeh_ops; 2056 #endif 2057 2058 /* Setup RID -> PE mapping function */ 2059 phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe; 2060 2061 /* Setup TCEs */ 2062 phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; 2063 phb->dma_set_mask = pnv_pci_ioda_dma_set_mask; 2064 phb->dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask; 2065 2066 /* Setup shutdown function for kexec */ 2067 phb->shutdown = pnv_pci_ioda_shutdown; 2068 2069 /* Setup MSI support */ 2070 pnv_pci_init_ioda_msis(phb); 2071 2072 /* 2073 * We pass the PCI probe flag PCI_REASSIGN_ALL_RSRC here 2074 * to let the PCI core do resource assignment. It's supposed 2075 * that the PCI core will do correct I/O and MMIO alignment 2076 * for the P2P bridge bars so that each PCI bus (excluding 2077 * the child P2P bridges) can form individual PE. 2078 */ 2079 ppc_md.pcibios_fixup = pnv_pci_ioda_fixup; 2080 ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook; 2081 ppc_md.pcibios_window_alignment = pnv_pci_window_alignment; 2082 ppc_md.pcibios_reset_secondary_bus = pnv_pci_reset_secondary_bus; 2083 pci_add_flags(PCI_REASSIGN_ALL_RSRC); 2084 2085 /* Reset IODA tables to a clean state */ 2086 rc = opal_pci_reset(phb_id, OPAL_RESET_PCI_IODA_TABLE, OPAL_ASSERT_RESET); 2087 if (rc) 2088 pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc); 2089 2090 /* If we're running in kdump kerenl, the previous kerenl never 2091 * shutdown PCI devices correctly. We already got IODA table 2092 * cleaned out. So we have to issue PHB reset to stop all PCI 2093 * transactions from previous kerenl. 2094 */ 2095 if (is_kdump_kernel()) { 2096 pr_info(" Issue PHB reset ...\n"); 2097 ioda_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL); 2098 ioda_eeh_phb_reset(hose, EEH_RESET_DEACTIVATE); 2099 } 2100 2101 /* Remove M64 resource if we can't configure it successfully */ 2102 if (!phb->init_m64 || phb->init_m64(phb)) 2103 hose->mem_resources[1].flags = 0; 2104 } 2105 2106 void __init pnv_pci_init_ioda2_phb(struct device_node *np) 2107 { 2108 pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2); 2109 } 2110 2111 void __init pnv_pci_init_ioda_hub(struct device_node *np) 2112 { 2113 struct device_node *phbn; 2114 const __be64 *prop64; 2115 u64 hub_id; 2116 2117 pr_info("Probing IODA IO-Hub %s\n", np->full_name); 2118 2119 prop64 = of_get_property(np, "ibm,opal-hubid", NULL); 2120 if (!prop64) { 2121 pr_err(" Missing \"ibm,opal-hubid\" property !\n"); 2122 return; 2123 } 2124 hub_id = be64_to_cpup(prop64); 2125 pr_devel(" HUB-ID : 0x%016llx\n", hub_id); 2126 2127 /* Count child PHBs */ 2128 for_each_child_of_node(np, phbn) { 2129 /* Look for IODA1 PHBs */ 2130 if (of_device_is_compatible(phbn, "ibm,ioda-phb")) 2131 pnv_pci_init_ioda_phb(phbn, hub_id, PNV_PHB_IODA1); 2132 } 2133 } 2134