1 /* 2 * Support PCI/PCIe on PowerNV platforms 3 * 4 * Copyright 2011 Benjamin Herrenschmidt, IBM Corp. 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12 #undef DEBUG 13 14 #include <linux/kernel.h> 15 #include <linux/pci.h> 16 #include <linux/delay.h> 17 #include <linux/string.h> 18 #include <linux/init.h> 19 #include <linux/bootmem.h> 20 #include <linux/irq.h> 21 #include <linux/io.h> 22 #include <linux/msi.h> 23 24 #include <asm/sections.h> 25 #include <asm/io.h> 26 #include <asm/prom.h> 27 #include <asm/pci-bridge.h> 28 #include <asm/machdep.h> 29 #include <asm/ppc-pci.h> 30 #include <asm/opal.h> 31 #include <asm/iommu.h> 32 #include <asm/tce.h> 33 34 #include "powernv.h" 35 #include "pci.h" 36 37 #define define_pe_printk_level(func, kern_level) \ 38 static int func(const struct pnv_ioda_pe *pe, const char *fmt, ...) \ 39 { \ 40 struct va_format vaf; \ 41 va_list args; \ 42 char pfix[32]; \ 43 int r; \ 44 \ 45 va_start(args, fmt); \ 46 \ 47 vaf.fmt = fmt; \ 48 vaf.va = &args; \ 49 \ 50 if (pe->pdev) \ 51 strlcpy(pfix, dev_name(&pe->pdev->dev), \ 52 sizeof(pfix)); \ 53 else \ 54 sprintf(pfix, "%04x:%02x ", \ 55 pci_domain_nr(pe->pbus), \ 56 pe->pbus->number); \ 57 r = printk(kern_level "pci %s: [PE# %.3d] %pV", \ 58 pfix, pe->pe_number, &vaf); \ 59 \ 60 va_end(args); \ 61 \ 62 return r; \ 63 } \ 64 65 define_pe_printk_level(pe_err, KERN_ERR); 66 define_pe_printk_level(pe_warn, KERN_WARNING); 67 define_pe_printk_level(pe_info, KERN_INFO); 68 69 static struct pci_dn *pnv_ioda_get_pdn(struct pci_dev *dev) 70 { 71 struct device_node *np; 72 73 np = pci_device_to_OF_node(dev); 74 if (!np) 75 return NULL; 76 return PCI_DN(np); 77 } 78 79 static int __devinit pnv_ioda_alloc_pe(struct pnv_phb *phb) 80 { 81 unsigned long pe; 82 83 do { 84 pe = find_next_zero_bit(phb->ioda.pe_alloc, 85 phb->ioda.total_pe, 0); 86 if (pe >= phb->ioda.total_pe) 87 return IODA_INVALID_PE; 88 } while(test_and_set_bit(pe, phb->ioda.pe_alloc)); 89 90 phb->ioda.pe_array[pe].pe_number = pe; 91 return pe; 92 } 93 94 static void __devinit pnv_ioda_free_pe(struct pnv_phb *phb, int pe) 95 { 96 WARN_ON(phb->ioda.pe_array[pe].pdev); 97 98 memset(&phb->ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe)); 99 clear_bit(pe, phb->ioda.pe_alloc); 100 } 101 102 /* Currently those 2 are only used when MSIs are enabled, this will change 103 * but in the meantime, we need to protect them to avoid warnings 104 */ 105 #ifdef CONFIG_PCI_MSI 106 static struct pnv_ioda_pe * __devinit pnv_ioda_get_pe(struct pci_dev *dev) 107 { 108 struct pci_controller *hose = pci_bus_to_host(dev->bus); 109 struct pnv_phb *phb = hose->private_data; 110 struct pci_dn *pdn = pnv_ioda_get_pdn(dev); 111 112 if (!pdn) 113 return NULL; 114 if (pdn->pe_number == IODA_INVALID_PE) 115 return NULL; 116 return &phb->ioda.pe_array[pdn->pe_number]; 117 } 118 #endif /* CONFIG_PCI_MSI */ 119 120 static int __devinit pnv_ioda_configure_pe(struct pnv_phb *phb, 121 struct pnv_ioda_pe *pe) 122 { 123 struct pci_dev *parent; 124 uint8_t bcomp, dcomp, fcomp; 125 long rc, rid_end, rid; 126 127 /* Bus validation ? */ 128 if (pe->pbus) { 129 int count; 130 131 dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER; 132 fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER; 133 parent = pe->pbus->self; 134 if (pe->flags & PNV_IODA_PE_BUS_ALL) 135 count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1; 136 else 137 count = 1; 138 139 switch(count) { 140 case 1: bcomp = OpalPciBusAll; break; 141 case 2: bcomp = OpalPciBus7Bits; break; 142 case 4: bcomp = OpalPciBus6Bits; break; 143 case 8: bcomp = OpalPciBus5Bits; break; 144 case 16: bcomp = OpalPciBus4Bits; break; 145 case 32: bcomp = OpalPciBus3Bits; break; 146 default: 147 pr_err("%s: Number of subordinate busses %d" 148 " unsupported\n", 149 pci_name(pe->pbus->self), count); 150 /* Do an exact match only */ 151 bcomp = OpalPciBusAll; 152 } 153 rid_end = pe->rid + (count << 8); 154 } else { 155 parent = pe->pdev->bus->self; 156 bcomp = OpalPciBusAll; 157 dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER; 158 fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER; 159 rid_end = pe->rid + 1; 160 } 161 162 /* Associate PE in PELT */ 163 rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid, 164 bcomp, dcomp, fcomp, OPAL_MAP_PE); 165 if (rc) { 166 pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc); 167 return -ENXIO; 168 } 169 opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number, 170 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); 171 172 /* Add to all parents PELT-V */ 173 while (parent) { 174 struct pci_dn *pdn = pnv_ioda_get_pdn(parent); 175 if (pdn && pdn->pe_number != IODA_INVALID_PE) { 176 rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number, 177 pe->pe_number, OPAL_ADD_PE_TO_DOMAIN); 178 /* XXX What to do in case of error ? */ 179 } 180 parent = parent->bus->self; 181 } 182 /* Setup reverse map */ 183 for (rid = pe->rid; rid < rid_end; rid++) 184 phb->ioda.pe_rmap[rid] = pe->pe_number; 185 186 /* Setup one MVTs on IODA1 */ 187 if (phb->type == PNV_PHB_IODA1) { 188 pe->mve_number = pe->pe_number; 189 rc = opal_pci_set_mve(phb->opal_id, pe->mve_number, 190 pe->pe_number); 191 if (rc) { 192 pe_err(pe, "OPAL error %ld setting up MVE %d\n", 193 rc, pe->mve_number); 194 pe->mve_number = -1; 195 } else { 196 rc = opal_pci_set_mve_enable(phb->opal_id, 197 pe->mve_number, OPAL_ENABLE_MVE); 198 if (rc) { 199 pe_err(pe, "OPAL error %ld enabling MVE %d\n", 200 rc, pe->mve_number); 201 pe->mve_number = -1; 202 } 203 } 204 } else if (phb->type == PNV_PHB_IODA2) 205 pe->mve_number = 0; 206 207 return 0; 208 } 209 210 static void __devinit pnv_ioda_link_pe_by_weight(struct pnv_phb *phb, 211 struct pnv_ioda_pe *pe) 212 { 213 struct pnv_ioda_pe *lpe; 214 215 list_for_each_entry(lpe, &phb->ioda.pe_dma_list, dma_link) { 216 if (lpe->dma_weight < pe->dma_weight) { 217 list_add_tail(&pe->dma_link, &lpe->dma_link); 218 return; 219 } 220 } 221 list_add_tail(&pe->dma_link, &phb->ioda.pe_dma_list); 222 } 223 224 static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev) 225 { 226 /* This is quite simplistic. The "base" weight of a device 227 * is 10. 0 means no DMA is to be accounted for it. 228 */ 229 230 /* If it's a bridge, no DMA */ 231 if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) 232 return 0; 233 234 /* Reduce the weight of slow USB controllers */ 235 if (dev->class == PCI_CLASS_SERIAL_USB_UHCI || 236 dev->class == PCI_CLASS_SERIAL_USB_OHCI || 237 dev->class == PCI_CLASS_SERIAL_USB_EHCI) 238 return 3; 239 240 /* Increase the weight of RAID (includes Obsidian) */ 241 if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID) 242 return 15; 243 244 /* Default */ 245 return 10; 246 } 247 248 #if 0 249 static struct pnv_ioda_pe * __devinit pnv_ioda_setup_dev_PE(struct pci_dev *dev) 250 { 251 struct pci_controller *hose = pci_bus_to_host(dev->bus); 252 struct pnv_phb *phb = hose->private_data; 253 struct pci_dn *pdn = pnv_ioda_get_pdn(dev); 254 struct pnv_ioda_pe *pe; 255 int pe_num; 256 257 if (!pdn) { 258 pr_err("%s: Device tree node not associated properly\n", 259 pci_name(dev)); 260 return NULL; 261 } 262 if (pdn->pe_number != IODA_INVALID_PE) 263 return NULL; 264 265 /* PE#0 has been pre-set */ 266 if (dev->bus->number == 0) 267 pe_num = 0; 268 else 269 pe_num = pnv_ioda_alloc_pe(phb); 270 if (pe_num == IODA_INVALID_PE) { 271 pr_warning("%s: Not enough PE# available, disabling device\n", 272 pci_name(dev)); 273 return NULL; 274 } 275 276 /* NOTE: We get only one ref to the pci_dev for the pdn, not for the 277 * pointer in the PE data structure, both should be destroyed at the 278 * same time. However, this needs to be looked at more closely again 279 * once we actually start removing things (Hotplug, SR-IOV, ...) 280 * 281 * At some point we want to remove the PDN completely anyways 282 */ 283 pe = &phb->ioda.pe_array[pe_num]; 284 pci_dev_get(dev); 285 pdn->pcidev = dev; 286 pdn->pe_number = pe_num; 287 pe->pdev = dev; 288 pe->pbus = NULL; 289 pe->tce32_seg = -1; 290 pe->mve_number = -1; 291 pe->rid = dev->bus->number << 8 | pdn->devfn; 292 293 pe_info(pe, "Associated device to PE\n"); 294 295 if (pnv_ioda_configure_pe(phb, pe)) { 296 /* XXX What do we do here ? */ 297 if (pe_num) 298 pnv_ioda_free_pe(phb, pe_num); 299 pdn->pe_number = IODA_INVALID_PE; 300 pe->pdev = NULL; 301 pci_dev_put(dev); 302 return NULL; 303 } 304 305 /* Assign a DMA weight to the device */ 306 pe->dma_weight = pnv_ioda_dma_weight(dev); 307 if (pe->dma_weight != 0) { 308 phb->ioda.dma_weight += pe->dma_weight; 309 phb->ioda.dma_pe_count++; 310 } 311 312 /* Link the PE */ 313 pnv_ioda_link_pe_by_weight(phb, pe); 314 315 return pe; 316 } 317 #endif /* Useful for SRIOV case */ 318 319 static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) 320 { 321 struct pci_dev *dev; 322 323 list_for_each_entry(dev, &bus->devices, bus_list) { 324 struct pci_dn *pdn = pnv_ioda_get_pdn(dev); 325 326 if (pdn == NULL) { 327 pr_warn("%s: No device node associated with device !\n", 328 pci_name(dev)); 329 continue; 330 } 331 pci_dev_get(dev); 332 pdn->pcidev = dev; 333 pdn->pe_number = pe->pe_number; 334 pe->dma_weight += pnv_ioda_dma_weight(dev); 335 if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) 336 pnv_ioda_setup_same_PE(dev->subordinate, pe); 337 } 338 } 339 340 /* 341 * There're 2 types of PCI bus sensitive PEs: One that is compromised of 342 * single PCI bus. Another one that contains the primary PCI bus and its 343 * subordinate PCI devices and buses. The second type of PE is normally 344 * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports. 345 */ 346 static void __devinit pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) 347 { 348 struct pci_controller *hose = pci_bus_to_host(bus); 349 struct pnv_phb *phb = hose->private_data; 350 struct pnv_ioda_pe *pe; 351 int pe_num; 352 353 pe_num = pnv_ioda_alloc_pe(phb); 354 if (pe_num == IODA_INVALID_PE) { 355 pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n", 356 __func__, pci_domain_nr(bus), bus->number); 357 return; 358 } 359 360 pe = &phb->ioda.pe_array[pe_num]; 361 pe->flags = (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS); 362 pe->pbus = bus; 363 pe->pdev = NULL; 364 pe->tce32_seg = -1; 365 pe->mve_number = -1; 366 pe->rid = bus->busn_res.start << 8; 367 pe->dma_weight = 0; 368 369 if (all) 370 pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n", 371 bus->busn_res.start, bus->busn_res.end, pe_num); 372 else 373 pe_info(pe, "Secondary bus %d associated with PE#%d\n", 374 bus->busn_res.start, pe_num); 375 376 if (pnv_ioda_configure_pe(phb, pe)) { 377 /* XXX What do we do here ? */ 378 if (pe_num) 379 pnv_ioda_free_pe(phb, pe_num); 380 pe->pbus = NULL; 381 return; 382 } 383 384 /* Associate it with all child devices */ 385 pnv_ioda_setup_same_PE(bus, pe); 386 387 /* Put PE to the list */ 388 list_add_tail(&pe->list, &phb->ioda.pe_list); 389 390 /* Account for one DMA PE if at least one DMA capable device exist 391 * below the bridge 392 */ 393 if (pe->dma_weight != 0) { 394 phb->ioda.dma_weight += pe->dma_weight; 395 phb->ioda.dma_pe_count++; 396 } 397 398 /* Link the PE */ 399 pnv_ioda_link_pe_by_weight(phb, pe); 400 } 401 402 static void __devinit pnv_ioda_setup_PEs(struct pci_bus *bus) 403 { 404 struct pci_dev *dev; 405 406 pnv_ioda_setup_bus_PE(bus, 0); 407 408 list_for_each_entry(dev, &bus->devices, bus_list) { 409 if (dev->subordinate) { 410 if (pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE) 411 pnv_ioda_setup_bus_PE(dev->subordinate, 1); 412 else 413 pnv_ioda_setup_PEs(dev->subordinate); 414 } 415 } 416 } 417 418 /* 419 * Configure PEs so that the downstream PCI buses and devices 420 * could have their associated PE#. Unfortunately, we didn't 421 * figure out the way to identify the PLX bridge yet. So we 422 * simply put the PCI bus and the subordinate behind the root 423 * port to PE# here. The game rule here is expected to be changed 424 * as soon as we can detected PLX bridge correctly. 425 */ 426 static void __devinit pnv_pci_ioda_setup_PEs(void) 427 { 428 struct pci_controller *hose, *tmp; 429 430 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 431 pnv_ioda_setup_PEs(hose->bus); 432 } 433 } 434 435 static void __devinit pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, 436 struct pci_dev *dev) 437 { 438 /* We delay DMA setup after we have assigned all PE# */ 439 } 440 441 static void __devinit pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, 442 struct pci_bus *bus) 443 { 444 struct pci_dev *dev; 445 446 list_for_each_entry(dev, &bus->devices, bus_list) { 447 set_iommu_table_base(&dev->dev, &pe->tce32_table); 448 if (dev->subordinate) 449 pnv_ioda_setup_bus_dma(pe, dev->subordinate); 450 } 451 } 452 453 static void __devinit pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, 454 struct pnv_ioda_pe *pe, 455 unsigned int base, 456 unsigned int segs) 457 { 458 459 struct page *tce_mem = NULL; 460 const __be64 *swinvp; 461 struct iommu_table *tbl; 462 unsigned int i; 463 int64_t rc; 464 void *addr; 465 466 /* 256M DMA window, 4K TCE pages, 8 bytes TCE */ 467 #define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8) 468 469 /* XXX FIXME: Handle 64-bit only DMA devices */ 470 /* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */ 471 /* XXX FIXME: Allocate multi-level tables on PHB3 */ 472 473 /* We shouldn't already have a 32-bit DMA associated */ 474 if (WARN_ON(pe->tce32_seg >= 0)) 475 return; 476 477 /* Grab a 32-bit TCE table */ 478 pe->tce32_seg = base; 479 pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n", 480 (base << 28), ((base + segs) << 28) - 1); 481 482 /* XXX Currently, we allocate one big contiguous table for the 483 * TCEs. We only really need one chunk per 256M of TCE space 484 * (ie per segment) but that's an optimization for later, it 485 * requires some added smarts with our get/put_tce implementation 486 */ 487 tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL, 488 get_order(TCE32_TABLE_SIZE * segs)); 489 if (!tce_mem) { 490 pe_err(pe, " Failed to allocate a 32-bit TCE memory\n"); 491 goto fail; 492 } 493 addr = page_address(tce_mem); 494 memset(addr, 0, TCE32_TABLE_SIZE * segs); 495 496 /* Configure HW */ 497 for (i = 0; i < segs; i++) { 498 rc = opal_pci_map_pe_dma_window(phb->opal_id, 499 pe->pe_number, 500 base + i, 1, 501 __pa(addr) + TCE32_TABLE_SIZE * i, 502 TCE32_TABLE_SIZE, 0x1000); 503 if (rc) { 504 pe_err(pe, " Failed to configure 32-bit TCE table," 505 " err %ld\n", rc); 506 goto fail; 507 } 508 } 509 510 /* Setup linux iommu table */ 511 tbl = &pe->tce32_table; 512 pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs, 513 base << 28); 514 515 /* OPAL variant of P7IOC SW invalidated TCEs */ 516 swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); 517 if (swinvp) { 518 /* We need a couple more fields -- an address and a data 519 * to or. Since the bus is only printed out on table free 520 * errors, and on the first pass the data will be a relative 521 * bus number, print that out instead. 522 */ 523 tbl->it_busno = 0; 524 tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8); 525 tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE 526 | TCE_PCI_SWINV_PAIR; 527 } 528 iommu_init_table(tbl, phb->hose->node); 529 530 if (pe->pdev) 531 set_iommu_table_base(&pe->pdev->dev, tbl); 532 else 533 pnv_ioda_setup_bus_dma(pe, pe->pbus); 534 535 return; 536 fail: 537 /* XXX Failure: Try to fallback to 64-bit only ? */ 538 if (pe->tce32_seg >= 0) 539 pe->tce32_seg = -1; 540 if (tce_mem) 541 __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs)); 542 } 543 544 static void __devinit pnv_ioda_setup_dma(struct pnv_phb *phb) 545 { 546 struct pci_controller *hose = phb->hose; 547 unsigned int residual, remaining, segs, tw, base; 548 struct pnv_ioda_pe *pe; 549 550 /* If we have more PE# than segments available, hand out one 551 * per PE until we run out and let the rest fail. If not, 552 * then we assign at least one segment per PE, plus more based 553 * on the amount of devices under that PE 554 */ 555 if (phb->ioda.dma_pe_count > phb->ioda.tce32_count) 556 residual = 0; 557 else 558 residual = phb->ioda.tce32_count - 559 phb->ioda.dma_pe_count; 560 561 pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n", 562 hose->global_number, phb->ioda.tce32_count); 563 pr_info("PCI: %d PE# for a total weight of %d\n", 564 phb->ioda.dma_pe_count, phb->ioda.dma_weight); 565 566 /* Walk our PE list and configure their DMA segments, hand them 567 * out one base segment plus any residual segments based on 568 * weight 569 */ 570 remaining = phb->ioda.tce32_count; 571 tw = phb->ioda.dma_weight; 572 base = 0; 573 list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) { 574 if (!pe->dma_weight) 575 continue; 576 if (!remaining) { 577 pe_warn(pe, "No DMA32 resources available\n"); 578 continue; 579 } 580 segs = 1; 581 if (residual) { 582 segs += ((pe->dma_weight * residual) + (tw / 2)) / tw; 583 if (segs > remaining) 584 segs = remaining; 585 } 586 pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n", 587 pe->dma_weight, segs); 588 pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs); 589 remaining -= segs; 590 base += segs; 591 } 592 } 593 594 #ifdef CONFIG_PCI_MSI 595 static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, 596 unsigned int hwirq, unsigned int is_64, 597 struct msi_msg *msg) 598 { 599 struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev); 600 unsigned int xive_num = hwirq - phb->msi_base; 601 uint64_t addr64; 602 uint32_t addr32, data; 603 int rc; 604 605 /* No PE assigned ? bail out ... no MSI for you ! */ 606 if (pe == NULL) 607 return -ENXIO; 608 609 /* Check if we have an MVE */ 610 if (pe->mve_number < 0) 611 return -ENXIO; 612 613 /* Assign XIVE to PE */ 614 rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num); 615 if (rc) { 616 pr_warn("%s: OPAL error %d setting XIVE %d PE\n", 617 pci_name(dev), rc, xive_num); 618 return -EIO; 619 } 620 621 if (is_64) { 622 rc = opal_get_msi_64(phb->opal_id, pe->mve_number, xive_num, 1, 623 &addr64, &data); 624 if (rc) { 625 pr_warn("%s: OPAL error %d getting 64-bit MSI data\n", 626 pci_name(dev), rc); 627 return -EIO; 628 } 629 msg->address_hi = addr64 >> 32; 630 msg->address_lo = addr64 & 0xfffffffful; 631 } else { 632 rc = opal_get_msi_32(phb->opal_id, pe->mve_number, xive_num, 1, 633 &addr32, &data); 634 if (rc) { 635 pr_warn("%s: OPAL error %d getting 32-bit MSI data\n", 636 pci_name(dev), rc); 637 return -EIO; 638 } 639 msg->address_hi = 0; 640 msg->address_lo = addr32; 641 } 642 msg->data = data; 643 644 pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d)," 645 " address=%x_%08x data=%x PE# %d\n", 646 pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num, 647 msg->address_hi, msg->address_lo, data, pe->pe_number); 648 649 return 0; 650 } 651 652 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) 653 { 654 unsigned int bmap_size; 655 const __be32 *prop = of_get_property(phb->hose->dn, 656 "ibm,opal-msi-ranges", NULL); 657 if (!prop) { 658 /* BML Fallback */ 659 prop = of_get_property(phb->hose->dn, "msi-ranges", NULL); 660 } 661 if (!prop) 662 return; 663 664 phb->msi_base = be32_to_cpup(prop); 665 phb->msi_count = be32_to_cpup(prop + 1); 666 bmap_size = BITS_TO_LONGS(phb->msi_count) * sizeof(unsigned long); 667 phb->msi_map = zalloc_maybe_bootmem(bmap_size, GFP_KERNEL); 668 if (!phb->msi_map) { 669 pr_err("PCI %d: Failed to allocate MSI bitmap !\n", 670 phb->hose->global_number); 671 return; 672 } 673 phb->msi_setup = pnv_pci_ioda_msi_setup; 674 phb->msi32_support = 1; 675 pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n", 676 phb->msi_count, phb->msi_base); 677 } 678 #else 679 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { } 680 #endif /* CONFIG_PCI_MSI */ 681 682 /* 683 * This function is supposed to be called on basis of PE from top 684 * to bottom style. So the the I/O or MMIO segment assigned to 685 * parent PE could be overrided by its child PEs if necessary. 686 */ 687 static void __devinit pnv_ioda_setup_pe_seg(struct pci_controller *hose, 688 struct pnv_ioda_pe *pe) 689 { 690 struct pnv_phb *phb = hose->private_data; 691 struct pci_bus_region region; 692 struct resource *res; 693 int i, index; 694 int rc; 695 696 /* 697 * NOTE: We only care PCI bus based PE for now. For PCI 698 * device based PE, for example SRIOV sensitive VF should 699 * be figured out later. 700 */ 701 BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))); 702 703 pci_bus_for_each_resource(pe->pbus, res, i) { 704 if (!res || !res->flags || 705 res->start > res->end) 706 continue; 707 708 if (res->flags & IORESOURCE_IO) { 709 region.start = res->start - phb->ioda.io_pci_base; 710 region.end = res->end - phb->ioda.io_pci_base; 711 index = region.start / phb->ioda.io_segsize; 712 713 while (index < phb->ioda.total_pe && 714 region.start <= region.end) { 715 phb->ioda.io_segmap[index] = pe->pe_number; 716 rc = opal_pci_map_pe_mmio_window(phb->opal_id, 717 pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index); 718 if (rc != OPAL_SUCCESS) { 719 pr_err("%s: OPAL error %d when mapping IO " 720 "segment #%d to PE#%d\n", 721 __func__, rc, index, pe->pe_number); 722 break; 723 } 724 725 region.start += phb->ioda.io_segsize; 726 index++; 727 } 728 } else if (res->flags & IORESOURCE_MEM) { 729 region.start = res->start - 730 hose->pci_mem_offset - 731 phb->ioda.m32_pci_base; 732 region.end = res->end - 733 hose->pci_mem_offset - 734 phb->ioda.m32_pci_base; 735 index = region.start / phb->ioda.m32_segsize; 736 737 while (index < phb->ioda.total_pe && 738 region.start <= region.end) { 739 phb->ioda.m32_segmap[index] = pe->pe_number; 740 rc = opal_pci_map_pe_mmio_window(phb->opal_id, 741 pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index); 742 if (rc != OPAL_SUCCESS) { 743 pr_err("%s: OPAL error %d when mapping M32 " 744 "segment#%d to PE#%d", 745 __func__, rc, index, pe->pe_number); 746 break; 747 } 748 749 region.start += phb->ioda.m32_segsize; 750 index++; 751 } 752 } 753 } 754 } 755 756 static void __devinit pnv_pci_ioda_setup_seg(void) 757 { 758 struct pci_controller *tmp, *hose; 759 struct pnv_phb *phb; 760 struct pnv_ioda_pe *pe; 761 762 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 763 phb = hose->private_data; 764 list_for_each_entry(pe, &phb->ioda.pe_list, list) { 765 pnv_ioda_setup_pe_seg(hose, pe); 766 } 767 } 768 } 769 770 static void __devinit pnv_pci_ioda_setup_DMA(void) 771 { 772 struct pci_controller *hose, *tmp; 773 struct pnv_phb *phb; 774 775 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 776 pnv_ioda_setup_dma(hose->private_data); 777 778 /* Mark the PHB initialization done */ 779 phb = hose->private_data; 780 phb->initialized = 1; 781 } 782 } 783 784 static void __devinit pnv_pci_ioda_fixup(void) 785 { 786 pnv_pci_ioda_setup_PEs(); 787 pnv_pci_ioda_setup_seg(); 788 pnv_pci_ioda_setup_DMA(); 789 } 790 791 /* 792 * Returns the alignment for I/O or memory windows for P2P 793 * bridges. That actually depends on how PEs are segmented. 794 * For now, we return I/O or M32 segment size for PE sensitive 795 * P2P bridges. Otherwise, the default values (4KiB for I/O, 796 * 1MiB for memory) will be returned. 797 * 798 * The current PCI bus might be put into one PE, which was 799 * create against the parent PCI bridge. For that case, we 800 * needn't enlarge the alignment so that we can save some 801 * resources. 802 */ 803 static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, 804 unsigned long type) 805 { 806 struct pci_dev *bridge; 807 struct pci_controller *hose = pci_bus_to_host(bus); 808 struct pnv_phb *phb = hose->private_data; 809 int num_pci_bridges = 0; 810 811 bridge = bus->self; 812 while (bridge) { 813 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) { 814 num_pci_bridges++; 815 if (num_pci_bridges >= 2) 816 return 1; 817 } 818 819 bridge = bridge->bus->self; 820 } 821 822 /* We need support prefetchable memory window later */ 823 if (type & IORESOURCE_MEM) 824 return phb->ioda.m32_segsize; 825 826 return phb->ioda.io_segsize; 827 } 828 829 /* Prevent enabling devices for which we couldn't properly 830 * assign a PE 831 */ 832 static int __devinit pnv_pci_enable_device_hook(struct pci_dev *dev) 833 { 834 struct pci_controller *hose = pci_bus_to_host(dev->bus); 835 struct pnv_phb *phb = hose->private_data; 836 struct pci_dn *pdn; 837 838 /* The function is probably called while the PEs have 839 * not be created yet. For example, resource reassignment 840 * during PCI probe period. We just skip the check if 841 * PEs isn't ready. 842 */ 843 if (!phb->initialized) 844 return 0; 845 846 pdn = pnv_ioda_get_pdn(dev); 847 if (!pdn || pdn->pe_number == IODA_INVALID_PE) 848 return -EINVAL; 849 850 return 0; 851 } 852 853 static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus, 854 u32 devfn) 855 { 856 return phb->ioda.pe_rmap[(bus->number << 8) | devfn]; 857 } 858 859 void __init pnv_pci_init_ioda1_phb(struct device_node *np) 860 { 861 struct pci_controller *hose; 862 static int primary = 1; 863 struct pnv_phb *phb; 864 unsigned long size, m32map_off, iomap_off, pemap_off; 865 const u64 *prop64; 866 u64 phb_id; 867 void *aux; 868 long rc; 869 870 pr_info(" Initializing IODA OPAL PHB %s\n", np->full_name); 871 872 prop64 = of_get_property(np, "ibm,opal-phbid", NULL); 873 if (!prop64) { 874 pr_err(" Missing \"ibm,opal-phbid\" property !\n"); 875 return; 876 } 877 phb_id = be64_to_cpup(prop64); 878 pr_debug(" PHB-ID : 0x%016llx\n", phb_id); 879 880 phb = alloc_bootmem(sizeof(struct pnv_phb)); 881 if (phb) { 882 memset(phb, 0, sizeof(struct pnv_phb)); 883 phb->hose = hose = pcibios_alloc_controller(np); 884 } 885 if (!phb || !phb->hose) { 886 pr_err("PCI: Failed to allocate PCI controller for %s\n", 887 np->full_name); 888 return; 889 } 890 891 spin_lock_init(&phb->lock); 892 /* XXX Use device-tree */ 893 hose->first_busno = 0; 894 hose->last_busno = 0xff; 895 hose->private_data = phb; 896 phb->opal_id = phb_id; 897 phb->type = PNV_PHB_IODA1; 898 899 /* Detect specific models for error handling */ 900 if (of_device_is_compatible(np, "ibm,p7ioc-pciex")) 901 phb->model = PNV_PHB_MODEL_P7IOC; 902 else 903 phb->model = PNV_PHB_MODEL_UNKNOWN; 904 905 /* We parse "ranges" now since we need to deduce the register base 906 * from the IO base 907 */ 908 pci_process_bridge_OF_ranges(phb->hose, np, primary); 909 primary = 0; 910 911 /* Magic formula from Milton */ 912 phb->regs = of_iomap(np, 0); 913 if (phb->regs == NULL) 914 pr_err(" Failed to map registers !\n"); 915 916 917 /* XXX This is hack-a-thon. This needs to be changed so that: 918 * - we obtain stuff like PE# etc... from device-tree 919 * - we properly re-allocate M32 ourselves 920 * (the OFW one isn't very good) 921 */ 922 923 /* Initialize more IODA stuff */ 924 phb->ioda.total_pe = 128; 925 926 phb->ioda.m32_size = resource_size(&hose->mem_resources[0]); 927 /* OFW Has already off top 64k of M32 space (MSI space) */ 928 phb->ioda.m32_size += 0x10000; 929 930 phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe; 931 phb->ioda.m32_pci_base = hose->mem_resources[0].start - 932 hose->pci_mem_offset; 933 phb->ioda.io_size = hose->pci_io_size; 934 phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe; 935 phb->ioda.io_pci_base = 0; /* XXX calculate this ? */ 936 937 /* Allocate aux data & arrays */ 938 size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long)); 939 m32map_off = size; 940 size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]); 941 iomap_off = size; 942 size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]); 943 pemap_off = size; 944 size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe); 945 aux = alloc_bootmem(size); 946 memset(aux, 0, size); 947 phb->ioda.pe_alloc = aux; 948 phb->ioda.m32_segmap = aux + m32map_off; 949 phb->ioda.io_segmap = aux + iomap_off; 950 phb->ioda.pe_array = aux + pemap_off; 951 set_bit(0, phb->ioda.pe_alloc); 952 953 INIT_LIST_HEAD(&phb->ioda.pe_dma_list); 954 INIT_LIST_HEAD(&phb->ioda.pe_list); 955 956 /* Calculate how many 32-bit TCE segments we have */ 957 phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28; 958 959 /* Clear unusable m64 */ 960 hose->mem_resources[1].flags = 0; 961 hose->mem_resources[1].start = 0; 962 hose->mem_resources[1].end = 0; 963 hose->mem_resources[2].flags = 0; 964 hose->mem_resources[2].start = 0; 965 hose->mem_resources[2].end = 0; 966 967 #if 0 968 rc = opal_pci_set_phb_mem_window(opal->phb_id, 969 window_type, 970 window_num, 971 starting_real_address, 972 starting_pci_address, 973 segment_size); 974 #endif 975 976 pr_info(" %d PE's M32: 0x%x [segment=0x%x] IO: 0x%x [segment=0x%x]\n", 977 phb->ioda.total_pe, 978 phb->ioda.m32_size, phb->ioda.m32_segsize, 979 phb->ioda.io_size, phb->ioda.io_segsize); 980 981 if (phb->regs) { 982 pr_devel(" BUID = 0x%016llx\n", in_be64(phb->regs + 0x100)); 983 pr_devel(" PHB2_CR = 0x%016llx\n", in_be64(phb->regs + 0x160)); 984 pr_devel(" IO_BAR = 0x%016llx\n", in_be64(phb->regs + 0x170)); 985 pr_devel(" IO_BAMR = 0x%016llx\n", in_be64(phb->regs + 0x178)); 986 pr_devel(" IO_SAR = 0x%016llx\n", in_be64(phb->regs + 0x180)); 987 pr_devel(" M32_BAR = 0x%016llx\n", in_be64(phb->regs + 0x190)); 988 pr_devel(" M32_BAMR = 0x%016llx\n", in_be64(phb->regs + 0x198)); 989 pr_devel(" M32_SAR = 0x%016llx\n", in_be64(phb->regs + 0x1a0)); 990 } 991 phb->hose->ops = &pnv_pci_ops; 992 993 /* Setup RID -> PE mapping function */ 994 phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe; 995 996 /* Setup TCEs */ 997 phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; 998 999 /* Setup MSI support */ 1000 pnv_pci_init_ioda_msis(phb); 1001 1002 /* 1003 * We pass the PCI probe flag PCI_REASSIGN_ALL_RSRC here 1004 * to let the PCI core do resource assignment. It's supposed 1005 * that the PCI core will do correct I/O and MMIO alignment 1006 * for the P2P bridge bars so that each PCI bus (excluding 1007 * the child P2P bridges) can form individual PE. 1008 */ 1009 ppc_md.pcibios_fixup = pnv_pci_ioda_fixup; 1010 ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook; 1011 ppc_md.pcibios_window_alignment = pnv_pci_window_alignment; 1012 pci_add_flags(PCI_REASSIGN_ALL_RSRC); 1013 1014 /* Reset IODA tables to a clean state */ 1015 rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET); 1016 if (rc) 1017 pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc); 1018 opal_pci_set_pe(phb_id, 0, 0, 7, 1, 1 , OPAL_MAP_PE); 1019 } 1020 1021 void __init pnv_pci_init_ioda_hub(struct device_node *np) 1022 { 1023 struct device_node *phbn; 1024 const u64 *prop64; 1025 u64 hub_id; 1026 1027 pr_info("Probing IODA IO-Hub %s\n", np->full_name); 1028 1029 prop64 = of_get_property(np, "ibm,opal-hubid", NULL); 1030 if (!prop64) { 1031 pr_err(" Missing \"ibm,opal-hubid\" property !\n"); 1032 return; 1033 } 1034 hub_id = be64_to_cpup(prop64); 1035 pr_devel(" HUB-ID : 0x%016llx\n", hub_id); 1036 1037 /* Count child PHBs */ 1038 for_each_child_of_node(np, phbn) { 1039 /* Look for IODA1 PHBs */ 1040 if (of_device_is_compatible(phbn, "ibm,ioda-phb")) 1041 pnv_pci_init_ioda1_phb(phbn); 1042 } 1043 } 1044