1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Low-Level PCI Express Support for the SH7786 4 * 5 * Copyright (C) 2009 - 2011 Paul Mundt 6 */ 7 #define pr_fmt(fmt) "PCI: " fmt 8 9 #include <linux/pci.h> 10 #include <linux/init.h> 11 #include <linux/kernel.h> 12 #include <linux/io.h> 13 #include <linux/async.h> 14 #include <linux/delay.h> 15 #include <linux/slab.h> 16 #include <linux/clk.h> 17 #include <linux/sh_clk.h> 18 #include <linux/sh_intc.h> 19 #include <cpu/sh7786.h> 20 #include "pcie-sh7786.h" 21 #include <asm/sizes.h> 22 23 struct sh7786_pcie_port { 24 struct pci_channel *hose; 25 struct clk *fclk, phy_clk; 26 unsigned int index; 27 int endpoint; 28 int link; 29 }; 30 31 static struct sh7786_pcie_port *sh7786_pcie_ports; 32 static unsigned int nr_ports; 33 static unsigned long dma_pfn_offset; 34 35 static struct sh7786_pcie_hwops { 36 int (*core_init)(void); 37 async_func_t port_init_hw; 38 } *sh7786_pcie_hwops; 39 40 static struct resource sh7786_pci0_resources[] = { 41 { 42 .name = "PCIe0 MEM 0", 43 .start = 0xfd000000, 44 .end = 0xfd000000 + SZ_8M - 1, 45 .flags = IORESOURCE_MEM, 46 }, { 47 .name = "PCIe0 MEM 1", 48 .start = 0xc0000000, 49 .end = 0xc0000000 + SZ_512M - 1, 50 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 51 }, { 52 .name = "PCIe0 MEM 2", 53 .start = 0x10000000, 54 .end = 0x10000000 + SZ_64M - 1, 55 .flags = IORESOURCE_MEM, 56 }, { 57 .name = "PCIe0 IO", 58 .start = 0xfe100000, 59 .end = 0xfe100000 + SZ_1M - 1, 60 .flags = IORESOURCE_IO, 61 }, 62 }; 63 64 static struct resource sh7786_pci1_resources[] = { 65 { 66 .name = "PCIe1 MEM 0", 67 .start = 0xfd800000, 68 .end = 0xfd800000 + SZ_8M - 1, 69 .flags = IORESOURCE_MEM, 70 }, { 71 .name = "PCIe1 MEM 1", 72 .start = 0xa0000000, 73 .end = 0xa0000000 + SZ_512M - 1, 74 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 75 }, { 76 .name = "PCIe1 MEM 2", 77 .start = 0x30000000, 78 .end = 0x30000000 + SZ_256M - 1, 79 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 80 }, { 81 .name = "PCIe1 IO", 82 .start = 0xfe300000, 83 .end = 0xfe300000 + SZ_1M - 1, 84 .flags = IORESOURCE_IO, 85 }, 86 }; 87 88 static struct resource sh7786_pci2_resources[] = { 89 { 90 .name = "PCIe2 MEM 0", 91 .start = 0xfc800000, 92 .end = 0xfc800000 + SZ_4M - 1, 93 .flags = IORESOURCE_MEM, 94 }, { 95 .name = "PCIe2 MEM 1", 96 .start = 0x80000000, 97 .end = 0x80000000 + SZ_512M - 1, 98 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 99 }, { 100 .name = "PCIe2 MEM 2", 101 .start = 0x20000000, 102 .end = 0x20000000 + SZ_256M - 1, 103 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 104 }, { 105 .name = "PCIe2 IO", 106 .start = 0xfcd00000, 107 .end = 0xfcd00000 + SZ_1M - 1, 108 .flags = IORESOURCE_IO, 109 }, 110 }; 111 112 extern struct pci_ops sh7786_pci_ops; 113 114 #define DEFINE_CONTROLLER(start, idx) \ 115 { \ 116 .pci_ops = &sh7786_pci_ops, \ 117 .resources = sh7786_pci##idx##_resources, \ 118 .nr_resources = ARRAY_SIZE(sh7786_pci##idx##_resources), \ 119 .reg_base = start, \ 120 .mem_offset = 0, \ 121 .io_offset = 0, \ 122 } 123 124 static struct pci_channel sh7786_pci_channels[] = { 125 DEFINE_CONTROLLER(0xfe000000, 0), 126 DEFINE_CONTROLLER(0xfe200000, 1), 127 DEFINE_CONTROLLER(0xfcc00000, 2), 128 }; 129 130 static struct clk fixed_pciexclkp = { 131 .rate = 100000000, /* 100 MHz reference clock */ 132 }; 133 134 static void sh7786_pci_fixup(struct pci_dev *dev) 135 { 136 /* 137 * Prevent enumeration of root complex resources. 138 */ 139 if (pci_is_root_bus(dev->bus) && dev->devfn == 0) { 140 int i; 141 142 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { 143 dev->resource[i].start = 0; 144 dev->resource[i].end = 0; 145 dev->resource[i].flags = 0; 146 } 147 } 148 } 149 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_RENESAS, PCI_DEVICE_ID_RENESAS_SH7786, 150 sh7786_pci_fixup); 151 152 static int __init phy_wait_for_ack(struct pci_channel *chan) 153 { 154 unsigned int timeout = 100; 155 156 while (timeout--) { 157 if (pci_read_reg(chan, SH4A_PCIEPHYADRR) & (1 << BITS_ACK)) 158 return 0; 159 160 udelay(100); 161 } 162 163 return -ETIMEDOUT; 164 } 165 166 static int __init pci_wait_for_irq(struct pci_channel *chan, unsigned int mask) 167 { 168 unsigned int timeout = 100; 169 170 while (timeout--) { 171 if ((pci_read_reg(chan, SH4A_PCIEINTR) & mask) == mask) 172 return 0; 173 174 udelay(100); 175 } 176 177 return -ETIMEDOUT; 178 } 179 180 static void __init phy_write_reg(struct pci_channel *chan, unsigned int addr, 181 unsigned int lane, unsigned int data) 182 { 183 unsigned long phyaddr; 184 185 phyaddr = (1 << BITS_CMD) + ((lane & 0xf) << BITS_LANE) + 186 ((addr & 0xff) << BITS_ADR); 187 188 /* Set write data */ 189 pci_write_reg(chan, data, SH4A_PCIEPHYDOUTR); 190 pci_write_reg(chan, phyaddr, SH4A_PCIEPHYADRR); 191 192 phy_wait_for_ack(chan); 193 194 /* Clear command */ 195 pci_write_reg(chan, 0, SH4A_PCIEPHYDOUTR); 196 pci_write_reg(chan, 0, SH4A_PCIEPHYADRR); 197 198 phy_wait_for_ack(chan); 199 } 200 201 static int __init pcie_clk_init(struct sh7786_pcie_port *port) 202 { 203 struct pci_channel *chan = port->hose; 204 struct clk *clk; 205 char fclk_name[16]; 206 int ret; 207 208 /* 209 * First register the fixed clock 210 */ 211 ret = clk_register(&fixed_pciexclkp); 212 if (unlikely(ret != 0)) 213 return ret; 214 215 /* 216 * Grab the port's function clock, which the PHY clock depends 217 * on. clock lookups don't help us much at this point, since no 218 * dev_id is available this early. Lame. 219 */ 220 snprintf(fclk_name, sizeof(fclk_name), "pcie%d_fck", port->index); 221 222 port->fclk = clk_get(NULL, fclk_name); 223 if (IS_ERR(port->fclk)) { 224 ret = PTR_ERR(port->fclk); 225 goto err_fclk; 226 } 227 228 clk_enable(port->fclk); 229 230 /* 231 * And now, set up the PHY clock 232 */ 233 clk = &port->phy_clk; 234 235 memset(clk, 0, sizeof(struct clk)); 236 237 clk->parent = &fixed_pciexclkp; 238 clk->enable_reg = (void __iomem *)(chan->reg_base + SH4A_PCIEPHYCTLR); 239 clk->enable_bit = BITS_CKE; 240 241 ret = sh_clk_mstp_register(clk, 1); 242 if (unlikely(ret < 0)) 243 goto err_phy; 244 245 return 0; 246 247 err_phy: 248 clk_disable(port->fclk); 249 clk_put(port->fclk); 250 err_fclk: 251 clk_unregister(&fixed_pciexclkp); 252 253 return ret; 254 } 255 256 static int __init phy_init(struct sh7786_pcie_port *port) 257 { 258 struct pci_channel *chan = port->hose; 259 unsigned int timeout = 100; 260 261 clk_enable(&port->phy_clk); 262 263 /* Initialize the phy */ 264 phy_write_reg(chan, 0x60, 0xf, 0x004b008b); 265 phy_write_reg(chan, 0x61, 0xf, 0x00007b41); 266 phy_write_reg(chan, 0x64, 0xf, 0x00ff4f00); 267 phy_write_reg(chan, 0x65, 0xf, 0x09070907); 268 phy_write_reg(chan, 0x66, 0xf, 0x00000010); 269 phy_write_reg(chan, 0x74, 0xf, 0x0007001c); 270 phy_write_reg(chan, 0x79, 0xf, 0x01fc000d); 271 phy_write_reg(chan, 0xb0, 0xf, 0x00000610); 272 273 /* Deassert Standby */ 274 phy_write_reg(chan, 0x67, 0x1, 0x00000400); 275 276 /* Disable clock */ 277 clk_disable(&port->phy_clk); 278 279 while (timeout--) { 280 if (pci_read_reg(chan, SH4A_PCIEPHYSR)) 281 return 0; 282 283 udelay(100); 284 } 285 286 return -ETIMEDOUT; 287 } 288 289 static void __init pcie_reset(struct sh7786_pcie_port *port) 290 { 291 struct pci_channel *chan = port->hose; 292 293 pci_write_reg(chan, 1, SH4A_PCIESRSTR); 294 pci_write_reg(chan, 0, SH4A_PCIETCTLR); 295 pci_write_reg(chan, 0, SH4A_PCIESRSTR); 296 pci_write_reg(chan, 0, SH4A_PCIETXVC0SR); 297 } 298 299 static int __init pcie_init(struct sh7786_pcie_port *port) 300 { 301 struct pci_channel *chan = port->hose; 302 unsigned int data; 303 phys_addr_t memstart, memend; 304 size_t memsize; 305 int ret, i, win; 306 307 /* Begin initialization */ 308 pcie_reset(port); 309 310 /* 311 * Initial header for port config space is type 1, set the device 312 * class to match. Hardware takes care of propagating the IDSETR 313 * settings, so there is no need to bother with a quirk. 314 */ 315 pci_write_reg(chan, PCI_CLASS_BRIDGE_PCI << 16, SH4A_PCIEIDSETR1); 316 317 /* Initialize default capabilities. */ 318 data = pci_read_reg(chan, SH4A_PCIEEXPCAP0); 319 data &= ~(PCI_EXP_FLAGS_TYPE << 16); 320 321 if (port->endpoint) 322 data |= PCI_EXP_TYPE_ENDPOINT << 20; 323 else 324 data |= PCI_EXP_TYPE_ROOT_PORT << 20; 325 326 data |= PCI_CAP_ID_EXP; 327 pci_write_reg(chan, data, SH4A_PCIEEXPCAP0); 328 329 /* Enable data link layer active state reporting */ 330 pci_write_reg(chan, PCI_EXP_LNKCAP_DLLLARC, SH4A_PCIEEXPCAP3); 331 332 /* Enable extended sync and ASPM L0s support */ 333 data = pci_read_reg(chan, SH4A_PCIEEXPCAP4); 334 data &= ~PCI_EXP_LNKCTL_ASPMC; 335 data |= PCI_EXP_LNKCTL_ES | 1; 336 pci_write_reg(chan, data, SH4A_PCIEEXPCAP4); 337 338 /* Write out the physical slot number */ 339 data = pci_read_reg(chan, SH4A_PCIEEXPCAP5); 340 data &= ~PCI_EXP_SLTCAP_PSN; 341 data |= (port->index + 1) << 19; 342 pci_write_reg(chan, data, SH4A_PCIEEXPCAP5); 343 344 /* Set the completion timer timeout to the maximum 32ms. */ 345 data = pci_read_reg(chan, SH4A_PCIETLCTLR); 346 data &= ~0x3f00; 347 data |= 0x32 << 8; 348 pci_write_reg(chan, data, SH4A_PCIETLCTLR); 349 350 /* 351 * Set fast training sequences to the maximum 255, 352 * and enable MAC data scrambling. 353 */ 354 data = pci_read_reg(chan, SH4A_PCIEMACCTLR); 355 data &= ~PCIEMACCTLR_SCR_DIS; 356 data |= (0xff << 16); 357 pci_write_reg(chan, data, SH4A_PCIEMACCTLR); 358 359 memstart = __pa(memory_start); 360 memend = __pa(memory_end); 361 memsize = roundup_pow_of_two(memend - memstart); 362 363 /* 364 * The start address must be aligned on its size. So we round 365 * it down, and then recalculate the size so that it covers 366 * the entire memory. 367 */ 368 memstart = ALIGN_DOWN(memstart, memsize); 369 memsize = roundup_pow_of_two(memend - memstart); 370 371 dma_pfn_offset = memstart >> PAGE_SHIFT; 372 373 /* 374 * If there's more than 512MB of memory, we need to roll over to 375 * LAR1/LAMR1. 376 */ 377 if (memsize > SZ_512M) { 378 pci_write_reg(chan, memstart + SZ_512M, SH4A_PCIELAR1); 379 pci_write_reg(chan, ((memsize - SZ_512M) - SZ_256) | 1, 380 SH4A_PCIELAMR1); 381 memsize = SZ_512M; 382 } else { 383 /* 384 * Otherwise just zero it out and disable it. 385 */ 386 pci_write_reg(chan, 0, SH4A_PCIELAR1); 387 pci_write_reg(chan, 0, SH4A_PCIELAMR1); 388 } 389 390 /* 391 * LAR0/LAMR0 covers up to the first 512MB, which is enough to 392 * cover all of lowmem on most platforms. 393 */ 394 pci_write_reg(chan, memstart, SH4A_PCIELAR0); 395 pci_write_reg(chan, (memsize - SZ_256) | 1, SH4A_PCIELAMR0); 396 397 /* Finish initialization */ 398 data = pci_read_reg(chan, SH4A_PCIETCTLR); 399 data |= 0x1; 400 pci_write_reg(chan, data, SH4A_PCIETCTLR); 401 402 /* Let things settle down a bit.. */ 403 mdelay(100); 404 405 /* Enable DL_Active Interrupt generation */ 406 data = pci_read_reg(chan, SH4A_PCIEDLINTENR); 407 data |= PCIEDLINTENR_DLL_ACT_ENABLE; 408 pci_write_reg(chan, data, SH4A_PCIEDLINTENR); 409 410 /* Disable MAC data scrambling. */ 411 data = pci_read_reg(chan, SH4A_PCIEMACCTLR); 412 data |= PCIEMACCTLR_SCR_DIS | (0xff << 16); 413 pci_write_reg(chan, data, SH4A_PCIEMACCTLR); 414 415 /* 416 * This will timeout if we don't have a link, but we permit the 417 * port to register anyways in order to support hotplug on future 418 * hardware. 419 */ 420 ret = pci_wait_for_irq(chan, MASK_INT_TX_CTRL); 421 422 data = pci_read_reg(chan, SH4A_PCIEPCICONF1); 423 data &= ~(PCI_STATUS_DEVSEL_MASK << 16); 424 data |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER | 425 (PCI_STATUS_CAP_LIST | PCI_STATUS_DEVSEL_FAST) << 16; 426 pci_write_reg(chan, data, SH4A_PCIEPCICONF1); 427 428 pci_write_reg(chan, 0x80888000, SH4A_PCIETXVC0DCTLR); 429 pci_write_reg(chan, 0x00222000, SH4A_PCIERXVC0DCTLR); 430 431 wmb(); 432 433 if (ret == 0) { 434 data = pci_read_reg(chan, SH4A_PCIEMACSR); 435 printk(KERN_NOTICE "PCI: PCIe#%d x%d link detected\n", 436 port->index, (data >> 20) & 0x3f); 437 } else 438 printk(KERN_NOTICE "PCI: PCIe#%d link down\n", 439 port->index); 440 441 for (i = win = 0; i < chan->nr_resources; i++) { 442 struct resource *res = chan->resources + i; 443 resource_size_t size; 444 u32 mask; 445 446 /* 447 * We can't use the 32-bit mode windows in legacy 29-bit 448 * mode, so just skip them entirely. 449 */ 450 if ((res->flags & IORESOURCE_MEM_32BIT) && __in_29bit_mode()) 451 res->flags |= IORESOURCE_DISABLED; 452 453 if (res->flags & IORESOURCE_DISABLED) 454 continue; 455 456 pci_write_reg(chan, 0x00000000, SH4A_PCIEPTCTLR(win)); 457 458 /* 459 * The PAMR mask is calculated in units of 256kB, which 460 * keeps things pretty simple. 461 */ 462 size = resource_size(res); 463 mask = (roundup_pow_of_two(size) / SZ_256K) - 1; 464 pci_write_reg(chan, mask << 18, SH4A_PCIEPAMR(win)); 465 466 pci_write_reg(chan, upper_32_bits(res->start), 467 SH4A_PCIEPARH(win)); 468 pci_write_reg(chan, lower_32_bits(res->start), 469 SH4A_PCIEPARL(win)); 470 471 mask = MASK_PARE; 472 if (res->flags & IORESOURCE_IO) 473 mask |= MASK_SPC; 474 475 pci_write_reg(chan, mask, SH4A_PCIEPTCTLR(win)); 476 477 win++; 478 } 479 480 return 0; 481 } 482 483 int pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin) 484 { 485 return evt2irq(0xae0); 486 } 487 488 void pcibios_bus_add_device(struct pci_dev *pdev) 489 { 490 pdev->dev.dma_pfn_offset = dma_pfn_offset; 491 } 492 493 static int __init sh7786_pcie_core_init(void) 494 { 495 /* Return the number of ports */ 496 return test_mode_pin(MODE_PIN12) ? 3 : 2; 497 } 498 499 static void __init sh7786_pcie_init_hw(void *data, async_cookie_t cookie) 500 { 501 struct sh7786_pcie_port *port = data; 502 int ret; 503 504 /* 505 * Check if we are configured in endpoint or root complex mode, 506 * this is a fixed pin setting that applies to all PCIe ports. 507 */ 508 port->endpoint = test_mode_pin(MODE_PIN11); 509 510 /* 511 * Setup clocks, needed both for PHY and PCIe registers. 512 */ 513 ret = pcie_clk_init(port); 514 if (unlikely(ret < 0)) { 515 pr_err("clock initialization failed for port#%d\n", 516 port->index); 517 return; 518 } 519 520 ret = phy_init(port); 521 if (unlikely(ret < 0)) { 522 pr_err("phy initialization failed for port#%d\n", 523 port->index); 524 return; 525 } 526 527 ret = pcie_init(port); 528 if (unlikely(ret < 0)) { 529 pr_err("core initialization failed for port#%d\n", 530 port->index); 531 return; 532 } 533 534 /* In the interest of preserving device ordering, synchronize */ 535 async_synchronize_cookie(cookie); 536 537 register_pci_controller(port->hose); 538 } 539 540 static struct sh7786_pcie_hwops sh7786_65nm_pcie_hwops __initdata = { 541 .core_init = sh7786_pcie_core_init, 542 .port_init_hw = sh7786_pcie_init_hw, 543 }; 544 545 static int __init sh7786_pcie_init(void) 546 { 547 struct clk *platclk; 548 u32 mm_sel; 549 int i; 550 551 printk(KERN_NOTICE "PCI: Starting initialization.\n"); 552 553 sh7786_pcie_hwops = &sh7786_65nm_pcie_hwops; 554 555 nr_ports = sh7786_pcie_hwops->core_init(); 556 BUG_ON(nr_ports > ARRAY_SIZE(sh7786_pci_channels)); 557 558 if (unlikely(nr_ports == 0)) 559 return -ENODEV; 560 561 sh7786_pcie_ports = kcalloc(nr_ports, sizeof(struct sh7786_pcie_port), 562 GFP_KERNEL); 563 if (unlikely(!sh7786_pcie_ports)) 564 return -ENOMEM; 565 566 /* 567 * Fetch any optional platform clock associated with this block. 568 * 569 * This is a rather nasty hack for boards with spec-mocking FPGAs 570 * that have a secondary set of clocks outside of the on-chip 571 * ones that need to be accounted for before there is any chance 572 * of touching the existing MSTP bits or CPG clocks. 573 */ 574 platclk = clk_get(NULL, "pcie_plat_clk"); 575 if (IS_ERR(platclk)) { 576 /* Sane hardware should probably get a WARN_ON.. */ 577 platclk = NULL; 578 } 579 580 clk_enable(platclk); 581 582 mm_sel = sh7786_mm_sel(); 583 584 /* 585 * Depending on the MMSELR register value, the PCIe0 MEM 1 586 * area may not be available. See Table 13.11 of the SH7786 587 * datasheet. 588 */ 589 if (mm_sel != 1 && mm_sel != 2 && mm_sel != 5 && mm_sel != 6) 590 sh7786_pci0_resources[2].flags |= IORESOURCE_DISABLED; 591 592 printk(KERN_NOTICE "PCI: probing %d ports.\n", nr_ports); 593 594 for (i = 0; i < nr_ports; i++) { 595 struct sh7786_pcie_port *port = sh7786_pcie_ports + i; 596 597 port->index = i; 598 port->hose = sh7786_pci_channels + i; 599 port->hose->io_map_base = port->hose->resources[0].start; 600 601 async_schedule(sh7786_pcie_hwops->port_init_hw, port); 602 } 603 604 async_synchronize_full(); 605 606 return 0; 607 } 608 arch_initcall(sh7786_pcie_init); 609