1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Low-Level PCI Express Support for the SH7786 4 * 5 * Copyright (C) 2009 - 2011 Paul Mundt 6 */ 7 #define pr_fmt(fmt) "PCI: " fmt 8 9 #include <linux/pci.h> 10 #include <linux/init.h> 11 #include <linux/kernel.h> 12 #include <linux/io.h> 13 #include <linux/async.h> 14 #include <linux/delay.h> 15 #include <linux/dma-map-ops.h> 16 #include <linux/slab.h> 17 #include <linux/clk.h> 18 #include <linux/sh_clk.h> 19 #include <linux/sh_intc.h> 20 #include <cpu/sh7786.h> 21 #include "pcie-sh7786.h" 22 #include <linux/sizes.h> 23 24 struct sh7786_pcie_port { 25 struct pci_channel *hose; 26 struct clk *fclk, phy_clk; 27 unsigned int index; 28 int endpoint; 29 int link; 30 }; 31 32 static struct sh7786_pcie_port *sh7786_pcie_ports; 33 static unsigned int nr_ports; 34 size_t memsize; 35 u64 memstart; 36 37 static struct sh7786_pcie_hwops { 38 int (*core_init)(void); 39 async_func_t port_init_hw; 40 } *sh7786_pcie_hwops; 41 42 static struct resource sh7786_pci0_resources[] = { 43 { 44 .name = "PCIe0 MEM 0", 45 .start = 0xfd000000, 46 .end = 0xfd000000 + SZ_8M - 1, 47 .flags = IORESOURCE_MEM, 48 }, { 49 .name = "PCIe0 MEM 1", 50 .start = 0xc0000000, 51 .end = 0xc0000000 + SZ_512M - 1, 52 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 53 }, { 54 .name = "PCIe0 MEM 2", 55 .start = 0x10000000, 56 .end = 0x10000000 + SZ_64M - 1, 57 .flags = IORESOURCE_MEM, 58 }, { 59 .name = "PCIe0 IO", 60 .start = 0xfe100000, 61 .end = 0xfe100000 + SZ_1M - 1, 62 .flags = IORESOURCE_IO, 63 }, 64 }; 65 66 static struct resource sh7786_pci1_resources[] = { 67 { 68 .name = "PCIe1 MEM 0", 69 .start = 0xfd800000, 70 .end = 0xfd800000 + SZ_8M - 1, 71 .flags = IORESOURCE_MEM, 72 }, { 73 .name = "PCIe1 MEM 1", 74 .start = 0xa0000000, 75 .end = 0xa0000000 + SZ_512M - 1, 76 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 77 }, { 78 .name = "PCIe1 MEM 2", 79 .start = 0x30000000, 80 .end = 0x30000000 + SZ_256M - 1, 81 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 82 }, { 83 .name = "PCIe1 IO", 84 .start = 0xfe300000, 85 .end = 0xfe300000 + SZ_1M - 1, 86 .flags = IORESOURCE_IO, 87 }, 88 }; 89 90 static struct resource sh7786_pci2_resources[] = { 91 { 92 .name = "PCIe2 MEM 0", 93 .start = 0xfc800000, 94 .end = 0xfc800000 + SZ_4M - 1, 95 .flags = IORESOURCE_MEM, 96 }, { 97 .name = "PCIe2 MEM 1", 98 .start = 0x80000000, 99 .end = 0x80000000 + SZ_512M - 1, 100 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 101 }, { 102 .name = "PCIe2 MEM 2", 103 .start = 0x20000000, 104 .end = 0x20000000 + SZ_256M - 1, 105 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 106 }, { 107 .name = "PCIe2 IO", 108 .start = 0xfcd00000, 109 .end = 0xfcd00000 + SZ_1M - 1, 110 .flags = IORESOURCE_IO, 111 }, 112 }; 113 114 extern struct pci_ops sh7786_pci_ops; 115 116 #define DEFINE_CONTROLLER(start, idx) \ 117 { \ 118 .pci_ops = &sh7786_pci_ops, \ 119 .resources = sh7786_pci##idx##_resources, \ 120 .nr_resources = ARRAY_SIZE(sh7786_pci##idx##_resources), \ 121 .reg_base = start, \ 122 .mem_offset = 0, \ 123 .io_offset = 0, \ 124 } 125 126 static struct pci_channel sh7786_pci_channels[] = { 127 DEFINE_CONTROLLER(0xfe000000, 0), 128 DEFINE_CONTROLLER(0xfe200000, 1), 129 DEFINE_CONTROLLER(0xfcc00000, 2), 130 }; 131 132 static struct clk fixed_pciexclkp = { 133 .rate = 100000000, /* 100 MHz reference clock */ 134 }; 135 136 static void sh7786_pci_fixup(struct pci_dev *dev) 137 { 138 /* 139 * Prevent enumeration of root complex resources. 140 */ 141 if (pci_is_root_bus(dev->bus) && dev->devfn == 0) { 142 struct resource *r; 143 144 pci_dev_for_each_resource(dev, r) { 145 r->start = 0; 146 r->end = 0; 147 r->flags = 0; 148 } 149 } 150 } 151 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_RENESAS, PCI_DEVICE_ID_RENESAS_SH7786, 152 sh7786_pci_fixup); 153 154 static int __init phy_wait_for_ack(struct pci_channel *chan) 155 { 156 unsigned int timeout = 100; 157 158 while (timeout--) { 159 if (pci_read_reg(chan, SH4A_PCIEPHYADRR) & (1 << BITS_ACK)) 160 return 0; 161 162 udelay(100); 163 } 164 165 return -ETIMEDOUT; 166 } 167 168 static int __init pci_wait_for_irq(struct pci_channel *chan, unsigned int mask) 169 { 170 unsigned int timeout = 100; 171 172 while (timeout--) { 173 if ((pci_read_reg(chan, SH4A_PCIEINTR) & mask) == mask) 174 return 0; 175 176 udelay(100); 177 } 178 179 return -ETIMEDOUT; 180 } 181 182 static void __init phy_write_reg(struct pci_channel *chan, unsigned int addr, 183 unsigned int lane, unsigned int data) 184 { 185 unsigned long phyaddr; 186 187 phyaddr = (1 << BITS_CMD) + ((lane & 0xf) << BITS_LANE) + 188 ((addr & 0xff) << BITS_ADR); 189 190 /* Set write data */ 191 pci_write_reg(chan, data, SH4A_PCIEPHYDOUTR); 192 pci_write_reg(chan, phyaddr, SH4A_PCIEPHYADRR); 193 194 phy_wait_for_ack(chan); 195 196 /* Clear command */ 197 pci_write_reg(chan, 0, SH4A_PCIEPHYDOUTR); 198 pci_write_reg(chan, 0, SH4A_PCIEPHYADRR); 199 200 phy_wait_for_ack(chan); 201 } 202 203 static int __init pcie_clk_init(struct sh7786_pcie_port *port) 204 { 205 struct pci_channel *chan = port->hose; 206 struct clk *clk; 207 char fclk_name[16]; 208 int ret; 209 210 /* 211 * First register the fixed clock 212 */ 213 ret = clk_register(&fixed_pciexclkp); 214 if (unlikely(ret != 0)) 215 return ret; 216 217 /* 218 * Grab the port's function clock, which the PHY clock depends 219 * on. clock lookups don't help us much at this point, since no 220 * dev_id is available this early. Lame. 221 */ 222 snprintf(fclk_name, sizeof(fclk_name), "pcie%d_fck", port->index); 223 224 port->fclk = clk_get(NULL, fclk_name); 225 if (IS_ERR(port->fclk)) { 226 ret = PTR_ERR(port->fclk); 227 goto err_fclk; 228 } 229 230 clk_enable(port->fclk); 231 232 /* 233 * And now, set up the PHY clock 234 */ 235 clk = &port->phy_clk; 236 237 memset(clk, 0, sizeof(struct clk)); 238 239 clk->parent = &fixed_pciexclkp; 240 clk->enable_reg = (void __iomem *)(chan->reg_base + SH4A_PCIEPHYCTLR); 241 clk->enable_bit = BITS_CKE; 242 243 ret = sh_clk_mstp_register(clk, 1); 244 if (unlikely(ret < 0)) 245 goto err_phy; 246 247 return 0; 248 249 err_phy: 250 clk_disable(port->fclk); 251 clk_put(port->fclk); 252 err_fclk: 253 clk_unregister(&fixed_pciexclkp); 254 255 return ret; 256 } 257 258 static int __init phy_init(struct sh7786_pcie_port *port) 259 { 260 struct pci_channel *chan = port->hose; 261 unsigned int timeout = 100; 262 263 clk_enable(&port->phy_clk); 264 265 /* Initialize the phy */ 266 phy_write_reg(chan, 0x60, 0xf, 0x004b008b); 267 phy_write_reg(chan, 0x61, 0xf, 0x00007b41); 268 phy_write_reg(chan, 0x64, 0xf, 0x00ff4f00); 269 phy_write_reg(chan, 0x65, 0xf, 0x09070907); 270 phy_write_reg(chan, 0x66, 0xf, 0x00000010); 271 phy_write_reg(chan, 0x74, 0xf, 0x0007001c); 272 phy_write_reg(chan, 0x79, 0xf, 0x01fc000d); 273 phy_write_reg(chan, 0xb0, 0xf, 0x00000610); 274 275 /* Deassert Standby */ 276 phy_write_reg(chan, 0x67, 0x1, 0x00000400); 277 278 /* Disable clock */ 279 clk_disable(&port->phy_clk); 280 281 while (timeout--) { 282 if (pci_read_reg(chan, SH4A_PCIEPHYSR)) 283 return 0; 284 285 udelay(100); 286 } 287 288 return -ETIMEDOUT; 289 } 290 291 static void __init pcie_reset(struct sh7786_pcie_port *port) 292 { 293 struct pci_channel *chan = port->hose; 294 295 pci_write_reg(chan, 1, SH4A_PCIESRSTR); 296 pci_write_reg(chan, 0, SH4A_PCIETCTLR); 297 pci_write_reg(chan, 0, SH4A_PCIESRSTR); 298 pci_write_reg(chan, 0, SH4A_PCIETXVC0SR); 299 } 300 301 static int __init pcie_init(struct sh7786_pcie_port *port) 302 { 303 struct pci_channel *chan = port->hose; 304 unsigned int data; 305 phys_addr_t memstart, memend; 306 int ret, i, win; 307 308 /* Begin initialization */ 309 pcie_reset(port); 310 311 /* 312 * Initial header for port config space is type 1, set the device 313 * class to match. Hardware takes care of propagating the IDSETR 314 * settings, so there is no need to bother with a quirk. 315 */ 316 pci_write_reg(chan, PCI_CLASS_BRIDGE_PCI_NORMAL << 8, SH4A_PCIEIDSETR1); 317 318 /* Initialize default capabilities. */ 319 data = pci_read_reg(chan, SH4A_PCIEEXPCAP0); 320 data &= ~(PCI_EXP_FLAGS_TYPE << 16); 321 322 if (port->endpoint) 323 data |= PCI_EXP_TYPE_ENDPOINT << 20; 324 else 325 data |= PCI_EXP_TYPE_ROOT_PORT << 20; 326 327 data |= PCI_CAP_ID_EXP; 328 pci_write_reg(chan, data, SH4A_PCIEEXPCAP0); 329 330 /* Enable data link layer active state reporting */ 331 pci_write_reg(chan, PCI_EXP_LNKCAP_DLLLARC, SH4A_PCIEEXPCAP3); 332 333 /* Enable extended sync and ASPM L0s support */ 334 data = pci_read_reg(chan, SH4A_PCIEEXPCAP4); 335 data &= ~PCI_EXP_LNKCTL_ASPMC; 336 data |= PCI_EXP_LNKCTL_ES | 1; 337 pci_write_reg(chan, data, SH4A_PCIEEXPCAP4); 338 339 /* Write out the physical slot number */ 340 data = pci_read_reg(chan, SH4A_PCIEEXPCAP5); 341 data &= ~PCI_EXP_SLTCAP_PSN; 342 data |= (port->index + 1) << 19; 343 pci_write_reg(chan, data, SH4A_PCIEEXPCAP5); 344 345 /* Set the completion timer timeout to the maximum 32ms. */ 346 data = pci_read_reg(chan, SH4A_PCIETLCTLR); 347 data &= ~0x3f00; 348 data |= 0x32 << 8; 349 pci_write_reg(chan, data, SH4A_PCIETLCTLR); 350 351 /* 352 * Set fast training sequences to the maximum 255, 353 * and enable MAC data scrambling. 354 */ 355 data = pci_read_reg(chan, SH4A_PCIEMACCTLR); 356 data &= ~PCIEMACCTLR_SCR_DIS; 357 data |= (0xff << 16); 358 pci_write_reg(chan, data, SH4A_PCIEMACCTLR); 359 360 memstart = __pa(memory_start); 361 memend = __pa(memory_end); 362 memsize = roundup_pow_of_two(memend - memstart); 363 364 /* 365 * The start address must be aligned on its size. So we round 366 * it down, and then recalculate the size so that it covers 367 * the entire memory. 368 */ 369 memstart = ALIGN_DOWN(memstart, memsize); 370 memsize = roundup_pow_of_two(memend - memstart); 371 372 /* 373 * If there's more than 512MB of memory, we need to roll over to 374 * LAR1/LAMR1. 375 */ 376 if (memsize > SZ_512M) { 377 pci_write_reg(chan, memstart + SZ_512M, SH4A_PCIELAR1); 378 pci_write_reg(chan, ((memsize - SZ_512M) - SZ_256) | 1, 379 SH4A_PCIELAMR1); 380 memsize = SZ_512M; 381 } else { 382 /* 383 * Otherwise just zero it out and disable it. 384 */ 385 pci_write_reg(chan, 0, SH4A_PCIELAR1); 386 pci_write_reg(chan, 0, SH4A_PCIELAMR1); 387 } 388 389 /* 390 * LAR0/LAMR0 covers up to the first 512MB, which is enough to 391 * cover all of lowmem on most platforms. 392 */ 393 pci_write_reg(chan, memstart, SH4A_PCIELAR0); 394 pci_write_reg(chan, (memsize - SZ_256) | 1, SH4A_PCIELAMR0); 395 396 /* Finish initialization */ 397 data = pci_read_reg(chan, SH4A_PCIETCTLR); 398 data |= 0x1; 399 pci_write_reg(chan, data, SH4A_PCIETCTLR); 400 401 /* Let things settle down a bit.. */ 402 mdelay(100); 403 404 /* Enable DL_Active Interrupt generation */ 405 data = pci_read_reg(chan, SH4A_PCIEDLINTENR); 406 data |= PCIEDLINTENR_DLL_ACT_ENABLE; 407 pci_write_reg(chan, data, SH4A_PCIEDLINTENR); 408 409 /* Disable MAC data scrambling. */ 410 data = pci_read_reg(chan, SH4A_PCIEMACCTLR); 411 data |= PCIEMACCTLR_SCR_DIS | (0xff << 16); 412 pci_write_reg(chan, data, SH4A_PCIEMACCTLR); 413 414 /* 415 * This will timeout if we don't have a link, but we permit the 416 * port to register anyways in order to support hotplug on future 417 * hardware. 418 */ 419 ret = pci_wait_for_irq(chan, MASK_INT_TX_CTRL); 420 421 data = pci_read_reg(chan, SH4A_PCIEPCICONF1); 422 data &= ~(PCI_STATUS_DEVSEL_MASK << 16); 423 data |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER | 424 (PCI_STATUS_CAP_LIST | PCI_STATUS_DEVSEL_FAST) << 16; 425 pci_write_reg(chan, data, SH4A_PCIEPCICONF1); 426 427 pci_write_reg(chan, 0x80888000, SH4A_PCIETXVC0DCTLR); 428 pci_write_reg(chan, 0x00222000, SH4A_PCIERXVC0DCTLR); 429 430 wmb(); 431 432 if (ret == 0) { 433 data = pci_read_reg(chan, SH4A_PCIEMACSR); 434 printk(KERN_NOTICE "PCI: PCIe#%d x%d link detected\n", 435 port->index, (data >> 20) & 0x3f); 436 } else 437 printk(KERN_NOTICE "PCI: PCIe#%d link down\n", 438 port->index); 439 440 for (i = win = 0; i < chan->nr_resources; i++) { 441 struct resource *res = chan->resources + i; 442 resource_size_t size; 443 u32 mask; 444 445 /* 446 * We can't use the 32-bit mode windows in legacy 29-bit 447 * mode, so just skip them entirely. 448 */ 449 if ((res->flags & IORESOURCE_MEM_32BIT) && __in_29bit_mode()) 450 res->flags |= IORESOURCE_DISABLED; 451 452 if (res->flags & IORESOURCE_DISABLED) 453 continue; 454 455 pci_write_reg(chan, 0x00000000, SH4A_PCIEPTCTLR(win)); 456 457 /* 458 * The PAMR mask is calculated in units of 256kB, which 459 * keeps things pretty simple. 460 */ 461 size = resource_size(res); 462 mask = (roundup_pow_of_two(size) / SZ_256K) - 1; 463 pci_write_reg(chan, mask << 18, SH4A_PCIEPAMR(win)); 464 465 pci_write_reg(chan, upper_32_bits(res->start), 466 SH4A_PCIEPARH(win)); 467 pci_write_reg(chan, lower_32_bits(res->start), 468 SH4A_PCIEPARL(win)); 469 470 mask = MASK_PARE; 471 if (res->flags & IORESOURCE_IO) 472 mask |= MASK_SPC; 473 474 pci_write_reg(chan, mask, SH4A_PCIEPTCTLR(win)); 475 476 win++; 477 } 478 479 return 0; 480 } 481 482 int pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin) 483 { 484 return evt2irq(0xae0); 485 } 486 487 void pcibios_bus_add_device(struct pci_dev *pdev) 488 { 489 dma_direct_set_offset(&pdev->dev, __pa(memory_start), 490 __pa(memory_start) - memstart, memsize); 491 } 492 493 static int __init sh7786_pcie_core_init(void) 494 { 495 /* Return the number of ports */ 496 return test_mode_pin(MODE_PIN12) ? 3 : 2; 497 } 498 499 static void __init sh7786_pcie_init_hw(void *data, async_cookie_t cookie) 500 { 501 struct sh7786_pcie_port *port = data; 502 int ret; 503 504 /* 505 * Check if we are configured in endpoint or root complex mode, 506 * this is a fixed pin setting that applies to all PCIe ports. 507 */ 508 port->endpoint = test_mode_pin(MODE_PIN11); 509 510 /* 511 * Setup clocks, needed both for PHY and PCIe registers. 512 */ 513 ret = pcie_clk_init(port); 514 if (unlikely(ret < 0)) { 515 pr_err("clock initialization failed for port#%d\n", 516 port->index); 517 return; 518 } 519 520 ret = phy_init(port); 521 if (unlikely(ret < 0)) { 522 pr_err("phy initialization failed for port#%d\n", 523 port->index); 524 return; 525 } 526 527 ret = pcie_init(port); 528 if (unlikely(ret < 0)) { 529 pr_err("core initialization failed for port#%d\n", 530 port->index); 531 return; 532 } 533 534 /* In the interest of preserving device ordering, synchronize */ 535 async_synchronize_cookie(cookie); 536 537 register_pci_controller(port->hose); 538 } 539 540 static struct sh7786_pcie_hwops sh7786_65nm_pcie_hwops __initdata = { 541 .core_init = sh7786_pcie_core_init, 542 .port_init_hw = sh7786_pcie_init_hw, 543 }; 544 545 static int __init sh7786_pcie_init(void) 546 { 547 struct clk *platclk; 548 u32 mm_sel; 549 int i; 550 551 printk(KERN_NOTICE "PCI: Starting initialization.\n"); 552 553 sh7786_pcie_hwops = &sh7786_65nm_pcie_hwops; 554 555 nr_ports = sh7786_pcie_hwops->core_init(); 556 BUG_ON(nr_ports > ARRAY_SIZE(sh7786_pci_channels)); 557 558 if (unlikely(nr_ports == 0)) 559 return -ENODEV; 560 561 sh7786_pcie_ports = kcalloc(nr_ports, sizeof(struct sh7786_pcie_port), 562 GFP_KERNEL); 563 if (unlikely(!sh7786_pcie_ports)) 564 return -ENOMEM; 565 566 /* 567 * Fetch any optional platform clock associated with this block. 568 * 569 * This is a rather nasty hack for boards with spec-mocking FPGAs 570 * that have a secondary set of clocks outside of the on-chip 571 * ones that need to be accounted for before there is any chance 572 * of touching the existing MSTP bits or CPG clocks. 573 */ 574 platclk = clk_get(NULL, "pcie_plat_clk"); 575 if (IS_ERR(platclk)) { 576 /* Sane hardware should probably get a WARN_ON.. */ 577 platclk = NULL; 578 } 579 580 clk_enable(platclk); 581 582 mm_sel = sh7786_mm_sel(); 583 584 /* 585 * Depending on the MMSELR register value, the PCIe0 MEM 1 586 * area may not be available. See Table 13.11 of the SH7786 587 * datasheet. 588 */ 589 if (mm_sel != 1 && mm_sel != 2 && mm_sel != 5 && mm_sel != 6) 590 sh7786_pci0_resources[2].flags |= IORESOURCE_DISABLED; 591 592 printk(KERN_NOTICE "PCI: probing %d ports.\n", nr_ports); 593 594 for (i = 0; i < nr_ports; i++) { 595 struct sh7786_pcie_port *port = sh7786_pcie_ports + i; 596 597 port->index = i; 598 port->hose = sh7786_pci_channels + i; 599 port->hose->io_map_base = port->hose->resources[0].start; 600 601 async_schedule(sh7786_pcie_hwops->port_init_hw, port); 602 } 603 604 async_synchronize_full(); 605 606 return 0; 607 } 608 arch_initcall(sh7786_pcie_init); 609