1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * arch/powerpc/sysdev/dart_iommu.c 4 * 5 * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation 6 * Copyright (C) 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org>, 7 * IBM Corporation 8 * 9 * Based on pSeries_iommu.c: 10 * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation 11 * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation 12 * 13 * Dynamic DMA mapping support, Apple U3, U4 & IBM CPC925 "DART" iommu. 14 */ 15 16 #include <linux/init.h> 17 #include <linux/types.h> 18 #include <linux/mm.h> 19 #include <linux/spinlock.h> 20 #include <linux/string.h> 21 #include <linux/pci.h> 22 #include <linux/dma-mapping.h> 23 #include <linux/vmalloc.h> 24 #include <linux/suspend.h> 25 #include <linux/memblock.h> 26 #include <linux/gfp.h> 27 #include <linux/of_address.h> 28 #include <asm/io.h> 29 #include <asm/iommu.h> 30 #include <asm/pci-bridge.h> 31 #include <asm/machdep.h> 32 #include <asm/cacheflush.h> 33 #include <asm/ppc-pci.h> 34 35 #include "dart.h" 36 37 /* DART table address and size */ 38 static u32 *dart_tablebase; 39 static unsigned long dart_tablesize; 40 41 /* Mapped base address for the dart */ 42 static unsigned int __iomem *dart; 43 44 /* Dummy val that entries are set to when unused */ 45 static unsigned int dart_emptyval; 46 47 static struct iommu_table iommu_table_dart; 48 static int iommu_table_dart_inited; 49 static int dart_dirty; 50 static int dart_is_u4; 51 52 #define DART_U4_BYPASS_BASE 0x8000000000ull 53 54 #define DBG(...) 55 56 static DEFINE_SPINLOCK(invalidate_lock); 57 58 static inline void dart_tlb_invalidate_all(void) 59 { 60 unsigned long l = 0; 61 unsigned int reg, inv_bit; 62 unsigned long limit; 63 unsigned long flags; 64 65 spin_lock_irqsave(&invalidate_lock, flags); 66 67 DBG("dart: flush\n"); 68 69 /* To invalidate the DART, set the DARTCNTL_FLUSHTLB bit in the 70 * control register and wait for it to clear. 71 * 72 * Gotcha: Sometimes, the DART won't detect that the bit gets 73 * set. If so, clear it and set it again. 74 */ 75 76 limit = 0; 77 78 inv_bit = dart_is_u4 ? DART_CNTL_U4_FLUSHTLB : DART_CNTL_U3_FLUSHTLB; 79 retry: 80 l = 0; 81 reg = DART_IN(DART_CNTL); 82 reg |= inv_bit; 83 DART_OUT(DART_CNTL, reg); 84 85 while ((DART_IN(DART_CNTL) & inv_bit) && l < (1L << limit)) 86 l++; 87 if (l == (1L << limit)) { 88 if (limit < 4) { 89 limit++; 90 reg = DART_IN(DART_CNTL); 91 reg &= ~inv_bit; 92 DART_OUT(DART_CNTL, reg); 93 goto retry; 94 } else 95 panic("DART: TLB did not flush after waiting a long " 96 "time. Buggy U3 ?"); 97 } 98 99 spin_unlock_irqrestore(&invalidate_lock, flags); 100 } 101 102 static inline void dart_tlb_invalidate_one(unsigned long bus_rpn) 103 { 104 unsigned int reg; 105 unsigned int l, limit; 106 unsigned long flags; 107 108 spin_lock_irqsave(&invalidate_lock, flags); 109 110 reg = DART_CNTL_U4_ENABLE | DART_CNTL_U4_IONE | 111 (bus_rpn & DART_CNTL_U4_IONE_MASK); 112 DART_OUT(DART_CNTL, reg); 113 114 limit = 0; 115 wait_more: 116 l = 0; 117 while ((DART_IN(DART_CNTL) & DART_CNTL_U4_IONE) && l < (1L << limit)) { 118 rmb(); 119 l++; 120 } 121 122 if (l == (1L << limit)) { 123 if (limit < 4) { 124 limit++; 125 goto wait_more; 126 } else 127 panic("DART: TLB did not flush after waiting a long " 128 "time. Buggy U4 ?"); 129 } 130 131 spin_unlock_irqrestore(&invalidate_lock, flags); 132 } 133 134 static void dart_cache_sync(unsigned int *base, unsigned int count) 135 { 136 /* 137 * We add 1 to the number of entries to flush, following a 138 * comment in Darwin indicating that the memory controller 139 * can prefetch unmapped memory under some circumstances. 140 */ 141 unsigned long start = (unsigned long)base; 142 unsigned long end = start + (count + 1) * sizeof(unsigned int); 143 unsigned int tmp; 144 145 /* Perform a standard cache flush */ 146 flush_dcache_range(start, end); 147 148 /* 149 * Perform the sequence described in the CPC925 manual to 150 * ensure all the data gets to a point the cache incoherent 151 * DART hardware will see. 152 */ 153 asm volatile(" sync;" 154 " isync;" 155 " dcbf 0,%1;" 156 " sync;" 157 " isync;" 158 " lwz %0,0(%1);" 159 " isync" : "=r" (tmp) : "r" (end) : "memory"); 160 } 161 162 static void dart_flush(struct iommu_table *tbl) 163 { 164 mb(); 165 if (dart_dirty) { 166 dart_tlb_invalidate_all(); 167 dart_dirty = 0; 168 } 169 } 170 171 static int dart_build(struct iommu_table *tbl, long index, 172 long npages, unsigned long uaddr, 173 enum dma_data_direction direction, 174 unsigned long attrs) 175 { 176 unsigned int *dp, *orig_dp; 177 unsigned int rpn; 178 long l; 179 180 DBG("dart: build at: %lx, %lx, addr: %x\n", index, npages, uaddr); 181 182 orig_dp = dp = ((unsigned int*)tbl->it_base) + index; 183 184 /* On U3, all memory is contiguous, so we can move this 185 * out of the loop. 186 */ 187 l = npages; 188 while (l--) { 189 rpn = __pa(uaddr) >> DART_PAGE_SHIFT; 190 191 *(dp++) = DARTMAP_VALID | (rpn & DARTMAP_RPNMASK); 192 193 uaddr += DART_PAGE_SIZE; 194 } 195 dart_cache_sync(orig_dp, npages); 196 197 if (dart_is_u4) { 198 rpn = index; 199 while (npages--) 200 dart_tlb_invalidate_one(rpn++); 201 } else { 202 dart_dirty = 1; 203 } 204 return 0; 205 } 206 207 208 static void dart_free(struct iommu_table *tbl, long index, long npages) 209 { 210 unsigned int *dp, *orig_dp; 211 long orig_npages = npages; 212 213 /* We don't worry about flushing the TLB cache. The only drawback of 214 * not doing it is that we won't catch buggy device drivers doing 215 * bad DMAs, but then no 32-bit architecture ever does either. 216 */ 217 218 DBG("dart: free at: %lx, %lx\n", index, npages); 219 220 orig_dp = dp = ((unsigned int *)tbl->it_base) + index; 221 222 while (npages--) 223 *(dp++) = dart_emptyval; 224 225 dart_cache_sync(orig_dp, orig_npages); 226 } 227 228 static void __init allocate_dart(void) 229 { 230 unsigned long tmp; 231 232 /* 512 pages (2MB) is max DART tablesize. */ 233 dart_tablesize = 1UL << 21; 234 235 /* 236 * 16MB (1 << 24) alignment. We allocate a full 16Mb chuck since we 237 * will blow up an entire large page anyway in the kernel mapping. 238 */ 239 dart_tablebase = memblock_alloc_try_nid_raw(SZ_16M, SZ_16M, 240 MEMBLOCK_LOW_LIMIT, SZ_2G, 241 NUMA_NO_NODE); 242 if (!dart_tablebase) 243 panic("Failed to allocate 16MB below 2GB for DART table\n"); 244 245 /* Allocate a spare page to map all invalid DART pages. We need to do 246 * that to work around what looks like a problem with the HT bridge 247 * prefetching into invalid pages and corrupting data 248 */ 249 tmp = memblock_phys_alloc(DART_PAGE_SIZE, DART_PAGE_SIZE); 250 if (!tmp) 251 panic("DART: table allocation failed\n"); 252 253 dart_emptyval = DARTMAP_VALID | ((tmp >> DART_PAGE_SHIFT) & 254 DARTMAP_RPNMASK); 255 256 printk(KERN_INFO "DART table allocated at: %p\n", dart_tablebase); 257 } 258 259 static int __init dart_init(struct device_node *dart_node) 260 { 261 unsigned int i; 262 unsigned long base, size; 263 struct resource r; 264 265 /* IOMMU disabled by the user ? bail out */ 266 if (iommu_is_off) 267 return -ENODEV; 268 269 /* 270 * Only use the DART if the machine has more than 1GB of RAM 271 * or if requested with iommu=on on cmdline. 272 * 273 * 1GB of RAM is picked as limit because some default devices 274 * (i.e. Airport Extreme) have 30 bit address range limits. 275 */ 276 277 if (!iommu_force_on && memblock_end_of_DRAM() <= 0x40000000ull) 278 return -ENODEV; 279 280 /* Get DART registers */ 281 if (of_address_to_resource(dart_node, 0, &r)) 282 panic("DART: can't get register base ! "); 283 284 /* Map in DART registers */ 285 dart = ioremap(r.start, resource_size(&r)); 286 if (dart == NULL) 287 panic("DART: Cannot map registers!"); 288 289 /* Allocate the DART and dummy page */ 290 allocate_dart(); 291 292 /* Fill initial table */ 293 for (i = 0; i < dart_tablesize/4; i++) 294 dart_tablebase[i] = dart_emptyval; 295 296 /* Push to memory */ 297 dart_cache_sync(dart_tablebase, dart_tablesize / sizeof(u32)); 298 299 /* Initialize DART with table base and enable it. */ 300 base = ((unsigned long)dart_tablebase) >> DART_PAGE_SHIFT; 301 size = dart_tablesize >> DART_PAGE_SHIFT; 302 if (dart_is_u4) { 303 size &= DART_SIZE_U4_SIZE_MASK; 304 DART_OUT(DART_BASE_U4, base); 305 DART_OUT(DART_SIZE_U4, size); 306 DART_OUT(DART_CNTL, DART_CNTL_U4_ENABLE); 307 } else { 308 size &= DART_CNTL_U3_SIZE_MASK; 309 DART_OUT(DART_CNTL, 310 DART_CNTL_U3_ENABLE | 311 (base << DART_CNTL_U3_BASE_SHIFT) | 312 (size << DART_CNTL_U3_SIZE_SHIFT)); 313 } 314 315 /* Invalidate DART to get rid of possible stale TLBs */ 316 dart_tlb_invalidate_all(); 317 318 printk(KERN_INFO "DART IOMMU initialized for %s type chipset\n", 319 dart_is_u4 ? "U4" : "U3"); 320 321 return 0; 322 } 323 324 static struct iommu_table_ops iommu_dart_ops = { 325 .set = dart_build, 326 .clear = dart_free, 327 .flush = dart_flush, 328 }; 329 330 static void iommu_table_dart_setup(void) 331 { 332 iommu_table_dart.it_busno = 0; 333 iommu_table_dart.it_offset = 0; 334 /* it_size is in number of entries */ 335 iommu_table_dart.it_size = dart_tablesize / sizeof(u32); 336 iommu_table_dart.it_page_shift = IOMMU_PAGE_SHIFT_4K; 337 338 /* Initialize the common IOMMU code */ 339 iommu_table_dart.it_base = (unsigned long)dart_tablebase; 340 iommu_table_dart.it_index = 0; 341 iommu_table_dart.it_blocksize = 1; 342 iommu_table_dart.it_ops = &iommu_dart_ops; 343 if (!iommu_init_table(&iommu_table_dart, -1, 0, 0)) 344 panic("Failed to initialize iommu table"); 345 346 /* Reserve the last page of the DART to avoid possible prefetch 347 * past the DART mapped area 348 */ 349 set_bit(iommu_table_dart.it_size - 1, iommu_table_dart.it_map); 350 } 351 352 static void pci_dma_bus_setup_dart(struct pci_bus *bus) 353 { 354 if (!iommu_table_dart_inited) { 355 iommu_table_dart_inited = 1; 356 iommu_table_dart_setup(); 357 } 358 } 359 360 static bool dart_device_on_pcie(struct device *dev) 361 { 362 struct device_node *np = of_node_get(dev->of_node); 363 364 while(np) { 365 if (of_device_is_compatible(np, "U4-pcie") || 366 of_device_is_compatible(np, "u4-pcie")) { 367 of_node_put(np); 368 return true; 369 } 370 np = of_get_next_parent(np); 371 } 372 return false; 373 } 374 375 static void pci_dma_dev_setup_dart(struct pci_dev *dev) 376 { 377 if (dart_is_u4 && dart_device_on_pcie(&dev->dev)) 378 dev->dev.archdata.dma_offset = DART_U4_BYPASS_BASE; 379 set_iommu_table_base(&dev->dev, &iommu_table_dart); 380 } 381 382 static bool iommu_bypass_supported_dart(struct pci_dev *dev, u64 mask) 383 { 384 return dart_is_u4 && 385 dart_device_on_pcie(&dev->dev) && 386 mask >= DMA_BIT_MASK(40); 387 } 388 389 void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops) 390 { 391 struct device_node *dn; 392 393 /* Find the DART in the device-tree */ 394 dn = of_find_compatible_node(NULL, "dart", "u3-dart"); 395 if (dn == NULL) { 396 dn = of_find_compatible_node(NULL, "dart", "u4-dart"); 397 if (dn == NULL) 398 return; /* use default direct_dma_ops */ 399 dart_is_u4 = 1; 400 } 401 402 /* Initialize the DART HW */ 403 if (dart_init(dn) != 0) { 404 of_node_put(dn); 405 return; 406 } 407 /* 408 * U4 supports a DART bypass, we use it for 64-bit capable devices to 409 * improve performance. However, that only works for devices connected 410 * to the U4 own PCIe interface, not bridged through hypertransport. 411 * We need the device to support at least 40 bits of addresses. 412 */ 413 controller_ops->dma_dev_setup = pci_dma_dev_setup_dart; 414 controller_ops->dma_bus_setup = pci_dma_bus_setup_dart; 415 controller_ops->iommu_bypass_supported = iommu_bypass_supported_dart; 416 417 /* Setup pci_dma ops */ 418 set_pci_dma_ops(&dma_iommu_ops); 419 of_node_put(dn); 420 } 421 422 #ifdef CONFIG_PM 423 static void iommu_dart_restore(void) 424 { 425 dart_cache_sync(dart_tablebase, dart_tablesize / sizeof(u32)); 426 dart_tlb_invalidate_all(); 427 } 428 429 static int __init iommu_init_late_dart(void) 430 { 431 if (!dart_tablebase) 432 return 0; 433 434 ppc_md.iommu_restore = iommu_dart_restore; 435 436 return 0; 437 } 438 439 late_initcall(iommu_init_late_dart); 440 #endif /* CONFIG_PM */ 441