1 /* 2 * This program is free software; you can redistribute it and/or modify 3 * it under the terms of the GNU General Public License version 2 as 4 * published by the Free Software Foundation. 5 */ 6 7 #include <asm/cacheflush.h> 8 #include <asm/pgtable.h> 9 #include <linux/compiler.h> 10 #include <linux/delay.h> 11 #include <linux/device.h> 12 #include <linux/errno.h> 13 #include <linux/interrupt.h> 14 #include <linux/io.h> 15 #include <linux/iommu.h> 16 #include <linux/jiffies.h> 17 #include <linux/list.h> 18 #include <linux/mm.h> 19 #include <linux/module.h> 20 #include <linux/of.h> 21 #include <linux/of_platform.h> 22 #include <linux/platform_device.h> 23 #include <linux/slab.h> 24 #include <linux/spinlock.h> 25 26 /** MMU register offsets */ 27 #define RK_MMU_DTE_ADDR 0x00 /* Directory table address */ 28 #define RK_MMU_STATUS 0x04 29 #define RK_MMU_COMMAND 0x08 30 #define RK_MMU_PAGE_FAULT_ADDR 0x0C /* IOVA of last page fault */ 31 #define RK_MMU_ZAP_ONE_LINE 0x10 /* Shootdown one IOTLB entry */ 32 #define RK_MMU_INT_RAWSTAT 0x14 /* IRQ status ignoring mask */ 33 #define RK_MMU_INT_CLEAR 0x18 /* Acknowledge and re-arm irq */ 34 #define RK_MMU_INT_MASK 0x1C /* IRQ enable */ 35 #define RK_MMU_INT_STATUS 0x20 /* IRQ status after masking */ 36 #define RK_MMU_AUTO_GATING 0x24 37 38 #define DTE_ADDR_DUMMY 0xCAFEBABE 39 #define FORCE_RESET_TIMEOUT 100 /* ms */ 40 41 /* RK_MMU_STATUS fields */ 42 #define RK_MMU_STATUS_PAGING_ENABLED BIT(0) 43 #define RK_MMU_STATUS_PAGE_FAULT_ACTIVE BIT(1) 44 #define RK_MMU_STATUS_STALL_ACTIVE BIT(2) 45 #define RK_MMU_STATUS_IDLE BIT(3) 46 #define RK_MMU_STATUS_REPLAY_BUFFER_EMPTY BIT(4) 47 #define RK_MMU_STATUS_PAGE_FAULT_IS_WRITE BIT(5) 48 #define RK_MMU_STATUS_STALL_NOT_ACTIVE BIT(31) 49 50 /* RK_MMU_COMMAND command values */ 51 #define RK_MMU_CMD_ENABLE_PAGING 0 /* Enable memory translation */ 52 #define RK_MMU_CMD_DISABLE_PAGING 1 /* Disable memory translation */ 53 #define RK_MMU_CMD_ENABLE_STALL 2 /* Stall paging to allow other cmds */ 54 #define RK_MMU_CMD_DISABLE_STALL 3 /* Stop stall re-enables paging */ 55 #define RK_MMU_CMD_ZAP_CACHE 4 /* Shoot down entire IOTLB */ 56 #define RK_MMU_CMD_PAGE_FAULT_DONE 5 /* Clear page fault */ 57 #define RK_MMU_CMD_FORCE_RESET 6 /* Reset all registers */ 58 59 /* RK_MMU_INT_* register fields */ 60 #define RK_MMU_IRQ_PAGE_FAULT 0x01 /* page fault */ 61 #define RK_MMU_IRQ_BUS_ERROR 0x02 /* bus read error */ 62 #define RK_MMU_IRQ_MASK (RK_MMU_IRQ_PAGE_FAULT | RK_MMU_IRQ_BUS_ERROR) 63 64 #define NUM_DT_ENTRIES 1024 65 #define NUM_PT_ENTRIES 1024 66 67 #define SPAGE_ORDER 12 68 #define SPAGE_SIZE (1 << SPAGE_ORDER) 69 70 /* 71 * Support mapping any size that fits in one page table: 72 * 4 KiB to 4 MiB 73 */ 74 #define RK_IOMMU_PGSIZE_BITMAP 0x007ff000 75 76 #define IOMMU_REG_POLL_COUNT_FAST 1000 77 78 struct rk_iommu_domain { 79 struct list_head iommus; 80 u32 *dt; /* page directory table */ 81 spinlock_t iommus_lock; /* lock for iommus list */ 82 spinlock_t dt_lock; /* lock for modifying page directory table */ 83 84 struct iommu_domain domain; 85 }; 86 87 struct rk_iommu { 88 struct device *dev; 89 void __iomem **bases; 90 int num_mmu; 91 int irq; 92 struct list_head node; /* entry in rk_iommu_domain.iommus */ 93 struct iommu_domain *domain; /* domain to which iommu is attached */ 94 }; 95 96 static inline void rk_table_flush(u32 *va, unsigned int count) 97 { 98 phys_addr_t pa_start = virt_to_phys(va); 99 phys_addr_t pa_end = virt_to_phys(va + count); 100 size_t size = pa_end - pa_start; 101 102 __cpuc_flush_dcache_area(va, size); 103 outer_flush_range(pa_start, pa_end); 104 } 105 106 static struct rk_iommu_domain *to_rk_domain(struct iommu_domain *dom) 107 { 108 return container_of(dom, struct rk_iommu_domain, domain); 109 } 110 111 /** 112 * Inspired by _wait_for in intel_drv.h 113 * This is NOT safe for use in interrupt context. 114 * 115 * Note that it's important that we check the condition again after having 116 * timed out, since the timeout could be due to preemption or similar and 117 * we've never had a chance to check the condition before the timeout. 118 */ 119 #define rk_wait_for(COND, MS) ({ \ 120 unsigned long timeout__ = jiffies + msecs_to_jiffies(MS) + 1; \ 121 int ret__ = 0; \ 122 while (!(COND)) { \ 123 if (time_after(jiffies, timeout__)) { \ 124 ret__ = (COND) ? 0 : -ETIMEDOUT; \ 125 break; \ 126 } \ 127 usleep_range(50, 100); \ 128 } \ 129 ret__; \ 130 }) 131 132 /* 133 * The Rockchip rk3288 iommu uses a 2-level page table. 134 * The first level is the "Directory Table" (DT). 135 * The DT consists of 1024 4-byte Directory Table Entries (DTEs), each pointing 136 * to a "Page Table". 137 * The second level is the 1024 Page Tables (PT). 138 * Each PT consists of 1024 4-byte Page Table Entries (PTEs), each pointing to 139 * a 4 KB page of physical memory. 140 * 141 * The DT and each PT fits in a single 4 KB page (4-bytes * 1024 entries). 142 * Each iommu device has a MMU_DTE_ADDR register that contains the physical 143 * address of the start of the DT page. 144 * 145 * The structure of the page table is as follows: 146 * 147 * DT 148 * MMU_DTE_ADDR -> +-----+ 149 * | | 150 * +-----+ PT 151 * | DTE | -> +-----+ 152 * +-----+ | | Memory 153 * | | +-----+ Page 154 * | | | PTE | -> +-----+ 155 * +-----+ +-----+ | | 156 * | | | | 157 * | | | | 158 * +-----+ | | 159 * | | 160 * | | 161 * +-----+ 162 */ 163 164 /* 165 * Each DTE has a PT address and a valid bit: 166 * +---------------------+-----------+-+ 167 * | PT address | Reserved |V| 168 * +---------------------+-----------+-+ 169 * 31:12 - PT address (PTs always starts on a 4 KB boundary) 170 * 11: 1 - Reserved 171 * 0 - 1 if PT @ PT address is valid 172 */ 173 #define RK_DTE_PT_ADDRESS_MASK 0xfffff000 174 #define RK_DTE_PT_VALID BIT(0) 175 176 static inline phys_addr_t rk_dte_pt_address(u32 dte) 177 { 178 return (phys_addr_t)dte & RK_DTE_PT_ADDRESS_MASK; 179 } 180 181 static inline bool rk_dte_is_pt_valid(u32 dte) 182 { 183 return dte & RK_DTE_PT_VALID; 184 } 185 186 static u32 rk_mk_dte(u32 *pt) 187 { 188 phys_addr_t pt_phys = virt_to_phys(pt); 189 return (pt_phys & RK_DTE_PT_ADDRESS_MASK) | RK_DTE_PT_VALID; 190 } 191 192 /* 193 * Each PTE has a Page address, some flags and a valid bit: 194 * +---------------------+---+-------+-+ 195 * | Page address |Rsv| Flags |V| 196 * +---------------------+---+-------+-+ 197 * 31:12 - Page address (Pages always start on a 4 KB boundary) 198 * 11: 9 - Reserved 199 * 8: 1 - Flags 200 * 8 - Read allocate - allocate cache space on read misses 201 * 7 - Read cache - enable cache & prefetch of data 202 * 6 - Write buffer - enable delaying writes on their way to memory 203 * 5 - Write allocate - allocate cache space on write misses 204 * 4 - Write cache - different writes can be merged together 205 * 3 - Override cache attributes 206 * if 1, bits 4-8 control cache attributes 207 * if 0, the system bus defaults are used 208 * 2 - Writable 209 * 1 - Readable 210 * 0 - 1 if Page @ Page address is valid 211 */ 212 #define RK_PTE_PAGE_ADDRESS_MASK 0xfffff000 213 #define RK_PTE_PAGE_FLAGS_MASK 0x000001fe 214 #define RK_PTE_PAGE_WRITABLE BIT(2) 215 #define RK_PTE_PAGE_READABLE BIT(1) 216 #define RK_PTE_PAGE_VALID BIT(0) 217 218 static inline phys_addr_t rk_pte_page_address(u32 pte) 219 { 220 return (phys_addr_t)pte & RK_PTE_PAGE_ADDRESS_MASK; 221 } 222 223 static inline bool rk_pte_is_page_valid(u32 pte) 224 { 225 return pte & RK_PTE_PAGE_VALID; 226 } 227 228 /* TODO: set cache flags per prot IOMMU_CACHE */ 229 static u32 rk_mk_pte(phys_addr_t page, int prot) 230 { 231 u32 flags = 0; 232 flags |= (prot & IOMMU_READ) ? RK_PTE_PAGE_READABLE : 0; 233 flags |= (prot & IOMMU_WRITE) ? RK_PTE_PAGE_WRITABLE : 0; 234 page &= RK_PTE_PAGE_ADDRESS_MASK; 235 return page | flags | RK_PTE_PAGE_VALID; 236 } 237 238 static u32 rk_mk_pte_invalid(u32 pte) 239 { 240 return pte & ~RK_PTE_PAGE_VALID; 241 } 242 243 /* 244 * rk3288 iova (IOMMU Virtual Address) format 245 * 31 22.21 12.11 0 246 * +-----------+-----------+-------------+ 247 * | DTE index | PTE index | Page offset | 248 * +-----------+-----------+-------------+ 249 * 31:22 - DTE index - index of DTE in DT 250 * 21:12 - PTE index - index of PTE in PT @ DTE.pt_address 251 * 11: 0 - Page offset - offset into page @ PTE.page_address 252 */ 253 #define RK_IOVA_DTE_MASK 0xffc00000 254 #define RK_IOVA_DTE_SHIFT 22 255 #define RK_IOVA_PTE_MASK 0x003ff000 256 #define RK_IOVA_PTE_SHIFT 12 257 #define RK_IOVA_PAGE_MASK 0x00000fff 258 #define RK_IOVA_PAGE_SHIFT 0 259 260 static u32 rk_iova_dte_index(dma_addr_t iova) 261 { 262 return (u32)(iova & RK_IOVA_DTE_MASK) >> RK_IOVA_DTE_SHIFT; 263 } 264 265 static u32 rk_iova_pte_index(dma_addr_t iova) 266 { 267 return (u32)(iova & RK_IOVA_PTE_MASK) >> RK_IOVA_PTE_SHIFT; 268 } 269 270 static u32 rk_iova_page_offset(dma_addr_t iova) 271 { 272 return (u32)(iova & RK_IOVA_PAGE_MASK) >> RK_IOVA_PAGE_SHIFT; 273 } 274 275 static u32 rk_iommu_read(void __iomem *base, u32 offset) 276 { 277 return readl(base + offset); 278 } 279 280 static void rk_iommu_write(void __iomem *base, u32 offset, u32 value) 281 { 282 writel(value, base + offset); 283 } 284 285 static void rk_iommu_command(struct rk_iommu *iommu, u32 command) 286 { 287 int i; 288 289 for (i = 0; i < iommu->num_mmu; i++) 290 writel(command, iommu->bases[i] + RK_MMU_COMMAND); 291 } 292 293 static void rk_iommu_base_command(void __iomem *base, u32 command) 294 { 295 writel(command, base + RK_MMU_COMMAND); 296 } 297 static void rk_iommu_zap_lines(struct rk_iommu *iommu, dma_addr_t iova, 298 size_t size) 299 { 300 int i; 301 302 dma_addr_t iova_end = iova + size; 303 /* 304 * TODO(djkurtz): Figure out when it is more efficient to shootdown the 305 * entire iotlb rather than iterate over individual iovas. 306 */ 307 for (i = 0; i < iommu->num_mmu; i++) 308 for (; iova < iova_end; iova += SPAGE_SIZE) 309 rk_iommu_write(iommu->bases[i], RK_MMU_ZAP_ONE_LINE, iova); 310 } 311 312 static bool rk_iommu_is_stall_active(struct rk_iommu *iommu) 313 { 314 bool active = true; 315 int i; 316 317 for (i = 0; i < iommu->num_mmu; i++) 318 active &= !!(rk_iommu_read(iommu->bases[i], RK_MMU_STATUS) & 319 RK_MMU_STATUS_STALL_ACTIVE); 320 321 return active; 322 } 323 324 static bool rk_iommu_is_paging_enabled(struct rk_iommu *iommu) 325 { 326 bool enable = true; 327 int i; 328 329 for (i = 0; i < iommu->num_mmu; i++) 330 enable &= !!(rk_iommu_read(iommu->bases[i], RK_MMU_STATUS) & 331 RK_MMU_STATUS_PAGING_ENABLED); 332 333 return enable; 334 } 335 336 static int rk_iommu_enable_stall(struct rk_iommu *iommu) 337 { 338 int ret, i; 339 340 if (rk_iommu_is_stall_active(iommu)) 341 return 0; 342 343 /* Stall can only be enabled if paging is enabled */ 344 if (!rk_iommu_is_paging_enabled(iommu)) 345 return 0; 346 347 rk_iommu_command(iommu, RK_MMU_CMD_ENABLE_STALL); 348 349 ret = rk_wait_for(rk_iommu_is_stall_active(iommu), 1); 350 if (ret) 351 for (i = 0; i < iommu->num_mmu; i++) 352 dev_err(iommu->dev, "Enable stall request timed out, status: %#08x\n", 353 rk_iommu_read(iommu->bases[i], RK_MMU_STATUS)); 354 355 return ret; 356 } 357 358 static int rk_iommu_disable_stall(struct rk_iommu *iommu) 359 { 360 int ret, i; 361 362 if (!rk_iommu_is_stall_active(iommu)) 363 return 0; 364 365 rk_iommu_command(iommu, RK_MMU_CMD_DISABLE_STALL); 366 367 ret = rk_wait_for(!rk_iommu_is_stall_active(iommu), 1); 368 if (ret) 369 for (i = 0; i < iommu->num_mmu; i++) 370 dev_err(iommu->dev, "Disable stall request timed out, status: %#08x\n", 371 rk_iommu_read(iommu->bases[i], RK_MMU_STATUS)); 372 373 return ret; 374 } 375 376 static int rk_iommu_enable_paging(struct rk_iommu *iommu) 377 { 378 int ret, i; 379 380 if (rk_iommu_is_paging_enabled(iommu)) 381 return 0; 382 383 rk_iommu_command(iommu, RK_MMU_CMD_ENABLE_PAGING); 384 385 ret = rk_wait_for(rk_iommu_is_paging_enabled(iommu), 1); 386 if (ret) 387 for (i = 0; i < iommu->num_mmu; i++) 388 dev_err(iommu->dev, "Enable paging request timed out, status: %#08x\n", 389 rk_iommu_read(iommu->bases[i], RK_MMU_STATUS)); 390 391 return ret; 392 } 393 394 static int rk_iommu_disable_paging(struct rk_iommu *iommu) 395 { 396 int ret, i; 397 398 if (!rk_iommu_is_paging_enabled(iommu)) 399 return 0; 400 401 rk_iommu_command(iommu, RK_MMU_CMD_DISABLE_PAGING); 402 403 ret = rk_wait_for(!rk_iommu_is_paging_enabled(iommu), 1); 404 if (ret) 405 for (i = 0; i < iommu->num_mmu; i++) 406 dev_err(iommu->dev, "Disable paging request timed out, status: %#08x\n", 407 rk_iommu_read(iommu->bases[i], RK_MMU_STATUS)); 408 409 return ret; 410 } 411 412 static int rk_iommu_force_reset(struct rk_iommu *iommu) 413 { 414 int ret, i; 415 u32 dte_addr; 416 417 /* 418 * Check if register DTE_ADDR is working by writing DTE_ADDR_DUMMY 419 * and verifying that upper 5 nybbles are read back. 420 */ 421 for (i = 0; i < iommu->num_mmu; i++) { 422 rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, DTE_ADDR_DUMMY); 423 424 dte_addr = rk_iommu_read(iommu->bases[i], RK_MMU_DTE_ADDR); 425 if (dte_addr != (DTE_ADDR_DUMMY & RK_DTE_PT_ADDRESS_MASK)) { 426 dev_err(iommu->dev, "Error during raw reset. MMU_DTE_ADDR is not functioning\n"); 427 return -EFAULT; 428 } 429 } 430 431 rk_iommu_command(iommu, RK_MMU_CMD_FORCE_RESET); 432 433 for (i = 0; i < iommu->num_mmu; i++) { 434 ret = rk_wait_for(rk_iommu_read(iommu->bases[i], RK_MMU_DTE_ADDR) == 0x00000000, 435 FORCE_RESET_TIMEOUT); 436 if (ret) { 437 dev_err(iommu->dev, "FORCE_RESET command timed out\n"); 438 return ret; 439 } 440 } 441 442 return 0; 443 } 444 445 static void log_iova(struct rk_iommu *iommu, int index, dma_addr_t iova) 446 { 447 void __iomem *base = iommu->bases[index]; 448 u32 dte_index, pte_index, page_offset; 449 u32 mmu_dte_addr; 450 phys_addr_t mmu_dte_addr_phys, dte_addr_phys; 451 u32 *dte_addr; 452 u32 dte; 453 phys_addr_t pte_addr_phys = 0; 454 u32 *pte_addr = NULL; 455 u32 pte = 0; 456 phys_addr_t page_addr_phys = 0; 457 u32 page_flags = 0; 458 459 dte_index = rk_iova_dte_index(iova); 460 pte_index = rk_iova_pte_index(iova); 461 page_offset = rk_iova_page_offset(iova); 462 463 mmu_dte_addr = rk_iommu_read(base, RK_MMU_DTE_ADDR); 464 mmu_dte_addr_phys = (phys_addr_t)mmu_dte_addr; 465 466 dte_addr_phys = mmu_dte_addr_phys + (4 * dte_index); 467 dte_addr = phys_to_virt(dte_addr_phys); 468 dte = *dte_addr; 469 470 if (!rk_dte_is_pt_valid(dte)) 471 goto print_it; 472 473 pte_addr_phys = rk_dte_pt_address(dte) + (pte_index * 4); 474 pte_addr = phys_to_virt(pte_addr_phys); 475 pte = *pte_addr; 476 477 if (!rk_pte_is_page_valid(pte)) 478 goto print_it; 479 480 page_addr_phys = rk_pte_page_address(pte) + page_offset; 481 page_flags = pte & RK_PTE_PAGE_FLAGS_MASK; 482 483 print_it: 484 dev_err(iommu->dev, "iova = %pad: dte_index: %#03x pte_index: %#03x page_offset: %#03x\n", 485 &iova, dte_index, pte_index, page_offset); 486 dev_err(iommu->dev, "mmu_dte_addr: %pa dte@%pa: %#08x valid: %u pte@%pa: %#08x valid: %u page@%pa flags: %#03x\n", 487 &mmu_dte_addr_phys, &dte_addr_phys, dte, 488 rk_dte_is_pt_valid(dte), &pte_addr_phys, pte, 489 rk_pte_is_page_valid(pte), &page_addr_phys, page_flags); 490 } 491 492 static irqreturn_t rk_iommu_irq(int irq, void *dev_id) 493 { 494 struct rk_iommu *iommu = dev_id; 495 u32 status; 496 u32 int_status; 497 dma_addr_t iova; 498 irqreturn_t ret = IRQ_NONE; 499 int i; 500 501 for (i = 0; i < iommu->num_mmu; i++) { 502 int_status = rk_iommu_read(iommu->bases[i], RK_MMU_INT_STATUS); 503 if (int_status == 0) 504 continue; 505 506 ret = IRQ_HANDLED; 507 iova = rk_iommu_read(iommu->bases[i], RK_MMU_PAGE_FAULT_ADDR); 508 509 if (int_status & RK_MMU_IRQ_PAGE_FAULT) { 510 int flags; 511 512 status = rk_iommu_read(iommu->bases[i], RK_MMU_STATUS); 513 flags = (status & RK_MMU_STATUS_PAGE_FAULT_IS_WRITE) ? 514 IOMMU_FAULT_WRITE : IOMMU_FAULT_READ; 515 516 dev_err(iommu->dev, "Page fault at %pad of type %s\n", 517 &iova, 518 (flags == IOMMU_FAULT_WRITE) ? "write" : "read"); 519 520 log_iova(iommu, i, iova); 521 522 /* 523 * Report page fault to any installed handlers. 524 * Ignore the return code, though, since we always zap cache 525 * and clear the page fault anyway. 526 */ 527 if (iommu->domain) 528 report_iommu_fault(iommu->domain, iommu->dev, iova, 529 flags); 530 else 531 dev_err(iommu->dev, "Page fault while iommu not attached to domain?\n"); 532 533 rk_iommu_base_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE); 534 rk_iommu_base_command(iommu->bases[i], RK_MMU_CMD_PAGE_FAULT_DONE); 535 } 536 537 if (int_status & RK_MMU_IRQ_BUS_ERROR) 538 dev_err(iommu->dev, "BUS_ERROR occurred at %pad\n", &iova); 539 540 if (int_status & ~RK_MMU_IRQ_MASK) 541 dev_err(iommu->dev, "unexpected int_status: %#08x\n", 542 int_status); 543 544 rk_iommu_write(iommu->bases[i], RK_MMU_INT_CLEAR, int_status); 545 } 546 547 return ret; 548 } 549 550 static phys_addr_t rk_iommu_iova_to_phys(struct iommu_domain *domain, 551 dma_addr_t iova) 552 { 553 struct rk_iommu_domain *rk_domain = to_rk_domain(domain); 554 unsigned long flags; 555 phys_addr_t pt_phys, phys = 0; 556 u32 dte, pte; 557 u32 *page_table; 558 559 spin_lock_irqsave(&rk_domain->dt_lock, flags); 560 561 dte = rk_domain->dt[rk_iova_dte_index(iova)]; 562 if (!rk_dte_is_pt_valid(dte)) 563 goto out; 564 565 pt_phys = rk_dte_pt_address(dte); 566 page_table = (u32 *)phys_to_virt(pt_phys); 567 pte = page_table[rk_iova_pte_index(iova)]; 568 if (!rk_pte_is_page_valid(pte)) 569 goto out; 570 571 phys = rk_pte_page_address(pte) + rk_iova_page_offset(iova); 572 out: 573 spin_unlock_irqrestore(&rk_domain->dt_lock, flags); 574 575 return phys; 576 } 577 578 static void rk_iommu_zap_iova(struct rk_iommu_domain *rk_domain, 579 dma_addr_t iova, size_t size) 580 { 581 struct list_head *pos; 582 unsigned long flags; 583 584 /* shootdown these iova from all iommus using this domain */ 585 spin_lock_irqsave(&rk_domain->iommus_lock, flags); 586 list_for_each(pos, &rk_domain->iommus) { 587 struct rk_iommu *iommu; 588 iommu = list_entry(pos, struct rk_iommu, node); 589 rk_iommu_zap_lines(iommu, iova, size); 590 } 591 spin_unlock_irqrestore(&rk_domain->iommus_lock, flags); 592 } 593 594 static void rk_iommu_zap_iova_first_last(struct rk_iommu_domain *rk_domain, 595 dma_addr_t iova, size_t size) 596 { 597 rk_iommu_zap_iova(rk_domain, iova, SPAGE_SIZE); 598 if (size > SPAGE_SIZE) 599 rk_iommu_zap_iova(rk_domain, iova + size - SPAGE_SIZE, 600 SPAGE_SIZE); 601 } 602 603 static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain, 604 dma_addr_t iova) 605 { 606 u32 *page_table, *dte_addr; 607 u32 dte; 608 phys_addr_t pt_phys; 609 610 assert_spin_locked(&rk_domain->dt_lock); 611 612 dte_addr = &rk_domain->dt[rk_iova_dte_index(iova)]; 613 dte = *dte_addr; 614 if (rk_dte_is_pt_valid(dte)) 615 goto done; 616 617 page_table = (u32 *)get_zeroed_page(GFP_ATOMIC | GFP_DMA32); 618 if (!page_table) 619 return ERR_PTR(-ENOMEM); 620 621 dte = rk_mk_dte(page_table); 622 *dte_addr = dte; 623 624 rk_table_flush(page_table, NUM_PT_ENTRIES); 625 rk_table_flush(dte_addr, 1); 626 627 done: 628 pt_phys = rk_dte_pt_address(dte); 629 return (u32 *)phys_to_virt(pt_phys); 630 } 631 632 static size_t rk_iommu_unmap_iova(struct rk_iommu_domain *rk_domain, 633 u32 *pte_addr, dma_addr_t iova, size_t size) 634 { 635 unsigned int pte_count; 636 unsigned int pte_total = size / SPAGE_SIZE; 637 638 assert_spin_locked(&rk_domain->dt_lock); 639 640 for (pte_count = 0; pte_count < pte_total; pte_count++) { 641 u32 pte = pte_addr[pte_count]; 642 if (!rk_pte_is_page_valid(pte)) 643 break; 644 645 pte_addr[pte_count] = rk_mk_pte_invalid(pte); 646 } 647 648 rk_table_flush(pte_addr, pte_count); 649 650 return pte_count * SPAGE_SIZE; 651 } 652 653 static int rk_iommu_map_iova(struct rk_iommu_domain *rk_domain, u32 *pte_addr, 654 dma_addr_t iova, phys_addr_t paddr, size_t size, 655 int prot) 656 { 657 unsigned int pte_count; 658 unsigned int pte_total = size / SPAGE_SIZE; 659 phys_addr_t page_phys; 660 661 assert_spin_locked(&rk_domain->dt_lock); 662 663 for (pte_count = 0; pte_count < pte_total; pte_count++) { 664 u32 pte = pte_addr[pte_count]; 665 666 if (rk_pte_is_page_valid(pte)) 667 goto unwind; 668 669 pte_addr[pte_count] = rk_mk_pte(paddr, prot); 670 671 paddr += SPAGE_SIZE; 672 } 673 674 rk_table_flush(pte_addr, pte_count); 675 676 /* 677 * Zap the first and last iova to evict from iotlb any previously 678 * mapped cachelines holding stale values for its dte and pte. 679 * We only zap the first and last iova, since only they could have 680 * dte or pte shared with an existing mapping. 681 */ 682 rk_iommu_zap_iova_first_last(rk_domain, iova, size); 683 684 return 0; 685 unwind: 686 /* Unmap the range of iovas that we just mapped */ 687 rk_iommu_unmap_iova(rk_domain, pte_addr, iova, pte_count * SPAGE_SIZE); 688 689 iova += pte_count * SPAGE_SIZE; 690 page_phys = rk_pte_page_address(pte_addr[pte_count]); 691 pr_err("iova: %pad already mapped to %pa cannot remap to phys: %pa prot: %#x\n", 692 &iova, &page_phys, &paddr, prot); 693 694 return -EADDRINUSE; 695 } 696 697 static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova, 698 phys_addr_t paddr, size_t size, int prot) 699 { 700 struct rk_iommu_domain *rk_domain = to_rk_domain(domain); 701 unsigned long flags; 702 dma_addr_t iova = (dma_addr_t)_iova; 703 u32 *page_table, *pte_addr; 704 int ret; 705 706 spin_lock_irqsave(&rk_domain->dt_lock, flags); 707 708 /* 709 * pgsize_bitmap specifies iova sizes that fit in one page table 710 * (1024 4-KiB pages = 4 MiB). 711 * So, size will always be 4096 <= size <= 4194304. 712 * Since iommu_map() guarantees that both iova and size will be 713 * aligned, we will always only be mapping from a single dte here. 714 */ 715 page_table = rk_dte_get_page_table(rk_domain, iova); 716 if (IS_ERR(page_table)) { 717 spin_unlock_irqrestore(&rk_domain->dt_lock, flags); 718 return PTR_ERR(page_table); 719 } 720 721 pte_addr = &page_table[rk_iova_pte_index(iova)]; 722 ret = rk_iommu_map_iova(rk_domain, pte_addr, iova, paddr, size, prot); 723 spin_unlock_irqrestore(&rk_domain->dt_lock, flags); 724 725 return ret; 726 } 727 728 static size_t rk_iommu_unmap(struct iommu_domain *domain, unsigned long _iova, 729 size_t size) 730 { 731 struct rk_iommu_domain *rk_domain = to_rk_domain(domain); 732 unsigned long flags; 733 dma_addr_t iova = (dma_addr_t)_iova; 734 phys_addr_t pt_phys; 735 u32 dte; 736 u32 *pte_addr; 737 size_t unmap_size; 738 739 spin_lock_irqsave(&rk_domain->dt_lock, flags); 740 741 /* 742 * pgsize_bitmap specifies iova sizes that fit in one page table 743 * (1024 4-KiB pages = 4 MiB). 744 * So, size will always be 4096 <= size <= 4194304. 745 * Since iommu_unmap() guarantees that both iova and size will be 746 * aligned, we will always only be unmapping from a single dte here. 747 */ 748 dte = rk_domain->dt[rk_iova_dte_index(iova)]; 749 /* Just return 0 if iova is unmapped */ 750 if (!rk_dte_is_pt_valid(dte)) { 751 spin_unlock_irqrestore(&rk_domain->dt_lock, flags); 752 return 0; 753 } 754 755 pt_phys = rk_dte_pt_address(dte); 756 pte_addr = (u32 *)phys_to_virt(pt_phys) + rk_iova_pte_index(iova); 757 unmap_size = rk_iommu_unmap_iova(rk_domain, pte_addr, iova, size); 758 759 spin_unlock_irqrestore(&rk_domain->dt_lock, flags); 760 761 /* Shootdown iotlb entries for iova range that was just unmapped */ 762 rk_iommu_zap_iova(rk_domain, iova, unmap_size); 763 764 return unmap_size; 765 } 766 767 static struct rk_iommu *rk_iommu_from_dev(struct device *dev) 768 { 769 struct iommu_group *group; 770 struct device *iommu_dev; 771 struct rk_iommu *rk_iommu; 772 773 group = iommu_group_get(dev); 774 if (!group) 775 return NULL; 776 iommu_dev = iommu_group_get_iommudata(group); 777 rk_iommu = dev_get_drvdata(iommu_dev); 778 iommu_group_put(group); 779 780 return rk_iommu; 781 } 782 783 static int rk_iommu_attach_device(struct iommu_domain *domain, 784 struct device *dev) 785 { 786 struct rk_iommu *iommu; 787 struct rk_iommu_domain *rk_domain = to_rk_domain(domain); 788 unsigned long flags; 789 int ret, i; 790 phys_addr_t dte_addr; 791 792 /* 793 * Allow 'virtual devices' (e.g., drm) to attach to domain. 794 * Such a device does not belong to an iommu group. 795 */ 796 iommu = rk_iommu_from_dev(dev); 797 if (!iommu) 798 return 0; 799 800 ret = rk_iommu_enable_stall(iommu); 801 if (ret) 802 return ret; 803 804 ret = rk_iommu_force_reset(iommu); 805 if (ret) 806 return ret; 807 808 iommu->domain = domain; 809 810 ret = devm_request_irq(dev, iommu->irq, rk_iommu_irq, 811 IRQF_SHARED, dev_name(dev), iommu); 812 if (ret) 813 return ret; 814 815 dte_addr = virt_to_phys(rk_domain->dt); 816 for (i = 0; i < iommu->num_mmu; i++) { 817 rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, dte_addr); 818 rk_iommu_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE); 819 rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK, RK_MMU_IRQ_MASK); 820 } 821 822 ret = rk_iommu_enable_paging(iommu); 823 if (ret) 824 return ret; 825 826 spin_lock_irqsave(&rk_domain->iommus_lock, flags); 827 list_add_tail(&iommu->node, &rk_domain->iommus); 828 spin_unlock_irqrestore(&rk_domain->iommus_lock, flags); 829 830 dev_dbg(dev, "Attached to iommu domain\n"); 831 832 rk_iommu_disable_stall(iommu); 833 834 return 0; 835 } 836 837 static void rk_iommu_detach_device(struct iommu_domain *domain, 838 struct device *dev) 839 { 840 struct rk_iommu *iommu; 841 struct rk_iommu_domain *rk_domain = to_rk_domain(domain); 842 unsigned long flags; 843 int i; 844 845 /* Allow 'virtual devices' (eg drm) to detach from domain */ 846 iommu = rk_iommu_from_dev(dev); 847 if (!iommu) 848 return; 849 850 spin_lock_irqsave(&rk_domain->iommus_lock, flags); 851 list_del_init(&iommu->node); 852 spin_unlock_irqrestore(&rk_domain->iommus_lock, flags); 853 854 /* Ignore error while disabling, just keep going */ 855 rk_iommu_enable_stall(iommu); 856 rk_iommu_disable_paging(iommu); 857 for (i = 0; i < iommu->num_mmu; i++) { 858 rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK, 0); 859 rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, 0); 860 } 861 rk_iommu_disable_stall(iommu); 862 863 devm_free_irq(dev, iommu->irq, iommu); 864 865 iommu->domain = NULL; 866 867 dev_dbg(dev, "Detached from iommu domain\n"); 868 } 869 870 static struct iommu_domain *rk_iommu_domain_alloc(unsigned type) 871 { 872 struct rk_iommu_domain *rk_domain; 873 874 if (type != IOMMU_DOMAIN_UNMANAGED) 875 return NULL; 876 877 rk_domain = kzalloc(sizeof(*rk_domain), GFP_KERNEL); 878 if (!rk_domain) 879 return NULL; 880 881 /* 882 * rk32xx iommus use a 2 level pagetable. 883 * Each level1 (dt) and level2 (pt) table has 1024 4-byte entries. 884 * Allocate one 4 KiB page for each table. 885 */ 886 rk_domain->dt = (u32 *)get_zeroed_page(GFP_KERNEL | GFP_DMA32); 887 if (!rk_domain->dt) 888 goto err_dt; 889 890 rk_table_flush(rk_domain->dt, NUM_DT_ENTRIES); 891 892 spin_lock_init(&rk_domain->iommus_lock); 893 spin_lock_init(&rk_domain->dt_lock); 894 INIT_LIST_HEAD(&rk_domain->iommus); 895 896 return &rk_domain->domain; 897 898 err_dt: 899 kfree(rk_domain); 900 return NULL; 901 } 902 903 static void rk_iommu_domain_free(struct iommu_domain *domain) 904 { 905 struct rk_iommu_domain *rk_domain = to_rk_domain(domain); 906 int i; 907 908 WARN_ON(!list_empty(&rk_domain->iommus)); 909 910 for (i = 0; i < NUM_DT_ENTRIES; i++) { 911 u32 dte = rk_domain->dt[i]; 912 if (rk_dte_is_pt_valid(dte)) { 913 phys_addr_t pt_phys = rk_dte_pt_address(dte); 914 u32 *page_table = phys_to_virt(pt_phys); 915 free_page((unsigned long)page_table); 916 } 917 } 918 919 free_page((unsigned long)rk_domain->dt); 920 kfree(rk_domain); 921 } 922 923 static bool rk_iommu_is_dev_iommu_master(struct device *dev) 924 { 925 struct device_node *np = dev->of_node; 926 int ret; 927 928 /* 929 * An iommu master has an iommus property containing a list of phandles 930 * to iommu nodes, each with an #iommu-cells property with value 0. 931 */ 932 ret = of_count_phandle_with_args(np, "iommus", "#iommu-cells"); 933 return (ret > 0); 934 } 935 936 static int rk_iommu_group_set_iommudata(struct iommu_group *group, 937 struct device *dev) 938 { 939 struct device_node *np = dev->of_node; 940 struct platform_device *pd; 941 int ret; 942 struct of_phandle_args args; 943 944 /* 945 * An iommu master has an iommus property containing a list of phandles 946 * to iommu nodes, each with an #iommu-cells property with value 0. 947 */ 948 ret = of_parse_phandle_with_args(np, "iommus", "#iommu-cells", 0, 949 &args); 950 if (ret) { 951 dev_err(dev, "of_parse_phandle_with_args(%s) => %d\n", 952 np->full_name, ret); 953 return ret; 954 } 955 if (args.args_count != 0) { 956 dev_err(dev, "incorrect number of iommu params found for %s (found %d, expected 0)\n", 957 args.np->full_name, args.args_count); 958 return -EINVAL; 959 } 960 961 pd = of_find_device_by_node(args.np); 962 of_node_put(args.np); 963 if (!pd) { 964 dev_err(dev, "iommu %s not found\n", args.np->full_name); 965 return -EPROBE_DEFER; 966 } 967 968 /* TODO(djkurtz): handle multiple slave iommus for a single master */ 969 iommu_group_set_iommudata(group, &pd->dev, NULL); 970 971 return 0; 972 } 973 974 static int rk_iommu_add_device(struct device *dev) 975 { 976 struct iommu_group *group; 977 int ret; 978 979 if (!rk_iommu_is_dev_iommu_master(dev)) 980 return -ENODEV; 981 982 group = iommu_group_get(dev); 983 if (!group) { 984 group = iommu_group_alloc(); 985 if (IS_ERR(group)) { 986 dev_err(dev, "Failed to allocate IOMMU group\n"); 987 return PTR_ERR(group); 988 } 989 } 990 991 ret = iommu_group_add_device(group, dev); 992 if (ret) 993 goto err_put_group; 994 995 ret = rk_iommu_group_set_iommudata(group, dev); 996 if (ret) 997 goto err_remove_device; 998 999 iommu_group_put(group); 1000 1001 return 0; 1002 1003 err_remove_device: 1004 iommu_group_remove_device(dev); 1005 err_put_group: 1006 iommu_group_put(group); 1007 return ret; 1008 } 1009 1010 static void rk_iommu_remove_device(struct device *dev) 1011 { 1012 if (!rk_iommu_is_dev_iommu_master(dev)) 1013 return; 1014 1015 iommu_group_remove_device(dev); 1016 } 1017 1018 static const struct iommu_ops rk_iommu_ops = { 1019 .domain_alloc = rk_iommu_domain_alloc, 1020 .domain_free = rk_iommu_domain_free, 1021 .attach_dev = rk_iommu_attach_device, 1022 .detach_dev = rk_iommu_detach_device, 1023 .map = rk_iommu_map, 1024 .unmap = rk_iommu_unmap, 1025 .add_device = rk_iommu_add_device, 1026 .remove_device = rk_iommu_remove_device, 1027 .iova_to_phys = rk_iommu_iova_to_phys, 1028 .pgsize_bitmap = RK_IOMMU_PGSIZE_BITMAP, 1029 }; 1030 1031 static int rk_iommu_probe(struct platform_device *pdev) 1032 { 1033 struct device *dev = &pdev->dev; 1034 struct rk_iommu *iommu; 1035 struct resource *res; 1036 int i; 1037 1038 iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL); 1039 if (!iommu) 1040 return -ENOMEM; 1041 1042 platform_set_drvdata(pdev, iommu); 1043 iommu->dev = dev; 1044 iommu->num_mmu = 0; 1045 iommu->bases = devm_kzalloc(dev, sizeof(*iommu->bases) * iommu->num_mmu, 1046 GFP_KERNEL); 1047 if (!iommu->bases) 1048 return -ENOMEM; 1049 1050 for (i = 0; i < pdev->num_resources; i++) { 1051 res = platform_get_resource(pdev, IORESOURCE_MEM, i); 1052 if (!res) 1053 continue; 1054 iommu->bases[i] = devm_ioremap_resource(&pdev->dev, res); 1055 if (IS_ERR(iommu->bases[i])) 1056 continue; 1057 iommu->num_mmu++; 1058 } 1059 if (iommu->num_mmu == 0) 1060 return PTR_ERR(iommu->bases[0]); 1061 1062 iommu->irq = platform_get_irq(pdev, 0); 1063 if (iommu->irq < 0) { 1064 dev_err(dev, "Failed to get IRQ, %d\n", iommu->irq); 1065 return -ENXIO; 1066 } 1067 1068 return 0; 1069 } 1070 1071 static int rk_iommu_remove(struct platform_device *pdev) 1072 { 1073 return 0; 1074 } 1075 1076 static const struct of_device_id rk_iommu_dt_ids[] = { 1077 { .compatible = "rockchip,iommu" }, 1078 { /* sentinel */ } 1079 }; 1080 MODULE_DEVICE_TABLE(of, rk_iommu_dt_ids); 1081 1082 static struct platform_driver rk_iommu_driver = { 1083 .probe = rk_iommu_probe, 1084 .remove = rk_iommu_remove, 1085 .driver = { 1086 .name = "rk_iommu", 1087 .of_match_table = rk_iommu_dt_ids, 1088 }, 1089 }; 1090 1091 static int __init rk_iommu_init(void) 1092 { 1093 struct device_node *np; 1094 int ret; 1095 1096 np = of_find_matching_node(NULL, rk_iommu_dt_ids); 1097 if (!np) 1098 return 0; 1099 1100 of_node_put(np); 1101 1102 ret = bus_set_iommu(&platform_bus_type, &rk_iommu_ops); 1103 if (ret) 1104 return ret; 1105 1106 return platform_driver_register(&rk_iommu_driver); 1107 } 1108 static void __exit rk_iommu_exit(void) 1109 { 1110 platform_driver_unregister(&rk_iommu_driver); 1111 } 1112 1113 subsys_initcall(rk_iommu_init); 1114 module_exit(rk_iommu_exit); 1115 1116 MODULE_DESCRIPTION("IOMMU API for Rockchip"); 1117 MODULE_AUTHOR("Simon Xue <xxm@rock-chips.com> and Daniel Kurtz <djkurtz@chromium.org>"); 1118 MODULE_ALIAS("platform:rockchip-iommu"); 1119 MODULE_LICENSE("GPL v2"); 1120