1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2020-2021 Ruslan Bukin <br@bsdpad.com> 5 * Copyright (c) 2014-2021 Andrew Turner 6 * Copyright (c) 2014-2016 The FreeBSD Foundation 7 * All rights reserved. 8 * 9 * This work was supported by Innovate UK project 105694, "Digital Security 10 * by Design (DSbD) Technology Platform Prototype". 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 /* 38 * Manages physical address maps for ARM SMMUv3 and ARM Mali GPU. 39 */ 40 41 #include "opt_vm.h" 42 43 #include <sys/param.h> 44 #include <sys/ktr.h> 45 #include <sys/mutex.h> 46 #include <sys/rwlock.h> 47 48 #include <vm/vm.h> 49 #include <vm/vm_param.h> 50 #include <vm/vm_page.h> 51 #include <vm/vm_map.h> 52 #include <vm/vm_object.h> 53 #include <vm/vm_pageout.h> 54 #include <vm/vm_radix.h> 55 56 #include <machine/machdep.h> 57 58 #include <arm64/iommu/iommu_pmap.h> 59 #include <arm64/iommu/iommu_pte.h> 60 61 #define IOMMU_PAGE_SIZE 4096 62 63 #define NL0PG (IOMMU_PAGE_SIZE/(sizeof (pd_entry_t))) 64 #define NL1PG (IOMMU_PAGE_SIZE/(sizeof (pd_entry_t))) 65 #define NL2PG (IOMMU_PAGE_SIZE/(sizeof (pd_entry_t))) 66 #define NL3PG (IOMMU_PAGE_SIZE/(sizeof (pt_entry_t))) 67 68 #define NUL0E IOMMU_L0_ENTRIES 69 #define NUL1E (NUL0E * NL1PG) 70 #define NUL2E (NUL1E * NL2PG) 71 72 #define iommu_l0_pindex(v) (NUL2E + NUL1E + ((v) >> IOMMU_L0_SHIFT)) 73 #define iommu_l1_pindex(v) (NUL2E + ((v) >> IOMMU_L1_SHIFT)) 74 #define iommu_l2_pindex(v) ((v) >> IOMMU_L2_SHIFT) 75 76 /* This code assumes all L1 DMAP entries will be used */ 77 CTASSERT((DMAP_MIN_ADDRESS & ~IOMMU_L0_OFFSET) == DMAP_MIN_ADDRESS); 78 CTASSERT((DMAP_MAX_ADDRESS & ~IOMMU_L0_OFFSET) == DMAP_MAX_ADDRESS); 79 80 static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex); 81 static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, 82 struct spglist *free); 83 84 /* 85 * These load the old table data and store the new value. 86 * They need to be atomic as the System MMU may write to the table at 87 * the same time as the CPU. 88 */ 89 #define pmap_load(table) (*table) 90 #define pmap_clear(table) atomic_store_64(table, 0) 91 #define pmap_store(table, entry) atomic_store_64(table, entry) 92 93 /********************/ 94 /* Inline functions */ 95 /********************/ 96 97 static __inline pd_entry_t * 98 pmap_l0(pmap_t pmap, vm_offset_t va) 99 { 100 101 return (&pmap->pm_l0[iommu_l0_index(va)]); 102 } 103 104 static __inline pd_entry_t * 105 pmap_l0_to_l1(pd_entry_t *l0, vm_offset_t va) 106 { 107 pd_entry_t *l1; 108 109 l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK); 110 return (&l1[iommu_l1_index(va)]); 111 } 112 113 static __inline pd_entry_t * 114 pmap_l1(pmap_t pmap, vm_offset_t va) 115 { 116 pd_entry_t *l0; 117 118 l0 = pmap_l0(pmap, va); 119 if ((pmap_load(l0) & ATTR_DESCR_MASK) != IOMMU_L0_TABLE) 120 return (NULL); 121 122 return (pmap_l0_to_l1(l0, va)); 123 } 124 125 static __inline pd_entry_t * 126 pmap_l1_to_l2(pd_entry_t *l1p, vm_offset_t va) 127 { 128 pd_entry_t l1, *l2p; 129 130 l1 = pmap_load(l1p); 131 132 /* 133 * The valid bit may be clear if pmap_update_entry() is concurrently 134 * modifying the entry, so for KVA only the entry type may be checked. 135 */ 136 KASSERT(va >= VM_MAX_USER_ADDRESS || (l1 & ATTR_DESCR_VALID) != 0, 137 ("%s: L1 entry %#lx for %#lx is invalid", __func__, l1, va)); 138 KASSERT((l1 & ATTR_DESCR_TYPE_MASK) == ATTR_DESCR_TYPE_TABLE, 139 ("%s: L1 entry %#lx for %#lx is a leaf", __func__, l1, va)); 140 l2p = (pd_entry_t *)PHYS_TO_DMAP(l1 & ~ATTR_MASK); 141 return (&l2p[iommu_l2_index(va)]); 142 } 143 144 static __inline pd_entry_t * 145 pmap_l2(pmap_t pmap, vm_offset_t va) 146 { 147 pd_entry_t *l1; 148 149 l1 = pmap_l1(pmap, va); 150 if ((pmap_load(l1) & ATTR_DESCR_MASK) != IOMMU_L1_TABLE) 151 return (NULL); 152 153 return (pmap_l1_to_l2(l1, va)); 154 } 155 156 static __inline pt_entry_t * 157 pmap_l2_to_l3(pd_entry_t *l2p, vm_offset_t va) 158 { 159 pd_entry_t l2; 160 pt_entry_t *l3p; 161 162 l2 = pmap_load(l2p); 163 164 /* 165 * The valid bit may be clear if pmap_update_entry() is concurrently 166 * modifying the entry, so for KVA only the entry type may be checked. 167 */ 168 KASSERT(va >= VM_MAX_USER_ADDRESS || (l2 & ATTR_DESCR_VALID) != 0, 169 ("%s: L2 entry %#lx for %#lx is invalid", __func__, l2, va)); 170 KASSERT((l2 & ATTR_DESCR_TYPE_MASK) == ATTR_DESCR_TYPE_TABLE, 171 ("%s: L2 entry %#lx for %#lx is a leaf", __func__, l2, va)); 172 l3p = (pt_entry_t *)PHYS_TO_DMAP(l2 & ~ATTR_MASK); 173 return (&l3p[iommu_l3_index(va)]); 174 } 175 176 /* 177 * Returns the lowest valid pde for a given virtual address. 178 * The next level may or may not point to a valid page or block. 179 */ 180 static __inline pd_entry_t * 181 pmap_pde(pmap_t pmap, vm_offset_t va, int *level) 182 { 183 pd_entry_t *l0, *l1, *l2, desc; 184 185 l0 = pmap_l0(pmap, va); 186 desc = pmap_load(l0) & ATTR_DESCR_MASK; 187 if (desc != IOMMU_L0_TABLE) { 188 *level = -1; 189 return (NULL); 190 } 191 192 l1 = pmap_l0_to_l1(l0, va); 193 desc = pmap_load(l1) & ATTR_DESCR_MASK; 194 if (desc != IOMMU_L1_TABLE) { 195 *level = 0; 196 return (l0); 197 } 198 199 l2 = pmap_l1_to_l2(l1, va); 200 desc = pmap_load(l2) & ATTR_DESCR_MASK; 201 if (desc != IOMMU_L2_TABLE) { 202 *level = 1; 203 return (l1); 204 } 205 206 *level = 2; 207 return (l2); 208 } 209 210 /* 211 * Returns the lowest valid pte block or table entry for a given virtual 212 * address. If there are no valid entries return NULL and set the level to 213 * the first invalid level. 214 */ 215 static __inline pt_entry_t * 216 pmap_pte(pmap_t pmap, vm_offset_t va, int *level) 217 { 218 pd_entry_t *l1, *l2, desc; 219 pt_entry_t *l3; 220 221 l1 = pmap_l1(pmap, va); 222 if (l1 == NULL) { 223 *level = 0; 224 return (NULL); 225 } 226 desc = pmap_load(l1) & ATTR_DESCR_MASK; 227 if (desc == IOMMU_L1_BLOCK) { 228 *level = 1; 229 return (l1); 230 } 231 232 if (desc != IOMMU_L1_TABLE) { 233 *level = 1; 234 return (NULL); 235 } 236 237 l2 = pmap_l1_to_l2(l1, va); 238 desc = pmap_load(l2) & ATTR_DESCR_MASK; 239 if (desc == IOMMU_L2_BLOCK) { 240 *level = 2; 241 return (l2); 242 } 243 244 if (desc != IOMMU_L2_TABLE) { 245 *level = 2; 246 return (NULL); 247 } 248 249 *level = 3; 250 l3 = pmap_l2_to_l3(l2, va); 251 if ((pmap_load(l3) & ATTR_DESCR_MASK) != IOMMU_L3_PAGE) 252 return (NULL); 253 254 return (l3); 255 } 256 257 static __inline int 258 pmap_l3_valid(pt_entry_t l3) 259 { 260 261 return ((l3 & ATTR_DESCR_MASK) == IOMMU_L3_PAGE); 262 } 263 264 CTASSERT(IOMMU_L1_BLOCK == IOMMU_L2_BLOCK); 265 266 static __inline void 267 pmap_resident_count_inc(pmap_t pmap, int count) 268 { 269 270 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 271 pmap->pm_stats.resident_count += count; 272 } 273 274 static __inline void 275 pmap_resident_count_dec(pmap_t pmap, int count) 276 { 277 278 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 279 KASSERT(pmap->pm_stats.resident_count >= count, 280 ("pmap %p resident count underflow %ld %d", pmap, 281 pmap->pm_stats.resident_count, count)); 282 pmap->pm_stats.resident_count -= count; 283 } 284 285 /*************************************************** 286 * Page table page management routines..... 287 ***************************************************/ 288 /* 289 * Schedule the specified unused page table page to be freed. Specifically, 290 * add the page to the specified list of pages that will be released to the 291 * physical memory manager after the TLB has been updated. 292 */ 293 static __inline void 294 pmap_add_delayed_free_list(vm_page_t m, struct spglist *free, 295 boolean_t set_PG_ZERO) 296 { 297 298 if (set_PG_ZERO) 299 m->flags |= PG_ZERO; 300 else 301 m->flags &= ~PG_ZERO; 302 SLIST_INSERT_HEAD(free, m, plinks.s.ss); 303 } 304 305 /*************************************************** 306 * Low level mapping routines..... 307 ***************************************************/ 308 309 /* 310 * Decrements a page table page's reference count, which is used to record the 311 * number of valid page table entries within the page. If the reference count 312 * drops to zero, then the page table page is unmapped. Returns TRUE if the 313 * page table page was unmapped and FALSE otherwise. 314 */ 315 static inline boolean_t 316 pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) 317 { 318 319 --m->ref_count; 320 if (m->ref_count == 0) { 321 _pmap_unwire_l3(pmap, va, m, free); 322 return (TRUE); 323 } else 324 return (FALSE); 325 } 326 327 static void 328 _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) 329 { 330 331 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 332 /* 333 * unmap the page table page 334 */ 335 if (m->pindex >= (NUL2E + NUL1E)) { 336 /* l1 page */ 337 pd_entry_t *l0; 338 339 l0 = pmap_l0(pmap, va); 340 pmap_clear(l0); 341 } else if (m->pindex >= NUL2E) { 342 /* l2 page */ 343 pd_entry_t *l1; 344 345 l1 = pmap_l1(pmap, va); 346 pmap_clear(l1); 347 } else { 348 /* l3 page */ 349 pd_entry_t *l2; 350 351 l2 = pmap_l2(pmap, va); 352 pmap_clear(l2); 353 } 354 pmap_resident_count_dec(pmap, 1); 355 if (m->pindex < NUL2E) { 356 /* We just released an l3, unhold the matching l2 */ 357 pd_entry_t *l1, tl1; 358 vm_page_t l2pg; 359 360 l1 = pmap_l1(pmap, va); 361 tl1 = pmap_load(l1); 362 l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK); 363 pmap_unwire_l3(pmap, va, l2pg, free); 364 } else if (m->pindex < (NUL2E + NUL1E)) { 365 /* We just released an l2, unhold the matching l1 */ 366 pd_entry_t *l0, tl0; 367 vm_page_t l1pg; 368 369 l0 = pmap_l0(pmap, va); 370 tl0 = pmap_load(l0); 371 l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK); 372 pmap_unwire_l3(pmap, va, l1pg, free); 373 } 374 375 /* 376 * Put page on a list so that it is released after 377 * *ALL* TLB shootdown is done 378 */ 379 pmap_add_delayed_free_list(m, free, TRUE); 380 } 381 382 static int 383 iommu_pmap_pinit_levels(pmap_t pmap, int levels) 384 { 385 vm_page_t m; 386 387 /* 388 * allocate the l0 page 389 */ 390 while ((m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 391 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) 392 vm_wait(NULL); 393 394 pmap->pm_l0_paddr = VM_PAGE_TO_PHYS(m); 395 pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(pmap->pm_l0_paddr); 396 397 if ((m->flags & PG_ZERO) == 0) 398 pagezero(pmap->pm_l0); 399 400 pmap->pm_root.rt_root = 0; 401 bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 402 403 MPASS(levels == 3 || levels == 4); 404 pmap->pm_levels = levels; 405 406 /* 407 * Allocate the level 1 entry to use as the root. This will increase 408 * the refcount on the level 1 page so it won't be removed until 409 * pmap_release() is called. 410 */ 411 if (pmap->pm_levels == 3) { 412 PMAP_LOCK(pmap); 413 m = _pmap_alloc_l3(pmap, NUL2E + NUL1E); 414 PMAP_UNLOCK(pmap); 415 } 416 pmap->pm_ttbr = VM_PAGE_TO_PHYS(m); 417 418 return (1); 419 } 420 421 int 422 iommu_pmap_pinit(pmap_t pmap) 423 { 424 425 return (iommu_pmap_pinit_levels(pmap, 4)); 426 } 427 428 /* 429 * This routine is called if the desired page table page does not exist. 430 * 431 * If page table page allocation fails, this routine may sleep before 432 * returning NULL. It sleeps only if a lock pointer was given. 433 * 434 * Note: If a page allocation fails at page table level two or three, 435 * one or two pages may be held during the wait, only to be released 436 * afterwards. This conservative approach is easily argued to avoid 437 * race conditions. 438 */ 439 static vm_page_t 440 _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex) 441 { 442 vm_page_t m, l1pg, l2pg; 443 444 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 445 446 /* 447 * Allocate a page table page. 448 */ 449 if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | 450 VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { 451 /* 452 * Indicate the need to retry. While waiting, the page table 453 * page may have been allocated. 454 */ 455 return (NULL); 456 } 457 if ((m->flags & PG_ZERO) == 0) 458 pmap_zero_page(m); 459 460 /* 461 * Because of AArch64's weak memory consistency model, we must have a 462 * barrier here to ensure that the stores for zeroing "m", whether by 463 * pmap_zero_page() or an earlier function, are visible before adding 464 * "m" to the page table. Otherwise, a page table walk by another 465 * processor's MMU could see the mapping to "m" and a stale, non-zero 466 * PTE within "m". 467 */ 468 dmb(ishst); 469 470 /* 471 * Map the pagetable page into the process address space, if 472 * it isn't already there. 473 */ 474 475 if (ptepindex >= (NUL2E + NUL1E)) { 476 pd_entry_t *l0; 477 vm_pindex_t l0index; 478 479 l0index = ptepindex - (NUL2E + NUL1E); 480 l0 = &pmap->pm_l0[l0index]; 481 pmap_store(l0, VM_PAGE_TO_PHYS(m) | IOMMU_L0_TABLE); 482 } else if (ptepindex >= NUL2E) { 483 vm_pindex_t l0index, l1index; 484 pd_entry_t *l0, *l1; 485 pd_entry_t tl0; 486 487 l1index = ptepindex - NUL2E; 488 l0index = l1index >> IOMMU_L0_ENTRIES_SHIFT; 489 490 l0 = &pmap->pm_l0[l0index]; 491 tl0 = pmap_load(l0); 492 if (tl0 == 0) { 493 /* recurse for allocating page dir */ 494 if (_pmap_alloc_l3(pmap, NUL2E + NUL1E + l0index) 495 == NULL) { 496 vm_page_unwire_noq(m); 497 vm_page_free_zero(m); 498 return (NULL); 499 } 500 } else { 501 l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK); 502 l1pg->ref_count++; 503 } 504 505 l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK); 506 l1 = &l1[ptepindex & Ln_ADDR_MASK]; 507 pmap_store(l1, VM_PAGE_TO_PHYS(m) | IOMMU_L1_TABLE); 508 } else { 509 vm_pindex_t l0index, l1index; 510 pd_entry_t *l0, *l1, *l2; 511 pd_entry_t tl0, tl1; 512 513 l1index = ptepindex >> Ln_ENTRIES_SHIFT; 514 l0index = l1index >> IOMMU_L0_ENTRIES_SHIFT; 515 516 l0 = &pmap->pm_l0[l0index]; 517 tl0 = pmap_load(l0); 518 if (tl0 == 0) { 519 /* recurse for allocating page dir */ 520 if (_pmap_alloc_l3(pmap, NUL2E + l1index) == NULL) { 521 vm_page_unwire_noq(m); 522 vm_page_free_zero(m); 523 return (NULL); 524 } 525 tl0 = pmap_load(l0); 526 l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK); 527 l1 = &l1[l1index & Ln_ADDR_MASK]; 528 } else { 529 l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK); 530 l1 = &l1[l1index & Ln_ADDR_MASK]; 531 tl1 = pmap_load(l1); 532 if (tl1 == 0) { 533 /* recurse for allocating page dir */ 534 if (_pmap_alloc_l3(pmap, NUL2E + l1index) 535 == NULL) { 536 vm_page_unwire_noq(m); 537 vm_page_free_zero(m); 538 return (NULL); 539 } 540 } else { 541 l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK); 542 l2pg->ref_count++; 543 } 544 } 545 546 l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK); 547 l2 = &l2[ptepindex & Ln_ADDR_MASK]; 548 pmap_store(l2, VM_PAGE_TO_PHYS(m) | IOMMU_L2_TABLE); 549 } 550 551 pmap_resident_count_inc(pmap, 1); 552 553 return (m); 554 } 555 556 /*************************************************** 557 * Pmap allocation/deallocation routines. 558 ***************************************************/ 559 560 /* 561 * Release any resources held by the given physical map. 562 * Called when a pmap initialized by pmap_pinit is being released. 563 * Should only be called if the map contains no valid mappings. 564 */ 565 void 566 iommu_pmap_release(pmap_t pmap) 567 { 568 boolean_t rv; 569 struct spglist free; 570 vm_page_t m; 571 572 if (pmap->pm_levels != 4) { 573 KASSERT(pmap->pm_stats.resident_count == 1, 574 ("pmap_release: pmap resident count %ld != 0", 575 pmap->pm_stats.resident_count)); 576 KASSERT((pmap->pm_l0[0] & ATTR_DESCR_VALID) == ATTR_DESCR_VALID, 577 ("pmap_release: Invalid l0 entry: %lx", pmap->pm_l0[0])); 578 579 SLIST_INIT(&free); 580 m = PHYS_TO_VM_PAGE(pmap->pm_ttbr); 581 PMAP_LOCK(pmap); 582 rv = pmap_unwire_l3(pmap, 0, m, &free); 583 PMAP_UNLOCK(pmap); 584 MPASS(rv == TRUE); 585 vm_page_free_pages_toq(&free, true); 586 } 587 588 KASSERT(pmap->pm_stats.resident_count == 0, 589 ("pmap_release: pmap resident count %ld != 0", 590 pmap->pm_stats.resident_count)); 591 KASSERT(vm_radix_is_empty(&pmap->pm_root), 592 ("pmap_release: pmap has reserved page table page(s)")); 593 594 m = PHYS_TO_VM_PAGE(pmap->pm_l0_paddr); 595 vm_page_unwire_noq(m); 596 vm_page_free_zero(m); 597 } 598 599 /*************************************************** 600 * page management routines. 601 ***************************************************/ 602 603 /* 604 * Add a single Mali GPU entry. This function does not sleep. 605 */ 606 int 607 pmap_gpu_enter(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, 608 vm_prot_t prot, u_int flags) 609 { 610 pd_entry_t *pde; 611 pt_entry_t new_l3, orig_l3; 612 pt_entry_t *l3; 613 vm_page_t mpte; 614 pd_entry_t *l1p; 615 pd_entry_t *l2p; 616 int lvl; 617 int rv; 618 619 KASSERT(pmap != kernel_pmap, ("kernel pmap used for GPU")); 620 KASSERT(va < VM_MAXUSER_ADDRESS, ("wrong address space")); 621 KASSERT((va & PAGE_MASK) == 0, ("va is misaligned")); 622 KASSERT((pa & PAGE_MASK) == 0, ("pa is misaligned")); 623 624 new_l3 = (pt_entry_t)(pa | ATTR_SH(ATTR_SH_IS) | IOMMU_L3_BLOCK); 625 626 if ((prot & VM_PROT_WRITE) != 0) 627 new_l3 |= ATTR_S2_S2AP(ATTR_S2_S2AP_WRITE); 628 if ((prot & VM_PROT_READ) != 0) 629 new_l3 |= ATTR_S2_S2AP(ATTR_S2_S2AP_READ); 630 if ((prot & VM_PROT_EXECUTE) == 0) 631 new_l3 |= ATTR_S2_XN(ATTR_S2_XN_ALL); 632 633 CTR2(KTR_PMAP, "pmap_gpu_enter: %.16lx -> %.16lx", va, pa); 634 635 PMAP_LOCK(pmap); 636 637 /* 638 * In the case that a page table page is not 639 * resident, we are creating it here. 640 */ 641 retry: 642 pde = pmap_pde(pmap, va, &lvl); 643 if (pde != NULL && lvl == 2) { 644 l3 = pmap_l2_to_l3(pde, va); 645 } else { 646 mpte = _pmap_alloc_l3(pmap, iommu_l2_pindex(va)); 647 if (mpte == NULL) { 648 CTR0(KTR_PMAP, "pmap_enter: mpte == NULL"); 649 rv = KERN_RESOURCE_SHORTAGE; 650 goto out; 651 } 652 653 /* 654 * Ensure newly created l1, l2 are visible to GPU. 655 * l0 is already visible by similar call in panfrost driver. 656 * The cache entry for l3 handled below. 657 */ 658 659 l1p = pmap_l1(pmap, va); 660 l2p = pmap_l2(pmap, va); 661 cpu_dcache_wb_range((vm_offset_t)l1p, sizeof(pd_entry_t)); 662 cpu_dcache_wb_range((vm_offset_t)l2p, sizeof(pd_entry_t)); 663 664 goto retry; 665 } 666 667 orig_l3 = pmap_load(l3); 668 KASSERT(!pmap_l3_valid(orig_l3), ("l3 is valid")); 669 670 /* New mapping */ 671 pmap_store(l3, new_l3); 672 673 cpu_dcache_wb_range((vm_offset_t)l3, sizeof(pt_entry_t)); 674 675 pmap_resident_count_inc(pmap, 1); 676 dsb(ishst); 677 678 rv = KERN_SUCCESS; 679 out: 680 PMAP_UNLOCK(pmap); 681 682 return (rv); 683 } 684 685 /* 686 * Remove a single Mali GPU entry. 687 */ 688 int 689 pmap_gpu_remove(pmap_t pmap, vm_offset_t va) 690 { 691 pd_entry_t *pde; 692 pt_entry_t *pte; 693 int lvl; 694 int rc; 695 696 KASSERT((va & PAGE_MASK) == 0, ("va is misaligned")); 697 KASSERT(pmap != kernel_pmap, ("kernel pmap used for GPU")); 698 699 PMAP_LOCK(pmap); 700 701 pde = pmap_pde(pmap, va, &lvl); 702 if (pde == NULL || lvl != 2) { 703 rc = KERN_FAILURE; 704 goto out; 705 } 706 707 pte = pmap_l2_to_l3(pde, va); 708 709 pmap_resident_count_dec(pmap, 1); 710 pmap_clear(pte); 711 cpu_dcache_wb_range((vm_offset_t)pte, sizeof(pt_entry_t)); 712 rc = KERN_SUCCESS; 713 714 out: 715 PMAP_UNLOCK(pmap); 716 717 return (rc); 718 } 719 720 /* 721 * Add a single SMMU entry. This function does not sleep. 722 */ 723 int 724 pmap_smmu_enter(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, 725 vm_prot_t prot, u_int flags) 726 { 727 pd_entry_t *pde; 728 pt_entry_t new_l3, orig_l3; 729 pt_entry_t *l3; 730 vm_page_t mpte; 731 int lvl; 732 int rv; 733 734 KASSERT(va < VM_MAXUSER_ADDRESS, ("wrong address space")); 735 736 va = trunc_page(va); 737 new_l3 = (pt_entry_t)(pa | ATTR_DEFAULT | 738 ATTR_S1_IDX(VM_MEMATTR_DEVICE) | IOMMU_L3_PAGE); 739 if ((prot & VM_PROT_WRITE) == 0) 740 new_l3 |= ATTR_S1_AP(ATTR_S1_AP_RO); 741 new_l3 |= ATTR_S1_XN; /* Execute never. */ 742 new_l3 |= ATTR_S1_AP(ATTR_S1_AP_USER); 743 new_l3 |= ATTR_S1_nG; /* Non global. */ 744 745 CTR2(KTR_PMAP, "pmap_senter: %.16lx -> %.16lx", va, pa); 746 747 PMAP_LOCK(pmap); 748 749 /* 750 * In the case that a page table page is not 751 * resident, we are creating it here. 752 */ 753 retry: 754 pde = pmap_pde(pmap, va, &lvl); 755 if (pde != NULL && lvl == 2) { 756 l3 = pmap_l2_to_l3(pde, va); 757 } else { 758 mpte = _pmap_alloc_l3(pmap, iommu_l2_pindex(va)); 759 if (mpte == NULL) { 760 CTR0(KTR_PMAP, "pmap_enter: mpte == NULL"); 761 rv = KERN_RESOURCE_SHORTAGE; 762 goto out; 763 } 764 goto retry; 765 } 766 767 orig_l3 = pmap_load(l3); 768 KASSERT(!pmap_l3_valid(orig_l3), ("l3 is valid")); 769 770 /* New mapping */ 771 pmap_store(l3, new_l3); 772 pmap_resident_count_inc(pmap, 1); 773 dsb(ishst); 774 775 rv = KERN_SUCCESS; 776 out: 777 PMAP_UNLOCK(pmap); 778 779 return (rv); 780 } 781 782 /* 783 * Remove a single SMMU entry. 784 */ 785 int 786 pmap_smmu_remove(pmap_t pmap, vm_offset_t va) 787 { 788 pt_entry_t *pte; 789 int lvl; 790 int rc; 791 792 PMAP_LOCK(pmap); 793 794 pte = pmap_pte(pmap, va, &lvl); 795 KASSERT(lvl == 3, 796 ("Invalid SMMU pagetable level: %d != 3", lvl)); 797 798 if (pte != NULL) { 799 pmap_resident_count_dec(pmap, 1); 800 pmap_clear(pte); 801 rc = KERN_SUCCESS; 802 } else 803 rc = KERN_FAILURE; 804 805 PMAP_UNLOCK(pmap); 806 807 return (rc); 808 } 809 810 /* 811 * Remove all the allocated L1, L2 pages from SMMU pmap. 812 * All the L3 entires must be cleared in advance, otherwise 813 * this function panics. 814 */ 815 void 816 iommu_pmap_remove_pages(pmap_t pmap) 817 { 818 pd_entry_t l0e, *l1, l1e, *l2, l2e; 819 pt_entry_t *l3, l3e; 820 vm_page_t m, m0, m1; 821 vm_offset_t sva; 822 vm_paddr_t pa; 823 vm_paddr_t pa0; 824 vm_paddr_t pa1; 825 int i, j, k, l; 826 827 PMAP_LOCK(pmap); 828 829 for (sva = VM_MINUSER_ADDRESS, i = iommu_l0_index(sva); 830 (i < Ln_ENTRIES && sva < VM_MAXUSER_ADDRESS); i++) { 831 l0e = pmap->pm_l0[i]; 832 if ((l0e & ATTR_DESCR_VALID) == 0) { 833 sva += IOMMU_L0_SIZE; 834 continue; 835 } 836 pa0 = l0e & ~ATTR_MASK; 837 m0 = PHYS_TO_VM_PAGE(pa0); 838 l1 = (pd_entry_t *)PHYS_TO_DMAP(pa0); 839 840 for (j = iommu_l1_index(sva); j < Ln_ENTRIES; j++) { 841 l1e = l1[j]; 842 if ((l1e & ATTR_DESCR_VALID) == 0) { 843 sva += IOMMU_L1_SIZE; 844 continue; 845 } 846 if ((l1e & ATTR_DESCR_MASK) == IOMMU_L1_BLOCK) { 847 sva += IOMMU_L1_SIZE; 848 continue; 849 } 850 pa1 = l1e & ~ATTR_MASK; 851 m1 = PHYS_TO_VM_PAGE(pa1); 852 l2 = (pd_entry_t *)PHYS_TO_DMAP(pa1); 853 854 for (k = iommu_l2_index(sva); k < Ln_ENTRIES; k++) { 855 l2e = l2[k]; 856 if ((l2e & ATTR_DESCR_VALID) == 0) { 857 sva += IOMMU_L2_SIZE; 858 continue; 859 } 860 pa = l2e & ~ATTR_MASK; 861 m = PHYS_TO_VM_PAGE(pa); 862 l3 = (pt_entry_t *)PHYS_TO_DMAP(pa); 863 864 for (l = iommu_l3_index(sva); l < Ln_ENTRIES; 865 l++, sva += IOMMU_L3_SIZE) { 866 l3e = l3[l]; 867 if ((l3e & ATTR_DESCR_VALID) == 0) 868 continue; 869 panic("%s: l3e found for va %jx\n", 870 __func__, sva); 871 } 872 873 vm_page_unwire_noq(m1); 874 vm_page_unwire_noq(m); 875 pmap_resident_count_dec(pmap, 1); 876 vm_page_free(m); 877 pmap_clear(&l2[k]); 878 } 879 880 vm_page_unwire_noq(m0); 881 pmap_resident_count_dec(pmap, 1); 882 vm_page_free(m1); 883 pmap_clear(&l1[j]); 884 } 885 886 pmap_resident_count_dec(pmap, 1); 887 vm_page_free(m0); 888 pmap_clear(&pmap->pm_l0[i]); 889 } 890 891 KASSERT(pmap->pm_stats.resident_count == 0, 892 ("Invalid resident count %jd", pmap->pm_stats.resident_count)); 893 894 PMAP_UNLOCK(pmap); 895 } 896