1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2024 The FreeBSD Foundation 5 * 6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org> 7 * under sponsorship from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/malloc.h> 34 #include <sys/bus.h> 35 #include <sys/domainset.h> 36 #include <sys/interrupt.h> 37 #include <sys/kernel.h> 38 #include <sys/ktr.h> 39 #include <sys/lock.h> 40 #include <sys/memdesc.h> 41 #include <sys/mutex.h> 42 #include <sys/proc.h> 43 #include <sys/rwlock.h> 44 #include <sys/rman.h> 45 #include <sys/sf_buf.h> 46 #include <sys/sysctl.h> 47 #include <sys/taskqueue.h> 48 #include <sys/tree.h> 49 #include <sys/uio.h> 50 #include <sys/vmem.h> 51 #include <vm/vm.h> 52 #include <vm/vm_extern.h> 53 #include <vm/vm_kern.h> 54 #include <vm/vm_object.h> 55 #include <vm/vm_page.h> 56 #include <vm/vm_pager.h> 57 #include <vm/vm_radix.h> 58 #include <vm/vm_map.h> 59 #include <dev/pci/pcireg.h> 60 #include <machine/atomic.h> 61 #include <machine/bus.h> 62 #include <machine/cpu.h> 63 #include <machine/md_var.h> 64 #include <machine/specialreg.h> 65 #include <x86/include/busdma_impl.h> 66 #include <dev/iommu/busdma_iommu.h> 67 #include <x86/iommu/amd_reg.h> 68 #include <x86/iommu/x86_iommu.h> 69 #include <x86/iommu/amd_iommu.h> 70 71 static void amdiommu_unmap_clear_pte(struct amdiommu_domain *domain, 72 iommu_gaddr_t base, int lvl, int flags, iommu_pte_t *pte, 73 struct sf_buf **sf, struct iommu_map_entry *entry, bool free_sf); 74 static int amdiommu_unmap_buf_locked(struct amdiommu_domain *domain, 75 iommu_gaddr_t base, iommu_gaddr_t size, int flags, 76 struct iommu_map_entry *entry); 77 78 int 79 amdiommu_domain_alloc_pgtbl(struct amdiommu_domain *domain) 80 { 81 vm_page_t m; 82 int dom; 83 84 KASSERT(domain->pgtbl_obj == NULL, 85 ("already initialized %p", domain)); 86 87 domain->pgtbl_obj = vm_pager_allocate(OBJT_PHYS, NULL, 88 IDX_TO_OFF(pglvl_max_pages(domain->pglvl)), 0, 0, NULL); 89 if (bus_get_domain(domain->iodom.iommu->dev, &dom) == 0) 90 domain->pgtbl_obj->domain.dr_policy = DOMAINSET_PREF(dom); 91 AMDIOMMU_DOMAIN_PGLOCK(domain); 92 m = iommu_pgalloc(domain->pgtbl_obj, 0, IOMMU_PGF_WAITOK | 93 IOMMU_PGF_ZERO | IOMMU_PGF_OBJL); 94 /* No implicit free of the top level page table page. */ 95 vm_page_wire(m); 96 domain->pgtblr = m; 97 AMDIOMMU_DOMAIN_PGUNLOCK(domain); 98 AMDIOMMU_LOCK(domain->unit); 99 domain->iodom.flags |= IOMMU_DOMAIN_PGTBL_INITED; 100 AMDIOMMU_UNLOCK(domain->unit); 101 return (0); 102 } 103 104 void 105 amdiommu_domain_free_pgtbl(struct amdiommu_domain *domain) 106 { 107 struct pctrie_iter pages; 108 vm_object_t obj; 109 vm_page_t m; 110 111 obj = domain->pgtbl_obj; 112 if (obj == NULL) { 113 KASSERT((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0, 114 ("lost pagetable object domain %p", domain)); 115 return; 116 } 117 AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(domain); 118 domain->pgtbl_obj = NULL; 119 domain->pgtblr = NULL; 120 121 /* Obliterate ref_counts */ 122 VM_OBJECT_ASSERT_WLOCKED(obj); 123 vm_page_iter_init(&pages, obj); 124 VM_RADIX_FORALL(m, &pages) 125 vm_page_clearref(m); 126 VM_OBJECT_WUNLOCK(obj); 127 vm_object_deallocate(obj); 128 } 129 130 static iommu_pte_t * 131 amdiommu_pgtbl_map_pte(struct amdiommu_domain *domain, iommu_gaddr_t base, 132 int lvl, int flags, vm_pindex_t *idxp, struct sf_buf **sf) 133 { 134 iommu_pte_t *pte, *ptep; 135 struct sf_buf *sfp; 136 vm_page_t m; 137 vm_pindex_t idx, idx1; 138 139 idx = pglvl_pgtbl_get_pindex(domain->pglvl, base, lvl); 140 if (*sf != NULL && idx == *idxp) { 141 pte = (iommu_pte_t *)sf_buf_kva(*sf); 142 } else { 143 if (*sf != NULL) 144 iommu_unmap_pgtbl(*sf); 145 *idxp = idx; 146 retry: 147 pte = iommu_map_pgtbl(domain->pgtbl_obj, idx, flags, sf); 148 if (pte == NULL) { 149 KASSERT(lvl > 0, 150 ("lost root page table page %p", domain)); 151 /* 152 * Page table page does not exist, allocate 153 * it and create a pte in the preceeding page level 154 * to reference the allocated page table page. 155 */ 156 m = iommu_pgalloc(domain->pgtbl_obj, idx, flags | 157 IOMMU_PGF_ZERO); 158 if (m == NULL) 159 return (NULL); 160 161 vm_page_wire(m); 162 163 sfp = NULL; 164 ptep = amdiommu_pgtbl_map_pte(domain, base, lvl - 1, 165 flags, &idx1, &sfp); 166 if (ptep == NULL) { 167 KASSERT(m->pindex != 0, 168 ("loosing root page %p", domain)); 169 vm_page_unwire_noq(m); 170 iommu_pgfree(domain->pgtbl_obj, m->pindex, 171 flags, NULL); 172 return (NULL); 173 } 174 ptep->pte = VM_PAGE_TO_PHYS(m) | AMDIOMMU_PTE_IR | 175 AMDIOMMU_PTE_IW | AMDIOMMU_PTE_PR | 176 ((domain->pglvl - lvl) << AMDIOMMU_PTE_NLVL_SHIFT); 177 vm_page_wire(sf_buf_page(sfp)); 178 vm_page_unwire_noq(m); 179 iommu_unmap_pgtbl(sfp); 180 /* Only executed once. */ 181 goto retry; 182 } 183 } 184 pte += pglvl_pgtbl_pte_off(domain->pglvl, base, lvl); 185 return (pte); 186 } 187 188 static int 189 amdiommu_map_buf_locked(struct amdiommu_domain *domain, iommu_gaddr_t base, 190 iommu_gaddr_t size, vm_page_t *ma, uint64_t pflags, int flags, 191 struct iommu_map_entry *entry) 192 { 193 iommu_pte_t *pte; 194 struct sf_buf *sf; 195 iommu_gaddr_t base1; 196 vm_pindex_t pi, idx; 197 198 AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(domain); 199 200 base1 = base; 201 flags |= IOMMU_PGF_OBJL; 202 idx = -1; 203 pte = NULL; 204 sf = NULL; 205 206 for (pi = 0; size > 0; base += IOMMU_PAGE_SIZE, size -= IOMMU_PAGE_SIZE, 207 pi++) { 208 KASSERT(size >= IOMMU_PAGE_SIZE, 209 ("mapping loop overflow %p %jx %jx %jx", domain, 210 (uintmax_t)base, (uintmax_t)size, (uintmax_t)IOMMU_PAGE_SIZE)); 211 pte = amdiommu_pgtbl_map_pte(domain, base, domain->pglvl - 1, 212 flags, &idx, &sf); 213 if (pte == NULL) { 214 KASSERT((flags & IOMMU_PGF_WAITOK) == 0, 215 ("failed waitable pte alloc %p", domain)); 216 if (sf != NULL) 217 iommu_unmap_pgtbl(sf); 218 amdiommu_unmap_buf_locked(domain, base1, base - base1, 219 flags, entry); 220 return (ENOMEM); 221 } 222 /* next level 0, no superpages */ 223 pte->pte = VM_PAGE_TO_PHYS(ma[pi]) | pflags | AMDIOMMU_PTE_PR; 224 vm_page_wire(sf_buf_page(sf)); 225 } 226 if (sf != NULL) 227 iommu_unmap_pgtbl(sf); 228 return (0); 229 } 230 231 static int 232 amdiommu_map_buf(struct iommu_domain *iodom, struct iommu_map_entry *entry, 233 vm_page_t *ma, uint64_t eflags, int flags) 234 { 235 struct amdiommu_domain *domain; 236 uint64_t pflags; 237 iommu_gaddr_t base, size; 238 int error; 239 240 base = entry->start; 241 size = entry->end - entry->start; 242 pflags = ((eflags & IOMMU_MAP_ENTRY_READ) != 0 ? AMDIOMMU_PTE_IR : 0) | 243 ((eflags & IOMMU_MAP_ENTRY_WRITE) != 0 ? AMDIOMMU_PTE_IW : 0) | 244 ((eflags & IOMMU_MAP_ENTRY_SNOOP) != 0 ? AMDIOMMU_PTE_FC : 0); 245 /* IOMMU_MAP_ENTRY_TM ignored */ 246 247 domain = IODOM2DOM(iodom); 248 249 KASSERT((iodom->flags & IOMMU_DOMAIN_IDMAP) == 0, 250 ("modifying idmap pagetable domain %p", domain)); 251 KASSERT((base & IOMMU_PAGE_MASK) == 0, 252 ("non-aligned base %p %jx %jx", domain, (uintmax_t)base, 253 (uintmax_t)size)); 254 KASSERT((size & IOMMU_PAGE_MASK) == 0, 255 ("non-aligned size %p %jx %jx", domain, (uintmax_t)base, 256 (uintmax_t)size)); 257 KASSERT(size > 0, ("zero size %p %jx %jx", domain, (uintmax_t)base, 258 (uintmax_t)size)); 259 KASSERT(base < iodom->end, 260 ("base too high %p %jx %jx end %jx", domain, (uintmax_t)base, 261 (uintmax_t)size, (uintmax_t)iodom->end)); 262 KASSERT(base + size < iodom->end, 263 ("end too high %p %jx %jx end %jx", domain, (uintmax_t)base, 264 (uintmax_t)size, (uintmax_t)iodom->end)); 265 KASSERT(base + size > base, 266 ("size overflow %p %jx %jx", domain, (uintmax_t)base, 267 (uintmax_t)size)); 268 KASSERT((pflags & (AMDIOMMU_PTE_IR | AMDIOMMU_PTE_IW)) != 0, 269 ("neither read nor write %jx", (uintmax_t)pflags)); 270 KASSERT((pflags & ~(AMDIOMMU_PTE_IR | AMDIOMMU_PTE_IW | AMDIOMMU_PTE_FC 271 )) == 0, 272 ("invalid pte flags %jx", (uintmax_t)pflags)); 273 KASSERT((flags & ~IOMMU_PGF_WAITOK) == 0, ("invalid flags %x", flags)); 274 275 AMDIOMMU_DOMAIN_PGLOCK(domain); 276 error = amdiommu_map_buf_locked(domain, base, size, ma, pflags, 277 flags, entry); 278 AMDIOMMU_DOMAIN_PGUNLOCK(domain); 279 280 /* 281 * XXXKIB invalidation seems to be needed even for non-valid->valid 282 * updates. Recheck. 283 */ 284 iommu_qi_invalidate_sync(iodom, base, size, 285 (flags & IOMMU_PGF_WAITOK) != 0); 286 return (error); 287 } 288 289 static void 290 amdiommu_free_pgtbl_pde(struct amdiommu_domain *domain, iommu_gaddr_t base, 291 int lvl, int flags, struct iommu_map_entry *entry) 292 { 293 struct sf_buf *sf; 294 iommu_pte_t *pde; 295 vm_pindex_t idx; 296 297 sf = NULL; 298 pde = amdiommu_pgtbl_map_pte(domain, base, lvl, flags, &idx, &sf); 299 amdiommu_unmap_clear_pte(domain, base, lvl, flags, pde, &sf, entry, 300 true); 301 } 302 303 static void 304 amdiommu_unmap_clear_pte(struct amdiommu_domain *domain, iommu_gaddr_t base, 305 int lvl, int flags, iommu_pte_t *pte, struct sf_buf **sf, 306 struct iommu_map_entry *entry, bool free_sf) 307 { 308 vm_page_t m; 309 310 pte->pte = 0; 311 m = sf_buf_page(*sf); 312 if (free_sf) { 313 iommu_unmap_pgtbl(*sf); 314 *sf = NULL; 315 } 316 if (!vm_page_unwire_noq(m)) 317 return; 318 KASSERT(lvl != 0, 319 ("lost reference (lvl) on root pg domain %p base %jx lvl %d", 320 domain, (uintmax_t)base, lvl)); 321 KASSERT(m->pindex != 0, 322 ("lost reference (idx) on root pg domain %p base %jx lvl %d", 323 domain, (uintmax_t)base, lvl)); 324 iommu_pgfree(domain->pgtbl_obj, m->pindex, flags, entry); 325 amdiommu_free_pgtbl_pde(domain, base, lvl - 1, flags, entry); 326 } 327 328 static int 329 amdiommu_unmap_buf_locked(struct amdiommu_domain *domain, iommu_gaddr_t base, 330 iommu_gaddr_t size, int flags, struct iommu_map_entry *entry) 331 { 332 iommu_pte_t *pte; 333 struct sf_buf *sf; 334 vm_pindex_t idx; 335 iommu_gaddr_t pg_sz; 336 337 AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(domain); 338 if (size == 0) 339 return (0); 340 341 KASSERT((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) == 0, 342 ("modifying idmap pagetable domain %p", domain)); 343 KASSERT((base & IOMMU_PAGE_MASK) == 0, 344 ("non-aligned base %p %jx %jx", domain, (uintmax_t)base, 345 (uintmax_t)size)); 346 KASSERT((size & IOMMU_PAGE_MASK) == 0, 347 ("non-aligned size %p %jx %jx", domain, (uintmax_t)base, 348 (uintmax_t)size)); 349 KASSERT(base < DOM2IODOM(domain)->end, 350 ("base too high %p %jx %jx end %jx", domain, (uintmax_t)base, 351 (uintmax_t)size, (uintmax_t)DOM2IODOM(domain)->end)); 352 KASSERT(base + size < DOM2IODOM(domain)->end, 353 ("end too high %p %jx %jx end %jx", domain, (uintmax_t)base, 354 (uintmax_t)size, (uintmax_t)DOM2IODOM(domain)->end)); 355 KASSERT(base + size > base, 356 ("size overflow %p %jx %jx", domain, (uintmax_t)base, 357 (uintmax_t)size)); 358 KASSERT((flags & ~IOMMU_PGF_WAITOK) == 0, ("invalid flags %x", flags)); 359 360 pg_sz = IOMMU_PAGE_SIZE; 361 flags |= IOMMU_PGF_OBJL; 362 363 for (sf = NULL; size > 0; base += pg_sz, size -= pg_sz) { 364 pte = amdiommu_pgtbl_map_pte(domain, base, 365 domain->pglvl - 1, flags, &idx, &sf); 366 KASSERT(pte != NULL, 367 ("sleeping or page missed %p %jx %d 0x%x", 368 domain, (uintmax_t)base, domain->pglvl - 1, flags)); 369 amdiommu_unmap_clear_pte(domain, base, domain->pglvl - 1, 370 flags, pte, &sf, entry, false); 371 KASSERT(size >= pg_sz, 372 ("unmapping loop overflow %p %jx %jx %jx", domain, 373 (uintmax_t)base, (uintmax_t)size, (uintmax_t)pg_sz)); 374 } 375 if (sf != NULL) 376 iommu_unmap_pgtbl(sf); 377 return (0); 378 } 379 380 static int 381 amdiommu_unmap_buf(struct iommu_domain *iodom, struct iommu_map_entry *entry, 382 int flags) 383 { 384 struct amdiommu_domain *domain; 385 int error; 386 387 domain = IODOM2DOM(iodom); 388 389 AMDIOMMU_DOMAIN_PGLOCK(domain); 390 error = amdiommu_unmap_buf_locked(domain, entry->start, 391 entry->end - entry->start, flags, entry); 392 AMDIOMMU_DOMAIN_PGUNLOCK(domain); 393 return (error); 394 } 395 396 const struct iommu_domain_map_ops amdiommu_domain_map_ops = { 397 .map = amdiommu_map_buf, 398 .unmap = amdiommu_unmap_buf, 399 }; 400