1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2024 The FreeBSD Foundation 5 * 6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org> 7 * under sponsorship from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/malloc.h> 34 #include <sys/bus.h> 35 #include <sys/domainset.h> 36 #include <sys/interrupt.h> 37 #include <sys/kernel.h> 38 #include <sys/ktr.h> 39 #include <sys/lock.h> 40 #include <sys/memdesc.h> 41 #include <sys/mutex.h> 42 #include <sys/proc.h> 43 #include <sys/rwlock.h> 44 #include <sys/rman.h> 45 #include <sys/sf_buf.h> 46 #include <sys/sysctl.h> 47 #include <sys/taskqueue.h> 48 #include <sys/tree.h> 49 #include <sys/uio.h> 50 #include <sys/vmem.h> 51 #include <vm/vm.h> 52 #include <vm/vm_extern.h> 53 #include <vm/vm_kern.h> 54 #include <vm/vm_object.h> 55 #include <vm/vm_page.h> 56 #include <vm/vm_pager.h> 57 #include <vm/vm_map.h> 58 #include <dev/pci/pcireg.h> 59 #include <machine/atomic.h> 60 #include <machine/bus.h> 61 #include <machine/cpu.h> 62 #include <machine/md_var.h> 63 #include <machine/specialreg.h> 64 #include <x86/include/busdma_impl.h> 65 #include <dev/iommu/busdma_iommu.h> 66 #include <x86/iommu/amd_reg.h> 67 #include <x86/iommu/x86_iommu.h> 68 #include <x86/iommu/amd_iommu.h> 69 70 static void amdiommu_unmap_clear_pte(struct amdiommu_domain *domain, 71 iommu_gaddr_t base, int lvl, int flags, iommu_pte_t *pte, 72 struct sf_buf **sf, struct iommu_map_entry *entry, bool free_sf); 73 static int amdiommu_unmap_buf_locked(struct amdiommu_domain *domain, 74 iommu_gaddr_t base, iommu_gaddr_t size, int flags, 75 struct iommu_map_entry *entry); 76 77 int 78 amdiommu_domain_alloc_pgtbl(struct amdiommu_domain *domain) 79 { 80 vm_page_t m; 81 int dom; 82 83 KASSERT(domain->pgtbl_obj == NULL, 84 ("already initialized %p", domain)); 85 86 domain->pgtbl_obj = vm_pager_allocate(OBJT_PHYS, NULL, 87 IDX_TO_OFF(pglvl_max_pages(domain->pglvl)), 0, 0, NULL); 88 if (bus_get_domain(domain->iodom.iommu->dev, &dom) == 0) 89 domain->pgtbl_obj->domain.dr_policy = DOMAINSET_PREF(dom); 90 AMDIOMMU_DOMAIN_PGLOCK(domain); 91 m = iommu_pgalloc(domain->pgtbl_obj, 0, IOMMU_PGF_WAITOK | 92 IOMMU_PGF_ZERO | IOMMU_PGF_OBJL); 93 /* No implicit free of the top level page table page. */ 94 vm_page_wire(m); 95 domain->pgtblr = m; 96 AMDIOMMU_DOMAIN_PGUNLOCK(domain); 97 AMDIOMMU_LOCK(domain->unit); 98 domain->iodom.flags |= IOMMU_DOMAIN_PGTBL_INITED; 99 AMDIOMMU_UNLOCK(domain->unit); 100 return (0); 101 } 102 103 void 104 amdiommu_domain_free_pgtbl(struct amdiommu_domain *domain) 105 { 106 vm_object_t obj; 107 vm_page_t m; 108 109 obj = domain->pgtbl_obj; 110 if (obj == NULL) { 111 KASSERT((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0, 112 ("lost pagetable object domain %p", domain)); 113 return; 114 } 115 AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(domain); 116 domain->pgtbl_obj = NULL; 117 domain->pgtblr = NULL; 118 119 /* Obliterate ref_counts */ 120 VM_OBJECT_ASSERT_WLOCKED(obj); 121 for (m = vm_page_lookup(obj, 0); m != NULL; m = vm_page_next(m)) 122 vm_page_clearref(m); 123 VM_OBJECT_WUNLOCK(obj); 124 vm_object_deallocate(obj); 125 } 126 127 static iommu_pte_t * 128 amdiommu_pgtbl_map_pte(struct amdiommu_domain *domain, iommu_gaddr_t base, 129 int lvl, int flags, vm_pindex_t *idxp, struct sf_buf **sf) 130 { 131 iommu_pte_t *pte, *ptep; 132 struct sf_buf *sfp; 133 vm_page_t m; 134 vm_pindex_t idx, idx1; 135 136 idx = pglvl_pgtbl_get_pindex(domain->pglvl, base, lvl); 137 if (*sf != NULL && idx == *idxp) { 138 pte = (iommu_pte_t *)sf_buf_kva(*sf); 139 } else { 140 if (*sf != NULL) 141 iommu_unmap_pgtbl(*sf); 142 *idxp = idx; 143 retry: 144 pte = iommu_map_pgtbl(domain->pgtbl_obj, idx, flags, sf); 145 if (pte == NULL) { 146 KASSERT(lvl > 0, 147 ("lost root page table page %p", domain)); 148 /* 149 * Page table page does not exist, allocate 150 * it and create a pte in the preceeding page level 151 * to reference the allocated page table page. 152 */ 153 m = iommu_pgalloc(domain->pgtbl_obj, idx, flags | 154 IOMMU_PGF_ZERO); 155 if (m == NULL) 156 return (NULL); 157 158 vm_page_wire(m); 159 160 sfp = NULL; 161 ptep = amdiommu_pgtbl_map_pte(domain, base, lvl - 1, 162 flags, &idx1, &sfp); 163 if (ptep == NULL) { 164 KASSERT(m->pindex != 0, 165 ("loosing root page %p", domain)); 166 vm_page_unwire_noq(m); 167 iommu_pgfree(domain->pgtbl_obj, m->pindex, 168 flags, NULL); 169 return (NULL); 170 } 171 ptep->pte = VM_PAGE_TO_PHYS(m) | AMDIOMMU_PTE_IR | 172 AMDIOMMU_PTE_IW | AMDIOMMU_PTE_PR | 173 ((domain->pglvl - lvl) << AMDIOMMU_PTE_NLVL_SHIFT); 174 vm_page_wire(sf_buf_page(sfp)); 175 vm_page_unwire_noq(m); 176 iommu_unmap_pgtbl(sfp); 177 /* Only executed once. */ 178 goto retry; 179 } 180 } 181 pte += pglvl_pgtbl_pte_off(domain->pglvl, base, lvl); 182 return (pte); 183 } 184 185 static int 186 amdiommu_map_buf_locked(struct amdiommu_domain *domain, iommu_gaddr_t base, 187 iommu_gaddr_t size, vm_page_t *ma, uint64_t pflags, int flags, 188 struct iommu_map_entry *entry) 189 { 190 iommu_pte_t *pte; 191 struct sf_buf *sf; 192 iommu_gaddr_t base1; 193 vm_pindex_t pi, idx; 194 195 AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(domain); 196 197 base1 = base; 198 flags |= IOMMU_PGF_OBJL; 199 idx = -1; 200 pte = NULL; 201 sf = NULL; 202 203 for (pi = 0; size > 0; base += IOMMU_PAGE_SIZE, size -= IOMMU_PAGE_SIZE, 204 pi++) { 205 KASSERT(size >= IOMMU_PAGE_SIZE, 206 ("mapping loop overflow %p %jx %jx %jx", domain, 207 (uintmax_t)base, (uintmax_t)size, (uintmax_t)IOMMU_PAGE_SIZE)); 208 pte = amdiommu_pgtbl_map_pte(domain, base, domain->pglvl - 1, 209 flags, &idx, &sf); 210 if (pte == NULL) { 211 KASSERT((flags & IOMMU_PGF_WAITOK) == 0, 212 ("failed waitable pte alloc %p", domain)); 213 if (sf != NULL) 214 iommu_unmap_pgtbl(sf); 215 amdiommu_unmap_buf_locked(domain, base1, base - base1, 216 flags, entry); 217 return (ENOMEM); 218 } 219 /* next level 0, no superpages */ 220 pte->pte = VM_PAGE_TO_PHYS(ma[pi]) | pflags | AMDIOMMU_PTE_PR; 221 vm_page_wire(sf_buf_page(sf)); 222 } 223 if (sf != NULL) 224 iommu_unmap_pgtbl(sf); 225 return (0); 226 } 227 228 static int 229 amdiommu_map_buf(struct iommu_domain *iodom, struct iommu_map_entry *entry, 230 vm_page_t *ma, uint64_t eflags, int flags) 231 { 232 struct amdiommu_domain *domain; 233 uint64_t pflags; 234 iommu_gaddr_t base, size; 235 int error; 236 237 base = entry->start; 238 size = entry->end - entry->start; 239 pflags = ((eflags & IOMMU_MAP_ENTRY_READ) != 0 ? AMDIOMMU_PTE_IR : 0) | 240 ((eflags & IOMMU_MAP_ENTRY_WRITE) != 0 ? AMDIOMMU_PTE_IW : 0) | 241 ((eflags & IOMMU_MAP_ENTRY_SNOOP) != 0 ? AMDIOMMU_PTE_FC : 0); 242 /* IOMMU_MAP_ENTRY_TM ignored */ 243 244 domain = IODOM2DOM(iodom); 245 246 KASSERT((iodom->flags & IOMMU_DOMAIN_IDMAP) == 0, 247 ("modifying idmap pagetable domain %p", domain)); 248 KASSERT((base & IOMMU_PAGE_MASK) == 0, 249 ("non-aligned base %p %jx %jx", domain, (uintmax_t)base, 250 (uintmax_t)size)); 251 KASSERT((size & IOMMU_PAGE_MASK) == 0, 252 ("non-aligned size %p %jx %jx", domain, (uintmax_t)base, 253 (uintmax_t)size)); 254 KASSERT(size > 0, ("zero size %p %jx %jx", domain, (uintmax_t)base, 255 (uintmax_t)size)); 256 KASSERT(base < iodom->end, 257 ("base too high %p %jx %jx end %jx", domain, (uintmax_t)base, 258 (uintmax_t)size, (uintmax_t)iodom->end)); 259 KASSERT(base + size < iodom->end, 260 ("end too high %p %jx %jx end %jx", domain, (uintmax_t)base, 261 (uintmax_t)size, (uintmax_t)iodom->end)); 262 KASSERT(base + size > base, 263 ("size overflow %p %jx %jx", domain, (uintmax_t)base, 264 (uintmax_t)size)); 265 KASSERT((pflags & (AMDIOMMU_PTE_IR | AMDIOMMU_PTE_IW)) != 0, 266 ("neither read nor write %jx", (uintmax_t)pflags)); 267 KASSERT((pflags & ~(AMDIOMMU_PTE_IR | AMDIOMMU_PTE_IW | AMDIOMMU_PTE_FC 268 )) == 0, 269 ("invalid pte flags %jx", (uintmax_t)pflags)); 270 KASSERT((flags & ~IOMMU_PGF_WAITOK) == 0, ("invalid flags %x", flags)); 271 272 AMDIOMMU_DOMAIN_PGLOCK(domain); 273 error = amdiommu_map_buf_locked(domain, base, size, ma, pflags, 274 flags, entry); 275 AMDIOMMU_DOMAIN_PGUNLOCK(domain); 276 277 /* 278 * XXXKIB invalidation seems to be needed even for non-valid->valid 279 * updates. Recheck. 280 */ 281 iommu_qi_invalidate_sync(iodom, base, size, 282 (flags & IOMMU_PGF_WAITOK) != 0); 283 return (error); 284 } 285 286 static void 287 amdiommu_free_pgtbl_pde(struct amdiommu_domain *domain, iommu_gaddr_t base, 288 int lvl, int flags, struct iommu_map_entry *entry) 289 { 290 struct sf_buf *sf; 291 iommu_pte_t *pde; 292 vm_pindex_t idx; 293 294 sf = NULL; 295 pde = amdiommu_pgtbl_map_pte(domain, base, lvl, flags, &idx, &sf); 296 amdiommu_unmap_clear_pte(domain, base, lvl, flags, pde, &sf, entry, 297 true); 298 } 299 300 static void 301 amdiommu_unmap_clear_pte(struct amdiommu_domain *domain, iommu_gaddr_t base, 302 int lvl, int flags, iommu_pte_t *pte, struct sf_buf **sf, 303 struct iommu_map_entry *entry, bool free_sf) 304 { 305 vm_page_t m; 306 307 pte->pte = 0; 308 m = sf_buf_page(*sf); 309 if (free_sf) { 310 iommu_unmap_pgtbl(*sf); 311 *sf = NULL; 312 } 313 if (!vm_page_unwire_noq(m)) 314 return; 315 KASSERT(lvl != 0, 316 ("lost reference (lvl) on root pg domain %p base %jx lvl %d", 317 domain, (uintmax_t)base, lvl)); 318 KASSERT(m->pindex != 0, 319 ("lost reference (idx) on root pg domain %p base %jx lvl %d", 320 domain, (uintmax_t)base, lvl)); 321 iommu_pgfree(domain->pgtbl_obj, m->pindex, flags, entry); 322 amdiommu_free_pgtbl_pde(domain, base, lvl - 1, flags, entry); 323 } 324 325 static int 326 amdiommu_unmap_buf_locked(struct amdiommu_domain *domain, iommu_gaddr_t base, 327 iommu_gaddr_t size, int flags, struct iommu_map_entry *entry) 328 { 329 iommu_pte_t *pte; 330 struct sf_buf *sf; 331 vm_pindex_t idx; 332 iommu_gaddr_t pg_sz; 333 334 AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(domain); 335 if (size == 0) 336 return (0); 337 338 KASSERT((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) == 0, 339 ("modifying idmap pagetable domain %p", domain)); 340 KASSERT((base & IOMMU_PAGE_MASK) == 0, 341 ("non-aligned base %p %jx %jx", domain, (uintmax_t)base, 342 (uintmax_t)size)); 343 KASSERT((size & IOMMU_PAGE_MASK) == 0, 344 ("non-aligned size %p %jx %jx", domain, (uintmax_t)base, 345 (uintmax_t)size)); 346 KASSERT(base < DOM2IODOM(domain)->end, 347 ("base too high %p %jx %jx end %jx", domain, (uintmax_t)base, 348 (uintmax_t)size, (uintmax_t)DOM2IODOM(domain)->end)); 349 KASSERT(base + size < DOM2IODOM(domain)->end, 350 ("end too high %p %jx %jx end %jx", domain, (uintmax_t)base, 351 (uintmax_t)size, (uintmax_t)DOM2IODOM(domain)->end)); 352 KASSERT(base + size > base, 353 ("size overflow %p %jx %jx", domain, (uintmax_t)base, 354 (uintmax_t)size)); 355 KASSERT((flags & ~IOMMU_PGF_WAITOK) == 0, ("invalid flags %x", flags)); 356 357 pg_sz = IOMMU_PAGE_SIZE; 358 flags |= IOMMU_PGF_OBJL; 359 360 for (sf = NULL; size > 0; base += pg_sz, size -= pg_sz) { 361 pte = amdiommu_pgtbl_map_pte(domain, base, 362 domain->pglvl - 1, flags, &idx, &sf); 363 KASSERT(pte != NULL, 364 ("sleeping or page missed %p %jx %d 0x%x", 365 domain, (uintmax_t)base, domain->pglvl - 1, flags)); 366 amdiommu_unmap_clear_pte(domain, base, domain->pglvl - 1, 367 flags, pte, &sf, entry, false); 368 KASSERT(size >= pg_sz, 369 ("unmapping loop overflow %p %jx %jx %jx", domain, 370 (uintmax_t)base, (uintmax_t)size, (uintmax_t)pg_sz)); 371 } 372 if (sf != NULL) 373 iommu_unmap_pgtbl(sf); 374 return (0); 375 } 376 377 static int 378 amdiommu_unmap_buf(struct iommu_domain *iodom, struct iommu_map_entry *entry, 379 int flags) 380 { 381 struct amdiommu_domain *domain; 382 int error; 383 384 domain = IODOM2DOM(iodom); 385 386 AMDIOMMU_DOMAIN_PGLOCK(domain); 387 error = amdiommu_unmap_buf_locked(domain, entry->start, 388 entry->end - entry->start, flags, entry); 389 AMDIOMMU_DOMAIN_PGUNLOCK(domain); 390 return (error); 391 } 392 393 const struct iommu_domain_map_ops amdiommu_domain_map_ops = { 394 .map = amdiommu_map_buf, 395 .unmap = amdiommu_unmap_buf, 396 }; 397