1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2024 The FreeBSD Foundation
5 *
6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
7 * under sponsorship from the FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/malloc.h>
34 #include <sys/bus.h>
35 #include <sys/domainset.h>
36 #include <sys/interrupt.h>
37 #include <sys/kernel.h>
38 #include <sys/ktr.h>
39 #include <sys/lock.h>
40 #include <sys/memdesc.h>
41 #include <sys/mutex.h>
42 #include <sys/proc.h>
43 #include <sys/rwlock.h>
44 #include <sys/rman.h>
45 #include <sys/sf_buf.h>
46 #include <sys/sysctl.h>
47 #include <sys/taskqueue.h>
48 #include <sys/tree.h>
49 #include <sys/uio.h>
50 #include <sys/vmem.h>
51 #include <vm/vm.h>
52 #include <vm/vm_extern.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_object.h>
55 #include <vm/vm_page.h>
56 #include <vm/vm_pager.h>
57 #include <vm/vm_map.h>
58 #include <dev/pci/pcireg.h>
59 #include <machine/atomic.h>
60 #include <machine/bus.h>
61 #include <machine/cpu.h>
62 #include <machine/md_var.h>
63 #include <machine/specialreg.h>
64 #include <x86/include/busdma_impl.h>
65 #include <dev/iommu/busdma_iommu.h>
66 #include <x86/iommu/amd_reg.h>
67 #include <x86/iommu/x86_iommu.h>
68 #include <x86/iommu/amd_iommu.h>
69
70 static void amdiommu_unmap_clear_pte(struct amdiommu_domain *domain,
71 iommu_gaddr_t base, int lvl, int flags, iommu_pte_t *pte,
72 struct sf_buf **sf, struct iommu_map_entry *entry, bool free_sf);
73 static int amdiommu_unmap_buf_locked(struct amdiommu_domain *domain,
74 iommu_gaddr_t base, iommu_gaddr_t size, int flags,
75 struct iommu_map_entry *entry);
76
77 int
amdiommu_domain_alloc_pgtbl(struct amdiommu_domain * domain)78 amdiommu_domain_alloc_pgtbl(struct amdiommu_domain *domain)
79 {
80 vm_page_t m;
81 int dom;
82
83 KASSERT(domain->pgtbl_obj == NULL,
84 ("already initialized %p", domain));
85
86 domain->pgtbl_obj = vm_pager_allocate(OBJT_PHYS, NULL,
87 IDX_TO_OFF(pglvl_max_pages(domain->pglvl)), 0, 0, NULL);
88 if (bus_get_domain(domain->iodom.iommu->dev, &dom) == 0)
89 domain->pgtbl_obj->domain.dr_policy = DOMAINSET_PREF(dom);
90 AMDIOMMU_DOMAIN_PGLOCK(domain);
91 m = iommu_pgalloc(domain->pgtbl_obj, 0, IOMMU_PGF_WAITOK |
92 IOMMU_PGF_ZERO | IOMMU_PGF_OBJL);
93 /* No implicit free of the top level page table page. */
94 vm_page_wire(m);
95 domain->pgtblr = m;
96 AMDIOMMU_DOMAIN_PGUNLOCK(domain);
97 AMDIOMMU_LOCK(domain->unit);
98 domain->iodom.flags |= IOMMU_DOMAIN_PGTBL_INITED;
99 AMDIOMMU_UNLOCK(domain->unit);
100 return (0);
101 }
102
103 void
amdiommu_domain_free_pgtbl(struct amdiommu_domain * domain)104 amdiommu_domain_free_pgtbl(struct amdiommu_domain *domain)
105 {
106 vm_object_t obj;
107 vm_page_t m;
108
109 obj = domain->pgtbl_obj;
110 if (obj == NULL) {
111 KASSERT((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0,
112 ("lost pagetable object domain %p", domain));
113 return;
114 }
115 AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(domain);
116 domain->pgtbl_obj = NULL;
117 domain->pgtblr = NULL;
118
119 /* Obliterate ref_counts */
120 VM_OBJECT_ASSERT_WLOCKED(obj);
121 for (m = vm_page_lookup(obj, 0); m != NULL; m = vm_page_next(m))
122 vm_page_clearref(m);
123 VM_OBJECT_WUNLOCK(obj);
124 vm_object_deallocate(obj);
125 }
126
127 static iommu_pte_t *
amdiommu_pgtbl_map_pte(struct amdiommu_domain * domain,iommu_gaddr_t base,int lvl,int flags,vm_pindex_t * idxp,struct sf_buf ** sf)128 amdiommu_pgtbl_map_pte(struct amdiommu_domain *domain, iommu_gaddr_t base,
129 int lvl, int flags, vm_pindex_t *idxp, struct sf_buf **sf)
130 {
131 iommu_pte_t *pte, *ptep;
132 struct sf_buf *sfp;
133 vm_page_t m;
134 vm_pindex_t idx, idx1;
135
136 idx = pglvl_pgtbl_get_pindex(domain->pglvl, base, lvl);
137 if (*sf != NULL && idx == *idxp) {
138 pte = (iommu_pte_t *)sf_buf_kva(*sf);
139 } else {
140 if (*sf != NULL)
141 iommu_unmap_pgtbl(*sf);
142 *idxp = idx;
143 retry:
144 pte = iommu_map_pgtbl(domain->pgtbl_obj, idx, flags, sf);
145 if (pte == NULL) {
146 KASSERT(lvl > 0,
147 ("lost root page table page %p", domain));
148 /*
149 * Page table page does not exist, allocate
150 * it and create a pte in the preceeding page level
151 * to reference the allocated page table page.
152 */
153 m = iommu_pgalloc(domain->pgtbl_obj, idx, flags |
154 IOMMU_PGF_ZERO);
155 if (m == NULL)
156 return (NULL);
157
158 vm_page_wire(m);
159
160 sfp = NULL;
161 ptep = amdiommu_pgtbl_map_pte(domain, base, lvl - 1,
162 flags, &idx1, &sfp);
163 if (ptep == NULL) {
164 KASSERT(m->pindex != 0,
165 ("loosing root page %p", domain));
166 vm_page_unwire_noq(m);
167 iommu_pgfree(domain->pgtbl_obj, m->pindex,
168 flags, NULL);
169 return (NULL);
170 }
171 ptep->pte = VM_PAGE_TO_PHYS(m) | AMDIOMMU_PTE_IR |
172 AMDIOMMU_PTE_IW | AMDIOMMU_PTE_PR |
173 ((domain->pglvl - lvl) << AMDIOMMU_PTE_NLVL_SHIFT);
174 vm_page_wire(sf_buf_page(sfp));
175 vm_page_unwire_noq(m);
176 iommu_unmap_pgtbl(sfp);
177 /* Only executed once. */
178 goto retry;
179 }
180 }
181 pte += pglvl_pgtbl_pte_off(domain->pglvl, base, lvl);
182 return (pte);
183 }
184
185 static int
amdiommu_map_buf_locked(struct amdiommu_domain * domain,iommu_gaddr_t base,iommu_gaddr_t size,vm_page_t * ma,uint64_t pflags,int flags,struct iommu_map_entry * entry)186 amdiommu_map_buf_locked(struct amdiommu_domain *domain, iommu_gaddr_t base,
187 iommu_gaddr_t size, vm_page_t *ma, uint64_t pflags, int flags,
188 struct iommu_map_entry *entry)
189 {
190 iommu_pte_t *pte;
191 struct sf_buf *sf;
192 iommu_gaddr_t base1;
193 vm_pindex_t pi, idx;
194
195 AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(domain);
196
197 base1 = base;
198 flags |= IOMMU_PGF_OBJL;
199 idx = -1;
200 pte = NULL;
201 sf = NULL;
202
203 for (pi = 0; size > 0; base += IOMMU_PAGE_SIZE, size -= IOMMU_PAGE_SIZE,
204 pi++) {
205 KASSERT(size >= IOMMU_PAGE_SIZE,
206 ("mapping loop overflow %p %jx %jx %jx", domain,
207 (uintmax_t)base, (uintmax_t)size, (uintmax_t)IOMMU_PAGE_SIZE));
208 pte = amdiommu_pgtbl_map_pte(domain, base, domain->pglvl - 1,
209 flags, &idx, &sf);
210 if (pte == NULL) {
211 KASSERT((flags & IOMMU_PGF_WAITOK) == 0,
212 ("failed waitable pte alloc %p", domain));
213 if (sf != NULL)
214 iommu_unmap_pgtbl(sf);
215 amdiommu_unmap_buf_locked(domain, base1, base - base1,
216 flags, entry);
217 return (ENOMEM);
218 }
219 /* next level 0, no superpages */
220 pte->pte = VM_PAGE_TO_PHYS(ma[pi]) | pflags | AMDIOMMU_PTE_PR;
221 vm_page_wire(sf_buf_page(sf));
222 }
223 if (sf != NULL)
224 iommu_unmap_pgtbl(sf);
225 return (0);
226 }
227
228 static int
amdiommu_map_buf(struct iommu_domain * iodom,struct iommu_map_entry * entry,vm_page_t * ma,uint64_t eflags,int flags)229 amdiommu_map_buf(struct iommu_domain *iodom, struct iommu_map_entry *entry,
230 vm_page_t *ma, uint64_t eflags, int flags)
231 {
232 struct amdiommu_domain *domain;
233 uint64_t pflags;
234 iommu_gaddr_t base, size;
235 int error;
236
237 base = entry->start;
238 size = entry->end - entry->start;
239 pflags = ((eflags & IOMMU_MAP_ENTRY_READ) != 0 ? AMDIOMMU_PTE_IR : 0) |
240 ((eflags & IOMMU_MAP_ENTRY_WRITE) != 0 ? AMDIOMMU_PTE_IW : 0) |
241 ((eflags & IOMMU_MAP_ENTRY_SNOOP) != 0 ? AMDIOMMU_PTE_FC : 0);
242 /* IOMMU_MAP_ENTRY_TM ignored */
243
244 domain = IODOM2DOM(iodom);
245
246 KASSERT((iodom->flags & IOMMU_DOMAIN_IDMAP) == 0,
247 ("modifying idmap pagetable domain %p", domain));
248 KASSERT((base & IOMMU_PAGE_MASK) == 0,
249 ("non-aligned base %p %jx %jx", domain, (uintmax_t)base,
250 (uintmax_t)size));
251 KASSERT((size & IOMMU_PAGE_MASK) == 0,
252 ("non-aligned size %p %jx %jx", domain, (uintmax_t)base,
253 (uintmax_t)size));
254 KASSERT(size > 0, ("zero size %p %jx %jx", domain, (uintmax_t)base,
255 (uintmax_t)size));
256 KASSERT(base < iodom->end,
257 ("base too high %p %jx %jx end %jx", domain, (uintmax_t)base,
258 (uintmax_t)size, (uintmax_t)iodom->end));
259 KASSERT(base + size < iodom->end,
260 ("end too high %p %jx %jx end %jx", domain, (uintmax_t)base,
261 (uintmax_t)size, (uintmax_t)iodom->end));
262 KASSERT(base + size > base,
263 ("size overflow %p %jx %jx", domain, (uintmax_t)base,
264 (uintmax_t)size));
265 KASSERT((pflags & (AMDIOMMU_PTE_IR | AMDIOMMU_PTE_IW)) != 0,
266 ("neither read nor write %jx", (uintmax_t)pflags));
267 KASSERT((pflags & ~(AMDIOMMU_PTE_IR | AMDIOMMU_PTE_IW | AMDIOMMU_PTE_FC
268 )) == 0,
269 ("invalid pte flags %jx", (uintmax_t)pflags));
270 KASSERT((flags & ~IOMMU_PGF_WAITOK) == 0, ("invalid flags %x", flags));
271
272 AMDIOMMU_DOMAIN_PGLOCK(domain);
273 error = amdiommu_map_buf_locked(domain, base, size, ma, pflags,
274 flags, entry);
275 AMDIOMMU_DOMAIN_PGUNLOCK(domain);
276
277 /*
278 * XXXKIB invalidation seems to be needed even for non-valid->valid
279 * updates. Recheck.
280 */
281 iommu_qi_invalidate_sync(iodom, base, size,
282 (flags & IOMMU_PGF_WAITOK) != 0);
283 return (error);
284 }
285
286 static void
amdiommu_free_pgtbl_pde(struct amdiommu_domain * domain,iommu_gaddr_t base,int lvl,int flags,struct iommu_map_entry * entry)287 amdiommu_free_pgtbl_pde(struct amdiommu_domain *domain, iommu_gaddr_t base,
288 int lvl, int flags, struct iommu_map_entry *entry)
289 {
290 struct sf_buf *sf;
291 iommu_pte_t *pde;
292 vm_pindex_t idx;
293
294 sf = NULL;
295 pde = amdiommu_pgtbl_map_pte(domain, base, lvl, flags, &idx, &sf);
296 amdiommu_unmap_clear_pte(domain, base, lvl, flags, pde, &sf, entry,
297 true);
298 }
299
300 static void
amdiommu_unmap_clear_pte(struct amdiommu_domain * domain,iommu_gaddr_t base,int lvl,int flags,iommu_pte_t * pte,struct sf_buf ** sf,struct iommu_map_entry * entry,bool free_sf)301 amdiommu_unmap_clear_pte(struct amdiommu_domain *domain, iommu_gaddr_t base,
302 int lvl, int flags, iommu_pte_t *pte, struct sf_buf **sf,
303 struct iommu_map_entry *entry, bool free_sf)
304 {
305 vm_page_t m;
306
307 pte->pte = 0;
308 m = sf_buf_page(*sf);
309 if (free_sf) {
310 iommu_unmap_pgtbl(*sf);
311 *sf = NULL;
312 }
313 if (!vm_page_unwire_noq(m))
314 return;
315 KASSERT(lvl != 0,
316 ("lost reference (lvl) on root pg domain %p base %jx lvl %d",
317 domain, (uintmax_t)base, lvl));
318 KASSERT(m->pindex != 0,
319 ("lost reference (idx) on root pg domain %p base %jx lvl %d",
320 domain, (uintmax_t)base, lvl));
321 iommu_pgfree(domain->pgtbl_obj, m->pindex, flags, entry);
322 amdiommu_free_pgtbl_pde(domain, base, lvl - 1, flags, entry);
323 }
324
325 static int
amdiommu_unmap_buf_locked(struct amdiommu_domain * domain,iommu_gaddr_t base,iommu_gaddr_t size,int flags,struct iommu_map_entry * entry)326 amdiommu_unmap_buf_locked(struct amdiommu_domain *domain, iommu_gaddr_t base,
327 iommu_gaddr_t size, int flags, struct iommu_map_entry *entry)
328 {
329 iommu_pte_t *pte;
330 struct sf_buf *sf;
331 vm_pindex_t idx;
332 iommu_gaddr_t pg_sz;
333
334 AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(domain);
335 if (size == 0)
336 return (0);
337
338 KASSERT((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) == 0,
339 ("modifying idmap pagetable domain %p", domain));
340 KASSERT((base & IOMMU_PAGE_MASK) == 0,
341 ("non-aligned base %p %jx %jx", domain, (uintmax_t)base,
342 (uintmax_t)size));
343 KASSERT((size & IOMMU_PAGE_MASK) == 0,
344 ("non-aligned size %p %jx %jx", domain, (uintmax_t)base,
345 (uintmax_t)size));
346 KASSERT(base < DOM2IODOM(domain)->end,
347 ("base too high %p %jx %jx end %jx", domain, (uintmax_t)base,
348 (uintmax_t)size, (uintmax_t)DOM2IODOM(domain)->end));
349 KASSERT(base + size < DOM2IODOM(domain)->end,
350 ("end too high %p %jx %jx end %jx", domain, (uintmax_t)base,
351 (uintmax_t)size, (uintmax_t)DOM2IODOM(domain)->end));
352 KASSERT(base + size > base,
353 ("size overflow %p %jx %jx", domain, (uintmax_t)base,
354 (uintmax_t)size));
355 KASSERT((flags & ~IOMMU_PGF_WAITOK) == 0, ("invalid flags %x", flags));
356
357 pg_sz = IOMMU_PAGE_SIZE;
358 flags |= IOMMU_PGF_OBJL;
359
360 for (sf = NULL; size > 0; base += pg_sz, size -= pg_sz) {
361 pte = amdiommu_pgtbl_map_pte(domain, base,
362 domain->pglvl - 1, flags, &idx, &sf);
363 KASSERT(pte != NULL,
364 ("sleeping or page missed %p %jx %d 0x%x",
365 domain, (uintmax_t)base, domain->pglvl - 1, flags));
366 amdiommu_unmap_clear_pte(domain, base, domain->pglvl - 1,
367 flags, pte, &sf, entry, false);
368 KASSERT(size >= pg_sz,
369 ("unmapping loop overflow %p %jx %jx %jx", domain,
370 (uintmax_t)base, (uintmax_t)size, (uintmax_t)pg_sz));
371 }
372 if (sf != NULL)
373 iommu_unmap_pgtbl(sf);
374 return (0);
375 }
376
377 static int
amdiommu_unmap_buf(struct iommu_domain * iodom,struct iommu_map_entry * entry,int flags)378 amdiommu_unmap_buf(struct iommu_domain *iodom, struct iommu_map_entry *entry,
379 int flags)
380 {
381 struct amdiommu_domain *domain;
382 int error;
383
384 domain = IODOM2DOM(iodom);
385
386 AMDIOMMU_DOMAIN_PGLOCK(domain);
387 error = amdiommu_unmap_buf_locked(domain, entry->start,
388 entry->end - entry->start, flags, entry);
389 AMDIOMMU_DOMAIN_PGUNLOCK(domain);
390 return (error);
391 }
392
393 const struct iommu_domain_map_ops amdiommu_domain_map_ops = {
394 .map = amdiommu_map_buf,
395 .unmap = amdiommu_unmap_buf,
396 };
397