1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2024 The FreeBSD Foundation
5 *
6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
7 * under sponsorship from the FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/malloc.h>
34 #include <sys/bus.h>
35 #include <sys/domainset.h>
36 #include <sys/interrupt.h>
37 #include <sys/kernel.h>
38 #include <sys/ktr.h>
39 #include <sys/lock.h>
40 #include <sys/memdesc.h>
41 #include <sys/mutex.h>
42 #include <sys/proc.h>
43 #include <sys/rwlock.h>
44 #include <sys/rman.h>
45 #include <sys/sf_buf.h>
46 #include <sys/sysctl.h>
47 #include <sys/taskqueue.h>
48 #include <sys/tree.h>
49 #include <sys/uio.h>
50 #include <sys/vmem.h>
51 #include <vm/vm.h>
52 #include <vm/vm_extern.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_object.h>
55 #include <vm/vm_page.h>
56 #include <vm/vm_pager.h>
57 #include <vm/vm_radix.h>
58 #include <vm/vm_map.h>
59 #include <dev/pci/pcireg.h>
60 #include <machine/atomic.h>
61 #include <machine/bus.h>
62 #include <machine/cpu.h>
63 #include <machine/md_var.h>
64 #include <machine/specialreg.h>
65 #include <x86/include/busdma_impl.h>
66 #include <dev/iommu/busdma_iommu.h>
67 #include <x86/iommu/amd_reg.h>
68 #include <x86/iommu/x86_iommu.h>
69 #include <x86/iommu/amd_iommu.h>
70
71 static void amdiommu_unmap_clear_pte(struct amdiommu_domain *domain,
72 iommu_gaddr_t base, int lvl, int flags, iommu_pte_t *pte,
73 struct sf_buf **sf, struct iommu_map_entry *entry, bool free_sf);
74 static int amdiommu_unmap_buf_locked(struct amdiommu_domain *domain,
75 iommu_gaddr_t base, iommu_gaddr_t size, int flags,
76 struct iommu_map_entry *entry);
77
78 int
amdiommu_domain_alloc_pgtbl(struct amdiommu_domain * domain)79 amdiommu_domain_alloc_pgtbl(struct amdiommu_domain *domain)
80 {
81 vm_page_t m;
82 int dom;
83
84 KASSERT(domain->pgtbl_obj == NULL,
85 ("already initialized %p", domain));
86
87 domain->pgtbl_obj = vm_pager_allocate(OBJT_PHYS, NULL,
88 IDX_TO_OFF(pglvl_max_pages(domain->pglvl)), 0, 0, NULL);
89 if (bus_get_domain(domain->iodom.iommu->dev, &dom) == 0)
90 domain->pgtbl_obj->domain.dr_policy = DOMAINSET_PREF(dom);
91 AMDIOMMU_DOMAIN_PGLOCK(domain);
92 m = iommu_pgalloc(domain->pgtbl_obj, 0, IOMMU_PGF_WAITOK |
93 IOMMU_PGF_ZERO | IOMMU_PGF_OBJL);
94 /* No implicit free of the top level page table page. */
95 vm_page_wire(m);
96 domain->pgtblr = m;
97 AMDIOMMU_DOMAIN_PGUNLOCK(domain);
98 AMDIOMMU_LOCK(domain->unit);
99 domain->iodom.flags |= IOMMU_DOMAIN_PGTBL_INITED;
100 AMDIOMMU_UNLOCK(domain->unit);
101 return (0);
102 }
103
104 void
amdiommu_domain_free_pgtbl(struct amdiommu_domain * domain)105 amdiommu_domain_free_pgtbl(struct amdiommu_domain *domain)
106 {
107 struct pctrie_iter pages;
108 vm_object_t obj;
109 vm_page_t m;
110
111 obj = domain->pgtbl_obj;
112 if (obj == NULL) {
113 KASSERT((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0,
114 ("lost pagetable object domain %p", domain));
115 return;
116 }
117 AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(domain);
118 domain->pgtbl_obj = NULL;
119 domain->pgtblr = NULL;
120
121 /* Obliterate ref_counts */
122 VM_OBJECT_ASSERT_WLOCKED(obj);
123 vm_page_iter_init(&pages, obj);
124 VM_RADIX_FORALL(m, &pages)
125 vm_page_clearref(m);
126 VM_OBJECT_WUNLOCK(obj);
127 vm_object_deallocate(obj);
128 }
129
130 static iommu_pte_t *
amdiommu_pgtbl_map_pte(struct amdiommu_domain * domain,iommu_gaddr_t base,int lvl,int flags,vm_pindex_t * idxp,struct sf_buf ** sf)131 amdiommu_pgtbl_map_pte(struct amdiommu_domain *domain, iommu_gaddr_t base,
132 int lvl, int flags, vm_pindex_t *idxp, struct sf_buf **sf)
133 {
134 iommu_pte_t *pte, *ptep;
135 struct sf_buf *sfp;
136 vm_page_t m;
137 vm_pindex_t idx, idx1;
138
139 idx = pglvl_pgtbl_get_pindex(domain->pglvl, base, lvl);
140 if (*sf != NULL && idx == *idxp) {
141 pte = (iommu_pte_t *)sf_buf_kva(*sf);
142 } else {
143 if (*sf != NULL)
144 iommu_unmap_pgtbl(*sf);
145 *idxp = idx;
146 retry:
147 pte = iommu_map_pgtbl(domain->pgtbl_obj, idx, flags, sf);
148 if (pte == NULL) {
149 KASSERT(lvl > 0,
150 ("lost root page table page %p", domain));
151 /*
152 * Page table page does not exist, allocate
153 * it and create a pte in the preceeding page level
154 * to reference the allocated page table page.
155 */
156 m = iommu_pgalloc(domain->pgtbl_obj, idx, flags |
157 IOMMU_PGF_ZERO);
158 if (m == NULL)
159 return (NULL);
160
161 vm_page_wire(m);
162
163 sfp = NULL;
164 ptep = amdiommu_pgtbl_map_pte(domain, base, lvl - 1,
165 flags, &idx1, &sfp);
166 if (ptep == NULL) {
167 KASSERT(m->pindex != 0,
168 ("loosing root page %p", domain));
169 vm_page_unwire_noq(m);
170 iommu_pgfree(domain->pgtbl_obj, m->pindex,
171 flags, NULL);
172 return (NULL);
173 }
174 ptep->pte = VM_PAGE_TO_PHYS(m) | AMDIOMMU_PTE_IR |
175 AMDIOMMU_PTE_IW | AMDIOMMU_PTE_PR |
176 ((domain->pglvl - lvl) << AMDIOMMU_PTE_NLVL_SHIFT);
177 vm_page_wire(sf_buf_page(sfp));
178 vm_page_unwire_noq(m);
179 iommu_unmap_pgtbl(sfp);
180 /* Only executed once. */
181 goto retry;
182 }
183 }
184 pte += pglvl_pgtbl_pte_off(domain->pglvl, base, lvl);
185 return (pte);
186 }
187
188 static int
amdiommu_map_buf_locked(struct amdiommu_domain * domain,iommu_gaddr_t base,iommu_gaddr_t size,vm_page_t * ma,uint64_t pflags,int flags,struct iommu_map_entry * entry)189 amdiommu_map_buf_locked(struct amdiommu_domain *domain, iommu_gaddr_t base,
190 iommu_gaddr_t size, vm_page_t *ma, uint64_t pflags, int flags,
191 struct iommu_map_entry *entry)
192 {
193 iommu_pte_t *pte;
194 struct sf_buf *sf;
195 iommu_gaddr_t base1;
196 vm_pindex_t pi, idx;
197
198 AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(domain);
199
200 base1 = base;
201 flags |= IOMMU_PGF_OBJL;
202 idx = -1;
203 pte = NULL;
204 sf = NULL;
205
206 for (pi = 0; size > 0; base += IOMMU_PAGE_SIZE, size -= IOMMU_PAGE_SIZE,
207 pi++) {
208 KASSERT(size >= IOMMU_PAGE_SIZE,
209 ("mapping loop overflow %p %jx %jx %jx", domain,
210 (uintmax_t)base, (uintmax_t)size, (uintmax_t)IOMMU_PAGE_SIZE));
211 pte = amdiommu_pgtbl_map_pte(domain, base, domain->pglvl - 1,
212 flags, &idx, &sf);
213 if (pte == NULL) {
214 KASSERT((flags & IOMMU_PGF_WAITOK) == 0,
215 ("failed waitable pte alloc %p", domain));
216 if (sf != NULL)
217 iommu_unmap_pgtbl(sf);
218 amdiommu_unmap_buf_locked(domain, base1, base - base1,
219 flags, entry);
220 return (ENOMEM);
221 }
222 /* next level 0, no superpages */
223 pte->pte = VM_PAGE_TO_PHYS(ma[pi]) | pflags | AMDIOMMU_PTE_PR;
224 vm_page_wire(sf_buf_page(sf));
225 }
226 if (sf != NULL)
227 iommu_unmap_pgtbl(sf);
228 return (0);
229 }
230
231 static int
amdiommu_map_buf(struct iommu_domain * iodom,struct iommu_map_entry * entry,vm_page_t * ma,uint64_t eflags,int flags)232 amdiommu_map_buf(struct iommu_domain *iodom, struct iommu_map_entry *entry,
233 vm_page_t *ma, uint64_t eflags, int flags)
234 {
235 struct amdiommu_domain *domain;
236 uint64_t pflags;
237 iommu_gaddr_t base, size;
238 int error;
239
240 base = entry->start;
241 size = entry->end - entry->start;
242 pflags = ((eflags & IOMMU_MAP_ENTRY_READ) != 0 ? AMDIOMMU_PTE_IR : 0) |
243 ((eflags & IOMMU_MAP_ENTRY_WRITE) != 0 ? AMDIOMMU_PTE_IW : 0) |
244 ((eflags & IOMMU_MAP_ENTRY_SNOOP) != 0 ? AMDIOMMU_PTE_FC : 0);
245 /* IOMMU_MAP_ENTRY_TM ignored */
246
247 domain = IODOM2DOM(iodom);
248
249 KASSERT((iodom->flags & IOMMU_DOMAIN_IDMAP) == 0,
250 ("modifying idmap pagetable domain %p", domain));
251 KASSERT((base & IOMMU_PAGE_MASK) == 0,
252 ("non-aligned base %p %jx %jx", domain, (uintmax_t)base,
253 (uintmax_t)size));
254 KASSERT((size & IOMMU_PAGE_MASK) == 0,
255 ("non-aligned size %p %jx %jx", domain, (uintmax_t)base,
256 (uintmax_t)size));
257 KASSERT(size > 0, ("zero size %p %jx %jx", domain, (uintmax_t)base,
258 (uintmax_t)size));
259 KASSERT(base < iodom->end,
260 ("base too high %p %jx %jx end %jx", domain, (uintmax_t)base,
261 (uintmax_t)size, (uintmax_t)iodom->end));
262 KASSERT(base + size < iodom->end,
263 ("end too high %p %jx %jx end %jx", domain, (uintmax_t)base,
264 (uintmax_t)size, (uintmax_t)iodom->end));
265 KASSERT(base + size > base,
266 ("size overflow %p %jx %jx", domain, (uintmax_t)base,
267 (uintmax_t)size));
268 KASSERT((pflags & (AMDIOMMU_PTE_IR | AMDIOMMU_PTE_IW)) != 0,
269 ("neither read nor write %jx", (uintmax_t)pflags));
270 KASSERT((pflags & ~(AMDIOMMU_PTE_IR | AMDIOMMU_PTE_IW | AMDIOMMU_PTE_FC
271 )) == 0,
272 ("invalid pte flags %jx", (uintmax_t)pflags));
273 KASSERT((flags & ~IOMMU_PGF_WAITOK) == 0, ("invalid flags %x", flags));
274
275 AMDIOMMU_DOMAIN_PGLOCK(domain);
276 error = amdiommu_map_buf_locked(domain, base, size, ma, pflags,
277 flags, entry);
278 AMDIOMMU_DOMAIN_PGUNLOCK(domain);
279
280 /*
281 * XXXKIB invalidation seems to be needed even for non-valid->valid
282 * updates. Recheck.
283 */
284 iommu_qi_invalidate_sync(iodom, base, size,
285 (flags & IOMMU_PGF_WAITOK) != 0);
286 return (error);
287 }
288
289 static void
amdiommu_free_pgtbl_pde(struct amdiommu_domain * domain,iommu_gaddr_t base,int lvl,int flags,struct iommu_map_entry * entry)290 amdiommu_free_pgtbl_pde(struct amdiommu_domain *domain, iommu_gaddr_t base,
291 int lvl, int flags, struct iommu_map_entry *entry)
292 {
293 struct sf_buf *sf;
294 iommu_pte_t *pde;
295 vm_pindex_t idx;
296
297 sf = NULL;
298 pde = amdiommu_pgtbl_map_pte(domain, base, lvl, flags, &idx, &sf);
299 amdiommu_unmap_clear_pte(domain, base, lvl, flags, pde, &sf, entry,
300 true);
301 }
302
303 static void
amdiommu_unmap_clear_pte(struct amdiommu_domain * domain,iommu_gaddr_t base,int lvl,int flags,iommu_pte_t * pte,struct sf_buf ** sf,struct iommu_map_entry * entry,bool free_sf)304 amdiommu_unmap_clear_pte(struct amdiommu_domain *domain, iommu_gaddr_t base,
305 int lvl, int flags, iommu_pte_t *pte, struct sf_buf **sf,
306 struct iommu_map_entry *entry, bool free_sf)
307 {
308 vm_page_t m;
309
310 pte->pte = 0;
311 m = sf_buf_page(*sf);
312 if (free_sf) {
313 iommu_unmap_pgtbl(*sf);
314 *sf = NULL;
315 }
316 if (!vm_page_unwire_noq(m))
317 return;
318 KASSERT(lvl != 0,
319 ("lost reference (lvl) on root pg domain %p base %jx lvl %d",
320 domain, (uintmax_t)base, lvl));
321 KASSERT(m->pindex != 0,
322 ("lost reference (idx) on root pg domain %p base %jx lvl %d",
323 domain, (uintmax_t)base, lvl));
324 iommu_pgfree(domain->pgtbl_obj, m->pindex, flags, entry);
325 amdiommu_free_pgtbl_pde(domain, base, lvl - 1, flags, entry);
326 }
327
328 static int
amdiommu_unmap_buf_locked(struct amdiommu_domain * domain,iommu_gaddr_t base,iommu_gaddr_t size,int flags,struct iommu_map_entry * entry)329 amdiommu_unmap_buf_locked(struct amdiommu_domain *domain, iommu_gaddr_t base,
330 iommu_gaddr_t size, int flags, struct iommu_map_entry *entry)
331 {
332 iommu_pte_t *pte;
333 struct sf_buf *sf;
334 vm_pindex_t idx;
335 iommu_gaddr_t pg_sz;
336
337 AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(domain);
338 if (size == 0)
339 return (0);
340
341 KASSERT((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) == 0,
342 ("modifying idmap pagetable domain %p", domain));
343 KASSERT((base & IOMMU_PAGE_MASK) == 0,
344 ("non-aligned base %p %jx %jx", domain, (uintmax_t)base,
345 (uintmax_t)size));
346 KASSERT((size & IOMMU_PAGE_MASK) == 0,
347 ("non-aligned size %p %jx %jx", domain, (uintmax_t)base,
348 (uintmax_t)size));
349 KASSERT(base < DOM2IODOM(domain)->end,
350 ("base too high %p %jx %jx end %jx", domain, (uintmax_t)base,
351 (uintmax_t)size, (uintmax_t)DOM2IODOM(domain)->end));
352 KASSERT(base + size < DOM2IODOM(domain)->end,
353 ("end too high %p %jx %jx end %jx", domain, (uintmax_t)base,
354 (uintmax_t)size, (uintmax_t)DOM2IODOM(domain)->end));
355 KASSERT(base + size > base,
356 ("size overflow %p %jx %jx", domain, (uintmax_t)base,
357 (uintmax_t)size));
358 KASSERT((flags & ~IOMMU_PGF_WAITOK) == 0, ("invalid flags %x", flags));
359
360 pg_sz = IOMMU_PAGE_SIZE;
361 flags |= IOMMU_PGF_OBJL;
362
363 for (sf = NULL; size > 0; base += pg_sz, size -= pg_sz) {
364 pte = amdiommu_pgtbl_map_pte(domain, base,
365 domain->pglvl - 1, flags, &idx, &sf);
366 KASSERT(pte != NULL,
367 ("sleeping or page missed %p %jx %d 0x%x",
368 domain, (uintmax_t)base, domain->pglvl - 1, flags));
369 amdiommu_unmap_clear_pte(domain, base, domain->pglvl - 1,
370 flags, pte, &sf, entry, false);
371 KASSERT(size >= pg_sz,
372 ("unmapping loop overflow %p %jx %jx %jx", domain,
373 (uintmax_t)base, (uintmax_t)size, (uintmax_t)pg_sz));
374 }
375 if (sf != NULL)
376 iommu_unmap_pgtbl(sf);
377 return (0);
378 }
379
380 static int
amdiommu_unmap_buf(struct iommu_domain * iodom,struct iommu_map_entry * entry,int flags)381 amdiommu_unmap_buf(struct iommu_domain *iodom, struct iommu_map_entry *entry,
382 int flags)
383 {
384 struct amdiommu_domain *domain;
385 int error;
386
387 domain = IODOM2DOM(iodom);
388
389 AMDIOMMU_DOMAIN_PGLOCK(domain);
390 error = amdiommu_unmap_buf_locked(domain, entry->start,
391 entry->end - entry->start, flags, entry);
392 AMDIOMMU_DOMAIN_PGUNLOCK(domain);
393 return (error);
394 }
395
396 const struct iommu_domain_map_ops amdiommu_domain_map_ops = {
397 .map = amdiommu_map_buf,
398 .unmap = amdiommu_unmap_buf,
399 };
400