xref: /freebsd/sys/x86/iommu/amd_idpgtbl.c (revision b01455592c22515429a030f47173acd40eb3e218)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2024 The FreeBSD Foundation
5  *
6  * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
7  * under sponsorship from the FreeBSD Foundation.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/malloc.h>
34 #include <sys/bus.h>
35 #include <sys/domainset.h>
36 #include <sys/interrupt.h>
37 #include <sys/kernel.h>
38 #include <sys/ktr.h>
39 #include <sys/lock.h>
40 #include <sys/memdesc.h>
41 #include <sys/mutex.h>
42 #include <sys/proc.h>
43 #include <sys/rwlock.h>
44 #include <sys/rman.h>
45 #include <sys/sf_buf.h>
46 #include <sys/sysctl.h>
47 #include <sys/taskqueue.h>
48 #include <sys/tree.h>
49 #include <sys/uio.h>
50 #include <sys/vmem.h>
51 #include <vm/vm.h>
52 #include <vm/vm_extern.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_object.h>
55 #include <vm/vm_page.h>
56 #include <vm/vm_pager.h>
57 #include <vm/vm_radix.h>
58 #include <vm/vm_map.h>
59 #include <dev/pci/pcireg.h>
60 #include <machine/atomic.h>
61 #include <machine/bus.h>
62 #include <machine/cpu.h>
63 #include <machine/md_var.h>
64 #include <machine/specialreg.h>
65 #include <x86/include/busdma_impl.h>
66 #include <dev/iommu/busdma_iommu.h>
67 #include <x86/iommu/amd_reg.h>
68 #include <x86/iommu/x86_iommu.h>
69 #include <x86/iommu/amd_iommu.h>
70 
71 static void amdiommu_unmap_clear_pte(struct amdiommu_domain *domain,
72     iommu_gaddr_t base, int lvl, int flags, iommu_pte_t *pte,
73     struct sf_buf **sf, struct iommu_map_entry *entry, bool free_sf);
74 static int amdiommu_unmap_buf_locked(struct amdiommu_domain *domain,
75     iommu_gaddr_t base, iommu_gaddr_t size, int flags,
76     struct iommu_map_entry *entry);
77 
78 int
amdiommu_domain_alloc_pgtbl(struct amdiommu_domain * domain)79 amdiommu_domain_alloc_pgtbl(struct amdiommu_domain *domain)
80 {
81 	vm_page_t m;
82 	int dom;
83 
84 	KASSERT(domain->pgtbl_obj == NULL,
85 	    ("already initialized %p", domain));
86 
87 	domain->pgtbl_obj = vm_pager_allocate(OBJT_PHYS, NULL,
88 	    IDX_TO_OFF(pglvl_max_pages(domain->pglvl)), 0, 0, NULL);
89 	if (bus_get_domain(domain->iodom.iommu->dev, &dom) == 0)
90 		domain->pgtbl_obj->domain.dr_policy = DOMAINSET_PREF(dom);
91 	AMDIOMMU_DOMAIN_PGLOCK(domain);
92 	m = iommu_pgalloc(domain->pgtbl_obj, 0, IOMMU_PGF_WAITOK |
93 	    IOMMU_PGF_ZERO | IOMMU_PGF_OBJL);
94 	/* No implicit free of the top level page table page. */
95 	vm_page_wire(m);
96 	domain->pgtblr = m;
97 	AMDIOMMU_DOMAIN_PGUNLOCK(domain);
98 	AMDIOMMU_LOCK(domain->unit);
99 	domain->iodom.flags |= IOMMU_DOMAIN_PGTBL_INITED;
100 	AMDIOMMU_UNLOCK(domain->unit);
101 	return (0);
102 }
103 
104 void
amdiommu_domain_free_pgtbl(struct amdiommu_domain * domain)105 amdiommu_domain_free_pgtbl(struct amdiommu_domain *domain)
106 {
107 	struct pctrie_iter pages;
108 	vm_object_t obj;
109 	vm_page_t m;
110 
111 	obj = domain->pgtbl_obj;
112 	if (obj == NULL) {
113 		KASSERT((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0,
114 		    ("lost pagetable object domain %p", domain));
115 		return;
116 	}
117 	AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(domain);
118 	domain->pgtbl_obj = NULL;
119 	domain->pgtblr = NULL;
120 
121 	/* Obliterate ref_counts */
122 	VM_OBJECT_ASSERT_WLOCKED(obj);
123 	vm_page_iter_init(&pages, obj);
124 	VM_RADIX_FORALL(m, &pages)
125 		vm_page_clearref(m);
126 	VM_OBJECT_WUNLOCK(obj);
127 	vm_object_deallocate(obj);
128 }
129 
130 static iommu_pte_t *
amdiommu_pgtbl_map_pte(struct amdiommu_domain * domain,iommu_gaddr_t base,int lvl,int flags,vm_pindex_t * idxp,struct sf_buf ** sf)131 amdiommu_pgtbl_map_pte(struct amdiommu_domain *domain, iommu_gaddr_t base,
132     int lvl, int flags, vm_pindex_t *idxp, struct sf_buf **sf)
133 {
134 	iommu_pte_t *pte, *ptep;
135 	struct sf_buf *sfp;
136 	vm_page_t m;
137 	vm_pindex_t idx, idx1;
138 
139 	idx = pglvl_pgtbl_get_pindex(domain->pglvl, base, lvl);
140 	if (*sf != NULL && idx == *idxp) {
141 		pte = (iommu_pte_t *)sf_buf_kva(*sf);
142 	} else {
143 		if (*sf != NULL)
144 			iommu_unmap_pgtbl(*sf);
145 		*idxp = idx;
146 retry:
147 		pte = iommu_map_pgtbl(domain->pgtbl_obj, idx, flags, sf);
148 		if (pte == NULL) {
149 			KASSERT(lvl > 0,
150 			    ("lost root page table page %p", domain));
151 			/*
152 			 * Page table page does not exist, allocate
153 			 * it and create a pte in the preceeding page level
154 			 * to reference the allocated page table page.
155 			 */
156 			m = iommu_pgalloc(domain->pgtbl_obj, idx, flags |
157 			    IOMMU_PGF_ZERO);
158 			if (m == NULL)
159 				return (NULL);
160 
161 			vm_page_wire(m);
162 
163 			sfp = NULL;
164 			ptep = amdiommu_pgtbl_map_pte(domain, base, lvl - 1,
165 			    flags, &idx1, &sfp);
166 			if (ptep == NULL) {
167 				KASSERT(m->pindex != 0,
168 				    ("loosing root page %p", domain));
169 				vm_page_unwire_noq(m);
170 				iommu_pgfree(domain->pgtbl_obj, m->pindex,
171 				    flags, NULL);
172 				return (NULL);
173 			}
174 			ptep->pte = VM_PAGE_TO_PHYS(m) |  AMDIOMMU_PTE_IR |
175 			    AMDIOMMU_PTE_IW | AMDIOMMU_PTE_PR |
176 			    ((domain->pglvl - lvl) << AMDIOMMU_PTE_NLVL_SHIFT);
177 			vm_page_wire(sf_buf_page(sfp));
178 			vm_page_unwire_noq(m);
179 			iommu_unmap_pgtbl(sfp);
180 			/* Only executed once. */
181 			goto retry;
182 		}
183 	}
184 	pte += pglvl_pgtbl_pte_off(domain->pglvl, base, lvl);
185 	return (pte);
186 }
187 
188 static int
amdiommu_map_buf_locked(struct amdiommu_domain * domain,iommu_gaddr_t base,iommu_gaddr_t size,vm_page_t * ma,uint64_t pflags,int flags,struct iommu_map_entry * entry)189 amdiommu_map_buf_locked(struct amdiommu_domain *domain, iommu_gaddr_t base,
190     iommu_gaddr_t size, vm_page_t *ma, uint64_t pflags, int flags,
191     struct iommu_map_entry *entry)
192 {
193 	iommu_pte_t *pte;
194 	struct sf_buf *sf;
195 	iommu_gaddr_t base1;
196 	vm_pindex_t pi, idx;
197 
198 	AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(domain);
199 
200 	base1 = base;
201 	flags |= IOMMU_PGF_OBJL;
202 	idx = -1;
203 	pte = NULL;
204 	sf = NULL;
205 
206 	for (pi = 0; size > 0; base += IOMMU_PAGE_SIZE, size -= IOMMU_PAGE_SIZE,
207 	    pi++) {
208 		KASSERT(size >= IOMMU_PAGE_SIZE,
209 		    ("mapping loop overflow %p %jx %jx %jx", domain,
210 		    (uintmax_t)base, (uintmax_t)size, (uintmax_t)IOMMU_PAGE_SIZE));
211 		pte = amdiommu_pgtbl_map_pte(domain, base, domain->pglvl - 1,
212 		    flags, &idx, &sf);
213 		if (pte == NULL) {
214 			KASSERT((flags & IOMMU_PGF_WAITOK) == 0,
215 			    ("failed waitable pte alloc %p", domain));
216 			if (sf != NULL)
217 				iommu_unmap_pgtbl(sf);
218 			amdiommu_unmap_buf_locked(domain, base1, base - base1,
219 			    flags, entry);
220 			return (ENOMEM);
221 		}
222 		/* next level 0, no superpages */
223 		pte->pte = VM_PAGE_TO_PHYS(ma[pi]) | pflags | AMDIOMMU_PTE_PR;
224 		vm_page_wire(sf_buf_page(sf));
225 	}
226 	if (sf != NULL)
227 		iommu_unmap_pgtbl(sf);
228 	return (0);
229 }
230 
231 static int
amdiommu_map_buf(struct iommu_domain * iodom,struct iommu_map_entry * entry,vm_page_t * ma,uint64_t eflags,int flags)232 amdiommu_map_buf(struct iommu_domain *iodom, struct iommu_map_entry *entry,
233     vm_page_t *ma, uint64_t eflags, int flags)
234 {
235 	struct amdiommu_domain *domain;
236 	uint64_t pflags;
237 	iommu_gaddr_t base, size;
238 	int error;
239 
240 	base = entry->start;
241 	size = entry->end - entry->start;
242 	pflags = ((eflags & IOMMU_MAP_ENTRY_READ) != 0 ? AMDIOMMU_PTE_IR : 0) |
243 	    ((eflags & IOMMU_MAP_ENTRY_WRITE) != 0 ? AMDIOMMU_PTE_IW : 0) |
244 	    ((eflags & IOMMU_MAP_ENTRY_SNOOP) != 0 ? AMDIOMMU_PTE_FC : 0);
245 	/* IOMMU_MAP_ENTRY_TM ignored */
246 
247 	domain = IODOM2DOM(iodom);
248 
249 	KASSERT((iodom->flags & IOMMU_DOMAIN_IDMAP) == 0,
250 	    ("modifying idmap pagetable domain %p", domain));
251 	KASSERT((base & IOMMU_PAGE_MASK) == 0,
252 	    ("non-aligned base %p %jx %jx", domain, (uintmax_t)base,
253 	    (uintmax_t)size));
254 	KASSERT((size & IOMMU_PAGE_MASK) == 0,
255 	    ("non-aligned size %p %jx %jx", domain, (uintmax_t)base,
256 	    (uintmax_t)size));
257 	KASSERT(size > 0, ("zero size %p %jx %jx", domain, (uintmax_t)base,
258 	    (uintmax_t)size));
259 	KASSERT(base < iodom->end,
260 	    ("base too high %p %jx %jx end %jx", domain, (uintmax_t)base,
261 	    (uintmax_t)size, (uintmax_t)iodom->end));
262 	KASSERT(base + size < iodom->end,
263 	    ("end too high %p %jx %jx end %jx", domain, (uintmax_t)base,
264 	    (uintmax_t)size, (uintmax_t)iodom->end));
265 	KASSERT(base + size > base,
266 	    ("size overflow %p %jx %jx", domain, (uintmax_t)base,
267 	    (uintmax_t)size));
268 	KASSERT((pflags & (AMDIOMMU_PTE_IR | AMDIOMMU_PTE_IW)) != 0,
269 	    ("neither read nor write %jx", (uintmax_t)pflags));
270 	KASSERT((pflags & ~(AMDIOMMU_PTE_IR | AMDIOMMU_PTE_IW | AMDIOMMU_PTE_FC
271 	    )) == 0,
272 	    ("invalid pte flags %jx", (uintmax_t)pflags));
273 	KASSERT((flags & ~IOMMU_PGF_WAITOK) == 0, ("invalid flags %x", flags));
274 
275 	AMDIOMMU_DOMAIN_PGLOCK(domain);
276 	error = amdiommu_map_buf_locked(domain, base, size, ma, pflags,
277 	    flags, entry);
278 	AMDIOMMU_DOMAIN_PGUNLOCK(domain);
279 
280 	/*
281 	 * XXXKIB invalidation seems to be needed even for non-valid->valid
282 	 * updates.  Recheck.
283 	 */
284 	iommu_qi_invalidate_sync(iodom, base, size,
285 	    (flags & IOMMU_PGF_WAITOK) != 0);
286 	return (error);
287 }
288 
289 static void
amdiommu_free_pgtbl_pde(struct amdiommu_domain * domain,iommu_gaddr_t base,int lvl,int flags,struct iommu_map_entry * entry)290 amdiommu_free_pgtbl_pde(struct amdiommu_domain *domain, iommu_gaddr_t base,
291     int lvl, int flags, struct iommu_map_entry *entry)
292 {
293 	struct sf_buf *sf;
294 	iommu_pte_t *pde;
295 	vm_pindex_t idx;
296 
297 	sf = NULL;
298 	pde = amdiommu_pgtbl_map_pte(domain, base, lvl, flags, &idx, &sf);
299 	amdiommu_unmap_clear_pte(domain, base, lvl, flags, pde, &sf, entry,
300 	    true);
301 }
302 
303 static void
amdiommu_unmap_clear_pte(struct amdiommu_domain * domain,iommu_gaddr_t base,int lvl,int flags,iommu_pte_t * pte,struct sf_buf ** sf,struct iommu_map_entry * entry,bool free_sf)304 amdiommu_unmap_clear_pte(struct amdiommu_domain *domain, iommu_gaddr_t base,
305     int lvl, int flags, iommu_pte_t *pte, struct sf_buf **sf,
306     struct iommu_map_entry *entry, bool free_sf)
307 {
308 	vm_page_t m;
309 
310 	pte->pte = 0;
311 	m = sf_buf_page(*sf);
312 	if (free_sf) {
313 		iommu_unmap_pgtbl(*sf);
314 		*sf = NULL;
315 	}
316 	if (!vm_page_unwire_noq(m))
317 		return;
318 	KASSERT(lvl != 0,
319 	    ("lost reference (lvl) on root pg domain %p base %jx lvl %d",
320 	    domain, (uintmax_t)base, lvl));
321 	KASSERT(m->pindex != 0,
322 	    ("lost reference (idx) on root pg domain %p base %jx lvl %d",
323 	    domain, (uintmax_t)base, lvl));
324 	iommu_pgfree(domain->pgtbl_obj, m->pindex, flags, entry);
325 	amdiommu_free_pgtbl_pde(domain, base, lvl - 1, flags, entry);
326 }
327 
328 static int
amdiommu_unmap_buf_locked(struct amdiommu_domain * domain,iommu_gaddr_t base,iommu_gaddr_t size,int flags,struct iommu_map_entry * entry)329 amdiommu_unmap_buf_locked(struct amdiommu_domain *domain, iommu_gaddr_t base,
330     iommu_gaddr_t size, int flags, struct iommu_map_entry *entry)
331 {
332 	iommu_pte_t *pte;
333 	struct sf_buf *sf;
334 	vm_pindex_t idx;
335 	iommu_gaddr_t pg_sz;
336 
337 	AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(domain);
338 	if (size == 0)
339 		return (0);
340 
341 	KASSERT((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) == 0,
342 	    ("modifying idmap pagetable domain %p", domain));
343 	KASSERT((base & IOMMU_PAGE_MASK) == 0,
344 	    ("non-aligned base %p %jx %jx", domain, (uintmax_t)base,
345 	    (uintmax_t)size));
346 	KASSERT((size & IOMMU_PAGE_MASK) == 0,
347 	    ("non-aligned size %p %jx %jx", domain, (uintmax_t)base,
348 	    (uintmax_t)size));
349 	KASSERT(base < DOM2IODOM(domain)->end,
350 	    ("base too high %p %jx %jx end %jx", domain, (uintmax_t)base,
351 	    (uintmax_t)size, (uintmax_t)DOM2IODOM(domain)->end));
352 	KASSERT(base + size < DOM2IODOM(domain)->end,
353 	    ("end too high %p %jx %jx end %jx", domain, (uintmax_t)base,
354 	    (uintmax_t)size, (uintmax_t)DOM2IODOM(domain)->end));
355 	KASSERT(base + size > base,
356 	    ("size overflow %p %jx %jx", domain, (uintmax_t)base,
357 	    (uintmax_t)size));
358 	KASSERT((flags & ~IOMMU_PGF_WAITOK) == 0, ("invalid flags %x", flags));
359 
360 	pg_sz = IOMMU_PAGE_SIZE;
361 	flags |= IOMMU_PGF_OBJL;
362 
363 	for (sf = NULL; size > 0; base += pg_sz, size -= pg_sz) {
364 		pte = amdiommu_pgtbl_map_pte(domain, base,
365 		    domain->pglvl - 1, flags, &idx, &sf);
366 		KASSERT(pte != NULL,
367 		    ("sleeping or page missed %p %jx %d 0x%x",
368 		    domain, (uintmax_t)base, domain->pglvl - 1, flags));
369 		amdiommu_unmap_clear_pte(domain, base, domain->pglvl - 1,
370 		    flags, pte, &sf, entry, false);
371 		KASSERT(size >= pg_sz,
372 		    ("unmapping loop overflow %p %jx %jx %jx", domain,
373 		    (uintmax_t)base, (uintmax_t)size, (uintmax_t)pg_sz));
374 	}
375 	if (sf != NULL)
376 		iommu_unmap_pgtbl(sf);
377 	return (0);
378 }
379 
380 static int
amdiommu_unmap_buf(struct iommu_domain * iodom,struct iommu_map_entry * entry,int flags)381 amdiommu_unmap_buf(struct iommu_domain *iodom, struct iommu_map_entry *entry,
382     int flags)
383 {
384 	struct amdiommu_domain *domain;
385 	int error;
386 
387 	domain = IODOM2DOM(iodom);
388 
389 	AMDIOMMU_DOMAIN_PGLOCK(domain);
390 	error = amdiommu_unmap_buf_locked(domain, entry->start,
391 	    entry->end - entry->start, flags, entry);
392 	AMDIOMMU_DOMAIN_PGUNLOCK(domain);
393 	return (error);
394 }
395 
396 const struct iommu_domain_map_ops amdiommu_domain_map_ops = {
397 	.map = amdiommu_map_buf,
398 	.unmap = amdiommu_unmap_buf,
399 };
400