xref: /freebsd/sys/x86/iommu/amd_idpgtbl.c (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2024 The FreeBSD Foundation
5  *
6  * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
7  * under sponsorship from the FreeBSD Foundation.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/malloc.h>
34 #include <sys/bus.h>
35 #include <sys/domainset.h>
36 #include <sys/interrupt.h>
37 #include <sys/kernel.h>
38 #include <sys/ktr.h>
39 #include <sys/lock.h>
40 #include <sys/memdesc.h>
41 #include <sys/mutex.h>
42 #include <sys/proc.h>
43 #include <sys/rwlock.h>
44 #include <sys/rman.h>
45 #include <sys/sf_buf.h>
46 #include <sys/sysctl.h>
47 #include <sys/taskqueue.h>
48 #include <sys/tree.h>
49 #include <sys/uio.h>
50 #include <sys/vmem.h>
51 #include <vm/vm.h>
52 #include <vm/vm_extern.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_object.h>
55 #include <vm/vm_page.h>
56 #include <vm/vm_pager.h>
57 #include <vm/vm_map.h>
58 #include <dev/pci/pcireg.h>
59 #include <machine/atomic.h>
60 #include <machine/bus.h>
61 #include <machine/cpu.h>
62 #include <machine/md_var.h>
63 #include <machine/specialreg.h>
64 #include <x86/include/busdma_impl.h>
65 #include <dev/iommu/busdma_iommu.h>
66 #include <x86/iommu/amd_reg.h>
67 #include <x86/iommu/x86_iommu.h>
68 #include <x86/iommu/amd_iommu.h>
69 
70 static void amdiommu_unmap_clear_pte(struct amdiommu_domain *domain,
71     iommu_gaddr_t base, int lvl, int flags, iommu_pte_t *pte,
72     struct sf_buf **sf, struct iommu_map_entry *entry, bool free_sf);
73 static int amdiommu_unmap_buf_locked(struct amdiommu_domain *domain,
74     iommu_gaddr_t base, iommu_gaddr_t size, int flags,
75     struct iommu_map_entry *entry);
76 
77 int
78 amdiommu_domain_alloc_pgtbl(struct amdiommu_domain *domain)
79 {
80 	vm_page_t m;
81 	int dom;
82 
83 	KASSERT(domain->pgtbl_obj == NULL,
84 	    ("already initialized %p", domain));
85 
86 	domain->pgtbl_obj = vm_pager_allocate(OBJT_PHYS, NULL,
87 	    IDX_TO_OFF(pglvl_max_pages(domain->pglvl)), 0, 0, NULL);
88 	if (bus_get_domain(domain->iodom.iommu->dev, &dom) == 0)
89 		domain->pgtbl_obj->domain.dr_policy = DOMAINSET_PREF(dom);
90 	AMDIOMMU_DOMAIN_PGLOCK(domain);
91 	m = iommu_pgalloc(domain->pgtbl_obj, 0, IOMMU_PGF_WAITOK |
92 	    IOMMU_PGF_ZERO | IOMMU_PGF_OBJL);
93 	/* No implicit free of the top level page table page. */
94 	vm_page_wire(m);
95 	domain->pgtblr = m;
96 	AMDIOMMU_DOMAIN_PGUNLOCK(domain);
97 	AMDIOMMU_LOCK(domain->unit);
98 	domain->iodom.flags |= IOMMU_DOMAIN_PGTBL_INITED;
99 	AMDIOMMU_UNLOCK(domain->unit);
100 	return (0);
101 }
102 
103 void
104 amdiommu_domain_free_pgtbl(struct amdiommu_domain *domain)
105 {
106 	vm_object_t obj;
107 	vm_page_t m;
108 
109 	obj = domain->pgtbl_obj;
110 	if (obj == NULL) {
111 		KASSERT((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0,
112 		    ("lost pagetable object domain %p", domain));
113 		return;
114 	}
115 	AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(domain);
116 	domain->pgtbl_obj = NULL;
117 	domain->pgtblr = NULL;
118 
119 	/* Obliterate ref_counts */
120 	VM_OBJECT_ASSERT_WLOCKED(obj);
121 	for (m = vm_page_lookup(obj, 0); m != NULL; m = vm_page_next(m))
122 		vm_page_clearref(m);
123 	VM_OBJECT_WUNLOCK(obj);
124 	vm_object_deallocate(obj);
125 }
126 
127 static iommu_pte_t *
128 amdiommu_pgtbl_map_pte(struct amdiommu_domain *domain, iommu_gaddr_t base,
129     int lvl, int flags, vm_pindex_t *idxp, struct sf_buf **sf)
130 {
131 	iommu_pte_t *pte, *ptep;
132 	struct sf_buf *sfp;
133 	vm_page_t m;
134 	vm_pindex_t idx, idx1;
135 
136 	idx = pglvl_pgtbl_get_pindex(domain->pglvl, base, lvl);
137 	if (*sf != NULL && idx == *idxp) {
138 		pte = (iommu_pte_t *)sf_buf_kva(*sf);
139 	} else {
140 		if (*sf != NULL)
141 			iommu_unmap_pgtbl(*sf);
142 		*idxp = idx;
143 retry:
144 		pte = iommu_map_pgtbl(domain->pgtbl_obj, idx, flags, sf);
145 		if (pte == NULL) {
146 			KASSERT(lvl > 0,
147 			    ("lost root page table page %p", domain));
148 			/*
149 			 * Page table page does not exist, allocate
150 			 * it and create a pte in the preceeding page level
151 			 * to reference the allocated page table page.
152 			 */
153 			m = iommu_pgalloc(domain->pgtbl_obj, idx, flags |
154 			    IOMMU_PGF_ZERO);
155 			if (m == NULL)
156 				return (NULL);
157 
158 			vm_page_wire(m);
159 
160 			sfp = NULL;
161 			ptep = amdiommu_pgtbl_map_pte(domain, base, lvl - 1,
162 			    flags, &idx1, &sfp);
163 			if (ptep == NULL) {
164 				KASSERT(m->pindex != 0,
165 				    ("loosing root page %p", domain));
166 				vm_page_unwire_noq(m);
167 				iommu_pgfree(domain->pgtbl_obj, m->pindex,
168 				    flags, NULL);
169 				return (NULL);
170 			}
171 			ptep->pte = VM_PAGE_TO_PHYS(m) |  AMDIOMMU_PTE_IR |
172 			    AMDIOMMU_PTE_IW | AMDIOMMU_PTE_PR |
173 			    ((domain->pglvl - lvl) << AMDIOMMU_PTE_NLVL_SHIFT);
174 			vm_page_wire(sf_buf_page(sfp));
175 			vm_page_unwire_noq(m);
176 			iommu_unmap_pgtbl(sfp);
177 			/* Only executed once. */
178 			goto retry;
179 		}
180 	}
181 	pte += pglvl_pgtbl_pte_off(domain->pglvl, base, lvl);
182 	return (pte);
183 }
184 
185 static int
186 amdiommu_map_buf_locked(struct amdiommu_domain *domain, iommu_gaddr_t base,
187     iommu_gaddr_t size, vm_page_t *ma, uint64_t pflags, int flags,
188     struct iommu_map_entry *entry)
189 {
190 	iommu_pte_t *pte;
191 	struct sf_buf *sf;
192 	iommu_gaddr_t base1;
193 	vm_pindex_t pi, idx;
194 
195 	AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(domain);
196 
197 	base1 = base;
198 	flags |= IOMMU_PGF_OBJL;
199 	idx = -1;
200 	pte = NULL;
201 	sf = NULL;
202 
203 	for (pi = 0; size > 0; base += IOMMU_PAGE_SIZE, size -= IOMMU_PAGE_SIZE,
204 	    pi++) {
205 		KASSERT(size >= IOMMU_PAGE_SIZE,
206 		    ("mapping loop overflow %p %jx %jx %jx", domain,
207 		    (uintmax_t)base, (uintmax_t)size, (uintmax_t)IOMMU_PAGE_SIZE));
208 		pte = amdiommu_pgtbl_map_pte(domain, base, domain->pglvl - 1,
209 		    flags, &idx, &sf);
210 		if (pte == NULL) {
211 			KASSERT((flags & IOMMU_PGF_WAITOK) == 0,
212 			    ("failed waitable pte alloc %p", domain));
213 			if (sf != NULL)
214 				iommu_unmap_pgtbl(sf);
215 			amdiommu_unmap_buf_locked(domain, base1, base - base1,
216 			    flags, entry);
217 			return (ENOMEM);
218 		}
219 		/* next level 0, no superpages */
220 		pte->pte = VM_PAGE_TO_PHYS(ma[pi]) | pflags | AMDIOMMU_PTE_PR;
221 		vm_page_wire(sf_buf_page(sf));
222 	}
223 	if (sf != NULL)
224 		iommu_unmap_pgtbl(sf);
225 	return (0);
226 }
227 
228 static int
229 amdiommu_map_buf(struct iommu_domain *iodom, struct iommu_map_entry *entry,
230     vm_page_t *ma, uint64_t eflags, int flags)
231 {
232 	struct amdiommu_domain *domain;
233 	uint64_t pflags;
234 	iommu_gaddr_t base, size;
235 	int error;
236 
237 	base = entry->start;
238 	size = entry->end - entry->start;
239 	pflags = ((eflags & IOMMU_MAP_ENTRY_READ) != 0 ? AMDIOMMU_PTE_IR : 0) |
240 	    ((eflags & IOMMU_MAP_ENTRY_WRITE) != 0 ? AMDIOMMU_PTE_IW : 0) |
241 	    ((eflags & IOMMU_MAP_ENTRY_SNOOP) != 0 ? AMDIOMMU_PTE_FC : 0);
242 	/* IOMMU_MAP_ENTRY_TM ignored */
243 
244 	domain = IODOM2DOM(iodom);
245 
246 	KASSERT((iodom->flags & IOMMU_DOMAIN_IDMAP) == 0,
247 	    ("modifying idmap pagetable domain %p", domain));
248 	KASSERT((base & IOMMU_PAGE_MASK) == 0,
249 	    ("non-aligned base %p %jx %jx", domain, (uintmax_t)base,
250 	    (uintmax_t)size));
251 	KASSERT((size & IOMMU_PAGE_MASK) == 0,
252 	    ("non-aligned size %p %jx %jx", domain, (uintmax_t)base,
253 	    (uintmax_t)size));
254 	KASSERT(size > 0, ("zero size %p %jx %jx", domain, (uintmax_t)base,
255 	    (uintmax_t)size));
256 	KASSERT(base < iodom->end,
257 	    ("base too high %p %jx %jx end %jx", domain, (uintmax_t)base,
258 	    (uintmax_t)size, (uintmax_t)iodom->end));
259 	KASSERT(base + size < iodom->end,
260 	    ("end too high %p %jx %jx end %jx", domain, (uintmax_t)base,
261 	    (uintmax_t)size, (uintmax_t)iodom->end));
262 	KASSERT(base + size > base,
263 	    ("size overflow %p %jx %jx", domain, (uintmax_t)base,
264 	    (uintmax_t)size));
265 	KASSERT((pflags & (AMDIOMMU_PTE_IR | AMDIOMMU_PTE_IW)) != 0,
266 	    ("neither read nor write %jx", (uintmax_t)pflags));
267 	KASSERT((pflags & ~(AMDIOMMU_PTE_IR | AMDIOMMU_PTE_IW | AMDIOMMU_PTE_FC
268 	    )) == 0,
269 	    ("invalid pte flags %jx", (uintmax_t)pflags));
270 	KASSERT((flags & ~IOMMU_PGF_WAITOK) == 0, ("invalid flags %x", flags));
271 
272 	AMDIOMMU_DOMAIN_PGLOCK(domain);
273 	error = amdiommu_map_buf_locked(domain, base, size, ma, pflags,
274 	    flags, entry);
275 	AMDIOMMU_DOMAIN_PGUNLOCK(domain);
276 
277 	/*
278 	 * XXXKIB invalidation seems to be needed even for non-valid->valid
279 	 * updates.  Recheck.
280 	 */
281 	iommu_qi_invalidate_sync(iodom, base, size,
282 	    (flags & IOMMU_PGF_WAITOK) != 0);
283 	return (error);
284 }
285 
286 static void
287 amdiommu_free_pgtbl_pde(struct amdiommu_domain *domain, iommu_gaddr_t base,
288     int lvl, int flags, struct iommu_map_entry *entry)
289 {
290 	struct sf_buf *sf;
291 	iommu_pte_t *pde;
292 	vm_pindex_t idx;
293 
294 	sf = NULL;
295 	pde = amdiommu_pgtbl_map_pte(domain, base, lvl, flags, &idx, &sf);
296 	amdiommu_unmap_clear_pte(domain, base, lvl, flags, pde, &sf, entry,
297 	    true);
298 }
299 
300 static void
301 amdiommu_unmap_clear_pte(struct amdiommu_domain *domain, iommu_gaddr_t base,
302     int lvl, int flags, iommu_pte_t *pte, struct sf_buf **sf,
303     struct iommu_map_entry *entry, bool free_sf)
304 {
305 	vm_page_t m;
306 
307 	pte->pte = 0;
308 	m = sf_buf_page(*sf);
309 	if (free_sf) {
310 		iommu_unmap_pgtbl(*sf);
311 		*sf = NULL;
312 	}
313 	if (!vm_page_unwire_noq(m))
314 		return;
315 	KASSERT(lvl != 0,
316 	    ("lost reference (lvl) on root pg domain %p base %jx lvl %d",
317 	    domain, (uintmax_t)base, lvl));
318 	KASSERT(m->pindex != 0,
319 	    ("lost reference (idx) on root pg domain %p base %jx lvl %d",
320 	    domain, (uintmax_t)base, lvl));
321 	iommu_pgfree(domain->pgtbl_obj, m->pindex, flags, entry);
322 	amdiommu_free_pgtbl_pde(domain, base, lvl - 1, flags, entry);
323 }
324 
325 static int
326 amdiommu_unmap_buf_locked(struct amdiommu_domain *domain, iommu_gaddr_t base,
327     iommu_gaddr_t size, int flags, struct iommu_map_entry *entry)
328 {
329 	iommu_pte_t *pte;
330 	struct sf_buf *sf;
331 	vm_pindex_t idx;
332 	iommu_gaddr_t pg_sz;
333 
334 	AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(domain);
335 	if (size == 0)
336 		return (0);
337 
338 	KASSERT((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) == 0,
339 	    ("modifying idmap pagetable domain %p", domain));
340 	KASSERT((base & IOMMU_PAGE_MASK) == 0,
341 	    ("non-aligned base %p %jx %jx", domain, (uintmax_t)base,
342 	    (uintmax_t)size));
343 	KASSERT((size & IOMMU_PAGE_MASK) == 0,
344 	    ("non-aligned size %p %jx %jx", domain, (uintmax_t)base,
345 	    (uintmax_t)size));
346 	KASSERT(base < DOM2IODOM(domain)->end,
347 	    ("base too high %p %jx %jx end %jx", domain, (uintmax_t)base,
348 	    (uintmax_t)size, (uintmax_t)DOM2IODOM(domain)->end));
349 	KASSERT(base + size < DOM2IODOM(domain)->end,
350 	    ("end too high %p %jx %jx end %jx", domain, (uintmax_t)base,
351 	    (uintmax_t)size, (uintmax_t)DOM2IODOM(domain)->end));
352 	KASSERT(base + size > base,
353 	    ("size overflow %p %jx %jx", domain, (uintmax_t)base,
354 	    (uintmax_t)size));
355 	KASSERT((flags & ~IOMMU_PGF_WAITOK) == 0, ("invalid flags %x", flags));
356 
357 	pg_sz = IOMMU_PAGE_SIZE;
358 	flags |= IOMMU_PGF_OBJL;
359 
360 	for (sf = NULL; size > 0; base += pg_sz, size -= pg_sz) {
361 		pte = amdiommu_pgtbl_map_pte(domain, base,
362 		    domain->pglvl - 1, flags, &idx, &sf);
363 		KASSERT(pte != NULL,
364 		    ("sleeping or page missed %p %jx %d 0x%x",
365 		    domain, (uintmax_t)base, domain->pglvl - 1, flags));
366 		amdiommu_unmap_clear_pte(domain, base, domain->pglvl - 1,
367 		    flags, pte, &sf, entry, false);
368 		KASSERT(size >= pg_sz,
369 		    ("unmapping loop overflow %p %jx %jx %jx", domain,
370 		    (uintmax_t)base, (uintmax_t)size, (uintmax_t)pg_sz));
371 	}
372 	if (sf != NULL)
373 		iommu_unmap_pgtbl(sf);
374 	return (0);
375 }
376 
377 static int
378 amdiommu_unmap_buf(struct iommu_domain *iodom, struct iommu_map_entry *entry,
379     int flags)
380 {
381 	struct amdiommu_domain *domain;
382 	int error;
383 
384 	domain = IODOM2DOM(iodom);
385 
386 	AMDIOMMU_DOMAIN_PGLOCK(domain);
387 	error = amdiommu_unmap_buf_locked(domain, entry->start,
388 	    entry->end - entry->start, flags, entry);
389 	AMDIOMMU_DOMAIN_PGUNLOCK(domain);
390 	return (error);
391 }
392 
393 const struct iommu_domain_map_ops amdiommu_domain_map_ops = {
394 	.map = amdiommu_map_buf,
395 	.unmap = amdiommu_unmap_buf,
396 };
397