xref: /freebsd/sys/riscv/iommu/iommu_pmap.c (revision bcecad2c24aa500913559c00f1be8b364a3ff150)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2026 Ruslan Bukin <br@bsdpad.com>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/ktr.h>
31 #include <sys/lock.h>
32 #include <sys/mutex.h>
33 
34 #include <vm/vm.h>
35 #include <vm/vm_param.h>
36 #include <vm/vm_page.h>
37 
38 #include <riscv/iommu/iommu_pmap.h>
39 
40 /*
41  * Boundary values for the page table page index space:
42  *
43  * L3 pages: [0, NUL2E)
44  * L2 pages: [NUL2E, NUL2E + NUL1E)
45  * L1 pages: [NUL2E + NUL1E, NUL2E + NUL1E + NUL0E)
46  *
47  * Note that these ranges are used in both SV39 and SV48 mode.  In SV39 mode the
48  * ranges are not fully populated since there are at most Ln_ENTRIES^2 L3 pages
49  * in a set of page tables.
50  */
51 #define	NUL0E		Ln_ENTRIES
52 #define	NUL1E		(Ln_ENTRIES * NUL0E)
53 #define	NUL2E		(Ln_ENTRIES * NUL1E)
54 
55 #define	pmap_l1_pindex(v)	(NUL2E + ((v) >> L1_SHIFT))
56 #define	pmap_l2_pindex(v)	((v) >> L2_SHIFT)
57 
58 #define	pmap_clear(pte)			pmap_store(pte, 0)
59 #define	pmap_clear_bits(pte, bits)	atomic_clear_64(pte, bits)
60 #define	pmap_load_store(pte, entry)	atomic_swap_64(pte, entry)
61 #define	pmap_load_clear(pte)		pmap_load_store(pte, 0)
62 #define	pmap_load(pte)			atomic_load_64(pte)
63 #define	pmap_store(pte, entry)		atomic_store_64(pte, entry)
64 #define	pmap_store_bits(pte, bits)	atomic_set_64(pte, bits)
65 
66 #define	pmap_l0_index(va)	(((va) >> L0_SHIFT) & Ln_ADDR_MASK)
67 #define	pmap_l1_index(va)	(((va) >> L1_SHIFT) & Ln_ADDR_MASK)
68 #define	pmap_l2_index(va)	(((va) >> L2_SHIFT) & Ln_ADDR_MASK)
69 #define	pmap_l3_index(va)	(((va) >> L3_SHIFT) & Ln_ADDR_MASK)
70 
71 #define	PTE_TO_PHYS(pte) \
72     ((((pte) & ~PTE_HI_MASK) >> PTE_PPN0_S) * PAGE_SIZE)
73 #define	L2PTE_TO_PHYS(l2) \
74     ((((l2) & ~PTE_HI_MASK) >> PTE_PPN1_S) << L2_SHIFT)
75 #define	L1PTE_TO_PHYS(l1) \
76     ((((l1) & ~PTE_HI_MASK) >> PTE_PPN2_S) << L1_SHIFT)
77 #define PTE_TO_VM_PAGE(pte) PHYS_TO_VM_PAGE(PTE_TO_PHYS(pte))
78 
79 /********************/
80 /* Inline functions */
81 /********************/
82 
83 static __inline pd_entry_t *
pmap_l0(struct riscv_iommu_pmap * pmap,vm_offset_t va)84 pmap_l0(struct riscv_iommu_pmap *pmap, vm_offset_t va)
85 {
86 	KASSERT(pmap->pm_mode != PMAP_MODE_SV39,
87 	    ("%s: in SV39 mode", __func__));
88 	KASSERT(VIRT_IS_VALID(va),
89 	    ("%s: malformed virtual address %#lx", __func__, va));
90 	return (&pmap->pm_top[pmap_l0_index(va)]);
91 }
92 
93 static __inline pd_entry_t *
pmap_l0_to_l1(struct riscv_iommu_pmap * pmap,pd_entry_t * l0,vm_offset_t va)94 pmap_l0_to_l1(struct riscv_iommu_pmap *pmap, pd_entry_t *l0, vm_offset_t va)
95 {
96 	vm_paddr_t phys;
97 	pd_entry_t *l1;
98 
99 	KASSERT(pmap->pm_mode != PMAP_MODE_SV39,
100 	    ("%s: in SV39 mode", __func__));
101 	phys = PTE_TO_PHYS(pmap_load(l0));
102 	l1 = (pd_entry_t *)PHYS_TO_DMAP(phys);
103 
104 	return (&l1[pmap_l1_index(va)]);
105 }
106 
107 static __inline pd_entry_t *
pmap_l1(struct riscv_iommu_pmap * pmap,vm_offset_t va)108 pmap_l1(struct riscv_iommu_pmap *pmap, vm_offset_t va)
109 {
110 	pd_entry_t *l0;
111 
112 	KASSERT(VIRT_IS_VALID(va),
113 	    ("%s: malformed virtual address %#lx", __func__, va));
114 	if (pmap->pm_mode == PMAP_MODE_SV39) {
115 		return (&pmap->pm_top[pmap_l1_index(va)]);
116 	} else {
117 		l0 = pmap_l0(pmap, va);
118 		if ((pmap_load(l0) & PTE_V) == 0)
119 			return (NULL);
120 		if ((pmap_load(l0) & PTE_RX) != 0)
121 			return (NULL);
122 		return (pmap_l0_to_l1(pmap, l0, va));
123 	}
124 }
125 
126 static __inline pd_entry_t *
pmap_l1_to_l2(pd_entry_t * l1,vm_offset_t va)127 pmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va)
128 {
129 	vm_paddr_t phys;
130 	pd_entry_t *l2;
131 
132 	phys = PTE_TO_PHYS(pmap_load(l1));
133 	l2 = (pd_entry_t *)PHYS_TO_DMAP(phys);
134 
135 	return (&l2[pmap_l2_index(va)]);
136 }
137 
138 static __inline pd_entry_t *
pmap_l2(struct riscv_iommu_pmap * pmap,vm_offset_t va)139 pmap_l2(struct riscv_iommu_pmap *pmap, vm_offset_t va)
140 {
141 	pd_entry_t *l1;
142 
143 	l1 = pmap_l1(pmap, va);
144 	if (l1 == NULL)
145 		return (NULL);
146 	if ((pmap_load(l1) & PTE_V) == 0)
147 		return (NULL);
148 	if ((pmap_load(l1) & PTE_RX) != 0)
149 		return (NULL);
150 
151 	return (pmap_l1_to_l2(l1, va));
152 }
153 
154 static __inline pt_entry_t *
pmap_l2_to_l3(pd_entry_t * l2,vm_offset_t va)155 pmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va)
156 {
157 	vm_paddr_t phys;
158 	pt_entry_t *l3;
159 
160 	phys = PTE_TO_PHYS(pmap_load(l2));
161 	l3 = (pd_entry_t *)PHYS_TO_DMAP(phys);
162 
163 	return (&l3[pmap_l3_index(va)]);
164 }
165 
166 static __inline pt_entry_t *
pmap_l3(struct riscv_iommu_pmap * pmap,vm_offset_t va)167 pmap_l3(struct riscv_iommu_pmap *pmap, vm_offset_t va)
168 {
169 	pd_entry_t *l2;
170 
171 	l2 = pmap_l2(pmap, va);
172 	if (l2 == NULL)
173 		return (NULL);
174 	if ((pmap_load(l2) & PTE_V) == 0)
175 		return (NULL);
176 	if ((pmap_load(l2) & PTE_RX) != 0)
177 		return (NULL);
178 
179 	return (pmap_l2_to_l3(l2, va));
180 }
181 
182 static __inline void
pmap_resident_count_inc(struct riscv_iommu_pmap * pmap,int count)183 pmap_resident_count_inc(struct riscv_iommu_pmap *pmap, int count)
184 {
185 
186 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
187 	pmap->sp_resident_count += count;
188 }
189 
190 static __inline void
pmap_resident_count_dec(struct riscv_iommu_pmap * pmap,int count)191 pmap_resident_count_dec(struct riscv_iommu_pmap *pmap, int count)
192 {
193 
194 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
195 	KASSERT(pmap->sp_resident_count >= count,
196 	    ("pmap %p resident count underflow %ld %d", pmap,
197 	    pmap->sp_resident_count, count));
198 	pmap->sp_resident_count -= count;
199 }
200 
201 /***************************************************
202  * Page table page management routines.....
203  ***************************************************/
204 
205 int
iommu_pmap_pinit(struct riscv_iommu_pmap * pmap,enum pmap_mode pm_mode)206 iommu_pmap_pinit(struct riscv_iommu_pmap *pmap, enum pmap_mode pm_mode)
207 {
208 	vm_paddr_t topphys;
209 	vm_page_t m;
210 
211 	m = vm_page_alloc_noobj(VM_ALLOC_WIRED | VM_ALLOC_ZERO |
212 	    VM_ALLOC_WAITOK);
213 	topphys = VM_PAGE_TO_PHYS(m);
214 	pmap->pm_top = (pd_entry_t *)PHYS_TO_DMAP(topphys);
215 	pmap->pm_mode = pm_mode;
216 
217 	switch (pm_mode) {
218 	case PMAP_MODE_SV39:
219 		pmap->pm_satp = SATP_MODE_SV39;
220 		break;
221 	case PMAP_MODE_SV48:
222 		pmap->pm_satp = SATP_MODE_SV48;
223 		break;
224 	default:
225 		panic("Unknown virtual memory system");
226 	};
227 
228 	pmap->pm_satp |= (topphys >> PAGE_SHIFT);
229 
230 #ifdef INVARIANTS
231 	pmap->sp_resident_count = 0;
232 #endif
233 
234 	mtx_init(&pmap->pm_mtx, "iommu pmap", NULL, MTX_DEF);
235 
236 	return (1);
237 }
238 
239 /*
240  * Release any resources held by the given physical map.
241  * Called when a pmap initialized by pmap_pinit is being released.
242  * Should only be called if the map contains no valid mappings.
243  */
244 void
iommu_pmap_release(struct riscv_iommu_pmap * pmap)245 iommu_pmap_release(struct riscv_iommu_pmap *pmap)
246 {
247 	vm_page_t m;
248 
249 	KASSERT(pmap->sp_resident_count == 0,
250 	    ("pmap_release: pmap resident count %ld != 0",
251 	    pmap->sp_resident_count));
252 
253 	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_top));
254 	vm_page_unwire_noq(m);
255 	vm_page_free_zero(m);
256 	mtx_destroy(&pmap->pm_mtx);
257 }
258 
259 /*
260  * This routine is called if the desired page table page does not exist.
261  *
262  * If page table page allocation fails, this routine may sleep before
263  * returning NULL.  It sleeps only if a lock pointer was given.
264  *
265  * Note: If a page allocation fails at page table level two or three,
266  * one or two pages may be held during the wait, only to be released
267  * afterwards.  This conservative approach is easily argued to avoid
268  * race conditions.
269  */
270 static vm_page_t
_pmap_alloc_l3(struct riscv_iommu_pmap * pmap,vm_pindex_t ptepindex)271 _pmap_alloc_l3(struct riscv_iommu_pmap *pmap, vm_pindex_t ptepindex)
272 {
273 	vm_page_t m, pdpg;
274 	pt_entry_t entry;
275 	vm_paddr_t phys;
276 	pn_t pn;
277 
278 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
279 
280 	/*
281 	 * Allocate a page table page.
282 	 */
283 	m = vm_page_alloc_noobj(VM_ALLOC_WIRED | VM_ALLOC_ZERO);
284 	if (m == NULL) {
285 		/*
286 		 * Indicate the need to retry.  While waiting, the page table
287 		 * page may have been allocated.
288 		 */
289 		return (NULL);
290 	}
291 	m->pindex = ptepindex;
292 
293 	/*
294 	 * Map the pagetable page into the process address space, if
295 	 * it isn't already there.
296 	 */
297 	pn = VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT;
298 	if (ptepindex >= NUL2E + NUL1E) {
299 		pd_entry_t *l0;
300 		vm_pindex_t l0index;
301 
302 		KASSERT(pmap->pm_mode != PMAP_MODE_SV39,
303 		    ("%s: pindex %#lx in SV39 mode", __func__, ptepindex));
304 		KASSERT(ptepindex < NUL2E + NUL1E + NUL0E,
305 		    ("%s: pindex %#lx out of range", __func__, ptepindex));
306 
307 		l0index = ptepindex - (NUL2E + NUL1E);
308 		l0 = &pmap->pm_top[l0index];
309 		KASSERT((pmap_load(l0) & PTE_V) == 0,
310 		    ("%s: L0 entry %#lx is valid", __func__, pmap_load(l0)));
311 
312 		entry = PTE_V | (pn << PTE_PPN0_S);
313 		pmap_store(l0, entry);
314 	} else if (ptepindex >= NUL2E) {
315 		pd_entry_t *l0, *l1;
316 		vm_pindex_t l0index, l1index;
317 
318 		l1index = ptepindex - NUL2E;
319 		if (pmap->pm_mode == PMAP_MODE_SV39) {
320 			l1 = &pmap->pm_top[l1index];
321 		} else {
322 			l0index = l1index >> Ln_ENTRIES_SHIFT;
323 			l0 = &pmap->pm_top[l0index];
324 			if (pmap_load(l0) == 0) {
325 				/* Recurse to allocate the L1 page. */
326 				if (_pmap_alloc_l3(pmap,
327 				    NUL2E + NUL1E + l0index) == NULL)
328 					goto fail;
329 				phys = PTE_TO_PHYS(pmap_load(l0));
330 			} else {
331 				phys = PTE_TO_PHYS(pmap_load(l0));
332 				pdpg = PHYS_TO_VM_PAGE(phys);
333 				pdpg->ref_count++;
334 			}
335 			l1 = (pd_entry_t *)PHYS_TO_DMAP(phys);
336 			l1 = &l1[ptepindex & Ln_ADDR_MASK];
337 		}
338 		KASSERT((pmap_load(l1) & PTE_V) == 0,
339 		    ("%s: L1 entry %#lx is valid", __func__, pmap_load(l1)));
340 
341 		entry = PTE_V | (pn << PTE_PPN0_S);
342 		pmap_store(l1, entry);
343 	} else {
344 		vm_pindex_t l0index, l1index;
345 		pd_entry_t *l0, *l1, *l2;
346 
347 		l1index = ptepindex >> (L1_SHIFT - L2_SHIFT);
348 		if (pmap->pm_mode == PMAP_MODE_SV39) {
349 			l1 = &pmap->pm_top[l1index];
350 			if (pmap_load(l1) == 0) {
351 				/* recurse for allocating page dir */
352 				if (_pmap_alloc_l3(pmap, NUL2E + l1index)
353 				    == NULL)
354 					goto fail;
355 			} else {
356 				pdpg = PTE_TO_VM_PAGE(pmap_load(l1));
357 				pdpg->ref_count++;
358 			}
359 		} else {
360 			l0index = l1index >> Ln_ENTRIES_SHIFT;
361 			l0 = &pmap->pm_top[l0index];
362 			if (pmap_load(l0) == 0) {
363 				/* Recurse to allocate the L1 entry. */
364 				if (_pmap_alloc_l3(pmap, NUL2E + l1index)
365 				    == NULL)
366 					goto fail;
367 				phys = PTE_TO_PHYS(pmap_load(l0));
368 				l1 = (pd_entry_t *)PHYS_TO_DMAP(phys);
369 				l1 = &l1[l1index & Ln_ADDR_MASK];
370 			} else {
371 				phys = PTE_TO_PHYS(pmap_load(l0));
372 				l1 = (pd_entry_t *)PHYS_TO_DMAP(phys);
373 				l1 = &l1[l1index & Ln_ADDR_MASK];
374 				if (pmap_load(l1) == 0) {
375 					/* Recurse to allocate the L2 page. */
376 					if (_pmap_alloc_l3(pmap,
377 					    NUL2E + l1index) == NULL)
378 						goto fail;
379 				} else {
380 					pdpg = PTE_TO_VM_PAGE(pmap_load(l1));
381 					pdpg->ref_count++;
382 				}
383 			}
384 		}
385 
386 		phys = PTE_TO_PHYS(pmap_load(l1));
387 		l2 = (pd_entry_t *)PHYS_TO_DMAP(phys);
388 		l2 = &l2[ptepindex & Ln_ADDR_MASK];
389 		KASSERT((pmap_load(l2) & PTE_V) == 0,
390 		    ("%s: L2 entry %#lx is valid", __func__, pmap_load(l2)));
391 
392 		entry = PTE_V | (pn << PTE_PPN0_S);
393 		pmap_store(l2, entry);
394 	}
395 
396 	pmap_resident_count_inc(pmap, 1);
397 
398 	return (m);
399 
400 fail:
401 	vm_page_unwire_noq(m);
402 	vm_page_free_zero(m);
403 	return (NULL);
404 }
405 
406 /*
407  * Remove a single IOMMU entry.
408  */
409 int
iommu_pmap_remove(struct riscv_iommu_pmap * pmap,vm_offset_t va)410 iommu_pmap_remove(struct riscv_iommu_pmap *pmap, vm_offset_t va)
411 {
412 	pt_entry_t *l3;
413 	int rc;
414 
415 	PMAP_LOCK(pmap);
416 
417 	l3 = pmap_l3(pmap, va);
418 	if (l3 != NULL) {
419 		pmap_resident_count_dec(pmap, 1);
420 		pmap_clear(l3);
421 		rc = KERN_SUCCESS;
422 	} else
423 		rc = KERN_FAILURE;
424 
425 	PMAP_UNLOCK(pmap);
426 
427 	return (rc);
428 }
429 
430 /* Add a single IOMMU entry. This function does not sleep. */
431 int
iommu_pmap_enter(struct riscv_iommu_pmap * pmap,vm_offset_t va,vm_paddr_t pa,vm_prot_t prot,u_int flags)432 iommu_pmap_enter(struct riscv_iommu_pmap *pmap, vm_offset_t va, vm_paddr_t pa,
433     vm_prot_t prot, u_int flags)
434 {
435 	pd_entry_t *l2, l2e;
436 	pt_entry_t new_l3;
437 	pt_entry_t *l3;
438 	vm_page_t mpte;
439 	pn_t pn;
440 	int rv;
441 
442 	pn = (pa / PAGE_SIZE);
443 
444 	new_l3 = PTE_V | PTE_R | PTE_A;
445 	if (prot & VM_PROT_EXECUTE)
446 		new_l3 |= PTE_X;
447 	if (flags & VM_PROT_WRITE)
448 		new_l3 |= PTE_D;
449 	if (prot & VM_PROT_WRITE)
450 		new_l3 |= PTE_W;
451 	if (va < VM_MAX_USER_ADDRESS)
452 		new_l3 |= PTE_U;
453 
454 	new_l3 |= (pn << PTE_PPN0_S);
455 	new_l3 |= PTE_MA_IO;
456 
457 	/*
458 	 * Set modified bit gratuitously for writeable mappings if
459 	 * the page is unmanaged. We do not want to take a fault
460 	 * to do the dirty bit accounting for these mappings.
461 	 */
462 	if (prot & VM_PROT_WRITE)
463 		new_l3 |= PTE_D;
464 
465 	CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa);
466 
467 	mpte = NULL;
468 	PMAP_LOCK(pmap);
469 
470 	l2 = pmap_l2(pmap, va);
471 	if (l2 != NULL && ((l2e = pmap_load(l2)) & PTE_V) != 0 &&
472 	    ((l2e & PTE_RWX) == 0)) {
473 		l3 = pmap_l2_to_l3(l2, va);
474 	} else if (va < VM_MAXUSER_ADDRESS) {
475 		mpte = _pmap_alloc_l3(pmap, pmap_l2_pindex(va));
476 		if (mpte == NULL) {
477 			CTR0(KTR_PMAP, "pmap_enter: mpte == NULL");
478 			rv = KERN_RESOURCE_SHORTAGE;
479 			goto out;
480 		}
481 		l3 = pmap_l3(pmap, va);
482 	} else
483 		panic("pmap_enter: missing L3 table for kernel va %#lx", va);
484 
485 	KASSERT((pmap_load(l3) & PTE_V) == 0, ("l3 is valid"));
486 
487 	pmap_store(l3, new_l3);
488 	pmap_resident_count_inc(pmap, 1);
489 
490 	rv = KERN_SUCCESS;
491 out:
492 	PMAP_UNLOCK(pmap);
493 
494 	return (rv);
495 }
496 
497 static void
iommu_pmap_remove_pages_sv48(struct riscv_iommu_pmap * pmap)498 iommu_pmap_remove_pages_sv48(struct riscv_iommu_pmap *pmap)
499 {
500 	pd_entry_t l0e, *l1, l1e, *l2, l2e, *l3, l3e;
501 	vm_paddr_t pa0, pa1, pa;
502 	vm_page_t m0, m1, m;
503 	int i, j, k, l;
504 
505 	PMAP_LOCK(pmap);
506 
507 	for (i = 0; i < Ln_ENTRIES; i++) {
508 		l0e = pmap->pm_top[i];
509 		if ((l0e & PTE_V) == 0)
510 			continue;
511 		pa0 = PTE_TO_PHYS(l0e);
512 		m0 = PHYS_TO_VM_PAGE(pa0);
513 		l1 = (pd_entry_t *)PHYS_TO_DMAP(pa0);
514 
515 		for (j = 0; j < Ln_ENTRIES; j++) {
516 			l1e = l1[j];
517 			if ((l1e & PTE_V) == 0)
518 				continue;
519 			pa1 = PTE_TO_PHYS(l1e);
520 			m1 = PHYS_TO_VM_PAGE(pa1);
521 			l2 = (pd_entry_t *)PHYS_TO_DMAP(pa1);
522 
523 			for (k = 0; k < Ln_ENTRIES; k++) {
524 				l2e = l2[k];
525 				if ((l2e & PTE_V) == 0)
526 					continue;
527 				pa = PTE_TO_PHYS(l2e);
528 				m = PHYS_TO_VM_PAGE(pa);
529 				l3 = (pt_entry_t *)PHYS_TO_DMAP(pa);
530 
531 				for (l = 0; l < Ln_ENTRIES; l++) {
532 					l3e = l3[l];
533 					if ((l3e & PTE_V) == 0)
534 						continue;
535 					panic("%s: l3e found (idx %d %d %d %d)",
536 					    __func__, i, j, k, l);
537 				}
538 
539 				vm_page_unwire_noq(m1);
540 				vm_page_unwire_noq(m);
541 				pmap_resident_count_dec(pmap, 1);
542 				vm_page_free(m);
543 				pmap_clear(&l2[k]);
544 			}
545 
546 			vm_page_unwire_noq(m0);
547 			pmap_resident_count_dec(pmap, 1);
548 			vm_page_free(m1);
549 			pmap_clear(&l1[j]);
550 		}
551 
552 		pmap_resident_count_dec(pmap, 1);
553 		vm_page_free(m0);
554 		pmap_clear(&pmap->pm_top[i]);
555 	}
556 
557 	KASSERT(pmap->sp_resident_count == 0,
558 	    ("Invalid resident count %jd", pmap->sp_resident_count));
559 
560 	PMAP_UNLOCK(pmap);
561 }
562 
563 static void
iommu_pmap_remove_pages_sv39(struct riscv_iommu_pmap * pmap)564 iommu_pmap_remove_pages_sv39(struct riscv_iommu_pmap *pmap)
565 {
566 	pd_entry_t l1e, *l2, l2e, *l3, l3e;
567 	vm_paddr_t pa1, pa;
568 	vm_page_t m1, m;
569 	int j, k, l;
570 
571 	PMAP_LOCK(pmap);
572 
573 	for (j = 0; j < Ln_ENTRIES; j++) {
574 		l1e = pmap->pm_top[j];
575 		if ((l1e & PTE_V) == 0)
576 			continue;
577 		pa1 = PTE_TO_PHYS(l1e);
578 		m1 = PHYS_TO_VM_PAGE(pa1);
579 		l2 = (pd_entry_t *)PHYS_TO_DMAP(pa1);
580 
581 		for (k = 0; k < Ln_ENTRIES; k++) {
582 			l2e = l2[k];
583 			if ((l2e & PTE_V) == 0)
584 				continue;
585 			pa = PTE_TO_PHYS(l2e);
586 			m = PHYS_TO_VM_PAGE(pa);
587 			l3 = (pt_entry_t *)PHYS_TO_DMAP(pa);
588 
589 			for (l = 0; l < Ln_ENTRIES; l++) {
590 				l3e = l3[l];
591 				if ((l3e & PTE_V) == 0)
592 					continue;
593 				panic("%s: l3e found (idx %d %d %d)",
594 				    __func__, j, k, l);
595 			}
596 
597 			vm_page_unwire_noq(m1);
598 			vm_page_unwire_noq(m);
599 			pmap_resident_count_dec(pmap, 1);
600 			vm_page_free(m);
601 			pmap_clear(&l2[k]);
602 		}
603 
604 		pmap_resident_count_dec(pmap, 1);
605 		vm_page_free(m1);
606 		pmap_clear(&pmap->pm_top[j]);
607 	}
608 
609 	KASSERT(pmap->sp_resident_count == 0,
610 	    ("Invalid resident count %jd", pmap->sp_resident_count));
611 
612 	PMAP_UNLOCK(pmap);
613 }
614 
615 void
iommu_pmap_remove_pages(struct riscv_iommu_pmap * pmap)616 iommu_pmap_remove_pages(struct riscv_iommu_pmap *pmap)
617 {
618 
619 	switch (pmap->pm_mode) {
620 	case PMAP_MODE_SV39:
621 		iommu_pmap_remove_pages_sv39(pmap);
622 		break;
623 	case PMAP_MODE_SV48:
624 		iommu_pmap_remove_pages_sv48(pmap);
625 		break;
626 	default:
627 		panic("Unknown virtual memory system");
628 	}
629 }
630