xref: /freebsd/sys/x86/iommu/iommu_utils.c (revision a64729f5077d77e13b9497cb33ecb3c82e606ee8)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2013, 2014, 2024 The FreeBSD Foundation
5  *
6  * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
7  * under sponsorship from the FreeBSD Foundation.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include "opt_acpi.h"
32 #if defined(__amd64__)
33 #define	DEV_APIC
34 #else
35 #include "opt_apic.h"
36 #endif
37 #include "opt_ddb.h"
38 
39 #include <sys/systm.h>
40 #include <sys/bus.h>
41 #include <sys/kernel.h>
42 #include <sys/lock.h>
43 #include <sys/malloc.h>
44 #include <sys/memdesc.h>
45 #include <sys/mutex.h>
46 #include <sys/sf_buf.h>
47 #include <sys/sysctl.h>
48 #include <sys/proc.h>
49 #include <sys/sched.h>
50 #include <sys/rman.h>
51 #include <sys/rwlock.h>
52 #include <sys/taskqueue.h>
53 #include <sys/tree.h>
54 #include <vm/vm.h>
55 #include <vm/vm_extern.h>
56 #include <vm/vm_kern.h>
57 #include <vm/vm_map.h>
58 #include <vm/vm_object.h>
59 #include <vm/vm_page.h>
60 #include <dev/pci/pcireg.h>
61 #include <dev/pci/pcivar.h>
62 #include <machine/atomic.h>
63 #include <machine/bus.h>
64 #include <machine/cpu.h>
65 #include <x86/include/busdma_impl.h>
66 #include <dev/iommu/busdma_iommu.h>
67 #include <dev/iommu/iommu.h>
68 #include <x86/iommu/x86_iommu.h>
69 #include <x86/iommu/iommu_intrmap.h>
70 #ifdef DEV_APIC
71 #include "pcib_if.h"
72 #include <machine/intr_machdep.h>
73 #include <x86/apicreg.h>
74 #include <x86/apicvar.h>
75 #endif
76 
77 vm_page_t
78 iommu_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags)
79 {
80 	vm_page_t m;
81 	int zeroed, aflags;
82 
83 	zeroed = (flags & IOMMU_PGF_ZERO) != 0 ? VM_ALLOC_ZERO : 0;
84 	aflags = zeroed | VM_ALLOC_NOBUSY | VM_ALLOC_SYSTEM | VM_ALLOC_NODUMP |
85 	    ((flags & IOMMU_PGF_WAITOK) != 0 ? VM_ALLOC_WAITFAIL :
86 	    VM_ALLOC_NOWAIT);
87 	for (;;) {
88 		if ((flags & IOMMU_PGF_OBJL) == 0)
89 			VM_OBJECT_WLOCK(obj);
90 		m = vm_page_lookup(obj, idx);
91 		if ((flags & IOMMU_PGF_NOALLOC) != 0 || m != NULL) {
92 			if ((flags & IOMMU_PGF_OBJL) == 0)
93 				VM_OBJECT_WUNLOCK(obj);
94 			break;
95 		}
96 		m = vm_page_alloc_contig(obj, idx, aflags, 1, 0,
97 		    iommu_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT);
98 		if ((flags & IOMMU_PGF_OBJL) == 0)
99 			VM_OBJECT_WUNLOCK(obj);
100 		if (m != NULL) {
101 			if (zeroed && (m->flags & PG_ZERO) == 0)
102 				pmap_zero_page(m);
103 			atomic_add_int(&iommu_tbl_pagecnt, 1);
104 			break;
105 		}
106 		if ((flags & IOMMU_PGF_WAITOK) == 0)
107 			break;
108 	}
109 	return (m);
110 }
111 
112 void
113 iommu_pgfree(vm_object_t obj, vm_pindex_t idx, int flags,
114     struct iommu_map_entry *entry)
115 {
116 	vm_page_t m;
117 
118 	if ((flags & IOMMU_PGF_OBJL) == 0)
119 		VM_OBJECT_WLOCK(obj);
120 	m = vm_page_grab(obj, idx, VM_ALLOC_NOCREAT);
121 	if (m != NULL) {
122 		if (entry == NULL) {
123 			vm_page_free(m);
124 			atomic_subtract_int(&iommu_tbl_pagecnt, 1);
125 		} else {
126 			vm_page_remove_xbusy(m);	/* keep page busy */
127 			SLIST_INSERT_HEAD(&entry->pgtbl_free, m, plinks.s.ss);
128 		}
129 	}
130 	if ((flags & IOMMU_PGF_OBJL) == 0)
131 		VM_OBJECT_WUNLOCK(obj);
132 }
133 
134 void *
135 iommu_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags,
136     struct sf_buf **sf)
137 {
138 	vm_page_t m;
139 	bool allocated;
140 
141 	if ((flags & IOMMU_PGF_OBJL) == 0)
142 		VM_OBJECT_WLOCK(obj);
143 	m = vm_page_lookup(obj, idx);
144 	if (m == NULL && (flags & IOMMU_PGF_ALLOC) != 0) {
145 		m = iommu_pgalloc(obj, idx, flags | IOMMU_PGF_OBJL);
146 		allocated = true;
147 	} else
148 		allocated = false;
149 	if (m == NULL) {
150 		if ((flags & IOMMU_PGF_OBJL) == 0)
151 			VM_OBJECT_WUNLOCK(obj);
152 		return (NULL);
153 	}
154 	/* Sleepable allocations cannot fail. */
155 	if ((flags & IOMMU_PGF_WAITOK) != 0)
156 		VM_OBJECT_WUNLOCK(obj);
157 	sched_pin();
158 	*sf = sf_buf_alloc(m, SFB_CPUPRIVATE | ((flags & IOMMU_PGF_WAITOK)
159 	    == 0 ? SFB_NOWAIT : 0));
160 	if (*sf == NULL) {
161 		sched_unpin();
162 		if (allocated) {
163 			VM_OBJECT_ASSERT_WLOCKED(obj);
164 			iommu_pgfree(obj, m->pindex, flags | IOMMU_PGF_OBJL,
165 			    NULL);
166 		}
167 		if ((flags & IOMMU_PGF_OBJL) == 0)
168 			VM_OBJECT_WUNLOCK(obj);
169 		return (NULL);
170 	}
171 	if ((flags & (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) ==
172 	    (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL))
173 		VM_OBJECT_WLOCK(obj);
174 	else if ((flags & (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) == 0)
175 		VM_OBJECT_WUNLOCK(obj);
176 	return ((void *)sf_buf_kva(*sf));
177 }
178 
179 void
180 iommu_unmap_pgtbl(struct sf_buf *sf)
181 {
182 
183 	sf_buf_free(sf);
184 	sched_unpin();
185 }
186 
187 iommu_haddr_t iommu_high;
188 int iommu_tbl_pagecnt;
189 
190 SYSCTL_NODE(_hw_iommu, OID_AUTO, dmar, CTLFLAG_RD | CTLFLAG_MPSAFE,
191     NULL, "");
192 SYSCTL_INT(_hw_iommu, OID_AUTO, tbl_pagecnt, CTLFLAG_RD,
193     &iommu_tbl_pagecnt, 0,
194     "Count of pages used for IOMMU pagetables");
195 
196 int iommu_qi_batch_coalesce = 100;
197 SYSCTL_INT(_hw_iommu, OID_AUTO, batch_coalesce, CTLFLAG_RWTUN,
198     &iommu_qi_batch_coalesce, 0,
199     "Number of qi batches between interrupt");
200 
201 static struct iommu_unit *
202 x86_no_iommu_find(device_t dev, bool verbose)
203 {
204 	return (NULL);
205 }
206 
207 static int
208 x86_no_iommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count)
209 {
210 	return (EOPNOTSUPP);
211 }
212 
213 static int
214 x86_no_iommu_map_msi_intr(device_t src, u_int cpu, u_int vector,
215     u_int cookie, uint64_t *addr, uint32_t *data)
216 {
217 	return (EOPNOTSUPP);
218 }
219 
220 static int
221 x86_no_iommu_unmap_msi_intr(device_t src, u_int cookie)
222 {
223 	return (0);
224 }
225 
226 static int
227 x86_no_iommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector,
228     bool edge, bool activehi, int irq, u_int *cookie, uint32_t *hi,
229     uint32_t *lo)
230 {
231 	return (EOPNOTSUPP);
232 }
233 
234 static int
235 x86_no_iommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie)
236 {
237 	return (0);
238 }
239 
240 static struct x86_iommu x86_no_iommu = {
241 	.find = x86_no_iommu_find,
242 	.alloc_msi_intr = x86_no_iommu_alloc_msi_intr,
243 	.map_msi_intr = x86_no_iommu_map_msi_intr,
244 	.unmap_msi_intr = x86_no_iommu_unmap_msi_intr,
245 	.map_ioapic_intr = x86_no_iommu_map_ioapic_intr,
246 	.unmap_ioapic_intr = x86_no_iommu_unmap_ioapic_intr,
247 };
248 
249 static struct x86_iommu *x86_iommu = &x86_no_iommu;
250 
251 void
252 set_x86_iommu(struct x86_iommu *x)
253 {
254 	MPASS(x86_iommu == &x86_no_iommu);
255 	x86_iommu = x;
256 }
257 
258 struct x86_iommu *
259 get_x86_iommu(void)
260 {
261 	return (x86_iommu);
262 }
263 
264 void
265 iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
266     bool cansleep)
267 {
268 	x86_iommu->domain_unload_entry(entry, free, cansleep);
269 }
270 
271 void
272 iommu_domain_unload(struct iommu_domain *iodom,
273     struct iommu_map_entries_tailq *entries, bool cansleep)
274 {
275 	x86_iommu->domain_unload(iodom, entries, cansleep);
276 }
277 
278 struct iommu_ctx *
279 iommu_get_ctx(struct iommu_unit *iommu, device_t dev, uint16_t rid,
280     bool id_mapped, bool rmrr_init)
281 {
282 	return (x86_iommu->get_ctx(iommu, dev, rid, id_mapped, rmrr_init));
283 }
284 
285 void
286 iommu_free_ctx_locked(struct iommu_unit *iommu, struct iommu_ctx *context)
287 {
288 	x86_iommu->free_ctx_locked(iommu, context);
289 }
290 
291 void
292 iommu_free_ctx(struct iommu_ctx *context)
293 {
294 	x86_iommu->free_ctx(context);
295 }
296 
297 struct iommu_unit *
298 iommu_find(device_t dev, bool verbose)
299 {
300 	return (x86_iommu->find(dev, verbose));
301 }
302 
303 int
304 iommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count)
305 {
306 	return (x86_iommu->alloc_msi_intr(src, cookies, count));
307 }
308 
309 int
310 iommu_map_msi_intr(device_t src, u_int cpu, u_int vector, u_int cookie,
311     uint64_t *addr, uint32_t *data)
312 {
313 	return (x86_iommu->map_msi_intr(src, cpu, vector, cookie,
314 	    addr, data));
315 }
316 
317 int
318 iommu_unmap_msi_intr(device_t src, u_int cookie)
319 {
320 	return (x86_iommu->unmap_msi_intr(src, cookie));
321 }
322 
323 int
324 iommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector, bool edge,
325     bool activehi, int irq, u_int *cookie, uint32_t *hi, uint32_t *lo)
326 {
327 	return (x86_iommu->map_ioapic_intr(ioapic_id, cpu, vector, edge,
328 	    activehi, irq, cookie, hi, lo));
329 }
330 
331 int
332 iommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie)
333 {
334 	return (x86_iommu->unmap_ioapic_intr(ioapic_id, cookie));
335 }
336 
337 void
338 iommu_unit_pre_instantiate_ctx(struct iommu_unit *unit)
339 {
340 	x86_iommu->unit_pre_instantiate_ctx(unit);
341 }
342 
343 #define	IOMMU2X86C(iommu)	(x86_iommu->get_x86_common(iommu))
344 
345 static bool
346 iommu_qi_seq_processed(struct iommu_unit *unit,
347     const struct iommu_qi_genseq *pseq)
348 {
349 	struct x86_unit_common *x86c;
350 	u_int gen;
351 
352 	x86c = IOMMU2X86C(unit);
353 	gen = x86c->inv_waitd_gen;
354 	return (pseq->gen < gen || (pseq->gen == gen && pseq->seq <=
355 	    atomic_load_64(&x86c->inv_waitd_seq_hw)));
356 }
357 
358 void
359 iommu_qi_emit_wait_seq(struct iommu_unit *unit, struct iommu_qi_genseq *pseq,
360     bool emit_wait)
361 {
362 	struct x86_unit_common *x86c;
363 	struct iommu_qi_genseq gsec;
364 	uint32_t seq;
365 
366 	KASSERT(pseq != NULL, ("wait descriptor with no place for seq"));
367 	IOMMU_ASSERT_LOCKED(unit);
368 	x86c = IOMMU2X86C(unit);
369 
370 	if (x86c->inv_waitd_seq == 0xffffffff) {
371 		gsec.gen = x86c->inv_waitd_gen;
372 		gsec.seq = x86c->inv_waitd_seq;
373 		x86_iommu->qi_ensure(unit, 1);
374 		x86_iommu->qi_emit_wait_descr(unit, gsec.seq, false,
375 		    true, false);
376 		x86_iommu->qi_advance_tail(unit);
377 		while (!iommu_qi_seq_processed(unit, &gsec))
378 			cpu_spinwait();
379 		x86c->inv_waitd_gen++;
380 		x86c->inv_waitd_seq = 1;
381 	}
382 	seq = x86c->inv_waitd_seq++;
383 	pseq->gen = x86c->inv_waitd_gen;
384 	pseq->seq = seq;
385 	if (emit_wait) {
386 		x86_iommu->qi_ensure(unit, 1);
387 		x86_iommu->qi_emit_wait_descr(unit, seq, true, true, false);
388 	}
389 }
390 
391 /*
392  * To avoid missed wakeups, callers must increment the unit's waiters count
393  * before advancing the tail past the wait descriptor.
394  */
395 void
396 iommu_qi_wait_for_seq(struct iommu_unit *unit, const struct iommu_qi_genseq *
397     gseq, bool nowait)
398 {
399 	struct x86_unit_common *x86c;
400 
401 	IOMMU_ASSERT_LOCKED(unit);
402 	x86c = IOMMU2X86C(unit);
403 
404 	KASSERT(x86c->inv_seq_waiters > 0, ("%s: no waiters", __func__));
405 	while (!iommu_qi_seq_processed(unit, gseq)) {
406 		if (cold || nowait) {
407 			cpu_spinwait();
408 		} else {
409 			msleep(&x86c->inv_seq_waiters, &unit->lock, 0,
410 			    "dmarse", hz);
411 		}
412 	}
413 	x86c->inv_seq_waiters--;
414 }
415 
416 /*
417  * The caller must not be using the entry's dmamap_link field.
418  */
419 void
420 iommu_qi_invalidate_locked(struct iommu_domain *domain,
421     struct iommu_map_entry *entry, bool emit_wait)
422 {
423 	struct iommu_unit *unit;
424 	struct x86_unit_common *x86c;
425 
426 	unit = domain->iommu;
427 	x86c = IOMMU2X86C(unit);
428 	IOMMU_ASSERT_LOCKED(unit);
429 
430 	x86_iommu->qi_invalidate_emit(domain, entry->start, entry->end -
431 	    entry->start, &entry->gseq, emit_wait);
432 
433 	/*
434 	 * To avoid a data race in dmar_qi_task(), the entry's gseq must be
435 	 * initialized before the entry is added to the TLB flush list, and the
436 	 * entry must be added to that list before the tail is advanced.  More
437 	 * precisely, the tail must not be advanced past the wait descriptor
438 	 * that will generate the interrupt that schedules dmar_qi_task() for
439 	 * execution before the entry is added to the list.  While an earlier
440 	 * call to dmar_qi_ensure() might have advanced the tail, it will not
441 	 * advance it past the wait descriptor.
442 	 *
443 	 * See the definition of struct dmar_unit for more information on
444 	 * synchronization.
445 	 */
446 	entry->tlb_flush_next = NULL;
447 	atomic_store_rel_ptr((uintptr_t *)&x86c->tlb_flush_tail->
448 	    tlb_flush_next, (uintptr_t)entry);
449 	x86c->tlb_flush_tail = entry;
450 
451 	x86_iommu->qi_advance_tail(unit);
452 }
453 
454 void
455 iommu_qi_invalidate_sync(struct iommu_domain *domain, iommu_gaddr_t base,
456     iommu_gaddr_t size, bool cansleep)
457 {
458 	struct iommu_unit *unit;
459 	struct iommu_qi_genseq gseq;
460 
461 	unit = domain->iommu;
462 	IOMMU_LOCK(unit);
463 	x86_iommu->qi_invalidate_emit(domain, base, size, &gseq, true);
464 
465 	/*
466 	 * To avoid a missed wakeup in iommu_qi_task(), the unit's
467 	 * waiters count must be incremented before the tail is
468 	 * advanced.
469 	 */
470 	IOMMU2X86C(unit)->inv_seq_waiters++;
471 
472 	x86_iommu->qi_advance_tail(unit);
473 	iommu_qi_wait_for_seq(unit, &gseq, !cansleep);
474 	IOMMU_UNLOCK(unit);
475 }
476 
477 void
478 iommu_qi_drain_tlb_flush(struct iommu_unit *unit)
479 {
480 	struct x86_unit_common *x86c;
481 	struct iommu_map_entry *entry, *head;
482 
483 	x86c = IOMMU2X86C(unit);
484 	for (head = x86c->tlb_flush_head;; head = entry) {
485 		entry = (struct iommu_map_entry *)
486 		    atomic_load_acq_ptr((uintptr_t *)&head->tlb_flush_next);
487 		if (entry == NULL ||
488 		    !iommu_qi_seq_processed(unit, &entry->gseq))
489 			break;
490 		x86c->tlb_flush_head = entry;
491 		iommu_gas_free_entry(head);
492 		if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
493 			iommu_gas_free_region(entry);
494 		else
495 			iommu_gas_free_space(entry);
496 	}
497 }
498 
499 void
500 iommu_qi_common_init(struct iommu_unit *unit, task_fn_t qi_task)
501 {
502 	struct x86_unit_common *x86c;
503 	u_int qi_sz;
504 
505 	x86c = IOMMU2X86C(unit);
506 
507 	x86c->tlb_flush_head = x86c->tlb_flush_tail =
508             iommu_gas_alloc_entry(NULL, 0);
509 	TASK_INIT(&x86c->qi_task, 0, qi_task, unit);
510 	x86c->qi_taskqueue = taskqueue_create_fast("iommuqf", M_WAITOK,
511 	    taskqueue_thread_enqueue, &x86c->qi_taskqueue);
512 	taskqueue_start_threads(&x86c->qi_taskqueue, 1, PI_AV,
513 	    "iommu%d qi taskq", unit->unit);
514 
515 	x86c->inv_waitd_gen = 0;
516 	x86c->inv_waitd_seq = 1;
517 
518 	qi_sz = 3;
519 	TUNABLE_INT_FETCH("hw.iommu.qi_size", &qi_sz);
520 	if (qi_sz > x86c->qi_buf_maxsz)
521 		qi_sz = x86c->qi_buf_maxsz;
522 	x86c->inv_queue_size = (1ULL << qi_sz) * PAGE_SIZE;
523 	/* Reserve one descriptor to prevent wraparound. */
524 	x86c->inv_queue_avail = x86c->inv_queue_size -
525 	    x86c->qi_cmd_sz;
526 
527 	/*
528 	 * The invalidation queue reads by DMARs/AMDIOMMUs are always
529 	 * coherent.
530 	 */
531 	x86c->inv_queue = kmem_alloc_contig(x86c->inv_queue_size,
532 	    M_WAITOK | M_ZERO, 0, iommu_high, PAGE_SIZE, 0,
533 	    VM_MEMATTR_DEFAULT);
534 	x86c->inv_waitd_seq_hw_phys = pmap_kextract(
535 	    (vm_offset_t)&x86c->inv_waitd_seq_hw);
536 }
537 
538 void
539 iommu_qi_common_fini(struct iommu_unit *unit, void (*disable_qi)(
540     struct iommu_unit *))
541 {
542 	struct x86_unit_common *x86c;
543 	struct iommu_qi_genseq gseq;
544 
545 	x86c = IOMMU2X86C(unit);
546 
547 	taskqueue_drain(x86c->qi_taskqueue, &x86c->qi_task);
548 	taskqueue_free(x86c->qi_taskqueue);
549 	x86c->qi_taskqueue = NULL;
550 
551 	IOMMU_LOCK(unit);
552 	/* quisce */
553 	x86_iommu->qi_ensure(unit, 1);
554 	iommu_qi_emit_wait_seq(unit, &gseq, true);
555 	/* See iommu_qi_invalidate_locked(). */
556 	x86c->inv_seq_waiters++;
557 	x86_iommu->qi_advance_tail(unit);
558 	iommu_qi_wait_for_seq(unit, &gseq, false);
559 	/* only after the quisce, disable queue */
560 	disable_qi(unit);
561 	KASSERT(x86c->inv_seq_waiters == 0,
562 	    ("iommu%d: waiters on disabled queue", unit->unit));
563 	IOMMU_UNLOCK(unit);
564 
565 	kmem_free(x86c->inv_queue, x86c->inv_queue_size);
566 	x86c->inv_queue = NULL;
567 	x86c->inv_queue_size = 0;
568 }
569 
570 int
571 iommu_alloc_irq(struct iommu_unit *unit, int idx)
572 {
573 	device_t dev, pcib;
574 	struct iommu_msi_data *dmd;
575 	uint64_t msi_addr;
576 	uint32_t msi_data;
577 	int error;
578 
579 	MPASS(idx >= 0 || idx < IOMMU_MAX_MSI);
580 
581 	dev = unit->dev;
582 	dmd = &IOMMU2X86C(unit)->intrs[idx];
583 	pcib = device_get_parent(device_get_parent(dev)); /* Really not pcib */
584 	error = PCIB_ALLOC_MSIX(pcib, dev, &dmd->irq);
585 	if (error != 0) {
586 		device_printf(dev, "cannot allocate %s interrupt, %d\n",
587 		    dmd->name, error);
588 		goto err1;
589 	}
590 	error = bus_set_resource(dev, SYS_RES_IRQ, dmd->irq_rid,
591 	    dmd->irq, 1);
592 	if (error != 0) {
593 		device_printf(dev, "cannot set %s interrupt resource, %d\n",
594 		    dmd->name, error);
595 		goto err2;
596 	}
597 	dmd->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
598 	    &dmd->irq_rid, RF_ACTIVE);
599 	if (dmd->irq_res == NULL) {
600 		device_printf(dev,
601 		    "cannot allocate resource for %s interrupt\n", dmd->name);
602 		error = ENXIO;
603 		goto err3;
604 	}
605 	error = bus_setup_intr(dev, dmd->irq_res, INTR_TYPE_MISC,
606 	    dmd->handler, NULL, unit, &dmd->intr_handle);
607 	if (error != 0) {
608 		device_printf(dev, "cannot setup %s interrupt, %d\n",
609 		    dmd->name, error);
610 		goto err4;
611 	}
612 	bus_describe_intr(dev, dmd->irq_res, dmd->intr_handle, "%s", dmd->name);
613 	error = PCIB_MAP_MSI(pcib, dev, dmd->irq, &msi_addr, &msi_data);
614 	if (error != 0) {
615 		device_printf(dev, "cannot map %s interrupt, %d\n",
616 		    dmd->name, error);
617 		goto err5;
618 	}
619 
620 	dmd->msi_data = msi_data;
621 	dmd->msi_addr = msi_addr;
622 
623 	return (0);
624 
625 err5:
626 	bus_teardown_intr(dev, dmd->irq_res, dmd->intr_handle);
627 err4:
628 	bus_release_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq_res);
629 err3:
630 	bus_delete_resource(dev, SYS_RES_IRQ, dmd->irq_rid);
631 err2:
632 	PCIB_RELEASE_MSIX(pcib, dev, dmd->irq);
633 	dmd->irq = -1;
634 err1:
635 	return (error);
636 }
637 
638 void
639 iommu_release_intr(struct iommu_unit *unit, int idx)
640 {
641 	device_t dev;
642 	struct iommu_msi_data *dmd;
643 
644 	MPASS(idx >= 0 || idx < IOMMU_MAX_MSI);
645 
646 	dmd = &IOMMU2X86C(unit)->intrs[idx];
647 	if (dmd->handler == NULL || dmd->irq == -1)
648 		return;
649 	dev = unit->dev;
650 
651 	bus_teardown_intr(dev, dmd->irq_res, dmd->intr_handle);
652 	bus_release_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq_res);
653 	bus_delete_resource(dev, SYS_RES_IRQ, dmd->irq_rid);
654 	PCIB_RELEASE_MSIX(device_get_parent(device_get_parent(dev)),
655 	    dev, dmd->irq);
656 	dmd->irq = -1;
657 }
658 
659 void
660 iommu_device_tag_init(struct iommu_ctx *ctx, device_t dev)
661 {
662 	bus_addr_t maxaddr;
663 
664 	maxaddr = MIN(ctx->domain->end, BUS_SPACE_MAXADDR);
665 	ctx->tag->common.impl = &bus_dma_iommu_impl;
666 	ctx->tag->common.boundary = 0;
667 	ctx->tag->common.lowaddr = maxaddr;
668 	ctx->tag->common.highaddr = maxaddr;
669 	ctx->tag->common.maxsize = maxaddr;
670 	ctx->tag->common.nsegments = BUS_SPACE_UNRESTRICTED;
671 	ctx->tag->common.maxsegsz = maxaddr;
672 	ctx->tag->ctx = ctx;
673 	ctx->tag->owner = dev;
674 }
675 
676 void
677 iommu_domain_free_entry(struct iommu_map_entry *entry, bool free)
678 {
679 	if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
680 		iommu_gas_free_region(entry);
681 	else
682 		iommu_gas_free_space(entry);
683 	if (free)
684 		iommu_gas_free_entry(entry);
685 	else
686 		entry->flags = 0;
687 }
688 
689 /*
690  * Index of the pte for the guest address base in the page table at
691  * the level lvl.
692  */
693 int
694 pglvl_pgtbl_pte_off(int pglvl, iommu_gaddr_t base, int lvl)
695 {
696 
697 	base >>= IOMMU_PAGE_SHIFT + (pglvl - lvl - 1) *
698 	    IOMMU_NPTEPGSHIFT;
699 	return (base & IOMMU_PTEMASK);
700 }
701 
702 /*
703  * Returns the page index of the page table page in the page table
704  * object, which maps the given address base at the page table level
705  * lvl.
706  */
707 vm_pindex_t
708 pglvl_pgtbl_get_pindex(int pglvl, iommu_gaddr_t base, int lvl)
709 {
710 	vm_pindex_t idx, pidx;
711 	int i;
712 
713 	KASSERT(lvl >= 0 && lvl < pglvl,
714 	    ("wrong lvl %d %d", pglvl, lvl));
715 
716 	for (pidx = idx = 0, i = 0; i < lvl; i++, pidx = idx) {
717 		idx = pglvl_pgtbl_pte_off(pglvl, base, i) +
718 		    pidx * IOMMU_NPTEPG + 1;
719 	}
720 	return (idx);
721 }
722 
723 /*
724  * Calculate the total amount of page table pages needed to map the
725  * whole bus address space on the context with the selected agaw.
726  */
727 vm_pindex_t
728 pglvl_max_pages(int pglvl)
729 {
730 	vm_pindex_t res;
731 	int i;
732 
733 	for (res = 0, i = pglvl; i > 0; i--) {
734 		res *= IOMMU_NPTEPG;
735 		res++;
736 	}
737 	return (res);
738 }
739 
740 iommu_gaddr_t
741 pglvl_page_size(int total_pglvl, int lvl)
742 {
743 	int rlvl;
744 	static const iommu_gaddr_t pg_sz[] = {
745 		(iommu_gaddr_t)IOMMU_PAGE_SIZE,
746 		(iommu_gaddr_t)IOMMU_PAGE_SIZE << IOMMU_NPTEPGSHIFT,
747 		(iommu_gaddr_t)IOMMU_PAGE_SIZE << (2 * IOMMU_NPTEPGSHIFT),
748 		(iommu_gaddr_t)IOMMU_PAGE_SIZE << (3 * IOMMU_NPTEPGSHIFT),
749 		(iommu_gaddr_t)IOMMU_PAGE_SIZE << (4 * IOMMU_NPTEPGSHIFT),
750 		(iommu_gaddr_t)IOMMU_PAGE_SIZE << (5 * IOMMU_NPTEPGSHIFT),
751 		(iommu_gaddr_t)IOMMU_PAGE_SIZE << (6 * IOMMU_NPTEPGSHIFT),
752 	};
753 
754 	KASSERT(lvl >= 0 && lvl < total_pglvl,
755 	    ("total %d lvl %d", total_pglvl, lvl));
756 	rlvl = total_pglvl - lvl - 1;
757 	KASSERT(rlvl < nitems(pg_sz), ("sizeof pg_sz lvl %d", lvl));
758 	return (pg_sz[rlvl]);
759 }
760 
761 #ifdef DDB
762 #include <ddb/ddb.h>
763 #include <ddb/db_lex.h>
764 
765 void
766 iommu_db_print_domain_entry(const struct iommu_map_entry *entry)
767 {
768 	struct iommu_map_entry *l, *r;
769 
770 	db_printf(
771 	    "    start %jx end %jx first %jx last %jx free_down %jx flags %x ",
772 	    entry->start, entry->end, entry->first, entry->last,
773 	    entry->free_down, entry->flags);
774 	db_printf("left ");
775 	l = RB_LEFT(entry, rb_entry);
776 	if (l == NULL)
777 		db_printf("NULL ");
778 	else
779 		db_printf("%jx ", l->start);
780 	db_printf("right ");
781 	r = RB_RIGHT(entry, rb_entry);
782 	if (r == NULL)
783 		db_printf("NULL");
784 	else
785 		db_printf("%jx", r->start);
786 	db_printf("\n");
787 }
788 
789 void
790 iommu_db_print_ctx(struct iommu_ctx *ctx)
791 {
792 	db_printf(
793 	    "    @%p pci%d:%d:%d refs %d flags %#x loads %lu unloads %lu\n",
794 	    ctx, pci_get_bus(ctx->tag->owner),
795 	    pci_get_slot(ctx->tag->owner),
796 	    pci_get_function(ctx->tag->owner), ctx->refs,
797 	    ctx->flags, ctx->loads, ctx->unloads);
798 }
799 
800 void
801 iommu_db_domain_print_contexts(struct iommu_domain *iodom)
802 {
803 	struct iommu_ctx *ctx;
804 
805 	if (LIST_EMPTY(&iodom->contexts))
806 		return;
807 
808 	db_printf("  Contexts:\n");
809 	LIST_FOREACH(ctx, &iodom->contexts, link)
810 		iommu_db_print_ctx(ctx);
811 }
812 
813 void
814 iommu_db_domain_print_mappings(struct iommu_domain *iodom)
815 {
816 	struct iommu_map_entry *entry;
817 
818 	db_printf("    mapped:\n");
819 	RB_FOREACH(entry, iommu_gas_entries_tree, &iodom->rb_root) {
820 		iommu_db_print_domain_entry(entry);
821 		if (db_pager_quit)
822 			break;
823 	}
824 	if (db_pager_quit)
825 		return;
826 	db_printf("    unloading:\n");
827 	TAILQ_FOREACH(entry, &iodom->unload_entries, dmamap_link) {
828 		iommu_db_print_domain_entry(entry);
829 		if (db_pager_quit)
830 			break;
831 	}
832 }
833 
834 #endif
835