xref: /freebsd/sys/x86/iommu/iommu_utils.c (revision 53bb5613a8a15363718b6e6de8d965bf9a2c5469)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2013, 2014, 2024 The FreeBSD Foundation
5  *
6  * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
7  * under sponsorship from the FreeBSD Foundation.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include "opt_acpi.h"
32 #if defined(__amd64__)
33 #define	DEV_APIC
34 #else
35 #include "opt_apic.h"
36 #endif
37 
38 #include <sys/systm.h>
39 #include <sys/bus.h>
40 #include <sys/kernel.h>
41 #include <sys/lock.h>
42 #include <sys/malloc.h>
43 #include <sys/memdesc.h>
44 #include <sys/mutex.h>
45 #include <sys/sf_buf.h>
46 #include <sys/sysctl.h>
47 #include <sys/proc.h>
48 #include <sys/sched.h>
49 #include <sys/rman.h>
50 #include <sys/rwlock.h>
51 #include <sys/taskqueue.h>
52 #include <sys/tree.h>
53 #include <vm/vm.h>
54 #include <vm/vm_extern.h>
55 #include <vm/vm_kern.h>
56 #include <vm/vm_map.h>
57 #include <vm/vm_object.h>
58 #include <vm/vm_page.h>
59 #include <dev/pci/pcireg.h>
60 #include <dev/pci/pcivar.h>
61 #include <machine/atomic.h>
62 #include <machine/bus.h>
63 #include <machine/cpu.h>
64 #include <x86/include/busdma_impl.h>
65 #include <dev/iommu/busdma_iommu.h>
66 #include <dev/iommu/iommu.h>
67 #include <x86/iommu/x86_iommu.h>
68 #include <x86/iommu/iommu_intrmap.h>
69 #ifdef DEV_APIC
70 #include "pcib_if.h"
71 #include <machine/intr_machdep.h>
72 #include <x86/apicreg.h>
73 #include <x86/apicvar.h>
74 #endif
75 
76 vm_page_t
77 iommu_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags)
78 {
79 	vm_page_t m;
80 	int zeroed, aflags;
81 
82 	zeroed = (flags & IOMMU_PGF_ZERO) != 0 ? VM_ALLOC_ZERO : 0;
83 	aflags = zeroed | VM_ALLOC_NOBUSY | VM_ALLOC_SYSTEM | VM_ALLOC_NODUMP |
84 	    ((flags & IOMMU_PGF_WAITOK) != 0 ? VM_ALLOC_WAITFAIL :
85 	    VM_ALLOC_NOWAIT);
86 	for (;;) {
87 		if ((flags & IOMMU_PGF_OBJL) == 0)
88 			VM_OBJECT_WLOCK(obj);
89 		m = vm_page_lookup(obj, idx);
90 		if ((flags & IOMMU_PGF_NOALLOC) != 0 || m != NULL) {
91 			if ((flags & IOMMU_PGF_OBJL) == 0)
92 				VM_OBJECT_WUNLOCK(obj);
93 			break;
94 		}
95 		m = vm_page_alloc_contig(obj, idx, aflags, 1, 0,
96 		    iommu_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT);
97 		if ((flags & IOMMU_PGF_OBJL) == 0)
98 			VM_OBJECT_WUNLOCK(obj);
99 		if (m != NULL) {
100 			if (zeroed && (m->flags & PG_ZERO) == 0)
101 				pmap_zero_page(m);
102 			atomic_add_int(&iommu_tbl_pagecnt, 1);
103 			break;
104 		}
105 		if ((flags & IOMMU_PGF_WAITOK) == 0)
106 			break;
107 	}
108 	return (m);
109 }
110 
111 void
112 iommu_pgfree(vm_object_t obj, vm_pindex_t idx, int flags,
113     struct iommu_map_entry *entry)
114 {
115 	vm_page_t m;
116 
117 	if ((flags & IOMMU_PGF_OBJL) == 0)
118 		VM_OBJECT_WLOCK(obj);
119 	m = vm_page_grab(obj, idx, VM_ALLOC_NOCREAT);
120 	if (m != NULL) {
121 		if (entry == NULL) {
122 			vm_page_free(m);
123 			atomic_subtract_int(&iommu_tbl_pagecnt, 1);
124 		} else {
125 			vm_page_remove_xbusy(m);	/* keep page busy */
126 			SLIST_INSERT_HEAD(&entry->pgtbl_free, m, plinks.s.ss);
127 		}
128 	}
129 	if ((flags & IOMMU_PGF_OBJL) == 0)
130 		VM_OBJECT_WUNLOCK(obj);
131 }
132 
133 void *
134 iommu_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags,
135     struct sf_buf **sf)
136 {
137 	vm_page_t m;
138 	bool allocated;
139 
140 	if ((flags & IOMMU_PGF_OBJL) == 0)
141 		VM_OBJECT_WLOCK(obj);
142 	m = vm_page_lookup(obj, idx);
143 	if (m == NULL && (flags & IOMMU_PGF_ALLOC) != 0) {
144 		m = iommu_pgalloc(obj, idx, flags | IOMMU_PGF_OBJL);
145 		allocated = true;
146 	} else
147 		allocated = false;
148 	if (m == NULL) {
149 		if ((flags & IOMMU_PGF_OBJL) == 0)
150 			VM_OBJECT_WUNLOCK(obj);
151 		return (NULL);
152 	}
153 	/* Sleepable allocations cannot fail. */
154 	if ((flags & IOMMU_PGF_WAITOK) != 0)
155 		VM_OBJECT_WUNLOCK(obj);
156 	sched_pin();
157 	*sf = sf_buf_alloc(m, SFB_CPUPRIVATE | ((flags & IOMMU_PGF_WAITOK)
158 	    == 0 ? SFB_NOWAIT : 0));
159 	if (*sf == NULL) {
160 		sched_unpin();
161 		if (allocated) {
162 			VM_OBJECT_ASSERT_WLOCKED(obj);
163 			iommu_pgfree(obj, m->pindex, flags | IOMMU_PGF_OBJL,
164 			    NULL);
165 		}
166 		if ((flags & IOMMU_PGF_OBJL) == 0)
167 			VM_OBJECT_WUNLOCK(obj);
168 		return (NULL);
169 	}
170 	if ((flags & (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) ==
171 	    (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL))
172 		VM_OBJECT_WLOCK(obj);
173 	else if ((flags & (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) == 0)
174 		VM_OBJECT_WUNLOCK(obj);
175 	return ((void *)sf_buf_kva(*sf));
176 }
177 
178 void
179 iommu_unmap_pgtbl(struct sf_buf *sf)
180 {
181 
182 	sf_buf_free(sf);
183 	sched_unpin();
184 }
185 
186 iommu_haddr_t iommu_high;
187 int iommu_tbl_pagecnt;
188 
189 SYSCTL_NODE(_hw_iommu, OID_AUTO, dmar, CTLFLAG_RD | CTLFLAG_MPSAFE,
190     NULL, "");
191 SYSCTL_INT(_hw_iommu, OID_AUTO, tbl_pagecnt, CTLFLAG_RD,
192     &iommu_tbl_pagecnt, 0,
193     "Count of pages used for IOMMU pagetables");
194 
195 int iommu_qi_batch_coalesce = 100;
196 SYSCTL_INT(_hw_iommu, OID_AUTO, batch_coalesce, CTLFLAG_RWTUN,
197     &iommu_qi_batch_coalesce, 0,
198     "Number of qi batches between interrupt");
199 
200 static struct iommu_unit *
201 x86_no_iommu_find(device_t dev, bool verbose)
202 {
203 	return (NULL);
204 }
205 
206 static int
207 x86_no_iommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count)
208 {
209 	return (EOPNOTSUPP);
210 }
211 
212 static int
213 x86_no_iommu_map_msi_intr(device_t src, u_int cpu, u_int vector,
214     u_int cookie, uint64_t *addr, uint32_t *data)
215 {
216 	return (EOPNOTSUPP);
217 }
218 
219 static int
220 x86_no_iommu_unmap_msi_intr(device_t src, u_int cookie)
221 {
222 	return (0);
223 }
224 
225 static int
226 x86_no_iommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector,
227     bool edge, bool activehi, int irq, u_int *cookie, uint32_t *hi,
228     uint32_t *lo)
229 {
230 	return (EOPNOTSUPP);
231 }
232 
233 static int
234 x86_no_iommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie)
235 {
236 	return (0);
237 }
238 
239 static struct x86_iommu x86_no_iommu = {
240 	.find = x86_no_iommu_find,
241 	.alloc_msi_intr = x86_no_iommu_alloc_msi_intr,
242 	.map_msi_intr = x86_no_iommu_map_msi_intr,
243 	.unmap_msi_intr = x86_no_iommu_unmap_msi_intr,
244 	.map_ioapic_intr = x86_no_iommu_map_ioapic_intr,
245 	.unmap_ioapic_intr = x86_no_iommu_unmap_ioapic_intr,
246 };
247 
248 static struct x86_iommu *x86_iommu = &x86_no_iommu;
249 
250 void
251 set_x86_iommu(struct x86_iommu *x)
252 {
253 	MPASS(x86_iommu == &x86_no_iommu);
254 	x86_iommu = x;
255 }
256 
257 struct x86_iommu *
258 get_x86_iommu(void)
259 {
260 	return (x86_iommu);
261 }
262 
263 void
264 iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
265     bool cansleep)
266 {
267 	x86_iommu->domain_unload_entry(entry, free, cansleep);
268 }
269 
270 void
271 iommu_domain_unload(struct iommu_domain *iodom,
272     struct iommu_map_entries_tailq *entries, bool cansleep)
273 {
274 	x86_iommu->domain_unload(iodom, entries, cansleep);
275 }
276 
277 struct iommu_ctx *
278 iommu_get_ctx(struct iommu_unit *iommu, device_t dev, uint16_t rid,
279     bool id_mapped, bool rmrr_init)
280 {
281 	return (x86_iommu->get_ctx(iommu, dev, rid, id_mapped, rmrr_init));
282 }
283 
284 void
285 iommu_free_ctx_locked(struct iommu_unit *iommu, struct iommu_ctx *context)
286 {
287 	x86_iommu->free_ctx_locked(iommu, context);
288 }
289 
290 void
291 iommu_free_ctx(struct iommu_ctx *context)
292 {
293 	x86_iommu->free_ctx(context);
294 }
295 
296 struct iommu_unit *
297 iommu_find(device_t dev, bool verbose)
298 {
299 	return (x86_iommu->find(dev, verbose));
300 }
301 
302 int
303 iommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count)
304 {
305 	return (x86_iommu->alloc_msi_intr(src, cookies, count));
306 }
307 
308 int
309 iommu_map_msi_intr(device_t src, u_int cpu, u_int vector, u_int cookie,
310     uint64_t *addr, uint32_t *data)
311 {
312 	return (x86_iommu->map_msi_intr(src, cpu, vector, cookie,
313 	    addr, data));
314 }
315 
316 int
317 iommu_unmap_msi_intr(device_t src, u_int cookie)
318 {
319 	return (x86_iommu->unmap_msi_intr(src, cookie));
320 }
321 
322 int
323 iommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector, bool edge,
324     bool activehi, int irq, u_int *cookie, uint32_t *hi, uint32_t *lo)
325 {
326 	return (x86_iommu->map_ioapic_intr(ioapic_id, cpu, vector, edge,
327 	    activehi, irq, cookie, hi, lo));
328 }
329 
330 int
331 iommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie)
332 {
333 	return (x86_iommu->unmap_ioapic_intr(ioapic_id, cookie));
334 }
335 
336 void
337 iommu_unit_pre_instantiate_ctx(struct iommu_unit *unit)
338 {
339 	x86_iommu->unit_pre_instantiate_ctx(unit);
340 }
341 
342 #define	IOMMU2X86C(iommu)	(x86_iommu->get_x86_common(iommu))
343 
344 static bool
345 iommu_qi_seq_processed(struct iommu_unit *unit,
346     const struct iommu_qi_genseq *pseq)
347 {
348 	struct x86_unit_common *x86c;
349 	u_int gen;
350 
351 	x86c = IOMMU2X86C(unit);
352 	gen = x86c->inv_waitd_gen;
353 	return (pseq->gen < gen || (pseq->gen == gen && pseq->seq <=
354 	    atomic_load_64(&x86c->inv_waitd_seq_hw)));
355 }
356 
357 void
358 iommu_qi_emit_wait_seq(struct iommu_unit *unit, struct iommu_qi_genseq *pseq,
359     bool emit_wait)
360 {
361 	struct x86_unit_common *x86c;
362 	struct iommu_qi_genseq gsec;
363 	uint32_t seq;
364 
365 	KASSERT(pseq != NULL, ("wait descriptor with no place for seq"));
366 	IOMMU_ASSERT_LOCKED(unit);
367 	x86c = IOMMU2X86C(unit);
368 
369 	if (x86c->inv_waitd_seq == 0xffffffff) {
370 		gsec.gen = x86c->inv_waitd_gen;
371 		gsec.seq = x86c->inv_waitd_seq;
372 		x86_iommu->qi_ensure(unit, 1);
373 		x86_iommu->qi_emit_wait_descr(unit, gsec.seq, false,
374 		    true, false);
375 		x86_iommu->qi_advance_tail(unit);
376 		while (!iommu_qi_seq_processed(unit, &gsec))
377 			cpu_spinwait();
378 		x86c->inv_waitd_gen++;
379 		x86c->inv_waitd_seq = 1;
380 	}
381 	seq = x86c->inv_waitd_seq++;
382 	pseq->gen = x86c->inv_waitd_gen;
383 	pseq->seq = seq;
384 	if (emit_wait) {
385 		x86_iommu->qi_ensure(unit, 1);
386 		x86_iommu->qi_emit_wait_descr(unit, seq, true, true, false);
387 	}
388 }
389 
390 /*
391  * To avoid missed wakeups, callers must increment the unit's waiters count
392  * before advancing the tail past the wait descriptor.
393  */
394 void
395 iommu_qi_wait_for_seq(struct iommu_unit *unit, const struct iommu_qi_genseq *
396     gseq, bool nowait)
397 {
398 	struct x86_unit_common *x86c;
399 
400 	IOMMU_ASSERT_LOCKED(unit);
401 	x86c = IOMMU2X86C(unit);
402 
403 	KASSERT(x86c->inv_seq_waiters > 0, ("%s: no waiters", __func__));
404 	while (!iommu_qi_seq_processed(unit, gseq)) {
405 		if (cold || nowait) {
406 			cpu_spinwait();
407 		} else {
408 			msleep(&x86c->inv_seq_waiters, &unit->lock, 0,
409 			    "dmarse", hz);
410 		}
411 	}
412 	x86c->inv_seq_waiters--;
413 }
414 
415 /*
416  * The caller must not be using the entry's dmamap_link field.
417  */
418 void
419 iommu_qi_invalidate_locked(struct iommu_domain *domain,
420     struct iommu_map_entry *entry, bool emit_wait)
421 {
422 	struct iommu_unit *unit;
423 	struct x86_unit_common *x86c;
424 
425 	unit = domain->iommu;
426 	x86c = IOMMU2X86C(unit);
427 	IOMMU_ASSERT_LOCKED(unit);
428 
429 	x86_iommu->qi_invalidate_emit(domain, entry->start, entry->end -
430 	    entry->start, &entry->gseq, emit_wait);
431 
432 	/*
433 	 * To avoid a data race in dmar_qi_task(), the entry's gseq must be
434 	 * initialized before the entry is added to the TLB flush list, and the
435 	 * entry must be added to that list before the tail is advanced.  More
436 	 * precisely, the tail must not be advanced past the wait descriptor
437 	 * that will generate the interrupt that schedules dmar_qi_task() for
438 	 * execution before the entry is added to the list.  While an earlier
439 	 * call to dmar_qi_ensure() might have advanced the tail, it will not
440 	 * advance it past the wait descriptor.
441 	 *
442 	 * See the definition of struct dmar_unit for more information on
443 	 * synchronization.
444 	 */
445 	entry->tlb_flush_next = NULL;
446 	atomic_store_rel_ptr((uintptr_t *)&x86c->tlb_flush_tail->
447 	    tlb_flush_next, (uintptr_t)entry);
448 	x86c->tlb_flush_tail = entry;
449 
450 	x86_iommu->qi_advance_tail(unit);
451 }
452 
453 void
454 iommu_qi_invalidate_sync(struct iommu_domain *domain, iommu_gaddr_t base,
455     iommu_gaddr_t size, bool cansleep)
456 {
457 	struct iommu_unit *unit;
458 	struct iommu_qi_genseq gseq;
459 
460 	unit = domain->iommu;
461 	IOMMU_LOCK(unit);
462 	x86_iommu->qi_invalidate_emit(domain, base, size, &gseq, true);
463 
464 	/*
465 	 * To avoid a missed wakeup in iommu_qi_task(), the unit's
466 	 * waiters count must be incremented before the tail is
467 	 * advanced.
468 	 */
469 	IOMMU2X86C(unit)->inv_seq_waiters++;
470 
471 	x86_iommu->qi_advance_tail(unit);
472 	iommu_qi_wait_for_seq(unit, &gseq, !cansleep);
473 	IOMMU_UNLOCK(unit);
474 }
475 
476 void
477 iommu_qi_drain_tlb_flush(struct iommu_unit *unit)
478 {
479 	struct x86_unit_common *x86c;
480 	struct iommu_map_entry *entry, *head;
481 
482 	x86c = IOMMU2X86C(unit);
483 	for (head = x86c->tlb_flush_head;; head = entry) {
484 		entry = (struct iommu_map_entry *)
485 		    atomic_load_acq_ptr((uintptr_t *)&head->tlb_flush_next);
486 		if (entry == NULL ||
487 		    !iommu_qi_seq_processed(unit, &entry->gseq))
488 			break;
489 		x86c->tlb_flush_head = entry;
490 		iommu_gas_free_entry(head);
491 		if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
492 			iommu_gas_free_region(entry);
493 		else
494 			iommu_gas_free_space(entry);
495 	}
496 }
497 
498 void
499 iommu_qi_common_init(struct iommu_unit *unit, task_fn_t qi_task)
500 {
501 	struct x86_unit_common *x86c;
502 	u_int qi_sz;
503 
504 	x86c = IOMMU2X86C(unit);
505 
506 	x86c->tlb_flush_head = x86c->tlb_flush_tail =
507             iommu_gas_alloc_entry(NULL, 0);
508 	TASK_INIT(&x86c->qi_task, 0, qi_task, unit);
509 	x86c->qi_taskqueue = taskqueue_create_fast("iommuqf", M_WAITOK,
510 	    taskqueue_thread_enqueue, &x86c->qi_taskqueue);
511 	taskqueue_start_threads(&x86c->qi_taskqueue, 1, PI_AV,
512 	    "iommu%d qi taskq", unit->unit);
513 
514 	x86c->inv_waitd_gen = 0;
515 	x86c->inv_waitd_seq = 1;
516 
517 	qi_sz = 3;
518 	TUNABLE_INT_FETCH("hw.iommu.qi_size", &qi_sz);
519 	if (qi_sz > x86c->qi_buf_maxsz)
520 		qi_sz = x86c->qi_buf_maxsz;
521 	x86c->inv_queue_size = (1ULL << qi_sz) * PAGE_SIZE;
522 	/* Reserve one descriptor to prevent wraparound. */
523 	x86c->inv_queue_avail = x86c->inv_queue_size -
524 	    x86c->qi_cmd_sz;
525 
526 	/*
527 	 * The invalidation queue reads by DMARs/AMDIOMMUs are always
528 	 * coherent.
529 	 */
530 	x86c->inv_queue = kmem_alloc_contig(x86c->inv_queue_size,
531 	    M_WAITOK | M_ZERO, 0, iommu_high, PAGE_SIZE, 0,
532 	    VM_MEMATTR_DEFAULT);
533 	x86c->inv_waitd_seq_hw_phys = pmap_kextract(
534 	    (vm_offset_t)&x86c->inv_waitd_seq_hw);
535 }
536 
537 void
538 iommu_qi_common_fini(struct iommu_unit *unit, void (*disable_qi)(
539     struct iommu_unit *))
540 {
541 	struct x86_unit_common *x86c;
542 	struct iommu_qi_genseq gseq;
543 
544 	x86c = IOMMU2X86C(unit);
545 
546 	taskqueue_drain(x86c->qi_taskqueue, &x86c->qi_task);
547 	taskqueue_free(x86c->qi_taskqueue);
548 	x86c->qi_taskqueue = NULL;
549 
550 	IOMMU_LOCK(unit);
551 	/* quisce */
552 	x86_iommu->qi_ensure(unit, 1);
553 	iommu_qi_emit_wait_seq(unit, &gseq, true);
554 	/* See iommu_qi_invalidate_locked(). */
555 	x86c->inv_seq_waiters++;
556 	x86_iommu->qi_advance_tail(unit);
557 	iommu_qi_wait_for_seq(unit, &gseq, false);
558 	/* only after the quisce, disable queue */
559 	disable_qi(unit);
560 	KASSERT(x86c->inv_seq_waiters == 0,
561 	    ("iommu%d: waiters on disabled queue", unit->unit));
562 	IOMMU_UNLOCK(unit);
563 
564 	kmem_free(x86c->inv_queue, x86c->inv_queue_size);
565 	x86c->inv_queue = NULL;
566 	x86c->inv_queue_size = 0;
567 }
568 
569 int
570 iommu_alloc_irq(struct iommu_unit *unit, int idx)
571 {
572 	device_t dev, pcib;
573 	struct iommu_msi_data *dmd;
574 	uint64_t msi_addr;
575 	uint32_t msi_data;
576 	int error;
577 
578 	MPASS(idx >= 0 || idx < IOMMU_MAX_MSI);
579 
580 	dev = unit->dev;
581 	dmd = &IOMMU2X86C(unit)->intrs[idx];
582 	pcib = device_get_parent(device_get_parent(dev)); /* Really not pcib */
583 	error = PCIB_ALLOC_MSIX(pcib, dev, &dmd->irq);
584 	if (error != 0) {
585 		device_printf(dev, "cannot allocate %s interrupt, %d\n",
586 		    dmd->name, error);
587 		goto err1;
588 	}
589 	error = bus_set_resource(dev, SYS_RES_IRQ, dmd->irq_rid,
590 	    dmd->irq, 1);
591 	if (error != 0) {
592 		device_printf(dev, "cannot set %s interrupt resource, %d\n",
593 		    dmd->name, error);
594 		goto err2;
595 	}
596 	dmd->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
597 	    &dmd->irq_rid, RF_ACTIVE);
598 	if (dmd->irq_res == NULL) {
599 		device_printf(dev,
600 		    "cannot allocate resource for %s interrupt\n", dmd->name);
601 		error = ENXIO;
602 		goto err3;
603 	}
604 	error = bus_setup_intr(dev, dmd->irq_res, INTR_TYPE_MISC,
605 	    dmd->handler, NULL, unit, &dmd->intr_handle);
606 	if (error != 0) {
607 		device_printf(dev, "cannot setup %s interrupt, %d\n",
608 		    dmd->name, error);
609 		goto err4;
610 	}
611 	bus_describe_intr(dev, dmd->irq_res, dmd->intr_handle, "%s", dmd->name);
612 	error = PCIB_MAP_MSI(pcib, dev, dmd->irq, &msi_addr, &msi_data);
613 	if (error != 0) {
614 		device_printf(dev, "cannot map %s interrupt, %d\n",
615 		    dmd->name, error);
616 		goto err5;
617 	}
618 
619 	dmd->msi_data = msi_data;
620 	dmd->msi_addr = msi_addr;
621 
622 	return (0);
623 
624 err5:
625 	bus_teardown_intr(dev, dmd->irq_res, dmd->intr_handle);
626 err4:
627 	bus_release_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq_res);
628 err3:
629 	bus_delete_resource(dev, SYS_RES_IRQ, dmd->irq_rid);
630 err2:
631 	PCIB_RELEASE_MSIX(pcib, dev, dmd->irq);
632 	dmd->irq = -1;
633 err1:
634 	return (error);
635 }
636 
637 void
638 iommu_release_intr(struct iommu_unit *unit, int idx)
639 {
640 	device_t dev;
641 	struct iommu_msi_data *dmd;
642 
643 	MPASS(idx >= 0 || idx < IOMMU_MAX_MSI);
644 
645 	dmd = &IOMMU2X86C(unit)->intrs[idx];
646 	if (dmd->handler == NULL || dmd->irq == -1)
647 		return;
648 	dev = unit->dev;
649 
650 	bus_teardown_intr(dev, dmd->irq_res, dmd->intr_handle);
651 	bus_release_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq_res);
652 	bus_delete_resource(dev, SYS_RES_IRQ, dmd->irq_rid);
653 	PCIB_RELEASE_MSIX(device_get_parent(device_get_parent(dev)),
654 	    dev, dmd->irq);
655 	dmd->irq = -1;
656 }
657 
658 void
659 iommu_device_tag_init(struct iommu_ctx *ctx, device_t dev)
660 {
661 	bus_addr_t maxaddr;
662 
663 	maxaddr = MIN(ctx->domain->end, BUS_SPACE_MAXADDR);
664 	ctx->tag->common.impl = &bus_dma_iommu_impl;
665 	ctx->tag->common.boundary = 0;
666 	ctx->tag->common.lowaddr = maxaddr;
667 	ctx->tag->common.highaddr = maxaddr;
668 	ctx->tag->common.maxsize = maxaddr;
669 	ctx->tag->common.nsegments = BUS_SPACE_UNRESTRICTED;
670 	ctx->tag->common.maxsegsz = maxaddr;
671 	ctx->tag->ctx = ctx;
672 	ctx->tag->owner = dev;
673 }
674 
675 void
676 iommu_domain_free_entry(struct iommu_map_entry *entry, bool free)
677 {
678 	if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
679 		iommu_gas_free_region(entry);
680 	else
681 		iommu_gas_free_space(entry);
682 	if (free)
683 		iommu_gas_free_entry(entry);
684 	else
685 		entry->flags = 0;
686 }
687 
688 /*
689  * Index of the pte for the guest address base in the page table at
690  * the level lvl.
691  */
692 int
693 pglvl_pgtbl_pte_off(int pglvl, iommu_gaddr_t base, int lvl)
694 {
695 
696 	base >>= IOMMU_PAGE_SHIFT + (pglvl - lvl - 1) *
697 	    IOMMU_NPTEPGSHIFT;
698 	return (base & IOMMU_PTEMASK);
699 }
700 
701 /*
702  * Returns the page index of the page table page in the page table
703  * object, which maps the given address base at the page table level
704  * lvl.
705  */
706 vm_pindex_t
707 pglvl_pgtbl_get_pindex(int pglvl, iommu_gaddr_t base, int lvl)
708 {
709 	vm_pindex_t idx, pidx;
710 	int i;
711 
712 	KASSERT(lvl >= 0 && lvl < pglvl,
713 	    ("wrong lvl %d %d", pglvl, lvl));
714 
715 	for (pidx = idx = 0, i = 0; i < lvl; i++, pidx = idx) {
716 		idx = pglvl_pgtbl_pte_off(pglvl, base, i) +
717 		    pidx * IOMMU_NPTEPG + 1;
718 	}
719 	return (idx);
720 }
721 
722 /*
723  * Calculate the total amount of page table pages needed to map the
724  * whole bus address space on the context with the selected agaw.
725  */
726 vm_pindex_t
727 pglvl_max_pages(int pglvl)
728 {
729 	vm_pindex_t res;
730 	int i;
731 
732 	for (res = 0, i = pglvl; i > 0; i--) {
733 		res *= IOMMU_NPTEPG;
734 		res++;
735 	}
736 	return (res);
737 }
738 
739 iommu_gaddr_t
740 pglvl_page_size(int total_pglvl, int lvl)
741 {
742 	int rlvl;
743 	static const iommu_gaddr_t pg_sz[] = {
744 		(iommu_gaddr_t)IOMMU_PAGE_SIZE,
745 		(iommu_gaddr_t)IOMMU_PAGE_SIZE << IOMMU_NPTEPGSHIFT,
746 		(iommu_gaddr_t)IOMMU_PAGE_SIZE << (2 * IOMMU_NPTEPGSHIFT),
747 		(iommu_gaddr_t)IOMMU_PAGE_SIZE << (3 * IOMMU_NPTEPGSHIFT),
748 		(iommu_gaddr_t)IOMMU_PAGE_SIZE << (4 * IOMMU_NPTEPGSHIFT),
749 		(iommu_gaddr_t)IOMMU_PAGE_SIZE << (5 * IOMMU_NPTEPGSHIFT),
750 		(iommu_gaddr_t)IOMMU_PAGE_SIZE << (6 * IOMMU_NPTEPGSHIFT),
751 	};
752 
753 	KASSERT(lvl >= 0 && lvl < total_pglvl,
754 	    ("total %d lvl %d", total_pglvl, lvl));
755 	rlvl = total_pglvl - lvl - 1;
756 	KASSERT(rlvl < nitems(pg_sz), ("sizeof pg_sz lvl %d", lvl));
757 	return (pg_sz[rlvl]);
758 }
759