xref: /freebsd/sys/x86/iommu/intel_utils.c (revision e6bfd18d21b225af6a0ed67ceeaf1293b7b9eba5)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2013 The FreeBSD Foundation
5  *
6  * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
7  * under sponsorship from the FreeBSD Foundation.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include <sys/param.h>
35 #include <sys/bus.h>
36 #include <sys/kernel.h>
37 #include <sys/lock.h>
38 #include <sys/malloc.h>
39 #include <sys/memdesc.h>
40 #include <sys/mutex.h>
41 #include <sys/proc.h>
42 #include <sys/queue.h>
43 #include <sys/rman.h>
44 #include <sys/rwlock.h>
45 #include <sys/sched.h>
46 #include <sys/sf_buf.h>
47 #include <sys/sysctl.h>
48 #include <sys/systm.h>
49 #include <sys/taskqueue.h>
50 #include <sys/time.h>
51 #include <sys/tree.h>
52 #include <sys/vmem.h>
53 #include <vm/vm.h>
54 #include <vm/vm_extern.h>
55 #include <vm/vm_kern.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_page.h>
58 #include <vm/vm_map.h>
59 #include <vm/vm_pageout.h>
60 #include <dev/pci/pcireg.h>
61 #include <dev/pci/pcivar.h>
62 #include <machine/bus.h>
63 #include <machine/cpu.h>
64 #include <machine/intr_machdep.h>
65 #include <x86/include/apicvar.h>
66 #include <x86/include/busdma_impl.h>
67 #include <dev/iommu/busdma_iommu.h>
68 #include <x86/iommu/intel_reg.h>
69 #include <x86/iommu/intel_dmar.h>
70 
71 u_int
72 dmar_nd2mask(u_int nd)
73 {
74 	static const u_int masks[] = {
75 		0x000f,	/* nd == 0 */
76 		0x002f,	/* nd == 1 */
77 		0x00ff,	/* nd == 2 */
78 		0x02ff,	/* nd == 3 */
79 		0x0fff,	/* nd == 4 */
80 		0x2fff,	/* nd == 5 */
81 		0xffff,	/* nd == 6 */
82 		0x0000,	/* nd == 7 reserved */
83 	};
84 
85 	KASSERT(nd <= 6, ("number of domains %d", nd));
86 	return (masks[nd]);
87 }
88 
89 static const struct sagaw_bits_tag {
90 	int agaw;
91 	int cap;
92 	int awlvl;
93 	int pglvl;
94 } sagaw_bits[] = {
95 	{.agaw = 30, .cap = DMAR_CAP_SAGAW_2LVL, .awlvl = DMAR_CTX2_AW_2LVL,
96 	    .pglvl = 2},
97 	{.agaw = 39, .cap = DMAR_CAP_SAGAW_3LVL, .awlvl = DMAR_CTX2_AW_3LVL,
98 	    .pglvl = 3},
99 	{.agaw = 48, .cap = DMAR_CAP_SAGAW_4LVL, .awlvl = DMAR_CTX2_AW_4LVL,
100 	    .pglvl = 4},
101 	{.agaw = 57, .cap = DMAR_CAP_SAGAW_5LVL, .awlvl = DMAR_CTX2_AW_5LVL,
102 	    .pglvl = 5}
103 	/*
104 	 * 6-level paging (DMAR_CAP_SAGAW_6LVL) is not supported on any
105 	 * current VT-d hardware and its SAGAW field value is listed as
106 	 * reserved in the VT-d spec.  If support is added in the future,
107 	 * this structure and the logic in dmar_maxaddr2mgaw() will need
108 	 * to change to avoid attempted comparison against 1ULL << 64.
109 	 */
110 };
111 
112 bool
113 dmar_pglvl_supported(struct dmar_unit *unit, int pglvl)
114 {
115 	int i;
116 
117 	for (i = 0; i < nitems(sagaw_bits); i++) {
118 		if (sagaw_bits[i].pglvl != pglvl)
119 			continue;
120 		if ((DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap) != 0)
121 			return (true);
122 	}
123 	return (false);
124 }
125 
126 int
127 domain_set_agaw(struct dmar_domain *domain, int mgaw)
128 {
129 	int sagaw, i;
130 
131 	domain->mgaw = mgaw;
132 	sagaw = DMAR_CAP_SAGAW(domain->dmar->hw_cap);
133 	for (i = 0; i < nitems(sagaw_bits); i++) {
134 		if (sagaw_bits[i].agaw >= mgaw) {
135 			domain->agaw = sagaw_bits[i].agaw;
136 			domain->pglvl = sagaw_bits[i].pglvl;
137 			domain->awlvl = sagaw_bits[i].awlvl;
138 			return (0);
139 		}
140 	}
141 	device_printf(domain->dmar->dev,
142 	    "context request mgaw %d: no agaw found, sagaw %x\n",
143 	    mgaw, sagaw);
144 	return (EINVAL);
145 }
146 
147 /*
148  * Find a best fit mgaw for the given maxaddr:
149  *   - if allow_less is false, must find sagaw which maps all requested
150  *     addresses (used by identity mappings);
151  *   - if allow_less is true, and no supported sagaw can map all requested
152  *     address space, accept the biggest sagaw, whatever is it.
153  */
154 int
155 dmar_maxaddr2mgaw(struct dmar_unit *unit, iommu_gaddr_t maxaddr, bool allow_less)
156 {
157 	int i;
158 
159 	for (i = 0; i < nitems(sagaw_bits); i++) {
160 		if ((1ULL << sagaw_bits[i].agaw) >= maxaddr &&
161 		    (DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap) != 0)
162 			break;
163 	}
164 	if (allow_less && i == nitems(sagaw_bits)) {
165 		do {
166 			i--;
167 		} while ((DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap)
168 		    == 0);
169 	}
170 	if (i < nitems(sagaw_bits))
171 		return (sagaw_bits[i].agaw);
172 	KASSERT(0, ("no mgaw for maxaddr %jx allow_less %d",
173 	    (uintmax_t) maxaddr, allow_less));
174 	return (-1);
175 }
176 
177 /*
178  * Calculate the total amount of page table pages needed to map the
179  * whole bus address space on the context with the selected agaw.
180  */
181 vm_pindex_t
182 pglvl_max_pages(int pglvl)
183 {
184 	vm_pindex_t res;
185 	int i;
186 
187 	for (res = 0, i = pglvl; i > 0; i--) {
188 		res *= DMAR_NPTEPG;
189 		res++;
190 	}
191 	return (res);
192 }
193 
194 /*
195  * Return true if the page table level lvl supports the superpage for
196  * the context ctx.
197  */
198 int
199 domain_is_sp_lvl(struct dmar_domain *domain, int lvl)
200 {
201 	int alvl, cap_sps;
202 	static const int sagaw_sp[] = {
203 		DMAR_CAP_SPS_2M,
204 		DMAR_CAP_SPS_1G,
205 		DMAR_CAP_SPS_512G,
206 		DMAR_CAP_SPS_1T
207 	};
208 
209 	alvl = domain->pglvl - lvl - 1;
210 	cap_sps = DMAR_CAP_SPS(domain->dmar->hw_cap);
211 	return (alvl < nitems(sagaw_sp) && (sagaw_sp[alvl] & cap_sps) != 0);
212 }
213 
214 iommu_gaddr_t
215 pglvl_page_size(int total_pglvl, int lvl)
216 {
217 	int rlvl;
218 	static const iommu_gaddr_t pg_sz[] = {
219 		(iommu_gaddr_t)DMAR_PAGE_SIZE,
220 		(iommu_gaddr_t)DMAR_PAGE_SIZE << DMAR_NPTEPGSHIFT,
221 		(iommu_gaddr_t)DMAR_PAGE_SIZE << (2 * DMAR_NPTEPGSHIFT),
222 		(iommu_gaddr_t)DMAR_PAGE_SIZE << (3 * DMAR_NPTEPGSHIFT),
223 		(iommu_gaddr_t)DMAR_PAGE_SIZE << (4 * DMAR_NPTEPGSHIFT),
224 		(iommu_gaddr_t)DMAR_PAGE_SIZE << (5 * DMAR_NPTEPGSHIFT)
225 	};
226 
227 	KASSERT(lvl >= 0 && lvl < total_pglvl,
228 	    ("total %d lvl %d", total_pglvl, lvl));
229 	rlvl = total_pglvl - lvl - 1;
230 	KASSERT(rlvl < nitems(pg_sz), ("sizeof pg_sz lvl %d", lvl));
231 	return (pg_sz[rlvl]);
232 }
233 
234 iommu_gaddr_t
235 domain_page_size(struct dmar_domain *domain, int lvl)
236 {
237 
238 	return (pglvl_page_size(domain->pglvl, lvl));
239 }
240 
241 int
242 calc_am(struct dmar_unit *unit, iommu_gaddr_t base, iommu_gaddr_t size,
243     iommu_gaddr_t *isizep)
244 {
245 	iommu_gaddr_t isize;
246 	int am;
247 
248 	for (am = DMAR_CAP_MAMV(unit->hw_cap);; am--) {
249 		isize = 1ULL << (am + DMAR_PAGE_SHIFT);
250 		if ((base & (isize - 1)) == 0 && size >= isize)
251 			break;
252 		if (am == 0)
253 			break;
254 	}
255 	*isizep = isize;
256 	return (am);
257 }
258 
259 iommu_haddr_t dmar_high;
260 int haw;
261 int dmar_tbl_pagecnt;
262 
263 vm_page_t
264 dmar_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags)
265 {
266 	vm_page_t m;
267 	int zeroed, aflags;
268 
269 	zeroed = (flags & IOMMU_PGF_ZERO) != 0 ? VM_ALLOC_ZERO : 0;
270 	aflags = zeroed | VM_ALLOC_NOBUSY | VM_ALLOC_SYSTEM | VM_ALLOC_NODUMP |
271 	    ((flags & IOMMU_PGF_WAITOK) != 0 ? VM_ALLOC_WAITFAIL :
272 	    VM_ALLOC_NOWAIT);
273 	for (;;) {
274 		if ((flags & IOMMU_PGF_OBJL) == 0)
275 			VM_OBJECT_WLOCK(obj);
276 		m = vm_page_lookup(obj, idx);
277 		if ((flags & IOMMU_PGF_NOALLOC) != 0 || m != NULL) {
278 			if ((flags & IOMMU_PGF_OBJL) == 0)
279 				VM_OBJECT_WUNLOCK(obj);
280 			break;
281 		}
282 		m = vm_page_alloc_contig(obj, idx, aflags, 1, 0,
283 		    dmar_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT);
284 		if ((flags & IOMMU_PGF_OBJL) == 0)
285 			VM_OBJECT_WUNLOCK(obj);
286 		if (m != NULL) {
287 			if (zeroed && (m->flags & PG_ZERO) == 0)
288 				pmap_zero_page(m);
289 			atomic_add_int(&dmar_tbl_pagecnt, 1);
290 			break;
291 		}
292 		if ((flags & IOMMU_PGF_WAITOK) == 0)
293 			break;
294 	}
295 	return (m);
296 }
297 
298 void
299 dmar_pgfree(vm_object_t obj, vm_pindex_t idx, int flags)
300 {
301 	vm_page_t m;
302 
303 	if ((flags & IOMMU_PGF_OBJL) == 0)
304 		VM_OBJECT_WLOCK(obj);
305 	m = vm_page_grab(obj, idx, VM_ALLOC_NOCREAT);
306 	if (m != NULL) {
307 		vm_page_free(m);
308 		atomic_subtract_int(&dmar_tbl_pagecnt, 1);
309 	}
310 	if ((flags & IOMMU_PGF_OBJL) == 0)
311 		VM_OBJECT_WUNLOCK(obj);
312 }
313 
314 void *
315 dmar_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags,
316     struct sf_buf **sf)
317 {
318 	vm_page_t m;
319 	bool allocated;
320 
321 	if ((flags & IOMMU_PGF_OBJL) == 0)
322 		VM_OBJECT_WLOCK(obj);
323 	m = vm_page_lookup(obj, idx);
324 	if (m == NULL && (flags & IOMMU_PGF_ALLOC) != 0) {
325 		m = dmar_pgalloc(obj, idx, flags | IOMMU_PGF_OBJL);
326 		allocated = true;
327 	} else
328 		allocated = false;
329 	if (m == NULL) {
330 		if ((flags & IOMMU_PGF_OBJL) == 0)
331 			VM_OBJECT_WUNLOCK(obj);
332 		return (NULL);
333 	}
334 	/* Sleepable allocations cannot fail. */
335 	if ((flags & IOMMU_PGF_WAITOK) != 0)
336 		VM_OBJECT_WUNLOCK(obj);
337 	sched_pin();
338 	*sf = sf_buf_alloc(m, SFB_CPUPRIVATE | ((flags & IOMMU_PGF_WAITOK)
339 	    == 0 ? SFB_NOWAIT : 0));
340 	if (*sf == NULL) {
341 		sched_unpin();
342 		if (allocated) {
343 			VM_OBJECT_ASSERT_WLOCKED(obj);
344 			dmar_pgfree(obj, m->pindex, flags | IOMMU_PGF_OBJL);
345 		}
346 		if ((flags & IOMMU_PGF_OBJL) == 0)
347 			VM_OBJECT_WUNLOCK(obj);
348 		return (NULL);
349 	}
350 	if ((flags & (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) ==
351 	    (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL))
352 		VM_OBJECT_WLOCK(obj);
353 	else if ((flags & (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) == 0)
354 		VM_OBJECT_WUNLOCK(obj);
355 	return ((void *)sf_buf_kva(*sf));
356 }
357 
358 void
359 dmar_unmap_pgtbl(struct sf_buf *sf)
360 {
361 
362 	sf_buf_free(sf);
363 	sched_unpin();
364 }
365 
366 static void
367 dmar_flush_transl_to_ram(struct dmar_unit *unit, void *dst, size_t sz)
368 {
369 
370 	if (DMAR_IS_COHERENT(unit))
371 		return;
372 	/*
373 	 * If DMAR does not snoop paging structures accesses, flush
374 	 * CPU cache to memory.
375 	 */
376 	pmap_force_invalidate_cache_range((uintptr_t)dst, (uintptr_t)dst + sz);
377 }
378 
379 void
380 dmar_flush_pte_to_ram(struct dmar_unit *unit, dmar_pte_t *dst)
381 {
382 
383 	dmar_flush_transl_to_ram(unit, dst, sizeof(*dst));
384 }
385 
386 void
387 dmar_flush_ctx_to_ram(struct dmar_unit *unit, dmar_ctx_entry_t *dst)
388 {
389 
390 	dmar_flush_transl_to_ram(unit, dst, sizeof(*dst));
391 }
392 
393 void
394 dmar_flush_root_to_ram(struct dmar_unit *unit, dmar_root_entry_t *dst)
395 {
396 
397 	dmar_flush_transl_to_ram(unit, dst, sizeof(*dst));
398 }
399 
400 /*
401  * Load the root entry pointer into the hardware, busily waiting for
402  * the completion.
403  */
404 int
405 dmar_load_root_entry_ptr(struct dmar_unit *unit)
406 {
407 	vm_page_t root_entry;
408 	int error;
409 
410 	/*
411 	 * Access to the GCMD register must be serialized while the
412 	 * command is submitted.
413 	 */
414 	DMAR_ASSERT_LOCKED(unit);
415 
416 	VM_OBJECT_RLOCK(unit->ctx_obj);
417 	root_entry = vm_page_lookup(unit->ctx_obj, 0);
418 	VM_OBJECT_RUNLOCK(unit->ctx_obj);
419 	dmar_write8(unit, DMAR_RTADDR_REG, VM_PAGE_TO_PHYS(root_entry));
420 	dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd | DMAR_GCMD_SRTP);
421 	DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_RTPS)
422 	    != 0));
423 	return (error);
424 }
425 
426 /*
427  * Globally invalidate the context entries cache, busily waiting for
428  * the completion.
429  */
430 int
431 dmar_inv_ctx_glob(struct dmar_unit *unit)
432 {
433 	int error;
434 
435 	/*
436 	 * Access to the CCMD register must be serialized while the
437 	 * command is submitted.
438 	 */
439 	DMAR_ASSERT_LOCKED(unit);
440 	KASSERT(!unit->qi_enabled, ("QI enabled"));
441 
442 	/*
443 	 * The DMAR_CCMD_ICC bit in the upper dword should be written
444 	 * after the low dword write is completed.  Amd64
445 	 * dmar_write8() does not have this issue, i386 dmar_write8()
446 	 * writes the upper dword last.
447 	 */
448 	dmar_write8(unit, DMAR_CCMD_REG, DMAR_CCMD_ICC | DMAR_CCMD_CIRG_GLOB);
449 	DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_CCMD_REG + 4) & DMAR_CCMD_ICC32)
450 	    == 0));
451 	return (error);
452 }
453 
454 /*
455  * Globally invalidate the IOTLB, busily waiting for the completion.
456  */
457 int
458 dmar_inv_iotlb_glob(struct dmar_unit *unit)
459 {
460 	int error, reg;
461 
462 	DMAR_ASSERT_LOCKED(unit);
463 	KASSERT(!unit->qi_enabled, ("QI enabled"));
464 
465 	reg = 16 * DMAR_ECAP_IRO(unit->hw_ecap);
466 	/* See a comment about DMAR_CCMD_ICC in dmar_inv_ctx_glob. */
467 	dmar_write8(unit, reg + DMAR_IOTLB_REG_OFF, DMAR_IOTLB_IVT |
468 	    DMAR_IOTLB_IIRG_GLB | DMAR_IOTLB_DR | DMAR_IOTLB_DW);
469 	DMAR_WAIT_UNTIL(((dmar_read4(unit, reg + DMAR_IOTLB_REG_OFF + 4) &
470 	    DMAR_IOTLB_IVT32) == 0));
471 	return (error);
472 }
473 
474 /*
475  * Flush the chipset write buffers.  See 11.1 "Write Buffer Flushing"
476  * in the architecture specification.
477  */
478 int
479 dmar_flush_write_bufs(struct dmar_unit *unit)
480 {
481 	int error;
482 
483 	DMAR_ASSERT_LOCKED(unit);
484 
485 	/*
486 	 * DMAR_GCMD_WBF is only valid when CAP_RWBF is reported.
487 	 */
488 	KASSERT((unit->hw_cap & DMAR_CAP_RWBF) != 0,
489 	    ("dmar%d: no RWBF", unit->iommu.unit));
490 
491 	dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd | DMAR_GCMD_WBF);
492 	DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_WBFS)
493 	    != 0));
494 	return (error);
495 }
496 
497 /*
498  * Some BIOSes protect memory region they reside in by using DMAR to
499  * prevent devices from doing any DMA transactions to that part of RAM.
500  * AMI refers to this as "DMA Control Guarantee".
501  * We need to disable this when address translation is enabled.
502  */
503 int
504 dmar_disable_protected_regions(struct dmar_unit *unit)
505 {
506 	uint32_t reg;
507 	int error;
508 
509 	DMAR_ASSERT_LOCKED(unit);
510 
511 	/* Check if we support the feature. */
512 	if ((unit->hw_cap & (DMAR_CAP_PLMR | DMAR_CAP_PHMR)) == 0)
513 		return (0);
514 
515 	reg = dmar_read4(unit, DMAR_PMEN_REG);
516 	if ((reg & DMAR_PMEN_EPM) == 0)
517 		return (0);
518 
519 	reg &= ~DMAR_PMEN_EPM;
520 	dmar_write4(unit, DMAR_PMEN_REG, reg);
521 	DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_PMEN_REG) & DMAR_PMEN_PRS)
522 	    != 0));
523 
524 	return (error);
525 }
526 
527 int
528 dmar_enable_translation(struct dmar_unit *unit)
529 {
530 	int error;
531 
532 	DMAR_ASSERT_LOCKED(unit);
533 	unit->hw_gcmd |= DMAR_GCMD_TE;
534 	dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd);
535 	DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_TES)
536 	    != 0));
537 	return (error);
538 }
539 
540 int
541 dmar_disable_translation(struct dmar_unit *unit)
542 {
543 	int error;
544 
545 	DMAR_ASSERT_LOCKED(unit);
546 	unit->hw_gcmd &= ~DMAR_GCMD_TE;
547 	dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd);
548 	DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_TES)
549 	    == 0));
550 	return (error);
551 }
552 
553 int
554 dmar_load_irt_ptr(struct dmar_unit *unit)
555 {
556 	uint64_t irta, s;
557 	int error;
558 
559 	DMAR_ASSERT_LOCKED(unit);
560 	irta = unit->irt_phys;
561 	if (DMAR_X2APIC(unit))
562 		irta |= DMAR_IRTA_EIME;
563 	s = fls(unit->irte_cnt) - 2;
564 	KASSERT(unit->irte_cnt >= 2 && s <= DMAR_IRTA_S_MASK &&
565 	    powerof2(unit->irte_cnt),
566 	    ("IRTA_REG_S overflow %x", unit->irte_cnt));
567 	irta |= s;
568 	dmar_write8(unit, DMAR_IRTA_REG, irta);
569 	dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd | DMAR_GCMD_SIRTP);
570 	DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_IRTPS)
571 	    != 0));
572 	return (error);
573 }
574 
575 int
576 dmar_enable_ir(struct dmar_unit *unit)
577 {
578 	int error;
579 
580 	DMAR_ASSERT_LOCKED(unit);
581 	unit->hw_gcmd |= DMAR_GCMD_IRE;
582 	unit->hw_gcmd &= ~DMAR_GCMD_CFI;
583 	dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd);
584 	DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_IRES)
585 	    != 0));
586 	return (error);
587 }
588 
589 int
590 dmar_disable_ir(struct dmar_unit *unit)
591 {
592 	int error;
593 
594 	DMAR_ASSERT_LOCKED(unit);
595 	unit->hw_gcmd &= ~DMAR_GCMD_IRE;
596 	dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd);
597 	DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_IRES)
598 	    == 0));
599 	return (error);
600 }
601 
602 #define BARRIER_F				\
603 	u_int f_done, f_inproc, f_wakeup;	\
604 						\
605 	f_done = 1 << (barrier_id * 3);		\
606 	f_inproc = 1 << (barrier_id * 3 + 1);	\
607 	f_wakeup = 1 << (barrier_id * 3 + 2)
608 
609 bool
610 dmar_barrier_enter(struct dmar_unit *dmar, u_int barrier_id)
611 {
612 	BARRIER_F;
613 
614 	DMAR_LOCK(dmar);
615 	if ((dmar->barrier_flags & f_done) != 0) {
616 		DMAR_UNLOCK(dmar);
617 		return (false);
618 	}
619 
620 	if ((dmar->barrier_flags & f_inproc) != 0) {
621 		while ((dmar->barrier_flags & f_inproc) != 0) {
622 			dmar->barrier_flags |= f_wakeup;
623 			msleep(&dmar->barrier_flags, &dmar->iommu.lock, 0,
624 			    "dmarb", 0);
625 		}
626 		KASSERT((dmar->barrier_flags & f_done) != 0,
627 		    ("dmar%d barrier %d missing done", dmar->iommu.unit,
628 		    barrier_id));
629 		DMAR_UNLOCK(dmar);
630 		return (false);
631 	}
632 
633 	dmar->barrier_flags |= f_inproc;
634 	DMAR_UNLOCK(dmar);
635 	return (true);
636 }
637 
638 void
639 dmar_barrier_exit(struct dmar_unit *dmar, u_int barrier_id)
640 {
641 	BARRIER_F;
642 
643 	DMAR_ASSERT_LOCKED(dmar);
644 	KASSERT((dmar->barrier_flags & (f_done | f_inproc)) == f_inproc,
645 	    ("dmar%d barrier %d missed entry", dmar->iommu.unit, barrier_id));
646 	dmar->barrier_flags |= f_done;
647 	if ((dmar->barrier_flags & f_wakeup) != 0)
648 		wakeup(&dmar->barrier_flags);
649 	dmar->barrier_flags &= ~(f_inproc | f_wakeup);
650 	DMAR_UNLOCK(dmar);
651 }
652 
653 int dmar_batch_coalesce = 100;
654 struct timespec dmar_hw_timeout = {
655 	.tv_sec = 0,
656 	.tv_nsec = 1000000
657 };
658 
659 static const uint64_t d = 1000000000;
660 
661 void
662 dmar_update_timeout(uint64_t newval)
663 {
664 
665 	/* XXXKIB not atomic */
666 	dmar_hw_timeout.tv_sec = newval / d;
667 	dmar_hw_timeout.tv_nsec = newval % d;
668 }
669 
670 uint64_t
671 dmar_get_timeout(void)
672 {
673 
674 	return ((uint64_t)dmar_hw_timeout.tv_sec * d +
675 	    dmar_hw_timeout.tv_nsec);
676 }
677 
678 static int
679 dmar_timeout_sysctl(SYSCTL_HANDLER_ARGS)
680 {
681 	uint64_t val;
682 	int error;
683 
684 	val = dmar_get_timeout();
685 	error = sysctl_handle_long(oidp, &val, 0, req);
686 	if (error != 0 || req->newptr == NULL)
687 		return (error);
688 	dmar_update_timeout(val);
689 	return (error);
690 }
691 
692 static SYSCTL_NODE(_hw_iommu, OID_AUTO, dmar, CTLFLAG_RD | CTLFLAG_MPSAFE,
693     NULL, "");
694 SYSCTL_INT(_hw_iommu_dmar, OID_AUTO, tbl_pagecnt, CTLFLAG_RD,
695     &dmar_tbl_pagecnt, 0,
696     "Count of pages used for DMAR pagetables");
697 SYSCTL_INT(_hw_iommu_dmar, OID_AUTO, batch_coalesce, CTLFLAG_RWTUN,
698     &dmar_batch_coalesce, 0,
699     "Number of qi batches between interrupt");
700 SYSCTL_PROC(_hw_iommu_dmar, OID_AUTO, timeout,
701     CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0,
702     dmar_timeout_sysctl, "QU",
703     "Timeout for command wait, in nanoseconds");
704