xref: /freebsd/sys/x86/iommu/intel_utils.c (revision 22cf89c938886d14f5796fc49f9f020c23ea8eaf)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2013 The FreeBSD Foundation
5  *
6  * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
7  * under sponsorship from the FreeBSD Foundation.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 #include <sys/param.h>
33 #include <sys/bus.h>
34 #include <sys/kernel.h>
35 #include <sys/lock.h>
36 #include <sys/malloc.h>
37 #include <sys/memdesc.h>
38 #include <sys/mutex.h>
39 #include <sys/proc.h>
40 #include <sys/queue.h>
41 #include <sys/rman.h>
42 #include <sys/rwlock.h>
43 #include <sys/sched.h>
44 #include <sys/sf_buf.h>
45 #include <sys/sysctl.h>
46 #include <sys/systm.h>
47 #include <sys/taskqueue.h>
48 #include <sys/time.h>
49 #include <sys/tree.h>
50 #include <sys/vmem.h>
51 #include <vm/vm.h>
52 #include <vm/vm_extern.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_object.h>
55 #include <vm/vm_page.h>
56 #include <vm/vm_map.h>
57 #include <vm/vm_pageout.h>
58 #include <dev/pci/pcireg.h>
59 #include <dev/pci/pcivar.h>
60 #include <machine/bus.h>
61 #include <machine/cpu.h>
62 #include <machine/intr_machdep.h>
63 #include <x86/include/apicvar.h>
64 #include <x86/include/busdma_impl.h>
65 #include <dev/iommu/busdma_iommu.h>
66 #include <x86/iommu/intel_reg.h>
67 #include <x86/iommu/intel_dmar.h>
68 
69 u_int
70 dmar_nd2mask(u_int nd)
71 {
72 	static const u_int masks[] = {
73 		0x000f,	/* nd == 0 */
74 		0x002f,	/* nd == 1 */
75 		0x00ff,	/* nd == 2 */
76 		0x02ff,	/* nd == 3 */
77 		0x0fff,	/* nd == 4 */
78 		0x2fff,	/* nd == 5 */
79 		0xffff,	/* nd == 6 */
80 		0x0000,	/* nd == 7 reserved */
81 	};
82 
83 	KASSERT(nd <= 6, ("number of domains %d", nd));
84 	return (masks[nd]);
85 }
86 
87 static const struct sagaw_bits_tag {
88 	int agaw;
89 	int cap;
90 	int awlvl;
91 	int pglvl;
92 } sagaw_bits[] = {
93 	{.agaw = 30, .cap = DMAR_CAP_SAGAW_2LVL, .awlvl = DMAR_CTX2_AW_2LVL,
94 	    .pglvl = 2},
95 	{.agaw = 39, .cap = DMAR_CAP_SAGAW_3LVL, .awlvl = DMAR_CTX2_AW_3LVL,
96 	    .pglvl = 3},
97 	{.agaw = 48, .cap = DMAR_CAP_SAGAW_4LVL, .awlvl = DMAR_CTX2_AW_4LVL,
98 	    .pglvl = 4},
99 	{.agaw = 57, .cap = DMAR_CAP_SAGAW_5LVL, .awlvl = DMAR_CTX2_AW_5LVL,
100 	    .pglvl = 5}
101 	/*
102 	 * 6-level paging (DMAR_CAP_SAGAW_6LVL) is not supported on any
103 	 * current VT-d hardware and its SAGAW field value is listed as
104 	 * reserved in the VT-d spec.  If support is added in the future,
105 	 * this structure and the logic in dmar_maxaddr2mgaw() will need
106 	 * to change to avoid attempted comparison against 1ULL << 64.
107 	 */
108 };
109 
110 bool
111 dmar_pglvl_supported(struct dmar_unit *unit, int pglvl)
112 {
113 	int i;
114 
115 	for (i = 0; i < nitems(sagaw_bits); i++) {
116 		if (sagaw_bits[i].pglvl != pglvl)
117 			continue;
118 		if ((DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap) != 0)
119 			return (true);
120 	}
121 	return (false);
122 }
123 
124 int
125 domain_set_agaw(struct dmar_domain *domain, int mgaw)
126 {
127 	int sagaw, i;
128 
129 	domain->mgaw = mgaw;
130 	sagaw = DMAR_CAP_SAGAW(domain->dmar->hw_cap);
131 	for (i = 0; i < nitems(sagaw_bits); i++) {
132 		if (sagaw_bits[i].agaw >= mgaw) {
133 			domain->agaw = sagaw_bits[i].agaw;
134 			domain->pglvl = sagaw_bits[i].pglvl;
135 			domain->awlvl = sagaw_bits[i].awlvl;
136 			return (0);
137 		}
138 	}
139 	device_printf(domain->dmar->dev,
140 	    "context request mgaw %d: no agaw found, sagaw %x\n",
141 	    mgaw, sagaw);
142 	return (EINVAL);
143 }
144 
145 /*
146  * Find a best fit mgaw for the given maxaddr:
147  *   - if allow_less is false, must find sagaw which maps all requested
148  *     addresses (used by identity mappings);
149  *   - if allow_less is true, and no supported sagaw can map all requested
150  *     address space, accept the biggest sagaw, whatever is it.
151  */
152 int
153 dmar_maxaddr2mgaw(struct dmar_unit *unit, iommu_gaddr_t maxaddr, bool allow_less)
154 {
155 	int i;
156 
157 	for (i = 0; i < nitems(sagaw_bits); i++) {
158 		if ((1ULL << sagaw_bits[i].agaw) >= maxaddr &&
159 		    (DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap) != 0)
160 			break;
161 	}
162 	if (allow_less && i == nitems(sagaw_bits)) {
163 		do {
164 			i--;
165 		} while ((DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap)
166 		    == 0);
167 	}
168 	if (i < nitems(sagaw_bits))
169 		return (sagaw_bits[i].agaw);
170 	KASSERT(0, ("no mgaw for maxaddr %jx allow_less %d",
171 	    (uintmax_t) maxaddr, allow_less));
172 	return (-1);
173 }
174 
175 /*
176  * Calculate the total amount of page table pages needed to map the
177  * whole bus address space on the context with the selected agaw.
178  */
179 vm_pindex_t
180 pglvl_max_pages(int pglvl)
181 {
182 	vm_pindex_t res;
183 	int i;
184 
185 	for (res = 0, i = pglvl; i > 0; i--) {
186 		res *= DMAR_NPTEPG;
187 		res++;
188 	}
189 	return (res);
190 }
191 
192 /*
193  * Return true if the page table level lvl supports the superpage for
194  * the context ctx.
195  */
196 int
197 domain_is_sp_lvl(struct dmar_domain *domain, int lvl)
198 {
199 	int alvl, cap_sps;
200 	static const int sagaw_sp[] = {
201 		DMAR_CAP_SPS_2M,
202 		DMAR_CAP_SPS_1G,
203 		DMAR_CAP_SPS_512G,
204 		DMAR_CAP_SPS_1T
205 	};
206 
207 	alvl = domain->pglvl - lvl - 1;
208 	cap_sps = DMAR_CAP_SPS(domain->dmar->hw_cap);
209 	return (alvl < nitems(sagaw_sp) && (sagaw_sp[alvl] & cap_sps) != 0);
210 }
211 
212 iommu_gaddr_t
213 pglvl_page_size(int total_pglvl, int lvl)
214 {
215 	int rlvl;
216 	static const iommu_gaddr_t pg_sz[] = {
217 		(iommu_gaddr_t)DMAR_PAGE_SIZE,
218 		(iommu_gaddr_t)DMAR_PAGE_SIZE << DMAR_NPTEPGSHIFT,
219 		(iommu_gaddr_t)DMAR_PAGE_SIZE << (2 * DMAR_NPTEPGSHIFT),
220 		(iommu_gaddr_t)DMAR_PAGE_SIZE << (3 * DMAR_NPTEPGSHIFT),
221 		(iommu_gaddr_t)DMAR_PAGE_SIZE << (4 * DMAR_NPTEPGSHIFT),
222 		(iommu_gaddr_t)DMAR_PAGE_SIZE << (5 * DMAR_NPTEPGSHIFT)
223 	};
224 
225 	KASSERT(lvl >= 0 && lvl < total_pglvl,
226 	    ("total %d lvl %d", total_pglvl, lvl));
227 	rlvl = total_pglvl - lvl - 1;
228 	KASSERT(rlvl < nitems(pg_sz), ("sizeof pg_sz lvl %d", lvl));
229 	return (pg_sz[rlvl]);
230 }
231 
232 iommu_gaddr_t
233 domain_page_size(struct dmar_domain *domain, int lvl)
234 {
235 
236 	return (pglvl_page_size(domain->pglvl, lvl));
237 }
238 
239 int
240 calc_am(struct dmar_unit *unit, iommu_gaddr_t base, iommu_gaddr_t size,
241     iommu_gaddr_t *isizep)
242 {
243 	iommu_gaddr_t isize;
244 	int am;
245 
246 	for (am = DMAR_CAP_MAMV(unit->hw_cap);; am--) {
247 		isize = 1ULL << (am + DMAR_PAGE_SHIFT);
248 		if ((base & (isize - 1)) == 0 && size >= isize)
249 			break;
250 		if (am == 0)
251 			break;
252 	}
253 	*isizep = isize;
254 	return (am);
255 }
256 
257 iommu_haddr_t dmar_high;
258 int haw;
259 int dmar_tbl_pagecnt;
260 
261 vm_page_t
262 dmar_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags)
263 {
264 	vm_page_t m;
265 	int zeroed, aflags;
266 
267 	zeroed = (flags & IOMMU_PGF_ZERO) != 0 ? VM_ALLOC_ZERO : 0;
268 	aflags = zeroed | VM_ALLOC_NOBUSY | VM_ALLOC_SYSTEM | VM_ALLOC_NODUMP |
269 	    ((flags & IOMMU_PGF_WAITOK) != 0 ? VM_ALLOC_WAITFAIL :
270 	    VM_ALLOC_NOWAIT);
271 	for (;;) {
272 		if ((flags & IOMMU_PGF_OBJL) == 0)
273 			VM_OBJECT_WLOCK(obj);
274 		m = vm_page_lookup(obj, idx);
275 		if ((flags & IOMMU_PGF_NOALLOC) != 0 || m != NULL) {
276 			if ((flags & IOMMU_PGF_OBJL) == 0)
277 				VM_OBJECT_WUNLOCK(obj);
278 			break;
279 		}
280 		m = vm_page_alloc_contig(obj, idx, aflags, 1, 0,
281 		    dmar_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT);
282 		if ((flags & IOMMU_PGF_OBJL) == 0)
283 			VM_OBJECT_WUNLOCK(obj);
284 		if (m != NULL) {
285 			if (zeroed && (m->flags & PG_ZERO) == 0)
286 				pmap_zero_page(m);
287 			atomic_add_int(&dmar_tbl_pagecnt, 1);
288 			break;
289 		}
290 		if ((flags & IOMMU_PGF_WAITOK) == 0)
291 			break;
292 	}
293 	return (m);
294 }
295 
296 void
297 dmar_pgfree(vm_object_t obj, vm_pindex_t idx, int flags)
298 {
299 	vm_page_t m;
300 
301 	if ((flags & IOMMU_PGF_OBJL) == 0)
302 		VM_OBJECT_WLOCK(obj);
303 	m = vm_page_grab(obj, idx, VM_ALLOC_NOCREAT);
304 	if (m != NULL) {
305 		vm_page_free(m);
306 		atomic_subtract_int(&dmar_tbl_pagecnt, 1);
307 	}
308 	if ((flags & IOMMU_PGF_OBJL) == 0)
309 		VM_OBJECT_WUNLOCK(obj);
310 }
311 
312 void *
313 dmar_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags,
314     struct sf_buf **sf)
315 {
316 	vm_page_t m;
317 	bool allocated;
318 
319 	if ((flags & IOMMU_PGF_OBJL) == 0)
320 		VM_OBJECT_WLOCK(obj);
321 	m = vm_page_lookup(obj, idx);
322 	if (m == NULL && (flags & IOMMU_PGF_ALLOC) != 0) {
323 		m = dmar_pgalloc(obj, idx, flags | IOMMU_PGF_OBJL);
324 		allocated = true;
325 	} else
326 		allocated = false;
327 	if (m == NULL) {
328 		if ((flags & IOMMU_PGF_OBJL) == 0)
329 			VM_OBJECT_WUNLOCK(obj);
330 		return (NULL);
331 	}
332 	/* Sleepable allocations cannot fail. */
333 	if ((flags & IOMMU_PGF_WAITOK) != 0)
334 		VM_OBJECT_WUNLOCK(obj);
335 	sched_pin();
336 	*sf = sf_buf_alloc(m, SFB_CPUPRIVATE | ((flags & IOMMU_PGF_WAITOK)
337 	    == 0 ? SFB_NOWAIT : 0));
338 	if (*sf == NULL) {
339 		sched_unpin();
340 		if (allocated) {
341 			VM_OBJECT_ASSERT_WLOCKED(obj);
342 			dmar_pgfree(obj, m->pindex, flags | IOMMU_PGF_OBJL);
343 		}
344 		if ((flags & IOMMU_PGF_OBJL) == 0)
345 			VM_OBJECT_WUNLOCK(obj);
346 		return (NULL);
347 	}
348 	if ((flags & (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) ==
349 	    (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL))
350 		VM_OBJECT_WLOCK(obj);
351 	else if ((flags & (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) == 0)
352 		VM_OBJECT_WUNLOCK(obj);
353 	return ((void *)sf_buf_kva(*sf));
354 }
355 
356 void
357 dmar_unmap_pgtbl(struct sf_buf *sf)
358 {
359 
360 	sf_buf_free(sf);
361 	sched_unpin();
362 }
363 
364 static void
365 dmar_flush_transl_to_ram(struct dmar_unit *unit, void *dst, size_t sz)
366 {
367 
368 	if (DMAR_IS_COHERENT(unit))
369 		return;
370 	/*
371 	 * If DMAR does not snoop paging structures accesses, flush
372 	 * CPU cache to memory.
373 	 */
374 	pmap_force_invalidate_cache_range((uintptr_t)dst, (uintptr_t)dst + sz);
375 }
376 
377 void
378 dmar_flush_pte_to_ram(struct dmar_unit *unit, dmar_pte_t *dst)
379 {
380 
381 	dmar_flush_transl_to_ram(unit, dst, sizeof(*dst));
382 }
383 
384 void
385 dmar_flush_ctx_to_ram(struct dmar_unit *unit, dmar_ctx_entry_t *dst)
386 {
387 
388 	dmar_flush_transl_to_ram(unit, dst, sizeof(*dst));
389 }
390 
391 void
392 dmar_flush_root_to_ram(struct dmar_unit *unit, dmar_root_entry_t *dst)
393 {
394 
395 	dmar_flush_transl_to_ram(unit, dst, sizeof(*dst));
396 }
397 
398 /*
399  * Load the root entry pointer into the hardware, busily waiting for
400  * the completion.
401  */
402 int
403 dmar_load_root_entry_ptr(struct dmar_unit *unit)
404 {
405 	vm_page_t root_entry;
406 	int error;
407 
408 	/*
409 	 * Access to the GCMD register must be serialized while the
410 	 * command is submitted.
411 	 */
412 	DMAR_ASSERT_LOCKED(unit);
413 
414 	VM_OBJECT_RLOCK(unit->ctx_obj);
415 	root_entry = vm_page_lookup(unit->ctx_obj, 0);
416 	VM_OBJECT_RUNLOCK(unit->ctx_obj);
417 	dmar_write8(unit, DMAR_RTADDR_REG, VM_PAGE_TO_PHYS(root_entry));
418 	dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd | DMAR_GCMD_SRTP);
419 	DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_RTPS)
420 	    != 0));
421 	return (error);
422 }
423 
424 /*
425  * Globally invalidate the context entries cache, busily waiting for
426  * the completion.
427  */
428 int
429 dmar_inv_ctx_glob(struct dmar_unit *unit)
430 {
431 	int error;
432 
433 	/*
434 	 * Access to the CCMD register must be serialized while the
435 	 * command is submitted.
436 	 */
437 	DMAR_ASSERT_LOCKED(unit);
438 	KASSERT(!unit->qi_enabled, ("QI enabled"));
439 
440 	/*
441 	 * The DMAR_CCMD_ICC bit in the upper dword should be written
442 	 * after the low dword write is completed.  Amd64
443 	 * dmar_write8() does not have this issue, i386 dmar_write8()
444 	 * writes the upper dword last.
445 	 */
446 	dmar_write8(unit, DMAR_CCMD_REG, DMAR_CCMD_ICC | DMAR_CCMD_CIRG_GLOB);
447 	DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_CCMD_REG + 4) & DMAR_CCMD_ICC32)
448 	    == 0));
449 	return (error);
450 }
451 
452 /*
453  * Globally invalidate the IOTLB, busily waiting for the completion.
454  */
455 int
456 dmar_inv_iotlb_glob(struct dmar_unit *unit)
457 {
458 	int error, reg;
459 
460 	DMAR_ASSERT_LOCKED(unit);
461 	KASSERT(!unit->qi_enabled, ("QI enabled"));
462 
463 	reg = 16 * DMAR_ECAP_IRO(unit->hw_ecap);
464 	/* See a comment about DMAR_CCMD_ICC in dmar_inv_ctx_glob. */
465 	dmar_write8(unit, reg + DMAR_IOTLB_REG_OFF, DMAR_IOTLB_IVT |
466 	    DMAR_IOTLB_IIRG_GLB | DMAR_IOTLB_DR | DMAR_IOTLB_DW);
467 	DMAR_WAIT_UNTIL(((dmar_read4(unit, reg + DMAR_IOTLB_REG_OFF + 4) &
468 	    DMAR_IOTLB_IVT32) == 0));
469 	return (error);
470 }
471 
472 /*
473  * Flush the chipset write buffers.  See 11.1 "Write Buffer Flushing"
474  * in the architecture specification.
475  */
476 int
477 dmar_flush_write_bufs(struct dmar_unit *unit)
478 {
479 	int error;
480 
481 	DMAR_ASSERT_LOCKED(unit);
482 
483 	/*
484 	 * DMAR_GCMD_WBF is only valid when CAP_RWBF is reported.
485 	 */
486 	KASSERT((unit->hw_cap & DMAR_CAP_RWBF) != 0,
487 	    ("dmar%d: no RWBF", unit->iommu.unit));
488 
489 	dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd | DMAR_GCMD_WBF);
490 	DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_WBFS)
491 	    != 0));
492 	return (error);
493 }
494 
495 /*
496  * Some BIOSes protect memory region they reside in by using DMAR to
497  * prevent devices from doing any DMA transactions to that part of RAM.
498  * AMI refers to this as "DMA Control Guarantee".
499  * We need to disable this when address translation is enabled.
500  */
501 int
502 dmar_disable_protected_regions(struct dmar_unit *unit)
503 {
504 	uint32_t reg;
505 	int error;
506 
507 	DMAR_ASSERT_LOCKED(unit);
508 
509 	/* Check if we support the feature. */
510 	if ((unit->hw_cap & (DMAR_CAP_PLMR | DMAR_CAP_PHMR)) == 0)
511 		return (0);
512 
513 	reg = dmar_read4(unit, DMAR_PMEN_REG);
514 	if ((reg & DMAR_PMEN_EPM) == 0)
515 		return (0);
516 
517 	reg &= ~DMAR_PMEN_EPM;
518 	dmar_write4(unit, DMAR_PMEN_REG, reg);
519 	DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_PMEN_REG) & DMAR_PMEN_PRS)
520 	    != 0));
521 
522 	return (error);
523 }
524 
525 int
526 dmar_enable_translation(struct dmar_unit *unit)
527 {
528 	int error;
529 
530 	DMAR_ASSERT_LOCKED(unit);
531 	unit->hw_gcmd |= DMAR_GCMD_TE;
532 	dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd);
533 	DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_TES)
534 	    != 0));
535 	return (error);
536 }
537 
538 int
539 dmar_disable_translation(struct dmar_unit *unit)
540 {
541 	int error;
542 
543 	DMAR_ASSERT_LOCKED(unit);
544 	unit->hw_gcmd &= ~DMAR_GCMD_TE;
545 	dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd);
546 	DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_TES)
547 	    == 0));
548 	return (error);
549 }
550 
551 int
552 dmar_load_irt_ptr(struct dmar_unit *unit)
553 {
554 	uint64_t irta, s;
555 	int error;
556 
557 	DMAR_ASSERT_LOCKED(unit);
558 	irta = unit->irt_phys;
559 	if (DMAR_X2APIC(unit))
560 		irta |= DMAR_IRTA_EIME;
561 	s = fls(unit->irte_cnt) - 2;
562 	KASSERT(unit->irte_cnt >= 2 && s <= DMAR_IRTA_S_MASK &&
563 	    powerof2(unit->irte_cnt),
564 	    ("IRTA_REG_S overflow %x", unit->irte_cnt));
565 	irta |= s;
566 	dmar_write8(unit, DMAR_IRTA_REG, irta);
567 	dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd | DMAR_GCMD_SIRTP);
568 	DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_IRTPS)
569 	    != 0));
570 	return (error);
571 }
572 
573 int
574 dmar_enable_ir(struct dmar_unit *unit)
575 {
576 	int error;
577 
578 	DMAR_ASSERT_LOCKED(unit);
579 	unit->hw_gcmd |= DMAR_GCMD_IRE;
580 	unit->hw_gcmd &= ~DMAR_GCMD_CFI;
581 	dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd);
582 	DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_IRES)
583 	    != 0));
584 	return (error);
585 }
586 
587 int
588 dmar_disable_ir(struct dmar_unit *unit)
589 {
590 	int error;
591 
592 	DMAR_ASSERT_LOCKED(unit);
593 	unit->hw_gcmd &= ~DMAR_GCMD_IRE;
594 	dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd);
595 	DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_IRES)
596 	    == 0));
597 	return (error);
598 }
599 
600 #define BARRIER_F				\
601 	u_int f_done, f_inproc, f_wakeup;	\
602 						\
603 	f_done = 1 << (barrier_id * 3);		\
604 	f_inproc = 1 << (barrier_id * 3 + 1);	\
605 	f_wakeup = 1 << (barrier_id * 3 + 2)
606 
607 bool
608 dmar_barrier_enter(struct dmar_unit *dmar, u_int barrier_id)
609 {
610 	BARRIER_F;
611 
612 	DMAR_LOCK(dmar);
613 	if ((dmar->barrier_flags & f_done) != 0) {
614 		DMAR_UNLOCK(dmar);
615 		return (false);
616 	}
617 
618 	if ((dmar->barrier_flags & f_inproc) != 0) {
619 		while ((dmar->barrier_flags & f_inproc) != 0) {
620 			dmar->barrier_flags |= f_wakeup;
621 			msleep(&dmar->barrier_flags, &dmar->iommu.lock, 0,
622 			    "dmarb", 0);
623 		}
624 		KASSERT((dmar->barrier_flags & f_done) != 0,
625 		    ("dmar%d barrier %d missing done", dmar->iommu.unit,
626 		    barrier_id));
627 		DMAR_UNLOCK(dmar);
628 		return (false);
629 	}
630 
631 	dmar->barrier_flags |= f_inproc;
632 	DMAR_UNLOCK(dmar);
633 	return (true);
634 }
635 
636 void
637 dmar_barrier_exit(struct dmar_unit *dmar, u_int barrier_id)
638 {
639 	BARRIER_F;
640 
641 	DMAR_ASSERT_LOCKED(dmar);
642 	KASSERT((dmar->barrier_flags & (f_done | f_inproc)) == f_inproc,
643 	    ("dmar%d barrier %d missed entry", dmar->iommu.unit, barrier_id));
644 	dmar->barrier_flags |= f_done;
645 	if ((dmar->barrier_flags & f_wakeup) != 0)
646 		wakeup(&dmar->barrier_flags);
647 	dmar->barrier_flags &= ~(f_inproc | f_wakeup);
648 	DMAR_UNLOCK(dmar);
649 }
650 
651 int dmar_batch_coalesce = 100;
652 struct timespec dmar_hw_timeout = {
653 	.tv_sec = 0,
654 	.tv_nsec = 1000000
655 };
656 
657 static const uint64_t d = 1000000000;
658 
659 void
660 dmar_update_timeout(uint64_t newval)
661 {
662 
663 	/* XXXKIB not atomic */
664 	dmar_hw_timeout.tv_sec = newval / d;
665 	dmar_hw_timeout.tv_nsec = newval % d;
666 }
667 
668 uint64_t
669 dmar_get_timeout(void)
670 {
671 
672 	return ((uint64_t)dmar_hw_timeout.tv_sec * d +
673 	    dmar_hw_timeout.tv_nsec);
674 }
675 
676 static int
677 dmar_timeout_sysctl(SYSCTL_HANDLER_ARGS)
678 {
679 	uint64_t val;
680 	int error;
681 
682 	val = dmar_get_timeout();
683 	error = sysctl_handle_long(oidp, &val, 0, req);
684 	if (error != 0 || req->newptr == NULL)
685 		return (error);
686 	dmar_update_timeout(val);
687 	return (error);
688 }
689 
690 static SYSCTL_NODE(_hw_iommu, OID_AUTO, dmar, CTLFLAG_RD | CTLFLAG_MPSAFE,
691     NULL, "");
692 SYSCTL_INT(_hw_iommu_dmar, OID_AUTO, tbl_pagecnt, CTLFLAG_RD,
693     &dmar_tbl_pagecnt, 0,
694     "Count of pages used for DMAR pagetables");
695 SYSCTL_INT(_hw_iommu_dmar, OID_AUTO, batch_coalesce, CTLFLAG_RWTUN,
696     &dmar_batch_coalesce, 0,
697     "Number of qi batches between interrupt");
698 SYSCTL_PROC(_hw_iommu_dmar, OID_AUTO, timeout,
699     CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0,
700     dmar_timeout_sysctl, "QU",
701     "Timeout for command wait, in nanoseconds");
702