xref: /freebsd/sys/x86/iommu/intel_utils.c (revision 86be9f0dd5f5aeff00ca4a7411250f5ded912563)
1*86be9f0dSKonstantin Belousov /*-
2*86be9f0dSKonstantin Belousov  * Copyright (c) 2013 The FreeBSD Foundation
3*86be9f0dSKonstantin Belousov  * All rights reserved.
4*86be9f0dSKonstantin Belousov  *
5*86be9f0dSKonstantin Belousov  * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
6*86be9f0dSKonstantin Belousov  * under sponsorship from the FreeBSD Foundation.
7*86be9f0dSKonstantin Belousov  *
8*86be9f0dSKonstantin Belousov  * Redistribution and use in source and binary forms, with or without
9*86be9f0dSKonstantin Belousov  * modification, are permitted provided that the following conditions
10*86be9f0dSKonstantin Belousov  * are met:
11*86be9f0dSKonstantin Belousov  * 1. Redistributions of source code must retain the above copyright
12*86be9f0dSKonstantin Belousov  *    notice, this list of conditions and the following disclaimer.
13*86be9f0dSKonstantin Belousov  * 2. Redistributions in binary form must reproduce the above copyright
14*86be9f0dSKonstantin Belousov  *    notice, this list of conditions and the following disclaimer in the
15*86be9f0dSKonstantin Belousov  *    documentation and/or other materials provided with the distribution.
16*86be9f0dSKonstantin Belousov  *
17*86be9f0dSKonstantin Belousov  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18*86be9f0dSKonstantin Belousov  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19*86be9f0dSKonstantin Belousov  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20*86be9f0dSKonstantin Belousov  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21*86be9f0dSKonstantin Belousov  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22*86be9f0dSKonstantin Belousov  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23*86be9f0dSKonstantin Belousov  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24*86be9f0dSKonstantin Belousov  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25*86be9f0dSKonstantin Belousov  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26*86be9f0dSKonstantin Belousov  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27*86be9f0dSKonstantin Belousov  * SUCH DAMAGE.
28*86be9f0dSKonstantin Belousov  */
29*86be9f0dSKonstantin Belousov 
30*86be9f0dSKonstantin Belousov #include <sys/cdefs.h>
31*86be9f0dSKonstantin Belousov __FBSDID("$FreeBSD$");
32*86be9f0dSKonstantin Belousov 
33*86be9f0dSKonstantin Belousov #include <sys/param.h>
34*86be9f0dSKonstantin Belousov #include <sys/bus.h>
35*86be9f0dSKonstantin Belousov #include <sys/kernel.h>
36*86be9f0dSKonstantin Belousov #include <sys/lock.h>
37*86be9f0dSKonstantin Belousov #include <sys/malloc.h>
38*86be9f0dSKonstantin Belousov #include <sys/memdesc.h>
39*86be9f0dSKonstantin Belousov #include <sys/mutex.h>
40*86be9f0dSKonstantin Belousov #include <sys/proc.h>
41*86be9f0dSKonstantin Belousov #include <sys/queue.h>
42*86be9f0dSKonstantin Belousov #include <sys/rman.h>
43*86be9f0dSKonstantin Belousov #include <sys/rwlock.h>
44*86be9f0dSKonstantin Belousov #include <sys/sched.h>
45*86be9f0dSKonstantin Belousov #include <sys/sf_buf.h>
46*86be9f0dSKonstantin Belousov #include <sys/sysctl.h>
47*86be9f0dSKonstantin Belousov #include <sys/systm.h>
48*86be9f0dSKonstantin Belousov #include <sys/taskqueue.h>
49*86be9f0dSKonstantin Belousov #include <sys/tree.h>
50*86be9f0dSKonstantin Belousov #include <vm/vm.h>
51*86be9f0dSKonstantin Belousov #include <vm/vm_extern.h>
52*86be9f0dSKonstantin Belousov #include <vm/vm_kern.h>
53*86be9f0dSKonstantin Belousov #include <vm/vm_object.h>
54*86be9f0dSKonstantin Belousov #include <vm/vm_page.h>
55*86be9f0dSKonstantin Belousov #include <vm/vm_map.h>
56*86be9f0dSKonstantin Belousov #include <vm/vm_pageout.h>
57*86be9f0dSKonstantin Belousov #include <machine/bus.h>
58*86be9f0dSKonstantin Belousov #include <machine/cpu.h>
59*86be9f0dSKonstantin Belousov #include <x86/include/busdma_impl.h>
60*86be9f0dSKonstantin Belousov #include <x86/iommu/intel_reg.h>
61*86be9f0dSKonstantin Belousov #include <x86/iommu/busdma_dmar.h>
62*86be9f0dSKonstantin Belousov #include <x86/iommu/intel_dmar.h>
63*86be9f0dSKonstantin Belousov 
64*86be9f0dSKonstantin Belousov u_int
65*86be9f0dSKonstantin Belousov dmar_nd2mask(u_int nd)
66*86be9f0dSKonstantin Belousov {
67*86be9f0dSKonstantin Belousov 	static const u_int masks[] = {
68*86be9f0dSKonstantin Belousov 		0x000f,	/* nd == 0 */
69*86be9f0dSKonstantin Belousov 		0x002f,	/* nd == 1 */
70*86be9f0dSKonstantin Belousov 		0x00ff,	/* nd == 2 */
71*86be9f0dSKonstantin Belousov 		0x02ff,	/* nd == 3 */
72*86be9f0dSKonstantin Belousov 		0x0fff,	/* nd == 4 */
73*86be9f0dSKonstantin Belousov 		0x2fff,	/* nd == 5 */
74*86be9f0dSKonstantin Belousov 		0xffff,	/* nd == 6 */
75*86be9f0dSKonstantin Belousov 		0x0000,	/* nd == 7 reserved */
76*86be9f0dSKonstantin Belousov 	};
77*86be9f0dSKonstantin Belousov 
78*86be9f0dSKonstantin Belousov 	KASSERT(nd <= 6, ("number of domains %d", nd));
79*86be9f0dSKonstantin Belousov 	return (masks[nd]);
80*86be9f0dSKonstantin Belousov }
81*86be9f0dSKonstantin Belousov 
82*86be9f0dSKonstantin Belousov static const struct sagaw_bits_tag {
83*86be9f0dSKonstantin Belousov 	int agaw;
84*86be9f0dSKonstantin Belousov 	int cap;
85*86be9f0dSKonstantin Belousov 	int awlvl;
86*86be9f0dSKonstantin Belousov 	int pglvl;
87*86be9f0dSKonstantin Belousov } sagaw_bits[] = {
88*86be9f0dSKonstantin Belousov 	{.agaw = 30, .cap = DMAR_CAP_SAGAW_2LVL, .awlvl = DMAR_CTX2_AW_2LVL,
89*86be9f0dSKonstantin Belousov 	    .pglvl = 2},
90*86be9f0dSKonstantin Belousov 	{.agaw = 39, .cap = DMAR_CAP_SAGAW_3LVL, .awlvl = DMAR_CTX2_AW_3LVL,
91*86be9f0dSKonstantin Belousov 	    .pglvl = 3},
92*86be9f0dSKonstantin Belousov 	{.agaw = 48, .cap = DMAR_CAP_SAGAW_4LVL, .awlvl = DMAR_CTX2_AW_4LVL,
93*86be9f0dSKonstantin Belousov 	    .pglvl = 4},
94*86be9f0dSKonstantin Belousov 	{.agaw = 57, .cap = DMAR_CAP_SAGAW_5LVL, .awlvl = DMAR_CTX2_AW_5LVL,
95*86be9f0dSKonstantin Belousov 	    .pglvl = 5},
96*86be9f0dSKonstantin Belousov 	{.agaw = 64, .cap = DMAR_CAP_SAGAW_6LVL, .awlvl = DMAR_CTX2_AW_6LVL,
97*86be9f0dSKonstantin Belousov 	    .pglvl = 6}
98*86be9f0dSKonstantin Belousov };
99*86be9f0dSKonstantin Belousov #define SIZEOF_SAGAW_BITS (sizeof(sagaw_bits) / sizeof(sagaw_bits[0]))
100*86be9f0dSKonstantin Belousov 
101*86be9f0dSKonstantin Belousov bool
102*86be9f0dSKonstantin Belousov dmar_pglvl_supported(struct dmar_unit *unit, int pglvl)
103*86be9f0dSKonstantin Belousov {
104*86be9f0dSKonstantin Belousov 	int i;
105*86be9f0dSKonstantin Belousov 
106*86be9f0dSKonstantin Belousov 	for (i = 0; i < SIZEOF_SAGAW_BITS; i++) {
107*86be9f0dSKonstantin Belousov 		if (sagaw_bits[i].pglvl != pglvl)
108*86be9f0dSKonstantin Belousov 			continue;
109*86be9f0dSKonstantin Belousov 		if ((DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap) != 0)
110*86be9f0dSKonstantin Belousov 			return (true);
111*86be9f0dSKonstantin Belousov 	}
112*86be9f0dSKonstantin Belousov 	return (false);
113*86be9f0dSKonstantin Belousov }
114*86be9f0dSKonstantin Belousov 
115*86be9f0dSKonstantin Belousov int
116*86be9f0dSKonstantin Belousov ctx_set_agaw(struct dmar_ctx *ctx, int mgaw)
117*86be9f0dSKonstantin Belousov {
118*86be9f0dSKonstantin Belousov 	int sagaw, i;
119*86be9f0dSKonstantin Belousov 
120*86be9f0dSKonstantin Belousov 	ctx->mgaw = mgaw;
121*86be9f0dSKonstantin Belousov 	sagaw = DMAR_CAP_SAGAW(ctx->dmar->hw_cap);
122*86be9f0dSKonstantin Belousov 	for (i = 0; i < SIZEOF_SAGAW_BITS; i++) {
123*86be9f0dSKonstantin Belousov 		if (sagaw_bits[i].agaw >= mgaw) {
124*86be9f0dSKonstantin Belousov 			ctx->agaw = sagaw_bits[i].agaw;
125*86be9f0dSKonstantin Belousov 			ctx->pglvl = sagaw_bits[i].pglvl;
126*86be9f0dSKonstantin Belousov 			ctx->awlvl = sagaw_bits[i].awlvl;
127*86be9f0dSKonstantin Belousov 			return (0);
128*86be9f0dSKonstantin Belousov 		}
129*86be9f0dSKonstantin Belousov 	}
130*86be9f0dSKonstantin Belousov 	device_printf(ctx->dmar->dev,
131*86be9f0dSKonstantin Belousov 	    "context request mgaw %d for pci%d:%d:%d:%d, "
132*86be9f0dSKonstantin Belousov 	    "no agaw found, sagaw %x\n", mgaw, ctx->dmar->segment, ctx->bus,
133*86be9f0dSKonstantin Belousov 	     ctx->slot, ctx->func, sagaw);
134*86be9f0dSKonstantin Belousov 	return (EINVAL);
135*86be9f0dSKonstantin Belousov }
136*86be9f0dSKonstantin Belousov 
137*86be9f0dSKonstantin Belousov /*
138*86be9f0dSKonstantin Belousov  * Find a best fit mgaw for the given maxaddr:
139*86be9f0dSKonstantin Belousov  *   - if allow_less is false, must find sagaw which maps all requested
140*86be9f0dSKonstantin Belousov  *     addresses (used by identity mappings);
141*86be9f0dSKonstantin Belousov  *   - if allow_less is true, and no supported sagaw can map all requested
142*86be9f0dSKonstantin Belousov  *     address space, accept the biggest sagaw, whatever is it.
143*86be9f0dSKonstantin Belousov  */
144*86be9f0dSKonstantin Belousov int
145*86be9f0dSKonstantin Belousov dmar_maxaddr2mgaw(struct dmar_unit *unit, dmar_gaddr_t maxaddr, bool allow_less)
146*86be9f0dSKonstantin Belousov {
147*86be9f0dSKonstantin Belousov 	int i;
148*86be9f0dSKonstantin Belousov 
149*86be9f0dSKonstantin Belousov 	for (i = 0; i < SIZEOF_SAGAW_BITS; i++) {
150*86be9f0dSKonstantin Belousov 		if ((1ULL << sagaw_bits[i].agaw) >= maxaddr &&
151*86be9f0dSKonstantin Belousov 		    (DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap) != 0)
152*86be9f0dSKonstantin Belousov 			break;
153*86be9f0dSKonstantin Belousov 	}
154*86be9f0dSKonstantin Belousov 	if (allow_less && i == SIZEOF_SAGAW_BITS) {
155*86be9f0dSKonstantin Belousov 		do {
156*86be9f0dSKonstantin Belousov 			i--;
157*86be9f0dSKonstantin Belousov 		} while ((DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap)
158*86be9f0dSKonstantin Belousov 		    == 0);
159*86be9f0dSKonstantin Belousov 	}
160*86be9f0dSKonstantin Belousov 	if (i < SIZEOF_SAGAW_BITS)
161*86be9f0dSKonstantin Belousov 		return (sagaw_bits[i].agaw);
162*86be9f0dSKonstantin Belousov 	KASSERT(0, ("no mgaw for maxaddr %jx allow_less %d",
163*86be9f0dSKonstantin Belousov 	    (uintmax_t) maxaddr, allow_less));
164*86be9f0dSKonstantin Belousov 	return (-1);
165*86be9f0dSKonstantin Belousov }
166*86be9f0dSKonstantin Belousov 
167*86be9f0dSKonstantin Belousov /*
168*86be9f0dSKonstantin Belousov  * Calculate the total amount of page table pages needed to map the
169*86be9f0dSKonstantin Belousov  * whole bus address space on the context with the selected agaw.
170*86be9f0dSKonstantin Belousov  */
171*86be9f0dSKonstantin Belousov vm_pindex_t
172*86be9f0dSKonstantin Belousov pglvl_max_pages(int pglvl)
173*86be9f0dSKonstantin Belousov {
174*86be9f0dSKonstantin Belousov 	vm_pindex_t res;
175*86be9f0dSKonstantin Belousov 	int i;
176*86be9f0dSKonstantin Belousov 
177*86be9f0dSKonstantin Belousov 	for (res = 0, i = pglvl; i > 0; i--) {
178*86be9f0dSKonstantin Belousov 		res *= DMAR_NPTEPG;
179*86be9f0dSKonstantin Belousov 		res++;
180*86be9f0dSKonstantin Belousov 	}
181*86be9f0dSKonstantin Belousov 	return (res);
182*86be9f0dSKonstantin Belousov }
183*86be9f0dSKonstantin Belousov 
184*86be9f0dSKonstantin Belousov /*
185*86be9f0dSKonstantin Belousov  * Return true if the page table level lvl supports the superpage for
186*86be9f0dSKonstantin Belousov  * the context ctx.
187*86be9f0dSKonstantin Belousov  */
188*86be9f0dSKonstantin Belousov int
189*86be9f0dSKonstantin Belousov ctx_is_sp_lvl(struct dmar_ctx *ctx, int lvl)
190*86be9f0dSKonstantin Belousov {
191*86be9f0dSKonstantin Belousov 	int alvl, cap_sps;
192*86be9f0dSKonstantin Belousov 	static const int sagaw_sp[] = {
193*86be9f0dSKonstantin Belousov 		DMAR_CAP_SPS_2M,
194*86be9f0dSKonstantin Belousov 		DMAR_CAP_SPS_1G,
195*86be9f0dSKonstantin Belousov 		DMAR_CAP_SPS_512G,
196*86be9f0dSKonstantin Belousov 		DMAR_CAP_SPS_1T
197*86be9f0dSKonstantin Belousov 	};
198*86be9f0dSKonstantin Belousov 
199*86be9f0dSKonstantin Belousov 	alvl = ctx->pglvl - lvl - 1;
200*86be9f0dSKonstantin Belousov 	cap_sps = DMAR_CAP_SPS(ctx->dmar->hw_cap);
201*86be9f0dSKonstantin Belousov 	return (alvl < sizeof(sagaw_sp) / sizeof(sagaw_sp[0]) &&
202*86be9f0dSKonstantin Belousov 	    (sagaw_sp[alvl] & cap_sps) != 0);
203*86be9f0dSKonstantin Belousov }
204*86be9f0dSKonstantin Belousov 
205*86be9f0dSKonstantin Belousov dmar_gaddr_t
206*86be9f0dSKonstantin Belousov pglvl_page_size(int total_pglvl, int lvl)
207*86be9f0dSKonstantin Belousov {
208*86be9f0dSKonstantin Belousov 	int rlvl;
209*86be9f0dSKonstantin Belousov 	static const dmar_gaddr_t pg_sz[] = {
210*86be9f0dSKonstantin Belousov 		(dmar_gaddr_t)DMAR_PAGE_SIZE,
211*86be9f0dSKonstantin Belousov 		(dmar_gaddr_t)DMAR_PAGE_SIZE << DMAR_NPTEPGSHIFT,
212*86be9f0dSKonstantin Belousov 		(dmar_gaddr_t)DMAR_PAGE_SIZE << (2 * DMAR_NPTEPGSHIFT),
213*86be9f0dSKonstantin Belousov 		(dmar_gaddr_t)DMAR_PAGE_SIZE << (3 * DMAR_NPTEPGSHIFT),
214*86be9f0dSKonstantin Belousov 		(dmar_gaddr_t)DMAR_PAGE_SIZE << (4 * DMAR_NPTEPGSHIFT),
215*86be9f0dSKonstantin Belousov 		(dmar_gaddr_t)DMAR_PAGE_SIZE << (5 * DMAR_NPTEPGSHIFT)
216*86be9f0dSKonstantin Belousov 	};
217*86be9f0dSKonstantin Belousov 
218*86be9f0dSKonstantin Belousov 	KASSERT(lvl >= 0 && lvl < total_pglvl,
219*86be9f0dSKonstantin Belousov 	    ("total %d lvl %d", total_pglvl, lvl));
220*86be9f0dSKonstantin Belousov 	rlvl = total_pglvl - lvl - 1;
221*86be9f0dSKonstantin Belousov 	KASSERT(rlvl < sizeof(pg_sz) / sizeof(pg_sz[0]),
222*86be9f0dSKonstantin Belousov 	    ("sizeof pg_sz lvl %d", lvl));
223*86be9f0dSKonstantin Belousov 	return (pg_sz[rlvl]);
224*86be9f0dSKonstantin Belousov }
225*86be9f0dSKonstantin Belousov 
226*86be9f0dSKonstantin Belousov dmar_gaddr_t
227*86be9f0dSKonstantin Belousov ctx_page_size(struct dmar_ctx *ctx, int lvl)
228*86be9f0dSKonstantin Belousov {
229*86be9f0dSKonstantin Belousov 
230*86be9f0dSKonstantin Belousov 	return (pglvl_page_size(ctx->pglvl, lvl));
231*86be9f0dSKonstantin Belousov }
232*86be9f0dSKonstantin Belousov 
233*86be9f0dSKonstantin Belousov dmar_haddr_t dmar_high;
234*86be9f0dSKonstantin Belousov int haw;
235*86be9f0dSKonstantin Belousov int dmar_tbl_pagecnt;
236*86be9f0dSKonstantin Belousov 
237*86be9f0dSKonstantin Belousov vm_page_t
238*86be9f0dSKonstantin Belousov dmar_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags)
239*86be9f0dSKonstantin Belousov {
240*86be9f0dSKonstantin Belousov 	vm_page_t m;
241*86be9f0dSKonstantin Belousov 	int zeroed;
242*86be9f0dSKonstantin Belousov 
243*86be9f0dSKonstantin Belousov 	zeroed = (flags & DMAR_PGF_ZERO) != 0 ? VM_ALLOC_ZERO : 0;
244*86be9f0dSKonstantin Belousov 	for (;;) {
245*86be9f0dSKonstantin Belousov 		if ((flags & DMAR_PGF_OBJL) == 0)
246*86be9f0dSKonstantin Belousov 			VM_OBJECT_WLOCK(obj);
247*86be9f0dSKonstantin Belousov 		m = vm_page_lookup(obj, idx);
248*86be9f0dSKonstantin Belousov 		if ((flags & DMAR_PGF_NOALLOC) != 0 || m != NULL) {
249*86be9f0dSKonstantin Belousov 			if ((flags & DMAR_PGF_OBJL) == 0)
250*86be9f0dSKonstantin Belousov 				VM_OBJECT_WUNLOCK(obj);
251*86be9f0dSKonstantin Belousov 			break;
252*86be9f0dSKonstantin Belousov 		}
253*86be9f0dSKonstantin Belousov 		m = vm_page_alloc_contig(obj, idx, VM_ALLOC_NOBUSY |
254*86be9f0dSKonstantin Belousov 		    VM_ALLOC_SYSTEM | VM_ALLOC_NODUMP | zeroed, 1, 0,
255*86be9f0dSKonstantin Belousov 		    dmar_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT);
256*86be9f0dSKonstantin Belousov 		if ((flags & DMAR_PGF_OBJL) == 0)
257*86be9f0dSKonstantin Belousov 			VM_OBJECT_WUNLOCK(obj);
258*86be9f0dSKonstantin Belousov 		if (m != NULL) {
259*86be9f0dSKonstantin Belousov 			if (zeroed && (m->flags & PG_ZERO) == 0)
260*86be9f0dSKonstantin Belousov 				pmap_zero_page(m);
261*86be9f0dSKonstantin Belousov 			atomic_add_int(&dmar_tbl_pagecnt, 1);
262*86be9f0dSKonstantin Belousov 			break;
263*86be9f0dSKonstantin Belousov 		}
264*86be9f0dSKonstantin Belousov 		if ((flags & DMAR_PGF_WAITOK) == 0)
265*86be9f0dSKonstantin Belousov 			break;
266*86be9f0dSKonstantin Belousov 		if ((flags & DMAR_PGF_OBJL) != 0)
267*86be9f0dSKonstantin Belousov 			VM_OBJECT_WUNLOCK(obj);
268*86be9f0dSKonstantin Belousov 		VM_WAIT;
269*86be9f0dSKonstantin Belousov 		if ((flags & DMAR_PGF_OBJL) != 0)
270*86be9f0dSKonstantin Belousov 			VM_OBJECT_WLOCK(obj);
271*86be9f0dSKonstantin Belousov 	}
272*86be9f0dSKonstantin Belousov 	return (m);
273*86be9f0dSKonstantin Belousov }
274*86be9f0dSKonstantin Belousov 
275*86be9f0dSKonstantin Belousov void
276*86be9f0dSKonstantin Belousov dmar_pgfree(vm_object_t obj, vm_pindex_t idx, int flags)
277*86be9f0dSKonstantin Belousov {
278*86be9f0dSKonstantin Belousov 	vm_page_t m;
279*86be9f0dSKonstantin Belousov 
280*86be9f0dSKonstantin Belousov 	if ((flags & DMAR_PGF_OBJL) == 0)
281*86be9f0dSKonstantin Belousov 		VM_OBJECT_WLOCK(obj);
282*86be9f0dSKonstantin Belousov 	m = vm_page_lookup(obj, idx);
283*86be9f0dSKonstantin Belousov 	if (m != NULL) {
284*86be9f0dSKonstantin Belousov 		vm_page_free(m);
285*86be9f0dSKonstantin Belousov 		atomic_subtract_int(&dmar_tbl_pagecnt, 1);
286*86be9f0dSKonstantin Belousov 	}
287*86be9f0dSKonstantin Belousov 	if ((flags & DMAR_PGF_OBJL) == 0)
288*86be9f0dSKonstantin Belousov 		VM_OBJECT_WUNLOCK(obj);
289*86be9f0dSKonstantin Belousov }
290*86be9f0dSKonstantin Belousov 
291*86be9f0dSKonstantin Belousov void *
292*86be9f0dSKonstantin Belousov dmar_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags,
293*86be9f0dSKonstantin Belousov     struct sf_buf **sf)
294*86be9f0dSKonstantin Belousov {
295*86be9f0dSKonstantin Belousov 	vm_page_t m;
296*86be9f0dSKonstantin Belousov 	bool allocated;
297*86be9f0dSKonstantin Belousov 
298*86be9f0dSKonstantin Belousov 	if ((flags & DMAR_PGF_OBJL) == 0)
299*86be9f0dSKonstantin Belousov 		VM_OBJECT_WLOCK(obj);
300*86be9f0dSKonstantin Belousov 	m = vm_page_lookup(obj, idx);
301*86be9f0dSKonstantin Belousov 	if (m == NULL && (flags & DMAR_PGF_ALLOC) != 0) {
302*86be9f0dSKonstantin Belousov 		m = dmar_pgalloc(obj, idx, flags | DMAR_PGF_OBJL);
303*86be9f0dSKonstantin Belousov 		allocated = true;
304*86be9f0dSKonstantin Belousov 	} else
305*86be9f0dSKonstantin Belousov 		allocated = false;
306*86be9f0dSKonstantin Belousov 	if (m == NULL) {
307*86be9f0dSKonstantin Belousov 		if ((flags & DMAR_PGF_OBJL) == 0)
308*86be9f0dSKonstantin Belousov 			VM_OBJECT_WUNLOCK(obj);
309*86be9f0dSKonstantin Belousov 		return (NULL);
310*86be9f0dSKonstantin Belousov 	}
311*86be9f0dSKonstantin Belousov 	/* Sleepable allocations cannot fail. */
312*86be9f0dSKonstantin Belousov 	if ((flags & DMAR_PGF_WAITOK) != 0)
313*86be9f0dSKonstantin Belousov 		VM_OBJECT_WUNLOCK(obj);
314*86be9f0dSKonstantin Belousov 	sched_pin();
315*86be9f0dSKonstantin Belousov 	*sf = sf_buf_alloc(m, SFB_CPUPRIVATE | ((flags & DMAR_PGF_WAITOK)
316*86be9f0dSKonstantin Belousov 	    == 0 ? SFB_NOWAIT : 0));
317*86be9f0dSKonstantin Belousov 	if (*sf == NULL) {
318*86be9f0dSKonstantin Belousov 		sched_unpin();
319*86be9f0dSKonstantin Belousov 		if (allocated) {
320*86be9f0dSKonstantin Belousov 			VM_OBJECT_ASSERT_WLOCKED(obj);
321*86be9f0dSKonstantin Belousov 			dmar_pgfree(obj, m->pindex, flags | DMAR_PGF_OBJL);
322*86be9f0dSKonstantin Belousov 		}
323*86be9f0dSKonstantin Belousov 		if ((flags & DMAR_PGF_OBJL) == 0)
324*86be9f0dSKonstantin Belousov 			VM_OBJECT_WUNLOCK(obj);
325*86be9f0dSKonstantin Belousov 		return (NULL);
326*86be9f0dSKonstantin Belousov 	}
327*86be9f0dSKonstantin Belousov 	if ((flags & (DMAR_PGF_WAITOK | DMAR_PGF_OBJL)) ==
328*86be9f0dSKonstantin Belousov 	    (DMAR_PGF_WAITOK | DMAR_PGF_OBJL))
329*86be9f0dSKonstantin Belousov 		VM_OBJECT_WLOCK(obj);
330*86be9f0dSKonstantin Belousov 	else if ((flags & (DMAR_PGF_WAITOK | DMAR_PGF_OBJL)) == 0)
331*86be9f0dSKonstantin Belousov 		VM_OBJECT_WUNLOCK(obj);
332*86be9f0dSKonstantin Belousov 	return ((void *)sf_buf_kva(*sf));
333*86be9f0dSKonstantin Belousov }
334*86be9f0dSKonstantin Belousov 
335*86be9f0dSKonstantin Belousov void
336*86be9f0dSKonstantin Belousov dmar_unmap_pgtbl(struct sf_buf *sf, bool coherent)
337*86be9f0dSKonstantin Belousov {
338*86be9f0dSKonstantin Belousov 	vm_page_t m;
339*86be9f0dSKonstantin Belousov 
340*86be9f0dSKonstantin Belousov 	m = sf_buf_page(sf);
341*86be9f0dSKonstantin Belousov 	sf_buf_free(sf);
342*86be9f0dSKonstantin Belousov 	sched_unpin();
343*86be9f0dSKonstantin Belousov 
344*86be9f0dSKonstantin Belousov 	/*
345*86be9f0dSKonstantin Belousov 	 * If DMAR does not snoop paging structures accesses, flush
346*86be9f0dSKonstantin Belousov 	 * CPU cache to memory.
347*86be9f0dSKonstantin Belousov 	 */
348*86be9f0dSKonstantin Belousov 	if (!coherent)
349*86be9f0dSKonstantin Belousov 		pmap_invalidate_cache_pages(&m, 1);
350*86be9f0dSKonstantin Belousov }
351*86be9f0dSKonstantin Belousov 
352*86be9f0dSKonstantin Belousov /*
353*86be9f0dSKonstantin Belousov  * Load the root entry pointer into the hardware, busily waiting for
354*86be9f0dSKonstantin Belousov  * the completion.
355*86be9f0dSKonstantin Belousov  */
356*86be9f0dSKonstantin Belousov int
357*86be9f0dSKonstantin Belousov dmar_load_root_entry_ptr(struct dmar_unit *unit)
358*86be9f0dSKonstantin Belousov {
359*86be9f0dSKonstantin Belousov 	vm_page_t root_entry;
360*86be9f0dSKonstantin Belousov 
361*86be9f0dSKonstantin Belousov 	/*
362*86be9f0dSKonstantin Belousov 	 * Access to the GCMD register must be serialized while the
363*86be9f0dSKonstantin Belousov 	 * command is submitted.
364*86be9f0dSKonstantin Belousov 	 */
365*86be9f0dSKonstantin Belousov 	DMAR_ASSERT_LOCKED(unit);
366*86be9f0dSKonstantin Belousov 
367*86be9f0dSKonstantin Belousov 	/* VM_OBJECT_RLOCK(unit->ctx_obj); */
368*86be9f0dSKonstantin Belousov 	VM_OBJECT_WLOCK(unit->ctx_obj);
369*86be9f0dSKonstantin Belousov 	root_entry = vm_page_lookup(unit->ctx_obj, 0);
370*86be9f0dSKonstantin Belousov 	/* VM_OBJECT_RUNLOCK(unit->ctx_obj); */
371*86be9f0dSKonstantin Belousov 	VM_OBJECT_WUNLOCK(unit->ctx_obj);
372*86be9f0dSKonstantin Belousov 	dmar_write8(unit, DMAR_RTADDR_REG, VM_PAGE_TO_PHYS(root_entry));
373*86be9f0dSKonstantin Belousov 	dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd | DMAR_GCMD_SRTP);
374*86be9f0dSKonstantin Belousov 	/* XXXKIB should have a timeout */
375*86be9f0dSKonstantin Belousov 	while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_RTPS) == 0)
376*86be9f0dSKonstantin Belousov 		cpu_spinwait();
377*86be9f0dSKonstantin Belousov 	return (0);
378*86be9f0dSKonstantin Belousov }
379*86be9f0dSKonstantin Belousov 
380*86be9f0dSKonstantin Belousov /*
381*86be9f0dSKonstantin Belousov  * Globally invalidate the context entries cache, busily waiting for
382*86be9f0dSKonstantin Belousov  * the completion.
383*86be9f0dSKonstantin Belousov  */
384*86be9f0dSKonstantin Belousov int
385*86be9f0dSKonstantin Belousov dmar_inv_ctx_glob(struct dmar_unit *unit)
386*86be9f0dSKonstantin Belousov {
387*86be9f0dSKonstantin Belousov 
388*86be9f0dSKonstantin Belousov 	/*
389*86be9f0dSKonstantin Belousov 	 * Access to the CCMD register must be serialized while the
390*86be9f0dSKonstantin Belousov 	 * command is submitted.
391*86be9f0dSKonstantin Belousov 	 */
392*86be9f0dSKonstantin Belousov 	DMAR_ASSERT_LOCKED(unit);
393*86be9f0dSKonstantin Belousov 
394*86be9f0dSKonstantin Belousov 	/*
395*86be9f0dSKonstantin Belousov 	 * The DMAR_CCMD_ICC bit in the upper dword should be written
396*86be9f0dSKonstantin Belousov 	 * after the low dword write is completed.  Amd64
397*86be9f0dSKonstantin Belousov 	 * dmar_write8() does not have this issue, i386 dmar_write8()
398*86be9f0dSKonstantin Belousov 	 * writes the upper dword last.
399*86be9f0dSKonstantin Belousov 	 */
400*86be9f0dSKonstantin Belousov 	dmar_write8(unit, DMAR_CCMD_REG, DMAR_CCMD_ICC | DMAR_CCMD_CIRG_GLOB);
401*86be9f0dSKonstantin Belousov 	/* XXXKIB should have a timeout */
402*86be9f0dSKonstantin Belousov 	while ((dmar_read4(unit, DMAR_CCMD_REG + 4) & DMAR_CCMD_ICC32) != 0)
403*86be9f0dSKonstantin Belousov 		cpu_spinwait();
404*86be9f0dSKonstantin Belousov 	return (0);
405*86be9f0dSKonstantin Belousov }
406*86be9f0dSKonstantin Belousov 
407*86be9f0dSKonstantin Belousov /*
408*86be9f0dSKonstantin Belousov  * Globally invalidate the IOTLB, busily waiting for the completion.
409*86be9f0dSKonstantin Belousov  */
410*86be9f0dSKonstantin Belousov int
411*86be9f0dSKonstantin Belousov dmar_inv_iotlb_glob(struct dmar_unit *unit)
412*86be9f0dSKonstantin Belousov {
413*86be9f0dSKonstantin Belousov 	int reg;
414*86be9f0dSKonstantin Belousov 
415*86be9f0dSKonstantin Belousov 	DMAR_ASSERT_LOCKED(unit);
416*86be9f0dSKonstantin Belousov 
417*86be9f0dSKonstantin Belousov 	reg = 16 * DMAR_ECAP_IRO(unit->hw_ecap);
418*86be9f0dSKonstantin Belousov 	/* See a comment about DMAR_CCMD_ICC in dmar_inv_ctx_glob. */
419*86be9f0dSKonstantin Belousov 	dmar_write8(unit, reg + DMAR_IOTLB_REG_OFF, DMAR_IOTLB_IVT |
420*86be9f0dSKonstantin Belousov 	    DMAR_IOTLB_IIRG_GLB | DMAR_IOTLB_DR | DMAR_IOTLB_DW);
421*86be9f0dSKonstantin Belousov 	/* XXXKIB should have a timeout */
422*86be9f0dSKonstantin Belousov 	while ((dmar_read4(unit, reg + DMAR_IOTLB_REG_OFF + 4) &
423*86be9f0dSKonstantin Belousov 	    DMAR_IOTLB_IVT32) != 0)
424*86be9f0dSKonstantin Belousov 		cpu_spinwait();
425*86be9f0dSKonstantin Belousov 	return (0);
426*86be9f0dSKonstantin Belousov }
427*86be9f0dSKonstantin Belousov 
428*86be9f0dSKonstantin Belousov /*
429*86be9f0dSKonstantin Belousov  * Flush the chipset write buffers.  See 11.1 "Write Buffer Flushing"
430*86be9f0dSKonstantin Belousov  * in the architecture specification.
431*86be9f0dSKonstantin Belousov  */
432*86be9f0dSKonstantin Belousov int
433*86be9f0dSKonstantin Belousov dmar_flush_write_bufs(struct dmar_unit *unit)
434*86be9f0dSKonstantin Belousov {
435*86be9f0dSKonstantin Belousov 
436*86be9f0dSKonstantin Belousov 	DMAR_ASSERT_LOCKED(unit);
437*86be9f0dSKonstantin Belousov 
438*86be9f0dSKonstantin Belousov 	/*
439*86be9f0dSKonstantin Belousov 	 * DMAR_GCMD_WBF is only valid when CAP_RWBF is reported.
440*86be9f0dSKonstantin Belousov 	 */
441*86be9f0dSKonstantin Belousov 	KASSERT((unit->hw_cap & DMAR_CAP_RWBF) != 0,
442*86be9f0dSKonstantin Belousov 	    ("dmar%d: no RWBF", unit->unit));
443*86be9f0dSKonstantin Belousov 
444*86be9f0dSKonstantin Belousov 	dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd | DMAR_GCMD_WBF);
445*86be9f0dSKonstantin Belousov 	/* XXXKIB should have a timeout */
446*86be9f0dSKonstantin Belousov 	while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_WBFS) == 0)
447*86be9f0dSKonstantin Belousov 		cpu_spinwait();
448*86be9f0dSKonstantin Belousov 	return (0);
449*86be9f0dSKonstantin Belousov }
450*86be9f0dSKonstantin Belousov 
451*86be9f0dSKonstantin Belousov int
452*86be9f0dSKonstantin Belousov dmar_enable_translation(struct dmar_unit *unit)
453*86be9f0dSKonstantin Belousov {
454*86be9f0dSKonstantin Belousov 
455*86be9f0dSKonstantin Belousov 	DMAR_ASSERT_LOCKED(unit);
456*86be9f0dSKonstantin Belousov 	unit->hw_gcmd |= DMAR_GCMD_TE;
457*86be9f0dSKonstantin Belousov 	dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd);
458*86be9f0dSKonstantin Belousov 	/* XXXKIB should have a timeout */
459*86be9f0dSKonstantin Belousov 	while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_TES) == 0)
460*86be9f0dSKonstantin Belousov 		cpu_spinwait();
461*86be9f0dSKonstantin Belousov 	return (0);
462*86be9f0dSKonstantin Belousov }
463*86be9f0dSKonstantin Belousov 
464*86be9f0dSKonstantin Belousov int
465*86be9f0dSKonstantin Belousov dmar_disable_translation(struct dmar_unit *unit)
466*86be9f0dSKonstantin Belousov {
467*86be9f0dSKonstantin Belousov 
468*86be9f0dSKonstantin Belousov 	DMAR_ASSERT_LOCKED(unit);
469*86be9f0dSKonstantin Belousov 	unit->hw_gcmd &= ~DMAR_GCMD_TE;
470*86be9f0dSKonstantin Belousov 	dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd);
471*86be9f0dSKonstantin Belousov 	/* XXXKIB should have a timeout */
472*86be9f0dSKonstantin Belousov 	while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_TES) != 0)
473*86be9f0dSKonstantin Belousov 		cpu_spinwait();
474*86be9f0dSKonstantin Belousov 	return (0);
475*86be9f0dSKonstantin Belousov }
476*86be9f0dSKonstantin Belousov 
477*86be9f0dSKonstantin Belousov void
478*86be9f0dSKonstantin Belousov dmar_enable_intr(struct dmar_unit *unit)
479*86be9f0dSKonstantin Belousov {
480*86be9f0dSKonstantin Belousov 	uint32_t fectl;
481*86be9f0dSKonstantin Belousov 
482*86be9f0dSKonstantin Belousov 	fectl = dmar_read4(unit, DMAR_FECTL_REG);
483*86be9f0dSKonstantin Belousov 	fectl &= ~DMAR_FECTL_IM;
484*86be9f0dSKonstantin Belousov 	dmar_write4(unit, DMAR_FECTL_REG, fectl);
485*86be9f0dSKonstantin Belousov }
486*86be9f0dSKonstantin Belousov 
487*86be9f0dSKonstantin Belousov void
488*86be9f0dSKonstantin Belousov dmar_disable_intr(struct dmar_unit *unit)
489*86be9f0dSKonstantin Belousov {
490*86be9f0dSKonstantin Belousov 	uint32_t fectl;
491*86be9f0dSKonstantin Belousov 
492*86be9f0dSKonstantin Belousov 	fectl = dmar_read4(unit, DMAR_FECTL_REG);
493*86be9f0dSKonstantin Belousov 	dmar_write4(unit, DMAR_FECTL_REG, fectl | DMAR_FECTL_IM);
494*86be9f0dSKonstantin Belousov }
495*86be9f0dSKonstantin Belousov 
496*86be9f0dSKonstantin Belousov #define BARRIER_F				\
497*86be9f0dSKonstantin Belousov 	u_int f_done, f_inproc, f_wakeup;	\
498*86be9f0dSKonstantin Belousov 						\
499*86be9f0dSKonstantin Belousov 	f_done = 1 << (barrier_id * 3);		\
500*86be9f0dSKonstantin Belousov 	f_inproc = 1 << (barrier_id * 3 + 1);	\
501*86be9f0dSKonstantin Belousov 	f_wakeup = 1 << (barrier_id * 3 + 2)
502*86be9f0dSKonstantin Belousov 
503*86be9f0dSKonstantin Belousov bool
504*86be9f0dSKonstantin Belousov dmar_barrier_enter(struct dmar_unit *dmar, u_int barrier_id)
505*86be9f0dSKonstantin Belousov {
506*86be9f0dSKonstantin Belousov 	BARRIER_F;
507*86be9f0dSKonstantin Belousov 
508*86be9f0dSKonstantin Belousov 	DMAR_LOCK(dmar);
509*86be9f0dSKonstantin Belousov 	if ((dmar->barrier_flags & f_done) != 0) {
510*86be9f0dSKonstantin Belousov 		DMAR_UNLOCK(dmar);
511*86be9f0dSKonstantin Belousov 		return (false);
512*86be9f0dSKonstantin Belousov 	}
513*86be9f0dSKonstantin Belousov 
514*86be9f0dSKonstantin Belousov 	if ((dmar->barrier_flags & f_inproc) != 0) {
515*86be9f0dSKonstantin Belousov 		while ((dmar->barrier_flags & f_inproc) != 0) {
516*86be9f0dSKonstantin Belousov 			dmar->barrier_flags |= f_wakeup;
517*86be9f0dSKonstantin Belousov 			msleep(&dmar->barrier_flags, &dmar->lock, 0,
518*86be9f0dSKonstantin Belousov 			    "dmarb", 0);
519*86be9f0dSKonstantin Belousov 		}
520*86be9f0dSKonstantin Belousov 		KASSERT((dmar->barrier_flags & f_done) != 0,
521*86be9f0dSKonstantin Belousov 		    ("dmar%d barrier %d missing done", dmar->unit, barrier_id));
522*86be9f0dSKonstantin Belousov 		DMAR_UNLOCK(dmar);
523*86be9f0dSKonstantin Belousov 		return (false);
524*86be9f0dSKonstantin Belousov 	}
525*86be9f0dSKonstantin Belousov 
526*86be9f0dSKonstantin Belousov 	dmar->barrier_flags |= f_inproc;
527*86be9f0dSKonstantin Belousov 	DMAR_UNLOCK(dmar);
528*86be9f0dSKonstantin Belousov 	return (true);
529*86be9f0dSKonstantin Belousov }
530*86be9f0dSKonstantin Belousov 
531*86be9f0dSKonstantin Belousov void
532*86be9f0dSKonstantin Belousov dmar_barrier_exit(struct dmar_unit *dmar, u_int barrier_id)
533*86be9f0dSKonstantin Belousov {
534*86be9f0dSKonstantin Belousov 	BARRIER_F;
535*86be9f0dSKonstantin Belousov 
536*86be9f0dSKonstantin Belousov 	DMAR_ASSERT_LOCKED(dmar);
537*86be9f0dSKonstantin Belousov 	KASSERT((dmar->barrier_flags & (f_done | f_inproc)) == f_inproc,
538*86be9f0dSKonstantin Belousov 	    ("dmar%d barrier %d missed entry", dmar->unit, barrier_id));
539*86be9f0dSKonstantin Belousov 	dmar->barrier_flags |= f_done;
540*86be9f0dSKonstantin Belousov 	if ((dmar->barrier_flags & f_wakeup) != 0)
541*86be9f0dSKonstantin Belousov 		wakeup(&dmar->barrier_flags);
542*86be9f0dSKonstantin Belousov 	dmar->barrier_flags &= ~(f_inproc | f_wakeup);
543*86be9f0dSKonstantin Belousov 	DMAR_UNLOCK(dmar);
544*86be9f0dSKonstantin Belousov }
545*86be9f0dSKonstantin Belousov 
546*86be9f0dSKonstantin Belousov int dmar_match_verbose;
547*86be9f0dSKonstantin Belousov 
548*86be9f0dSKonstantin Belousov static SYSCTL_NODE(_hw, OID_AUTO, dmar, CTLFLAG_RD, NULL,
549*86be9f0dSKonstantin Belousov     "");
550*86be9f0dSKonstantin Belousov SYSCTL_INT(_hw_dmar, OID_AUTO, tbl_pagecnt, CTLFLAG_RD | CTLFLAG_TUN,
551*86be9f0dSKonstantin Belousov     &dmar_tbl_pagecnt, 0,
552*86be9f0dSKonstantin Belousov     "Count of pages used for DMAR pagetables");
553*86be9f0dSKonstantin Belousov SYSCTL_INT(_hw_dmar, OID_AUTO, match_verbose, CTLFLAG_RW | CTLFLAG_TUN,
554*86be9f0dSKonstantin Belousov     &dmar_match_verbose, 0,
555*86be9f0dSKonstantin Belousov     "Verbose matching of the PCI devices to DMAR paths");
556*86be9f0dSKonstantin Belousov #ifdef INVARIANTS
557*86be9f0dSKonstantin Belousov int dmar_check_free;
558*86be9f0dSKonstantin Belousov SYSCTL_INT(_hw_dmar, OID_AUTO, check_free, CTLFLAG_RW | CTLFLAG_TUN,
559*86be9f0dSKonstantin Belousov     &dmar_check_free, 0,
560*86be9f0dSKonstantin Belousov     "Check the GPA RBtree for free_down and free_after validity");
561*86be9f0dSKonstantin Belousov #endif
562*86be9f0dSKonstantin Belousov 
563