xref: /freebsd/sys/amd64/vmm/intel/vtd.c (revision 366f60834ff8ef709f132fe8976c96a5e2caace9)
1*366f6083SPeter Grehan /*-
2*366f6083SPeter Grehan  * Copyright (c) 2011 NetApp, Inc.
3*366f6083SPeter Grehan  * All rights reserved.
4*366f6083SPeter Grehan  *
5*366f6083SPeter Grehan  * Redistribution and use in source and binary forms, with or without
6*366f6083SPeter Grehan  * modification, are permitted provided that the following conditions
7*366f6083SPeter Grehan  * are met:
8*366f6083SPeter Grehan  * 1. Redistributions of source code must retain the above copyright
9*366f6083SPeter Grehan  *    notice, this list of conditions and the following disclaimer.
10*366f6083SPeter Grehan  * 2. Redistributions in binary form must reproduce the above copyright
11*366f6083SPeter Grehan  *    notice, this list of conditions and the following disclaimer in the
12*366f6083SPeter Grehan  *    documentation and/or other materials provided with the distribution.
13*366f6083SPeter Grehan  *
14*366f6083SPeter Grehan  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15*366f6083SPeter Grehan  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16*366f6083SPeter Grehan  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17*366f6083SPeter Grehan  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18*366f6083SPeter Grehan  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19*366f6083SPeter Grehan  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20*366f6083SPeter Grehan  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21*366f6083SPeter Grehan  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22*366f6083SPeter Grehan  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23*366f6083SPeter Grehan  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24*366f6083SPeter Grehan  * SUCH DAMAGE.
25*366f6083SPeter Grehan  *
26*366f6083SPeter Grehan  * $FreeBSD$
27*366f6083SPeter Grehan  */
28*366f6083SPeter Grehan 
29*366f6083SPeter Grehan #include <sys/cdefs.h>
30*366f6083SPeter Grehan __FBSDID("$FreeBSD$");
31*366f6083SPeter Grehan 
32*366f6083SPeter Grehan #include <sys/param.h>
33*366f6083SPeter Grehan #include <sys/kernel.h>
34*366f6083SPeter Grehan #include <sys/systm.h>
35*366f6083SPeter Grehan #include <sys/malloc.h>
36*366f6083SPeter Grehan 
37*366f6083SPeter Grehan #include <vm/vm.h>
38*366f6083SPeter Grehan #include <vm/pmap.h>
39*366f6083SPeter Grehan 
40*366f6083SPeter Grehan #include <dev/pci/pcireg.h>
41*366f6083SPeter Grehan 
42*366f6083SPeter Grehan #include <machine/pmap.h>
43*366f6083SPeter Grehan #include <machine/vmparam.h>
44*366f6083SPeter Grehan #include <machine/pci_cfgreg.h>
45*366f6083SPeter Grehan 
46*366f6083SPeter Grehan #include "io/iommu.h"
47*366f6083SPeter Grehan 
48*366f6083SPeter Grehan /*
49*366f6083SPeter Grehan  * Documented in the "Intel Virtualization Technology for Directed I/O",
50*366f6083SPeter Grehan  * Architecture Spec, September 2008.
51*366f6083SPeter Grehan  */
52*366f6083SPeter Grehan 
53*366f6083SPeter Grehan /* Section 10.4 "Register Descriptions" */
54*366f6083SPeter Grehan struct vtdmap {
55*366f6083SPeter Grehan 	volatile uint32_t	version;
56*366f6083SPeter Grehan 	volatile uint32_t	res0;
57*366f6083SPeter Grehan 	volatile uint64_t	cap;
58*366f6083SPeter Grehan 	volatile uint64_t	ext_cap;
59*366f6083SPeter Grehan 	volatile uint32_t	gcr;
60*366f6083SPeter Grehan 	volatile uint32_t	gsr;
61*366f6083SPeter Grehan 	volatile uint64_t	rta;
62*366f6083SPeter Grehan 	volatile uint64_t	ccr;
63*366f6083SPeter Grehan };
64*366f6083SPeter Grehan 
65*366f6083SPeter Grehan #define	VTD_CAP_SAGAW(cap)	(((cap) >> 8) & 0x1F)
66*366f6083SPeter Grehan #define	VTD_CAP_ND(cap)		((cap) & 0x7)
67*366f6083SPeter Grehan #define	VTD_CAP_CM(cap)		(((cap) >> 7) & 0x1)
68*366f6083SPeter Grehan #define	VTD_CAP_SPS(cap)	(((cap) >> 34) & 0xF)
69*366f6083SPeter Grehan #define	VTD_CAP_RWBF(cap)	(((cap) >> 4) & 0x1)
70*366f6083SPeter Grehan 
71*366f6083SPeter Grehan #define	VTD_ECAP_DI(ecap)	(((ecap) >> 2) & 0x1)
72*366f6083SPeter Grehan #define	VTD_ECAP_COHERENCY(ecap) ((ecap) & 0x1)
73*366f6083SPeter Grehan #define	VTD_ECAP_IRO(ecap)	(((ecap) >> 8) & 0x3FF)
74*366f6083SPeter Grehan 
75*366f6083SPeter Grehan #define	VTD_GCR_WBF		(1 << 27)
76*366f6083SPeter Grehan #define	VTD_GCR_SRTP		(1 << 30)
77*366f6083SPeter Grehan #define	VTD_GCR_TE		(1 << 31)
78*366f6083SPeter Grehan 
79*366f6083SPeter Grehan #define	VTD_GSR_WBFS		(1 << 27)
80*366f6083SPeter Grehan #define	VTD_GSR_RTPS		(1 << 30)
81*366f6083SPeter Grehan #define	VTD_GSR_TES		(1 << 31)
82*366f6083SPeter Grehan 
83*366f6083SPeter Grehan #define	VTD_CCR_ICC		(1UL << 63)	/* invalidate context cache */
84*366f6083SPeter Grehan #define	VTD_CCR_CIRG_GLOBAL	(1UL << 61)	/* global invalidation */
85*366f6083SPeter Grehan 
86*366f6083SPeter Grehan #define	VTD_IIR_IVT		(1UL << 63)	/* invalidation IOTLB */
87*366f6083SPeter Grehan #define	VTD_IIR_IIRG_GLOBAL	(1ULL << 60)	/* global IOTLB invalidation */
88*366f6083SPeter Grehan #define	VTD_IIR_IIRG_DOMAIN	(2ULL << 60)	/* domain IOTLB invalidation */
89*366f6083SPeter Grehan #define	VTD_IIR_IIRG_PAGE	(3ULL << 60)	/* page IOTLB invalidation */
90*366f6083SPeter Grehan #define	VTD_IIR_DRAIN_READS	(1ULL << 49)	/* drain pending DMA reads */
91*366f6083SPeter Grehan #define	VTD_IIR_DRAIN_WRITES	(1ULL << 48)	/* drain pending DMA writes */
92*366f6083SPeter Grehan #define	VTD_IIR_DOMAIN_P	32
93*366f6083SPeter Grehan 
94*366f6083SPeter Grehan #define	VTD_ROOT_PRESENT	0x1
95*366f6083SPeter Grehan #define	VTD_CTX_PRESENT		0x1
96*366f6083SPeter Grehan #define	VTD_CTX_TT_ALL		(1UL << 2)
97*366f6083SPeter Grehan 
98*366f6083SPeter Grehan #define	VTD_PTE_RD		(1UL << 0)
99*366f6083SPeter Grehan #define	VTD_PTE_WR		(1UL << 1)
100*366f6083SPeter Grehan #define	VTD_PTE_SUPERPAGE	(1UL << 7)
101*366f6083SPeter Grehan #define	VTD_PTE_ADDR_M		(0x000FFFFFFFFFF000UL)
102*366f6083SPeter Grehan 
103*366f6083SPeter Grehan struct domain {
104*366f6083SPeter Grehan 	uint64_t	*ptp;		/* first level page table page */
105*366f6083SPeter Grehan 	int		pt_levels;	/* number of page table levels */
106*366f6083SPeter Grehan 	int		addrwidth;	/* 'AW' field in context entry */
107*366f6083SPeter Grehan 	int		spsmask;	/* supported super page sizes */
108*366f6083SPeter Grehan 	u_int		id;		/* domain id */
109*366f6083SPeter Grehan 	vm_paddr_t	maxaddr;	/* highest address to be mapped */
110*366f6083SPeter Grehan 	SLIST_ENTRY(domain) next;
111*366f6083SPeter Grehan };
112*366f6083SPeter Grehan 
113*366f6083SPeter Grehan static SLIST_HEAD(, domain) domhead;
114*366f6083SPeter Grehan 
115*366f6083SPeter Grehan #define	DRHD_MAX_UNITS	8
116*366f6083SPeter Grehan static int		drhd_num;
117*366f6083SPeter Grehan static struct vtdmap	*vtdmaps[DRHD_MAX_UNITS];
118*366f6083SPeter Grehan static int		max_domains;
119*366f6083SPeter Grehan typedef int		(*drhd_ident_func_t)(void);
120*366f6083SPeter Grehan 
121*366f6083SPeter Grehan static uint64_t root_table[PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
122*366f6083SPeter Grehan static uint64_t ctx_tables[256][PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
123*366f6083SPeter Grehan 
124*366f6083SPeter Grehan static MALLOC_DEFINE(M_VTD, "vtd", "vtd");
125*366f6083SPeter Grehan 
126*366f6083SPeter Grehan /*
127*366f6083SPeter Grehan  * Config space register definitions from the "Intel 5520 and 5500" datasheet.
128*366f6083SPeter Grehan  */
129*366f6083SPeter Grehan static int
130*366f6083SPeter Grehan tylersburg_vtd_ident(void)
131*366f6083SPeter Grehan {
132*366f6083SPeter Grehan 	int units, nlbus;
133*366f6083SPeter Grehan 	uint16_t did, vid;
134*366f6083SPeter Grehan 	uint32_t miscsts, vtbar;
135*366f6083SPeter Grehan 
136*366f6083SPeter Grehan 	const int bus = 0;
137*366f6083SPeter Grehan 	const int slot = 20;
138*366f6083SPeter Grehan 	const int func = 0;
139*366f6083SPeter Grehan 
140*366f6083SPeter Grehan 	units = 0;
141*366f6083SPeter Grehan 
142*366f6083SPeter Grehan 	vid = pci_cfgregread(bus, slot, func, PCIR_VENDOR, 2);
143*366f6083SPeter Grehan 	did = pci_cfgregread(bus, slot, func, PCIR_DEVICE, 2);
144*366f6083SPeter Grehan 	if (vid != 0x8086 || did != 0x342E)
145*366f6083SPeter Grehan 		goto done;
146*366f6083SPeter Grehan 
147*366f6083SPeter Grehan 	/*
148*366f6083SPeter Grehan 	 * Check if this is a dual IOH configuration.
149*366f6083SPeter Grehan 	 */
150*366f6083SPeter Grehan 	miscsts = pci_cfgregread(bus, slot, func, 0x9C, 4);
151*366f6083SPeter Grehan 	if (miscsts & (1 << 25))
152*366f6083SPeter Grehan 		nlbus = pci_cfgregread(bus, slot, func, 0x160, 1);
153*366f6083SPeter Grehan 	else
154*366f6083SPeter Grehan 		nlbus = -1;
155*366f6083SPeter Grehan 
156*366f6083SPeter Grehan 	vtbar = pci_cfgregread(bus, slot, func, 0x180, 4);
157*366f6083SPeter Grehan 	if (vtbar & 0x1) {
158*366f6083SPeter Grehan 		vtdmaps[units++] = (struct vtdmap *)
159*366f6083SPeter Grehan 					PHYS_TO_DMAP(vtbar & 0xffffe000);
160*366f6083SPeter Grehan 	} else if (bootverbose)
161*366f6083SPeter Grehan 		printf("VT-d unit in legacy IOH is disabled!\n");
162*366f6083SPeter Grehan 
163*366f6083SPeter Grehan 	if (nlbus != -1) {
164*366f6083SPeter Grehan 		vtbar = pci_cfgregread(nlbus, slot, func, 0x180, 4);
165*366f6083SPeter Grehan 		if (vtbar & 0x1) {
166*366f6083SPeter Grehan 			vtdmaps[units++] = (struct vtdmap *)
167*366f6083SPeter Grehan 					   PHYS_TO_DMAP(vtbar & 0xffffe000);
168*366f6083SPeter Grehan 		} else if (bootverbose)
169*366f6083SPeter Grehan 			printf("VT-d unit in non-legacy IOH is disabled!\n");
170*366f6083SPeter Grehan 	}
171*366f6083SPeter Grehan done:
172*366f6083SPeter Grehan 	return (units);
173*366f6083SPeter Grehan }
174*366f6083SPeter Grehan 
175*366f6083SPeter Grehan static drhd_ident_func_t drhd_ident_funcs[] = {
176*366f6083SPeter Grehan 	tylersburg_vtd_ident,
177*366f6083SPeter Grehan 	NULL
178*366f6083SPeter Grehan };
179*366f6083SPeter Grehan 
180*366f6083SPeter Grehan static int
181*366f6083SPeter Grehan vtd_max_domains(struct vtdmap *vtdmap)
182*366f6083SPeter Grehan {
183*366f6083SPeter Grehan 	int nd;
184*366f6083SPeter Grehan 
185*366f6083SPeter Grehan 	nd = VTD_CAP_ND(vtdmap->cap);
186*366f6083SPeter Grehan 
187*366f6083SPeter Grehan 	switch (nd) {
188*366f6083SPeter Grehan 	case 0:
189*366f6083SPeter Grehan 		return (16);
190*366f6083SPeter Grehan 	case 1:
191*366f6083SPeter Grehan 		return (64);
192*366f6083SPeter Grehan 	case 2:
193*366f6083SPeter Grehan 		return (256);
194*366f6083SPeter Grehan 	case 3:
195*366f6083SPeter Grehan 		return (1024);
196*366f6083SPeter Grehan 	case 4:
197*366f6083SPeter Grehan 		return (4 * 1024);
198*366f6083SPeter Grehan 	case 5:
199*366f6083SPeter Grehan 		return (16 * 1024);
200*366f6083SPeter Grehan 	case 6:
201*366f6083SPeter Grehan 		return (64 * 1024);
202*366f6083SPeter Grehan 	default:
203*366f6083SPeter Grehan 		panic("vtd_max_domains: invalid value of nd (0x%0x)", nd);
204*366f6083SPeter Grehan 	}
205*366f6083SPeter Grehan }
206*366f6083SPeter Grehan 
207*366f6083SPeter Grehan static u_int
208*366f6083SPeter Grehan domain_id(void)
209*366f6083SPeter Grehan {
210*366f6083SPeter Grehan 	u_int id;
211*366f6083SPeter Grehan 	struct domain *dom;
212*366f6083SPeter Grehan 
213*366f6083SPeter Grehan 	/* Skip domain id 0 - it is reserved when Caching Mode field is set */
214*366f6083SPeter Grehan 	for (id = 1; id < max_domains; id++) {
215*366f6083SPeter Grehan 		SLIST_FOREACH(dom, &domhead, next) {
216*366f6083SPeter Grehan 			if (dom->id == id)
217*366f6083SPeter Grehan 				break;
218*366f6083SPeter Grehan 		}
219*366f6083SPeter Grehan 		if (dom == NULL)
220*366f6083SPeter Grehan 			break;		/* found it */
221*366f6083SPeter Grehan 	}
222*366f6083SPeter Grehan 
223*366f6083SPeter Grehan 	if (id >= max_domains)
224*366f6083SPeter Grehan 		panic("domain ids exhausted");
225*366f6083SPeter Grehan 
226*366f6083SPeter Grehan 	return (id);
227*366f6083SPeter Grehan }
228*366f6083SPeter Grehan 
229*366f6083SPeter Grehan static void
230*366f6083SPeter Grehan vtd_wbflush(struct vtdmap *vtdmap)
231*366f6083SPeter Grehan {
232*366f6083SPeter Grehan 
233*366f6083SPeter Grehan 	if (VTD_ECAP_COHERENCY(vtdmap->ext_cap) == 0)
234*366f6083SPeter Grehan 		pmap_invalidate_cache();
235*366f6083SPeter Grehan 
236*366f6083SPeter Grehan 	if (VTD_CAP_RWBF(vtdmap->cap)) {
237*366f6083SPeter Grehan 		vtdmap->gcr = VTD_GCR_WBF;
238*366f6083SPeter Grehan 		while ((vtdmap->gsr & VTD_GSR_WBFS) != 0)
239*366f6083SPeter Grehan 			;
240*366f6083SPeter Grehan 	}
241*366f6083SPeter Grehan }
242*366f6083SPeter Grehan 
243*366f6083SPeter Grehan static void
244*366f6083SPeter Grehan vtd_ctx_global_invalidate(struct vtdmap *vtdmap)
245*366f6083SPeter Grehan {
246*366f6083SPeter Grehan 
247*366f6083SPeter Grehan 	vtdmap->ccr = VTD_CCR_ICC | VTD_CCR_CIRG_GLOBAL;
248*366f6083SPeter Grehan 	while ((vtdmap->ccr & VTD_CCR_ICC) != 0)
249*366f6083SPeter Grehan 		;
250*366f6083SPeter Grehan }
251*366f6083SPeter Grehan 
252*366f6083SPeter Grehan static void
253*366f6083SPeter Grehan vtd_iotlb_global_invalidate(struct vtdmap *vtdmap)
254*366f6083SPeter Grehan {
255*366f6083SPeter Grehan 	int offset;
256*366f6083SPeter Grehan 	volatile uint64_t *iotlb_reg, val;
257*366f6083SPeter Grehan 
258*366f6083SPeter Grehan 	vtd_wbflush(vtdmap);
259*366f6083SPeter Grehan 
260*366f6083SPeter Grehan 	offset = VTD_ECAP_IRO(vtdmap->ext_cap) * 16;
261*366f6083SPeter Grehan 	iotlb_reg = (volatile uint64_t *)((caddr_t)vtdmap + offset + 8);
262*366f6083SPeter Grehan 
263*366f6083SPeter Grehan 	*iotlb_reg =  VTD_IIR_IVT | VTD_IIR_IIRG_GLOBAL |
264*366f6083SPeter Grehan 		      VTD_IIR_DRAIN_READS | VTD_IIR_DRAIN_WRITES;
265*366f6083SPeter Grehan 
266*366f6083SPeter Grehan 	while (1) {
267*366f6083SPeter Grehan 		val = *iotlb_reg;
268*366f6083SPeter Grehan 		if ((val & VTD_IIR_IVT) == 0)
269*366f6083SPeter Grehan 			break;
270*366f6083SPeter Grehan 	}
271*366f6083SPeter Grehan }
272*366f6083SPeter Grehan 
273*366f6083SPeter Grehan static void
274*366f6083SPeter Grehan vtd_translation_enable(struct vtdmap *vtdmap)
275*366f6083SPeter Grehan {
276*366f6083SPeter Grehan 
277*366f6083SPeter Grehan 	vtdmap->gcr = VTD_GCR_TE;
278*366f6083SPeter Grehan 	while ((vtdmap->gsr & VTD_GSR_TES) == 0)
279*366f6083SPeter Grehan 		;
280*366f6083SPeter Grehan }
281*366f6083SPeter Grehan 
282*366f6083SPeter Grehan static void
283*366f6083SPeter Grehan vtd_translation_disable(struct vtdmap *vtdmap)
284*366f6083SPeter Grehan {
285*366f6083SPeter Grehan 
286*366f6083SPeter Grehan 	vtdmap->gcr = 0;
287*366f6083SPeter Grehan 	while ((vtdmap->gsr & VTD_GSR_TES) != 0)
288*366f6083SPeter Grehan 		;
289*366f6083SPeter Grehan }
290*366f6083SPeter Grehan 
291*366f6083SPeter Grehan static int
292*366f6083SPeter Grehan vtd_init(void)
293*366f6083SPeter Grehan {
294*366f6083SPeter Grehan 	int i, units;
295*366f6083SPeter Grehan 	struct vtdmap *vtdmap;
296*366f6083SPeter Grehan 	vm_paddr_t ctx_paddr;
297*366f6083SPeter Grehan 
298*366f6083SPeter Grehan 	for (i = 0; drhd_ident_funcs[i] != NULL; i++) {
299*366f6083SPeter Grehan 		units = (*drhd_ident_funcs[i])();
300*366f6083SPeter Grehan 		if (units > 0)
301*366f6083SPeter Grehan 			break;
302*366f6083SPeter Grehan 	}
303*366f6083SPeter Grehan 
304*366f6083SPeter Grehan 	if (units <= 0)
305*366f6083SPeter Grehan 		return (ENXIO);
306*366f6083SPeter Grehan 
307*366f6083SPeter Grehan 	drhd_num = units;
308*366f6083SPeter Grehan 	vtdmap = vtdmaps[0];
309*366f6083SPeter Grehan 
310*366f6083SPeter Grehan 	if (VTD_CAP_CM(vtdmap->cap) != 0)
311*366f6083SPeter Grehan 		panic("vtd_init: invalid caching mode");
312*366f6083SPeter Grehan 
313*366f6083SPeter Grehan 	max_domains = vtd_max_domains(vtdmap);
314*366f6083SPeter Grehan 
315*366f6083SPeter Grehan 	/*
316*366f6083SPeter Grehan 	 * Set up the root-table to point to the context-entry tables
317*366f6083SPeter Grehan 	 */
318*366f6083SPeter Grehan 	for (i = 0; i < 256; i++) {
319*366f6083SPeter Grehan 		ctx_paddr = vtophys(ctx_tables[i]);
320*366f6083SPeter Grehan 		if (ctx_paddr & PAGE_MASK)
321*366f6083SPeter Grehan 			panic("ctx table (0x%0lx) not page aligned", ctx_paddr);
322*366f6083SPeter Grehan 
323*366f6083SPeter Grehan 		root_table[i * 2] = ctx_paddr | VTD_ROOT_PRESENT;
324*366f6083SPeter Grehan 	}
325*366f6083SPeter Grehan 
326*366f6083SPeter Grehan 	return (0);
327*366f6083SPeter Grehan }
328*366f6083SPeter Grehan 
329*366f6083SPeter Grehan static void
330*366f6083SPeter Grehan vtd_cleanup(void)
331*366f6083SPeter Grehan {
332*366f6083SPeter Grehan }
333*366f6083SPeter Grehan 
334*366f6083SPeter Grehan static void
335*366f6083SPeter Grehan vtd_enable(void)
336*366f6083SPeter Grehan {
337*366f6083SPeter Grehan 	int i;
338*366f6083SPeter Grehan 	struct vtdmap *vtdmap;
339*366f6083SPeter Grehan 
340*366f6083SPeter Grehan 	for (i = 0; i < drhd_num; i++) {
341*366f6083SPeter Grehan 		vtdmap = vtdmaps[i];
342*366f6083SPeter Grehan 		vtd_wbflush(vtdmap);
343*366f6083SPeter Grehan 
344*366f6083SPeter Grehan 		/* Update the root table address */
345*366f6083SPeter Grehan 		vtdmap->rta = vtophys(root_table);
346*366f6083SPeter Grehan 		vtdmap->gcr = VTD_GCR_SRTP;
347*366f6083SPeter Grehan 		while ((vtdmap->gsr & VTD_GSR_RTPS) == 0)
348*366f6083SPeter Grehan 			;
349*366f6083SPeter Grehan 
350*366f6083SPeter Grehan 		vtd_ctx_global_invalidate(vtdmap);
351*366f6083SPeter Grehan 		vtd_iotlb_global_invalidate(vtdmap);
352*366f6083SPeter Grehan 
353*366f6083SPeter Grehan 		vtd_translation_enable(vtdmap);
354*366f6083SPeter Grehan 	}
355*366f6083SPeter Grehan }
356*366f6083SPeter Grehan 
357*366f6083SPeter Grehan static void
358*366f6083SPeter Grehan vtd_disable(void)
359*366f6083SPeter Grehan {
360*366f6083SPeter Grehan 	int i;
361*366f6083SPeter Grehan 	struct vtdmap *vtdmap;
362*366f6083SPeter Grehan 
363*366f6083SPeter Grehan 	for (i = 0; i < drhd_num; i++) {
364*366f6083SPeter Grehan 		vtdmap = vtdmaps[i];
365*366f6083SPeter Grehan 		vtd_translation_disable(vtdmap);
366*366f6083SPeter Grehan 	}
367*366f6083SPeter Grehan }
368*366f6083SPeter Grehan 
369*366f6083SPeter Grehan static void
370*366f6083SPeter Grehan vtd_add_device(void *arg, int bus, int slot, int func)
371*366f6083SPeter Grehan {
372*366f6083SPeter Grehan 	int idx;
373*366f6083SPeter Grehan 	uint64_t *ctxp;
374*366f6083SPeter Grehan 	struct domain *dom = arg;
375*366f6083SPeter Grehan 	vm_paddr_t pt_paddr;
376*366f6083SPeter Grehan 	struct vtdmap *vtdmap;
377*366f6083SPeter Grehan 
378*366f6083SPeter Grehan 	if (bus < 0 || bus > PCI_BUSMAX ||
379*366f6083SPeter Grehan 	    slot < 0 || slot > PCI_SLOTMAX ||
380*366f6083SPeter Grehan 	    func < 0 || func > PCI_FUNCMAX)
381*366f6083SPeter Grehan 		panic("vtd_add_device: invalid bsf %d/%d/%d", bus, slot, func);
382*366f6083SPeter Grehan 
383*366f6083SPeter Grehan 	vtdmap = vtdmaps[0];
384*366f6083SPeter Grehan 	ctxp = ctx_tables[bus];
385*366f6083SPeter Grehan 	pt_paddr = vtophys(dom->ptp);
386*366f6083SPeter Grehan 	idx = (slot << 3 | func) * 2;
387*366f6083SPeter Grehan 
388*366f6083SPeter Grehan 	if (ctxp[idx] & VTD_CTX_PRESENT) {
389*366f6083SPeter Grehan 		panic("vtd_add_device: device %d/%d/%d is already owned by "
390*366f6083SPeter Grehan 		      "domain %d", bus, slot, func,
391*366f6083SPeter Grehan 		      (uint16_t)(ctxp[idx + 1] >> 8));
392*366f6083SPeter Grehan 	}
393*366f6083SPeter Grehan 
394*366f6083SPeter Grehan 	/*
395*366f6083SPeter Grehan 	 * Order is important. The 'present' bit is set only after all fields
396*366f6083SPeter Grehan 	 * of the context pointer are initialized.
397*366f6083SPeter Grehan 	 */
398*366f6083SPeter Grehan 	ctxp[idx + 1] = dom->addrwidth | (dom->id << 8);
399*366f6083SPeter Grehan 
400*366f6083SPeter Grehan 	if (VTD_ECAP_DI(vtdmap->ext_cap))
401*366f6083SPeter Grehan 		ctxp[idx] = VTD_CTX_TT_ALL;
402*366f6083SPeter Grehan 	else
403*366f6083SPeter Grehan 		ctxp[idx] = 0;
404*366f6083SPeter Grehan 
405*366f6083SPeter Grehan 	ctxp[idx] |= pt_paddr | VTD_CTX_PRESENT;
406*366f6083SPeter Grehan 
407*366f6083SPeter Grehan 	/*
408*366f6083SPeter Grehan 	 * 'Not Present' entries are not cached in either the Context Cache
409*366f6083SPeter Grehan 	 * or in the IOTLB, so there is no need to invalidate either of them.
410*366f6083SPeter Grehan 	 */
411*366f6083SPeter Grehan }
412*366f6083SPeter Grehan 
413*366f6083SPeter Grehan static void
414*366f6083SPeter Grehan vtd_remove_device(void *arg, int bus, int slot, int func)
415*366f6083SPeter Grehan {
416*366f6083SPeter Grehan 	int i, idx;
417*366f6083SPeter Grehan 	uint64_t *ctxp;
418*366f6083SPeter Grehan 	struct vtdmap *vtdmap;
419*366f6083SPeter Grehan 
420*366f6083SPeter Grehan 	if (bus < 0 || bus > PCI_BUSMAX ||
421*366f6083SPeter Grehan 	    slot < 0 || slot > PCI_SLOTMAX ||
422*366f6083SPeter Grehan 	    func < 0 || func > PCI_FUNCMAX)
423*366f6083SPeter Grehan 		panic("vtd_add_device: invalid bsf %d/%d/%d", bus, slot, func);
424*366f6083SPeter Grehan 
425*366f6083SPeter Grehan 	ctxp = ctx_tables[bus];
426*366f6083SPeter Grehan 	idx = (slot << 3 | func) * 2;
427*366f6083SPeter Grehan 
428*366f6083SPeter Grehan 	/*
429*366f6083SPeter Grehan 	 * Order is important. The 'present' bit is must be cleared first.
430*366f6083SPeter Grehan 	 */
431*366f6083SPeter Grehan 	ctxp[idx] = 0;
432*366f6083SPeter Grehan 	ctxp[idx + 1] = 0;
433*366f6083SPeter Grehan 
434*366f6083SPeter Grehan 	/*
435*366f6083SPeter Grehan 	 * Invalidate the Context Cache and the IOTLB.
436*366f6083SPeter Grehan 	 *
437*366f6083SPeter Grehan 	 * XXX use device-selective invalidation for Context Cache
438*366f6083SPeter Grehan 	 * XXX use domain-selective invalidation for IOTLB
439*366f6083SPeter Grehan 	 */
440*366f6083SPeter Grehan 	for (i = 0; i < drhd_num; i++) {
441*366f6083SPeter Grehan 		vtdmap = vtdmaps[i];
442*366f6083SPeter Grehan 		vtd_ctx_global_invalidate(vtdmap);
443*366f6083SPeter Grehan 		vtd_iotlb_global_invalidate(vtdmap);
444*366f6083SPeter Grehan 	}
445*366f6083SPeter Grehan }
446*366f6083SPeter Grehan 
447*366f6083SPeter Grehan static uint64_t
448*366f6083SPeter Grehan vtd_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len)
449*366f6083SPeter Grehan {
450*366f6083SPeter Grehan 	struct domain *dom;
451*366f6083SPeter Grehan 	int i, spshift, ptpshift, ptpindex, nlevels;
452*366f6083SPeter Grehan 	uint64_t spsize, *ptp;
453*366f6083SPeter Grehan 
454*366f6083SPeter Grehan 	dom = arg;
455*366f6083SPeter Grehan 	ptpindex = 0;
456*366f6083SPeter Grehan 	ptpshift = 0;
457*366f6083SPeter Grehan 
458*366f6083SPeter Grehan 	if (gpa & PAGE_MASK)
459*366f6083SPeter Grehan 		panic("vtd_create_mapping: unaligned gpa 0x%0lx", gpa);
460*366f6083SPeter Grehan 
461*366f6083SPeter Grehan 	if (hpa & PAGE_MASK)
462*366f6083SPeter Grehan 		panic("vtd_create_mapping: unaligned hpa 0x%0lx", hpa);
463*366f6083SPeter Grehan 
464*366f6083SPeter Grehan 	if (len & PAGE_MASK)
465*366f6083SPeter Grehan 		panic("vtd_create_mapping: unaligned len 0x%0lx", len);
466*366f6083SPeter Grehan 
467*366f6083SPeter Grehan 	/*
468*366f6083SPeter Grehan 	 * Compute the size of the mapping that we can accomodate.
469*366f6083SPeter Grehan 	 *
470*366f6083SPeter Grehan 	 * This is based on three factors:
471*366f6083SPeter Grehan 	 * - supported super page size
472*366f6083SPeter Grehan 	 * - alignment of the region starting at 'gpa' and 'hpa'
473*366f6083SPeter Grehan 	 * - length of the region 'len'
474*366f6083SPeter Grehan 	 */
475*366f6083SPeter Grehan 	spshift = 48;
476*366f6083SPeter Grehan 	for (i = 3; i >= 0; i--) {
477*366f6083SPeter Grehan 		spsize = 1UL << spshift;
478*366f6083SPeter Grehan 		if ((dom->spsmask & (1 << i)) != 0 &&
479*366f6083SPeter Grehan 		    (gpa & (spsize - 1)) == 0 &&
480*366f6083SPeter Grehan 		    (hpa & (spsize - 1)) == 0 &&
481*366f6083SPeter Grehan 		    (len >= spsize)) {
482*366f6083SPeter Grehan 			break;
483*366f6083SPeter Grehan 		}
484*366f6083SPeter Grehan 		spshift -= 9;
485*366f6083SPeter Grehan 	}
486*366f6083SPeter Grehan 
487*366f6083SPeter Grehan 	ptp = dom->ptp;
488*366f6083SPeter Grehan 	nlevels = dom->pt_levels;
489*366f6083SPeter Grehan 	while (--nlevels >= 0) {
490*366f6083SPeter Grehan 		ptpshift = 12 + nlevels * 9;
491*366f6083SPeter Grehan 		ptpindex = (gpa >> ptpshift) & 0x1FF;
492*366f6083SPeter Grehan 
493*366f6083SPeter Grehan 		/* We have reached the leaf mapping */
494*366f6083SPeter Grehan 		if (spshift >= ptpshift) {
495*366f6083SPeter Grehan 			break;
496*366f6083SPeter Grehan 		}
497*366f6083SPeter Grehan 
498*366f6083SPeter Grehan 		/*
499*366f6083SPeter Grehan 		 * We are working on a non-leaf page table page.
500*366f6083SPeter Grehan 		 *
501*366f6083SPeter Grehan 		 * Create a downstream page table page if necessary and point
502*366f6083SPeter Grehan 		 * to it from the current page table.
503*366f6083SPeter Grehan 		 */
504*366f6083SPeter Grehan 		if (ptp[ptpindex] == 0) {
505*366f6083SPeter Grehan 			void *nlp = malloc(PAGE_SIZE, M_VTD, M_WAITOK | M_ZERO);
506*366f6083SPeter Grehan 			ptp[ptpindex] = vtophys(nlp)| VTD_PTE_RD | VTD_PTE_WR;
507*366f6083SPeter Grehan 		}
508*366f6083SPeter Grehan 
509*366f6083SPeter Grehan 		ptp = (uint64_t *)PHYS_TO_DMAP(ptp[ptpindex] & VTD_PTE_ADDR_M);
510*366f6083SPeter Grehan 	}
511*366f6083SPeter Grehan 
512*366f6083SPeter Grehan 	if ((gpa & ((1UL << ptpshift) - 1)) != 0)
513*366f6083SPeter Grehan 		panic("gpa 0x%lx and ptpshift %d mismatch", gpa, ptpshift);
514*366f6083SPeter Grehan 
515*366f6083SPeter Grehan 	/*
516*366f6083SPeter Grehan 	 * Create a 'gpa' -> 'hpa' mapping
517*366f6083SPeter Grehan 	 */
518*366f6083SPeter Grehan 	ptp[ptpindex] = hpa | VTD_PTE_RD | VTD_PTE_WR;
519*366f6083SPeter Grehan 
520*366f6083SPeter Grehan 	if (nlevels > 0)
521*366f6083SPeter Grehan 		ptp[ptpindex] |= VTD_PTE_SUPERPAGE;
522*366f6083SPeter Grehan 
523*366f6083SPeter Grehan 	return (1UL << ptpshift);
524*366f6083SPeter Grehan }
525*366f6083SPeter Grehan 
526*366f6083SPeter Grehan static void *
527*366f6083SPeter Grehan vtd_create_domain(vm_paddr_t maxaddr)
528*366f6083SPeter Grehan {
529*366f6083SPeter Grehan 	struct domain *dom;
530*366f6083SPeter Grehan 	vm_paddr_t addr;
531*366f6083SPeter Grehan 	int tmp, i, gaw, agaw, sagaw, res, pt_levels, addrwidth;
532*366f6083SPeter Grehan 	struct vtdmap *vtdmap;
533*366f6083SPeter Grehan 
534*366f6083SPeter Grehan 	if (drhd_num <= 0)
535*366f6083SPeter Grehan 		panic("vtd_create_domain: no dma remapping hardware available");
536*366f6083SPeter Grehan 
537*366f6083SPeter Grehan 	vtdmap = vtdmaps[0];
538*366f6083SPeter Grehan 
539*366f6083SPeter Grehan 	/*
540*366f6083SPeter Grehan 	 * Calculate AGAW.
541*366f6083SPeter Grehan 	 * Section 3.4.2 "Adjusted Guest Address Width", Architecture Spec.
542*366f6083SPeter Grehan 	 */
543*366f6083SPeter Grehan 	addr = 0;
544*366f6083SPeter Grehan 	for (gaw = 0; addr < maxaddr; gaw++)
545*366f6083SPeter Grehan 		addr = 1ULL << gaw;
546*366f6083SPeter Grehan 
547*366f6083SPeter Grehan 	res = (gaw - 12) % 9;
548*366f6083SPeter Grehan 	if (res == 0)
549*366f6083SPeter Grehan 		agaw = gaw;
550*366f6083SPeter Grehan 	else
551*366f6083SPeter Grehan 		agaw = gaw + 9 - res;
552*366f6083SPeter Grehan 
553*366f6083SPeter Grehan 	if (agaw > 64)
554*366f6083SPeter Grehan 		agaw = 64;
555*366f6083SPeter Grehan 
556*366f6083SPeter Grehan 	/*
557*366f6083SPeter Grehan 	 * Select the smallest Supported AGAW and the corresponding number
558*366f6083SPeter Grehan 	 * of page table levels.
559*366f6083SPeter Grehan 	 */
560*366f6083SPeter Grehan 	pt_levels = 2;
561*366f6083SPeter Grehan 	sagaw = 30;
562*366f6083SPeter Grehan 	addrwidth = 0;
563*366f6083SPeter Grehan 	tmp = VTD_CAP_SAGAW(vtdmap->cap);
564*366f6083SPeter Grehan 	for (i = 0; i < 5; i++) {
565*366f6083SPeter Grehan 		if ((tmp & (1 << i)) != 0 && sagaw >= agaw)
566*366f6083SPeter Grehan 			break;
567*366f6083SPeter Grehan 		pt_levels++;
568*366f6083SPeter Grehan 		addrwidth++;
569*366f6083SPeter Grehan 		sagaw += 9;
570*366f6083SPeter Grehan 		if (sagaw > 64)
571*366f6083SPeter Grehan 			sagaw = 64;
572*366f6083SPeter Grehan 	}
573*366f6083SPeter Grehan 
574*366f6083SPeter Grehan 	if (i >= 5) {
575*366f6083SPeter Grehan 		panic("vtd_create_domain: SAGAW 0x%lx does not support AGAW %d",
576*366f6083SPeter Grehan 		      VTD_CAP_SAGAW(vtdmap->cap), agaw);
577*366f6083SPeter Grehan 	}
578*366f6083SPeter Grehan 
579*366f6083SPeter Grehan 	dom = malloc(sizeof(struct domain), M_VTD, M_ZERO | M_WAITOK);
580*366f6083SPeter Grehan 	dom->pt_levels = pt_levels;
581*366f6083SPeter Grehan 	dom->addrwidth = addrwidth;
582*366f6083SPeter Grehan 	dom->spsmask = VTD_CAP_SPS(vtdmap->cap);
583*366f6083SPeter Grehan 	dom->id = domain_id();
584*366f6083SPeter Grehan 	dom->maxaddr = maxaddr;
585*366f6083SPeter Grehan 	dom->ptp = malloc(PAGE_SIZE, M_VTD, M_ZERO | M_WAITOK);
586*366f6083SPeter Grehan 	if ((uintptr_t)dom->ptp & PAGE_MASK)
587*366f6083SPeter Grehan 		panic("vtd_create_domain: ptp (%p) not page aligned", dom->ptp);
588*366f6083SPeter Grehan 
589*366f6083SPeter Grehan 	SLIST_INSERT_HEAD(&domhead, dom, next);
590*366f6083SPeter Grehan 
591*366f6083SPeter Grehan 	return (dom);
592*366f6083SPeter Grehan }
593*366f6083SPeter Grehan 
594*366f6083SPeter Grehan static void
595*366f6083SPeter Grehan vtd_free_ptp(uint64_t *ptp, int level)
596*366f6083SPeter Grehan {
597*366f6083SPeter Grehan 	int i;
598*366f6083SPeter Grehan 	uint64_t *nlp;
599*366f6083SPeter Grehan 
600*366f6083SPeter Grehan 	if (level > 1) {
601*366f6083SPeter Grehan 		for (i = 0; i < 512; i++) {
602*366f6083SPeter Grehan 			if ((ptp[i] & (VTD_PTE_RD | VTD_PTE_WR)) == 0)
603*366f6083SPeter Grehan 				continue;
604*366f6083SPeter Grehan 			if ((ptp[i] & VTD_PTE_SUPERPAGE) != 0)
605*366f6083SPeter Grehan 				continue;
606*366f6083SPeter Grehan 			nlp = (uint64_t *)PHYS_TO_DMAP(ptp[i] & VTD_PTE_ADDR_M);
607*366f6083SPeter Grehan 			vtd_free_ptp(nlp, level - 1);
608*366f6083SPeter Grehan 		}
609*366f6083SPeter Grehan 	}
610*366f6083SPeter Grehan 
611*366f6083SPeter Grehan 	bzero(ptp, PAGE_SIZE);
612*366f6083SPeter Grehan 	free(ptp, M_VTD);
613*366f6083SPeter Grehan }
614*366f6083SPeter Grehan 
615*366f6083SPeter Grehan static void
616*366f6083SPeter Grehan vtd_destroy_domain(void *arg)
617*366f6083SPeter Grehan {
618*366f6083SPeter Grehan 	struct domain *dom;
619*366f6083SPeter Grehan 
620*366f6083SPeter Grehan 	dom = arg;
621*366f6083SPeter Grehan 
622*366f6083SPeter Grehan 	SLIST_REMOVE(&domhead, dom, domain, next);
623*366f6083SPeter Grehan 	vtd_free_ptp(dom->ptp, dom->pt_levels);
624*366f6083SPeter Grehan 	free(dom, M_VTD);
625*366f6083SPeter Grehan }
626*366f6083SPeter Grehan 
627*366f6083SPeter Grehan struct iommu_ops iommu_ops_intel = {
628*366f6083SPeter Grehan 	vtd_init,
629*366f6083SPeter Grehan 	vtd_cleanup,
630*366f6083SPeter Grehan 	vtd_enable,
631*366f6083SPeter Grehan 	vtd_disable,
632*366f6083SPeter Grehan 	vtd_create_domain,
633*366f6083SPeter Grehan 	vtd_destroy_domain,
634*366f6083SPeter Grehan 	vtd_create_mapping,
635*366f6083SPeter Grehan 	vtd_add_device,
636*366f6083SPeter Grehan 	vtd_remove_device,
637*366f6083SPeter Grehan };
638