xref: /freebsd/sys/amd64/vmm/intel/vtd.c (revision a86672509c47e89dae2bdd233ebf855a482e0e4e)
1366f6083SPeter Grehan /*-
2366f6083SPeter Grehan  * Copyright (c) 2011 NetApp, Inc.
3366f6083SPeter Grehan  * All rights reserved.
4366f6083SPeter Grehan  *
5366f6083SPeter Grehan  * Redistribution and use in source and binary forms, with or without
6366f6083SPeter Grehan  * modification, are permitted provided that the following conditions
7366f6083SPeter Grehan  * are met:
8366f6083SPeter Grehan  * 1. Redistributions of source code must retain the above copyright
9366f6083SPeter Grehan  *    notice, this list of conditions and the following disclaimer.
10366f6083SPeter Grehan  * 2. Redistributions in binary form must reproduce the above copyright
11366f6083SPeter Grehan  *    notice, this list of conditions and the following disclaimer in the
12366f6083SPeter Grehan  *    documentation and/or other materials provided with the distribution.
13366f6083SPeter Grehan  *
14366f6083SPeter Grehan  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15366f6083SPeter Grehan  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16366f6083SPeter Grehan  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17366f6083SPeter Grehan  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18366f6083SPeter Grehan  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19366f6083SPeter Grehan  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20366f6083SPeter Grehan  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21366f6083SPeter Grehan  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22366f6083SPeter Grehan  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23366f6083SPeter Grehan  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24366f6083SPeter Grehan  * SUCH DAMAGE.
25366f6083SPeter Grehan  *
26366f6083SPeter Grehan  * $FreeBSD$
27366f6083SPeter Grehan  */
28366f6083SPeter Grehan 
29366f6083SPeter Grehan #include <sys/cdefs.h>
30366f6083SPeter Grehan __FBSDID("$FreeBSD$");
31366f6083SPeter Grehan 
32366f6083SPeter Grehan #include <sys/param.h>
33366f6083SPeter Grehan #include <sys/kernel.h>
34366f6083SPeter Grehan #include <sys/systm.h>
35366f6083SPeter Grehan #include <sys/malloc.h>
36366f6083SPeter Grehan 
37366f6083SPeter Grehan #include <vm/vm.h>
38366f6083SPeter Grehan #include <vm/pmap.h>
39366f6083SPeter Grehan 
40366f6083SPeter Grehan #include <dev/pci/pcireg.h>
41366f6083SPeter Grehan 
42366f6083SPeter Grehan #include <machine/vmparam.h>
43f77e9829SNeel Natu #include <contrib/dev/acpica/include/acpi.h>
44366f6083SPeter Grehan 
45366f6083SPeter Grehan #include "io/iommu.h"
46366f6083SPeter Grehan 
47366f6083SPeter Grehan /*
48366f6083SPeter Grehan  * Documented in the "Intel Virtualization Technology for Directed I/O",
49366f6083SPeter Grehan  * Architecture Spec, September 2008.
50366f6083SPeter Grehan  */
51366f6083SPeter Grehan 
52366f6083SPeter Grehan /* Section 10.4 "Register Descriptions" */
53366f6083SPeter Grehan struct vtdmap {
54366f6083SPeter Grehan 	volatile uint32_t	version;
55366f6083SPeter Grehan 	volatile uint32_t	res0;
56366f6083SPeter Grehan 	volatile uint64_t	cap;
57366f6083SPeter Grehan 	volatile uint64_t	ext_cap;
58366f6083SPeter Grehan 	volatile uint32_t	gcr;
59366f6083SPeter Grehan 	volatile uint32_t	gsr;
60366f6083SPeter Grehan 	volatile uint64_t	rta;
61366f6083SPeter Grehan 	volatile uint64_t	ccr;
62366f6083SPeter Grehan };
63366f6083SPeter Grehan 
64366f6083SPeter Grehan #define	VTD_CAP_SAGAW(cap)	(((cap) >> 8) & 0x1F)
65366f6083SPeter Grehan #define	VTD_CAP_ND(cap)		((cap) & 0x7)
66366f6083SPeter Grehan #define	VTD_CAP_CM(cap)		(((cap) >> 7) & 0x1)
67366f6083SPeter Grehan #define	VTD_CAP_SPS(cap)	(((cap) >> 34) & 0xF)
68366f6083SPeter Grehan #define	VTD_CAP_RWBF(cap)	(((cap) >> 4) & 0x1)
69366f6083SPeter Grehan 
70366f6083SPeter Grehan #define	VTD_ECAP_DI(ecap)	(((ecap) >> 2) & 0x1)
71366f6083SPeter Grehan #define	VTD_ECAP_COHERENCY(ecap) ((ecap) & 0x1)
72366f6083SPeter Grehan #define	VTD_ECAP_IRO(ecap)	(((ecap) >> 8) & 0x3FF)
73366f6083SPeter Grehan 
74366f6083SPeter Grehan #define	VTD_GCR_WBF		(1 << 27)
75366f6083SPeter Grehan #define	VTD_GCR_SRTP		(1 << 30)
767a22215cSEitan Adler #define	VTD_GCR_TE		(1U << 31)
77366f6083SPeter Grehan 
78366f6083SPeter Grehan #define	VTD_GSR_WBFS		(1 << 27)
79366f6083SPeter Grehan #define	VTD_GSR_RTPS		(1 << 30)
807a22215cSEitan Adler #define	VTD_GSR_TES		(1U << 31)
81366f6083SPeter Grehan 
82366f6083SPeter Grehan #define	VTD_CCR_ICC		(1UL << 63)	/* invalidate context cache */
83366f6083SPeter Grehan #define	VTD_CCR_CIRG_GLOBAL	(1UL << 61)	/* global invalidation */
84366f6083SPeter Grehan 
85366f6083SPeter Grehan #define	VTD_IIR_IVT		(1UL << 63)	/* invalidation IOTLB */
86366f6083SPeter Grehan #define	VTD_IIR_IIRG_GLOBAL	(1ULL << 60)	/* global IOTLB invalidation */
87366f6083SPeter Grehan #define	VTD_IIR_IIRG_DOMAIN	(2ULL << 60)	/* domain IOTLB invalidation */
88366f6083SPeter Grehan #define	VTD_IIR_IIRG_PAGE	(3ULL << 60)	/* page IOTLB invalidation */
89366f6083SPeter Grehan #define	VTD_IIR_DRAIN_READS	(1ULL << 49)	/* drain pending DMA reads */
90366f6083SPeter Grehan #define	VTD_IIR_DRAIN_WRITES	(1ULL << 48)	/* drain pending DMA writes */
91366f6083SPeter Grehan #define	VTD_IIR_DOMAIN_P	32
92366f6083SPeter Grehan 
93366f6083SPeter Grehan #define	VTD_ROOT_PRESENT	0x1
94366f6083SPeter Grehan #define	VTD_CTX_PRESENT		0x1
95366f6083SPeter Grehan #define	VTD_CTX_TT_ALL		(1UL << 2)
96366f6083SPeter Grehan 
97366f6083SPeter Grehan #define	VTD_PTE_RD		(1UL << 0)
98366f6083SPeter Grehan #define	VTD_PTE_WR		(1UL << 1)
99366f6083SPeter Grehan #define	VTD_PTE_SUPERPAGE	(1UL << 7)
100366f6083SPeter Grehan #define	VTD_PTE_ADDR_M		(0x000FFFFFFFFFF000UL)
101366f6083SPeter Grehan 
102*a8667250SRyan Stone #define VTD_RID2IDX(rid)	(((rid) & 0xff) * 2)
103*a8667250SRyan Stone 
104366f6083SPeter Grehan struct domain {
105366f6083SPeter Grehan 	uint64_t	*ptp;		/* first level page table page */
106366f6083SPeter Grehan 	int		pt_levels;	/* number of page table levels */
107366f6083SPeter Grehan 	int		addrwidth;	/* 'AW' field in context entry */
108366f6083SPeter Grehan 	int		spsmask;	/* supported super page sizes */
109366f6083SPeter Grehan 	u_int		id;		/* domain id */
110366f6083SPeter Grehan 	vm_paddr_t	maxaddr;	/* highest address to be mapped */
111366f6083SPeter Grehan 	SLIST_ENTRY(domain) next;
112366f6083SPeter Grehan };
113366f6083SPeter Grehan 
114366f6083SPeter Grehan static SLIST_HEAD(, domain) domhead;
115366f6083SPeter Grehan 
116366f6083SPeter Grehan #define	DRHD_MAX_UNITS	8
117366f6083SPeter Grehan static int		drhd_num;
118366f6083SPeter Grehan static struct vtdmap	*vtdmaps[DRHD_MAX_UNITS];
119366f6083SPeter Grehan static int		max_domains;
120366f6083SPeter Grehan typedef int		(*drhd_ident_func_t)(void);
121366f6083SPeter Grehan 
122366f6083SPeter Grehan static uint64_t root_table[PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
123366f6083SPeter Grehan static uint64_t ctx_tables[256][PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
124366f6083SPeter Grehan 
125366f6083SPeter Grehan static MALLOC_DEFINE(M_VTD, "vtd", "vtd");
126366f6083SPeter Grehan 
127366f6083SPeter Grehan static int
128366f6083SPeter Grehan vtd_max_domains(struct vtdmap *vtdmap)
129366f6083SPeter Grehan {
130366f6083SPeter Grehan 	int nd;
131366f6083SPeter Grehan 
132366f6083SPeter Grehan 	nd = VTD_CAP_ND(vtdmap->cap);
133366f6083SPeter Grehan 
134366f6083SPeter Grehan 	switch (nd) {
135366f6083SPeter Grehan 	case 0:
136366f6083SPeter Grehan 		return (16);
137366f6083SPeter Grehan 	case 1:
138366f6083SPeter Grehan 		return (64);
139366f6083SPeter Grehan 	case 2:
140366f6083SPeter Grehan 		return (256);
141366f6083SPeter Grehan 	case 3:
142366f6083SPeter Grehan 		return (1024);
143366f6083SPeter Grehan 	case 4:
144366f6083SPeter Grehan 		return (4 * 1024);
145366f6083SPeter Grehan 	case 5:
146366f6083SPeter Grehan 		return (16 * 1024);
147366f6083SPeter Grehan 	case 6:
148366f6083SPeter Grehan 		return (64 * 1024);
149366f6083SPeter Grehan 	default:
150366f6083SPeter Grehan 		panic("vtd_max_domains: invalid value of nd (0x%0x)", nd);
151366f6083SPeter Grehan 	}
152366f6083SPeter Grehan }
153366f6083SPeter Grehan 
154366f6083SPeter Grehan static u_int
155366f6083SPeter Grehan domain_id(void)
156366f6083SPeter Grehan {
157366f6083SPeter Grehan 	u_int id;
158366f6083SPeter Grehan 	struct domain *dom;
159366f6083SPeter Grehan 
160366f6083SPeter Grehan 	/* Skip domain id 0 - it is reserved when Caching Mode field is set */
161366f6083SPeter Grehan 	for (id = 1; id < max_domains; id++) {
162366f6083SPeter Grehan 		SLIST_FOREACH(dom, &domhead, next) {
163366f6083SPeter Grehan 			if (dom->id == id)
164366f6083SPeter Grehan 				break;
165366f6083SPeter Grehan 		}
166366f6083SPeter Grehan 		if (dom == NULL)
167366f6083SPeter Grehan 			break;		/* found it */
168366f6083SPeter Grehan 	}
169366f6083SPeter Grehan 
170366f6083SPeter Grehan 	if (id >= max_domains)
171366f6083SPeter Grehan 		panic("domain ids exhausted");
172366f6083SPeter Grehan 
173366f6083SPeter Grehan 	return (id);
174366f6083SPeter Grehan }
175366f6083SPeter Grehan 
176366f6083SPeter Grehan static void
177366f6083SPeter Grehan vtd_wbflush(struct vtdmap *vtdmap)
178366f6083SPeter Grehan {
179366f6083SPeter Grehan 
180366f6083SPeter Grehan 	if (VTD_ECAP_COHERENCY(vtdmap->ext_cap) == 0)
181366f6083SPeter Grehan 		pmap_invalidate_cache();
182366f6083SPeter Grehan 
183366f6083SPeter Grehan 	if (VTD_CAP_RWBF(vtdmap->cap)) {
184366f6083SPeter Grehan 		vtdmap->gcr = VTD_GCR_WBF;
185366f6083SPeter Grehan 		while ((vtdmap->gsr & VTD_GSR_WBFS) != 0)
186366f6083SPeter Grehan 			;
187366f6083SPeter Grehan 	}
188366f6083SPeter Grehan }
189366f6083SPeter Grehan 
190366f6083SPeter Grehan static void
191366f6083SPeter Grehan vtd_ctx_global_invalidate(struct vtdmap *vtdmap)
192366f6083SPeter Grehan {
193366f6083SPeter Grehan 
194366f6083SPeter Grehan 	vtdmap->ccr = VTD_CCR_ICC | VTD_CCR_CIRG_GLOBAL;
195366f6083SPeter Grehan 	while ((vtdmap->ccr & VTD_CCR_ICC) != 0)
196366f6083SPeter Grehan 		;
197366f6083SPeter Grehan }
198366f6083SPeter Grehan 
199366f6083SPeter Grehan static void
200366f6083SPeter Grehan vtd_iotlb_global_invalidate(struct vtdmap *vtdmap)
201366f6083SPeter Grehan {
202366f6083SPeter Grehan 	int offset;
203366f6083SPeter Grehan 	volatile uint64_t *iotlb_reg, val;
204366f6083SPeter Grehan 
205366f6083SPeter Grehan 	vtd_wbflush(vtdmap);
206366f6083SPeter Grehan 
207366f6083SPeter Grehan 	offset = VTD_ECAP_IRO(vtdmap->ext_cap) * 16;
208366f6083SPeter Grehan 	iotlb_reg = (volatile uint64_t *)((caddr_t)vtdmap + offset + 8);
209366f6083SPeter Grehan 
210366f6083SPeter Grehan 	*iotlb_reg =  VTD_IIR_IVT | VTD_IIR_IIRG_GLOBAL |
211366f6083SPeter Grehan 		      VTD_IIR_DRAIN_READS | VTD_IIR_DRAIN_WRITES;
212366f6083SPeter Grehan 
213366f6083SPeter Grehan 	while (1) {
214366f6083SPeter Grehan 		val = *iotlb_reg;
215366f6083SPeter Grehan 		if ((val & VTD_IIR_IVT) == 0)
216366f6083SPeter Grehan 			break;
217366f6083SPeter Grehan 	}
218366f6083SPeter Grehan }
219366f6083SPeter Grehan 
220366f6083SPeter Grehan static void
221366f6083SPeter Grehan vtd_translation_enable(struct vtdmap *vtdmap)
222366f6083SPeter Grehan {
223366f6083SPeter Grehan 
224366f6083SPeter Grehan 	vtdmap->gcr = VTD_GCR_TE;
225366f6083SPeter Grehan 	while ((vtdmap->gsr & VTD_GSR_TES) == 0)
226366f6083SPeter Grehan 		;
227366f6083SPeter Grehan }
228366f6083SPeter Grehan 
229366f6083SPeter Grehan static void
230366f6083SPeter Grehan vtd_translation_disable(struct vtdmap *vtdmap)
231366f6083SPeter Grehan {
232366f6083SPeter Grehan 
233366f6083SPeter Grehan 	vtdmap->gcr = 0;
234366f6083SPeter Grehan 	while ((vtdmap->gsr & VTD_GSR_TES) != 0)
235366f6083SPeter Grehan 		;
236366f6083SPeter Grehan }
237366f6083SPeter Grehan 
238366f6083SPeter Grehan static int
239366f6083SPeter Grehan vtd_init(void)
240366f6083SPeter Grehan {
241f77e9829SNeel Natu 	int i, units, remaining;
242366f6083SPeter Grehan 	struct vtdmap *vtdmap;
243366f6083SPeter Grehan 	vm_paddr_t ctx_paddr;
244f77e9829SNeel Natu 	char *end, envname[32];
245f77e9829SNeel Natu 	unsigned long mapaddr;
246f77e9829SNeel Natu 	ACPI_STATUS status;
247f77e9829SNeel Natu 	ACPI_TABLE_DMAR *dmar;
248f77e9829SNeel Natu 	ACPI_DMAR_HEADER *hdr;
249f77e9829SNeel Natu 	ACPI_DMAR_HARDWARE_UNIT *drhd;
250366f6083SPeter Grehan 
251f77e9829SNeel Natu 	/*
252f77e9829SNeel Natu 	 * Allow the user to override the ACPI DMAR table by specifying the
253f77e9829SNeel Natu 	 * physical address of each remapping unit.
254f77e9829SNeel Natu 	 *
255f77e9829SNeel Natu 	 * The following example specifies two remapping units at
256f77e9829SNeel Natu 	 * physical addresses 0xfed90000 and 0xfeda0000 respectively.
257f77e9829SNeel Natu 	 * set vtd.regmap.0.addr=0xfed90000
258f77e9829SNeel Natu 	 * set vtd.regmap.1.addr=0xfeda0000
259f77e9829SNeel Natu 	 */
260f77e9829SNeel Natu 	for (units = 0; units < DRHD_MAX_UNITS; units++) {
261f77e9829SNeel Natu 		snprintf(envname, sizeof(envname), "vtd.regmap.%d.addr", units);
262f77e9829SNeel Natu 		if (getenv_ulong(envname, &mapaddr) == 0)
263366f6083SPeter Grehan 			break;
264f77e9829SNeel Natu 		vtdmaps[units] = (struct vtdmap *)PHYS_TO_DMAP(mapaddr);
265f77e9829SNeel Natu 	}
266f77e9829SNeel Natu 
267f77e9829SNeel Natu 	if (units > 0)
268f77e9829SNeel Natu 		goto skip_dmar;
269f77e9829SNeel Natu 
270f77e9829SNeel Natu 	/* Search for DMAR table. */
271f77e9829SNeel Natu 	status = AcpiGetTable(ACPI_SIG_DMAR, 0, (ACPI_TABLE_HEADER **)&dmar);
272f77e9829SNeel Natu 	if (ACPI_FAILURE(status))
273f77e9829SNeel Natu 		return (ENXIO);
274f77e9829SNeel Natu 
275f77e9829SNeel Natu 	end = (char *)dmar + dmar->Header.Length;
276f77e9829SNeel Natu 	remaining = dmar->Header.Length - sizeof(ACPI_TABLE_DMAR);
277f77e9829SNeel Natu 	while (remaining > sizeof(ACPI_DMAR_HEADER)) {
278f77e9829SNeel Natu 		hdr = (ACPI_DMAR_HEADER *)(end - remaining);
279f77e9829SNeel Natu 		if (hdr->Length > remaining)
280f77e9829SNeel Natu 			break;
281f77e9829SNeel Natu 		/*
282f77e9829SNeel Natu 		 * From Intel VT-d arch spec, version 1.3:
283f77e9829SNeel Natu 		 * BIOS implementations must report mapping structures
284f77e9829SNeel Natu 		 * in numerical order, i.e. All remapping structures of
285f77e9829SNeel Natu 		 * type 0 (DRHD) enumerated before remapping structures of
286f77e9829SNeel Natu 		 * type 1 (RMRR) and so forth.
287f77e9829SNeel Natu 		 */
288f77e9829SNeel Natu 		if (hdr->Type != ACPI_DMAR_TYPE_HARDWARE_UNIT)
289f77e9829SNeel Natu 			break;
290f77e9829SNeel Natu 
291f77e9829SNeel Natu 		drhd = (ACPI_DMAR_HARDWARE_UNIT *)hdr;
292f77e9829SNeel Natu 		vtdmaps[units++] = (struct vtdmap *)PHYS_TO_DMAP(drhd->Address);
293f77e9829SNeel Natu 		if (units >= DRHD_MAX_UNITS)
294f77e9829SNeel Natu 			break;
295f77e9829SNeel Natu 		remaining -= hdr->Length;
296366f6083SPeter Grehan 	}
297366f6083SPeter Grehan 
298366f6083SPeter Grehan 	if (units <= 0)
299366f6083SPeter Grehan 		return (ENXIO);
300366f6083SPeter Grehan 
301f77e9829SNeel Natu skip_dmar:
302366f6083SPeter Grehan 	drhd_num = units;
303366f6083SPeter Grehan 	vtdmap = vtdmaps[0];
304366f6083SPeter Grehan 
305366f6083SPeter Grehan 	if (VTD_CAP_CM(vtdmap->cap) != 0)
306366f6083SPeter Grehan 		panic("vtd_init: invalid caching mode");
307366f6083SPeter Grehan 
308366f6083SPeter Grehan 	max_domains = vtd_max_domains(vtdmap);
309366f6083SPeter Grehan 
310366f6083SPeter Grehan 	/*
311366f6083SPeter Grehan 	 * Set up the root-table to point to the context-entry tables
312366f6083SPeter Grehan 	 */
313366f6083SPeter Grehan 	for (i = 0; i < 256; i++) {
314366f6083SPeter Grehan 		ctx_paddr = vtophys(ctx_tables[i]);
315366f6083SPeter Grehan 		if (ctx_paddr & PAGE_MASK)
316366f6083SPeter Grehan 			panic("ctx table (0x%0lx) not page aligned", ctx_paddr);
317366f6083SPeter Grehan 
318366f6083SPeter Grehan 		root_table[i * 2] = ctx_paddr | VTD_ROOT_PRESENT;
319366f6083SPeter Grehan 	}
320366f6083SPeter Grehan 
321366f6083SPeter Grehan 	return (0);
322366f6083SPeter Grehan }
323366f6083SPeter Grehan 
324366f6083SPeter Grehan static void
325366f6083SPeter Grehan vtd_cleanup(void)
326366f6083SPeter Grehan {
327366f6083SPeter Grehan }
328366f6083SPeter Grehan 
329366f6083SPeter Grehan static void
330366f6083SPeter Grehan vtd_enable(void)
331366f6083SPeter Grehan {
332366f6083SPeter Grehan 	int i;
333366f6083SPeter Grehan 	struct vtdmap *vtdmap;
334366f6083SPeter Grehan 
335366f6083SPeter Grehan 	for (i = 0; i < drhd_num; i++) {
336366f6083SPeter Grehan 		vtdmap = vtdmaps[i];
337366f6083SPeter Grehan 		vtd_wbflush(vtdmap);
338366f6083SPeter Grehan 
339366f6083SPeter Grehan 		/* Update the root table address */
340366f6083SPeter Grehan 		vtdmap->rta = vtophys(root_table);
341366f6083SPeter Grehan 		vtdmap->gcr = VTD_GCR_SRTP;
342366f6083SPeter Grehan 		while ((vtdmap->gsr & VTD_GSR_RTPS) == 0)
343366f6083SPeter Grehan 			;
344366f6083SPeter Grehan 
345366f6083SPeter Grehan 		vtd_ctx_global_invalidate(vtdmap);
346366f6083SPeter Grehan 		vtd_iotlb_global_invalidate(vtdmap);
347366f6083SPeter Grehan 
348366f6083SPeter Grehan 		vtd_translation_enable(vtdmap);
349366f6083SPeter Grehan 	}
350366f6083SPeter Grehan }
351366f6083SPeter Grehan 
352366f6083SPeter Grehan static void
353366f6083SPeter Grehan vtd_disable(void)
354366f6083SPeter Grehan {
355366f6083SPeter Grehan 	int i;
356366f6083SPeter Grehan 	struct vtdmap *vtdmap;
357366f6083SPeter Grehan 
358366f6083SPeter Grehan 	for (i = 0; i < drhd_num; i++) {
359366f6083SPeter Grehan 		vtdmap = vtdmaps[i];
360366f6083SPeter Grehan 		vtd_translation_disable(vtdmap);
361366f6083SPeter Grehan 	}
362366f6083SPeter Grehan }
363366f6083SPeter Grehan 
364366f6083SPeter Grehan static void
365*a8667250SRyan Stone vtd_add_device(void *arg, uint16_t rid)
366366f6083SPeter Grehan {
367366f6083SPeter Grehan 	int idx;
368366f6083SPeter Grehan 	uint64_t *ctxp;
369366f6083SPeter Grehan 	struct domain *dom = arg;
370366f6083SPeter Grehan 	vm_paddr_t pt_paddr;
371366f6083SPeter Grehan 	struct vtdmap *vtdmap;
372*a8667250SRyan Stone 	uint8_t bus;
373366f6083SPeter Grehan 
374366f6083SPeter Grehan 	vtdmap = vtdmaps[0];
375*a8667250SRyan Stone 	bus = PCI_RID2BUS(rid);
376366f6083SPeter Grehan 	ctxp = ctx_tables[bus];
377366f6083SPeter Grehan 	pt_paddr = vtophys(dom->ptp);
378*a8667250SRyan Stone 	idx = VTD_RID2IDX(rid);
379366f6083SPeter Grehan 
380366f6083SPeter Grehan 	if (ctxp[idx] & VTD_CTX_PRESENT) {
381*a8667250SRyan Stone 		panic("vtd_add_device: device %x is already owned by "
382*a8667250SRyan Stone 		      "domain %d", rid,
383366f6083SPeter Grehan 		      (uint16_t)(ctxp[idx + 1] >> 8));
384366f6083SPeter Grehan 	}
385366f6083SPeter Grehan 
386366f6083SPeter Grehan 	/*
387366f6083SPeter Grehan 	 * Order is important. The 'present' bit is set only after all fields
388366f6083SPeter Grehan 	 * of the context pointer are initialized.
389366f6083SPeter Grehan 	 */
390366f6083SPeter Grehan 	ctxp[idx + 1] = dom->addrwidth | (dom->id << 8);
391366f6083SPeter Grehan 
392366f6083SPeter Grehan 	if (VTD_ECAP_DI(vtdmap->ext_cap))
393366f6083SPeter Grehan 		ctxp[idx] = VTD_CTX_TT_ALL;
394366f6083SPeter Grehan 	else
395366f6083SPeter Grehan 		ctxp[idx] = 0;
396366f6083SPeter Grehan 
397366f6083SPeter Grehan 	ctxp[idx] |= pt_paddr | VTD_CTX_PRESENT;
398366f6083SPeter Grehan 
399366f6083SPeter Grehan 	/*
400366f6083SPeter Grehan 	 * 'Not Present' entries are not cached in either the Context Cache
401366f6083SPeter Grehan 	 * or in the IOTLB, so there is no need to invalidate either of them.
402366f6083SPeter Grehan 	 */
403366f6083SPeter Grehan }
404366f6083SPeter Grehan 
405366f6083SPeter Grehan static void
406*a8667250SRyan Stone vtd_remove_device(void *arg, uint16_t rid)
407366f6083SPeter Grehan {
408366f6083SPeter Grehan 	int i, idx;
409366f6083SPeter Grehan 	uint64_t *ctxp;
410366f6083SPeter Grehan 	struct vtdmap *vtdmap;
411*a8667250SRyan Stone 	uint8_t bus;
412366f6083SPeter Grehan 
413*a8667250SRyan Stone 	bus = PCI_RID2BUS(rid);
414366f6083SPeter Grehan 	ctxp = ctx_tables[bus];
415*a8667250SRyan Stone 	idx = VTD_RID2IDX(rid);
416366f6083SPeter Grehan 
417366f6083SPeter Grehan 	/*
418366f6083SPeter Grehan 	 * Order is important. The 'present' bit is must be cleared first.
419366f6083SPeter Grehan 	 */
420366f6083SPeter Grehan 	ctxp[idx] = 0;
421366f6083SPeter Grehan 	ctxp[idx + 1] = 0;
422366f6083SPeter Grehan 
423366f6083SPeter Grehan 	/*
424366f6083SPeter Grehan 	 * Invalidate the Context Cache and the IOTLB.
425366f6083SPeter Grehan 	 *
426366f6083SPeter Grehan 	 * XXX use device-selective invalidation for Context Cache
427366f6083SPeter Grehan 	 * XXX use domain-selective invalidation for IOTLB
428366f6083SPeter Grehan 	 */
429366f6083SPeter Grehan 	for (i = 0; i < drhd_num; i++) {
430366f6083SPeter Grehan 		vtdmap = vtdmaps[i];
431366f6083SPeter Grehan 		vtd_ctx_global_invalidate(vtdmap);
432366f6083SPeter Grehan 		vtd_iotlb_global_invalidate(vtdmap);
433366f6083SPeter Grehan 	}
434366f6083SPeter Grehan }
435366f6083SPeter Grehan 
4367ce04d0aSNeel Natu #define	CREATE_MAPPING	0
4377ce04d0aSNeel Natu #define	REMOVE_MAPPING	1
4387ce04d0aSNeel Natu 
439366f6083SPeter Grehan static uint64_t
4407ce04d0aSNeel Natu vtd_update_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len,
4417ce04d0aSNeel Natu 		   int remove)
442366f6083SPeter Grehan {
443366f6083SPeter Grehan 	struct domain *dom;
444366f6083SPeter Grehan 	int i, spshift, ptpshift, ptpindex, nlevels;
445366f6083SPeter Grehan 	uint64_t spsize, *ptp;
446366f6083SPeter Grehan 
447366f6083SPeter Grehan 	dom = arg;
448366f6083SPeter Grehan 	ptpindex = 0;
449366f6083SPeter Grehan 	ptpshift = 0;
450366f6083SPeter Grehan 
451366f6083SPeter Grehan 	if (gpa & PAGE_MASK)
452366f6083SPeter Grehan 		panic("vtd_create_mapping: unaligned gpa 0x%0lx", gpa);
453366f6083SPeter Grehan 
454366f6083SPeter Grehan 	if (hpa & PAGE_MASK)
455366f6083SPeter Grehan 		panic("vtd_create_mapping: unaligned hpa 0x%0lx", hpa);
456366f6083SPeter Grehan 
457366f6083SPeter Grehan 	if (len & PAGE_MASK)
458366f6083SPeter Grehan 		panic("vtd_create_mapping: unaligned len 0x%0lx", len);
459366f6083SPeter Grehan 
460366f6083SPeter Grehan 	/*
461366f6083SPeter Grehan 	 * Compute the size of the mapping that we can accomodate.
462366f6083SPeter Grehan 	 *
463366f6083SPeter Grehan 	 * This is based on three factors:
464366f6083SPeter Grehan 	 * - supported super page size
465366f6083SPeter Grehan 	 * - alignment of the region starting at 'gpa' and 'hpa'
466366f6083SPeter Grehan 	 * - length of the region 'len'
467366f6083SPeter Grehan 	 */
468366f6083SPeter Grehan 	spshift = 48;
469366f6083SPeter Grehan 	for (i = 3; i >= 0; i--) {
470366f6083SPeter Grehan 		spsize = 1UL << spshift;
471366f6083SPeter Grehan 		if ((dom->spsmask & (1 << i)) != 0 &&
472366f6083SPeter Grehan 		    (gpa & (spsize - 1)) == 0 &&
473366f6083SPeter Grehan 		    (hpa & (spsize - 1)) == 0 &&
474366f6083SPeter Grehan 		    (len >= spsize)) {
475366f6083SPeter Grehan 			break;
476366f6083SPeter Grehan 		}
477366f6083SPeter Grehan 		spshift -= 9;
478366f6083SPeter Grehan 	}
479366f6083SPeter Grehan 
480366f6083SPeter Grehan 	ptp = dom->ptp;
481366f6083SPeter Grehan 	nlevels = dom->pt_levels;
482366f6083SPeter Grehan 	while (--nlevels >= 0) {
483366f6083SPeter Grehan 		ptpshift = 12 + nlevels * 9;
484366f6083SPeter Grehan 		ptpindex = (gpa >> ptpshift) & 0x1FF;
485366f6083SPeter Grehan 
486366f6083SPeter Grehan 		/* We have reached the leaf mapping */
487366f6083SPeter Grehan 		if (spshift >= ptpshift) {
488366f6083SPeter Grehan 			break;
489366f6083SPeter Grehan 		}
490366f6083SPeter Grehan 
491366f6083SPeter Grehan 		/*
492366f6083SPeter Grehan 		 * We are working on a non-leaf page table page.
493366f6083SPeter Grehan 		 *
494366f6083SPeter Grehan 		 * Create a downstream page table page if necessary and point
495366f6083SPeter Grehan 		 * to it from the current page table.
496366f6083SPeter Grehan 		 */
497366f6083SPeter Grehan 		if (ptp[ptpindex] == 0) {
498366f6083SPeter Grehan 			void *nlp = malloc(PAGE_SIZE, M_VTD, M_WAITOK | M_ZERO);
499366f6083SPeter Grehan 			ptp[ptpindex] = vtophys(nlp)| VTD_PTE_RD | VTD_PTE_WR;
500366f6083SPeter Grehan 		}
501366f6083SPeter Grehan 
502366f6083SPeter Grehan 		ptp = (uint64_t *)PHYS_TO_DMAP(ptp[ptpindex] & VTD_PTE_ADDR_M);
503366f6083SPeter Grehan 	}
504366f6083SPeter Grehan 
505366f6083SPeter Grehan 	if ((gpa & ((1UL << ptpshift) - 1)) != 0)
506366f6083SPeter Grehan 		panic("gpa 0x%lx and ptpshift %d mismatch", gpa, ptpshift);
507366f6083SPeter Grehan 
508366f6083SPeter Grehan 	/*
5097ce04d0aSNeel Natu 	 * Update the 'gpa' -> 'hpa' mapping
510366f6083SPeter Grehan 	 */
5117ce04d0aSNeel Natu 	if (remove) {
5127ce04d0aSNeel Natu 		ptp[ptpindex] = 0;
5137ce04d0aSNeel Natu 	} else {
514366f6083SPeter Grehan 		ptp[ptpindex] = hpa | VTD_PTE_RD | VTD_PTE_WR;
515366f6083SPeter Grehan 
516366f6083SPeter Grehan 		if (nlevels > 0)
517366f6083SPeter Grehan 			ptp[ptpindex] |= VTD_PTE_SUPERPAGE;
5187ce04d0aSNeel Natu 	}
519366f6083SPeter Grehan 
520366f6083SPeter Grehan 	return (1UL << ptpshift);
521366f6083SPeter Grehan }
522366f6083SPeter Grehan 
5237ce04d0aSNeel Natu static uint64_t
5247ce04d0aSNeel Natu vtd_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len)
5257ce04d0aSNeel Natu {
5267ce04d0aSNeel Natu 
5277ce04d0aSNeel Natu 	return (vtd_update_mapping(arg, gpa, hpa, len, CREATE_MAPPING));
5287ce04d0aSNeel Natu }
5297ce04d0aSNeel Natu 
5307ce04d0aSNeel Natu static uint64_t
5317ce04d0aSNeel Natu vtd_remove_mapping(void *arg, vm_paddr_t gpa, uint64_t len)
5327ce04d0aSNeel Natu {
5337ce04d0aSNeel Natu 
5347ce04d0aSNeel Natu 	return (vtd_update_mapping(arg, gpa, 0, len, REMOVE_MAPPING));
5357ce04d0aSNeel Natu }
5367ce04d0aSNeel Natu 
5377ce04d0aSNeel Natu static void
5387ce04d0aSNeel Natu vtd_invalidate_tlb(void *dom)
5397ce04d0aSNeel Natu {
5407ce04d0aSNeel Natu 	int i;
5417ce04d0aSNeel Natu 	struct vtdmap *vtdmap;
5427ce04d0aSNeel Natu 
5437ce04d0aSNeel Natu 	/*
5447ce04d0aSNeel Natu 	 * Invalidate the IOTLB.
5457ce04d0aSNeel Natu 	 * XXX use domain-selective invalidation for IOTLB
5467ce04d0aSNeel Natu 	 */
5477ce04d0aSNeel Natu 	for (i = 0; i < drhd_num; i++) {
5487ce04d0aSNeel Natu 		vtdmap = vtdmaps[i];
5497ce04d0aSNeel Natu 		vtd_iotlb_global_invalidate(vtdmap);
5507ce04d0aSNeel Natu 	}
5517ce04d0aSNeel Natu }
5527ce04d0aSNeel Natu 
553366f6083SPeter Grehan static void *
554366f6083SPeter Grehan vtd_create_domain(vm_paddr_t maxaddr)
555366f6083SPeter Grehan {
556366f6083SPeter Grehan 	struct domain *dom;
557366f6083SPeter Grehan 	vm_paddr_t addr;
558366f6083SPeter Grehan 	int tmp, i, gaw, agaw, sagaw, res, pt_levels, addrwidth;
559366f6083SPeter Grehan 	struct vtdmap *vtdmap;
560366f6083SPeter Grehan 
561366f6083SPeter Grehan 	if (drhd_num <= 0)
562366f6083SPeter Grehan 		panic("vtd_create_domain: no dma remapping hardware available");
563366f6083SPeter Grehan 
564366f6083SPeter Grehan 	vtdmap = vtdmaps[0];
565366f6083SPeter Grehan 
566366f6083SPeter Grehan 	/*
567366f6083SPeter Grehan 	 * Calculate AGAW.
568366f6083SPeter Grehan 	 * Section 3.4.2 "Adjusted Guest Address Width", Architecture Spec.
569366f6083SPeter Grehan 	 */
570366f6083SPeter Grehan 	addr = 0;
571366f6083SPeter Grehan 	for (gaw = 0; addr < maxaddr; gaw++)
572366f6083SPeter Grehan 		addr = 1ULL << gaw;
573366f6083SPeter Grehan 
574366f6083SPeter Grehan 	res = (gaw - 12) % 9;
575366f6083SPeter Grehan 	if (res == 0)
576366f6083SPeter Grehan 		agaw = gaw;
577366f6083SPeter Grehan 	else
578366f6083SPeter Grehan 		agaw = gaw + 9 - res;
579366f6083SPeter Grehan 
580366f6083SPeter Grehan 	if (agaw > 64)
581366f6083SPeter Grehan 		agaw = 64;
582366f6083SPeter Grehan 
583366f6083SPeter Grehan 	/*
584366f6083SPeter Grehan 	 * Select the smallest Supported AGAW and the corresponding number
585366f6083SPeter Grehan 	 * of page table levels.
586366f6083SPeter Grehan 	 */
587366f6083SPeter Grehan 	pt_levels = 2;
588366f6083SPeter Grehan 	sagaw = 30;
589366f6083SPeter Grehan 	addrwidth = 0;
590366f6083SPeter Grehan 	tmp = VTD_CAP_SAGAW(vtdmap->cap);
591366f6083SPeter Grehan 	for (i = 0; i < 5; i++) {
592366f6083SPeter Grehan 		if ((tmp & (1 << i)) != 0 && sagaw >= agaw)
593366f6083SPeter Grehan 			break;
594366f6083SPeter Grehan 		pt_levels++;
595366f6083SPeter Grehan 		addrwidth++;
596366f6083SPeter Grehan 		sagaw += 9;
597366f6083SPeter Grehan 		if (sagaw > 64)
598366f6083SPeter Grehan 			sagaw = 64;
599366f6083SPeter Grehan 	}
600366f6083SPeter Grehan 
601366f6083SPeter Grehan 	if (i >= 5) {
602366f6083SPeter Grehan 		panic("vtd_create_domain: SAGAW 0x%lx does not support AGAW %d",
603366f6083SPeter Grehan 		      VTD_CAP_SAGAW(vtdmap->cap), agaw);
604366f6083SPeter Grehan 	}
605366f6083SPeter Grehan 
606366f6083SPeter Grehan 	dom = malloc(sizeof(struct domain), M_VTD, M_ZERO | M_WAITOK);
607366f6083SPeter Grehan 	dom->pt_levels = pt_levels;
608366f6083SPeter Grehan 	dom->addrwidth = addrwidth;
609366f6083SPeter Grehan 	dom->id = domain_id();
610366f6083SPeter Grehan 	dom->maxaddr = maxaddr;
611366f6083SPeter Grehan 	dom->ptp = malloc(PAGE_SIZE, M_VTD, M_ZERO | M_WAITOK);
612366f6083SPeter Grehan 	if ((uintptr_t)dom->ptp & PAGE_MASK)
613366f6083SPeter Grehan 		panic("vtd_create_domain: ptp (%p) not page aligned", dom->ptp);
614366f6083SPeter Grehan 
615b98940e5SNeel Natu #ifdef notyet
616b98940e5SNeel Natu 	/*
617b98940e5SNeel Natu 	 * XXX superpage mappings for the iommu do not work correctly.
618b98940e5SNeel Natu 	 *
619b98940e5SNeel Natu 	 * By default all physical memory is mapped into the host_domain.
620b98940e5SNeel Natu 	 * When a VM is allocated wired memory the pages belonging to it
621b98940e5SNeel Natu 	 * are removed from the host_domain and added to the vm's domain.
622b98940e5SNeel Natu 	 *
623b98940e5SNeel Natu 	 * If the page being removed was mapped using a superpage mapping
624b98940e5SNeel Natu 	 * in the host_domain then we need to demote the mapping before
625b98940e5SNeel Natu 	 * removing the page.
626b98940e5SNeel Natu 	 *
627b98940e5SNeel Natu 	 * There is not any code to deal with the demotion at the moment
628b98940e5SNeel Natu 	 * so we disable superpage mappings altogether.
629b98940e5SNeel Natu 	 */
630b98940e5SNeel Natu 	dom->spsmask = VTD_CAP_SPS(vtdmap->cap);
631b98940e5SNeel Natu #endif
632b98940e5SNeel Natu 
633366f6083SPeter Grehan 	SLIST_INSERT_HEAD(&domhead, dom, next);
634366f6083SPeter Grehan 
635366f6083SPeter Grehan 	return (dom);
636366f6083SPeter Grehan }
637366f6083SPeter Grehan 
638366f6083SPeter Grehan static void
639366f6083SPeter Grehan vtd_free_ptp(uint64_t *ptp, int level)
640366f6083SPeter Grehan {
641366f6083SPeter Grehan 	int i;
642366f6083SPeter Grehan 	uint64_t *nlp;
643366f6083SPeter Grehan 
644366f6083SPeter Grehan 	if (level > 1) {
645366f6083SPeter Grehan 		for (i = 0; i < 512; i++) {
646366f6083SPeter Grehan 			if ((ptp[i] & (VTD_PTE_RD | VTD_PTE_WR)) == 0)
647366f6083SPeter Grehan 				continue;
648366f6083SPeter Grehan 			if ((ptp[i] & VTD_PTE_SUPERPAGE) != 0)
649366f6083SPeter Grehan 				continue;
650366f6083SPeter Grehan 			nlp = (uint64_t *)PHYS_TO_DMAP(ptp[i] & VTD_PTE_ADDR_M);
651366f6083SPeter Grehan 			vtd_free_ptp(nlp, level - 1);
652366f6083SPeter Grehan 		}
653366f6083SPeter Grehan 	}
654366f6083SPeter Grehan 
655366f6083SPeter Grehan 	bzero(ptp, PAGE_SIZE);
656366f6083SPeter Grehan 	free(ptp, M_VTD);
657366f6083SPeter Grehan }
658366f6083SPeter Grehan 
659366f6083SPeter Grehan static void
660366f6083SPeter Grehan vtd_destroy_domain(void *arg)
661366f6083SPeter Grehan {
662366f6083SPeter Grehan 	struct domain *dom;
663366f6083SPeter Grehan 
664366f6083SPeter Grehan 	dom = arg;
665366f6083SPeter Grehan 
666366f6083SPeter Grehan 	SLIST_REMOVE(&domhead, dom, domain, next);
667366f6083SPeter Grehan 	vtd_free_ptp(dom->ptp, dom->pt_levels);
668366f6083SPeter Grehan 	free(dom, M_VTD);
669366f6083SPeter Grehan }
670366f6083SPeter Grehan 
671366f6083SPeter Grehan struct iommu_ops iommu_ops_intel = {
672366f6083SPeter Grehan 	vtd_init,
673366f6083SPeter Grehan 	vtd_cleanup,
674366f6083SPeter Grehan 	vtd_enable,
675366f6083SPeter Grehan 	vtd_disable,
676366f6083SPeter Grehan 	vtd_create_domain,
677366f6083SPeter Grehan 	vtd_destroy_domain,
678366f6083SPeter Grehan 	vtd_create_mapping,
6797ce04d0aSNeel Natu 	vtd_remove_mapping,
680366f6083SPeter Grehan 	vtd_add_device,
681366f6083SPeter Grehan 	vtd_remove_device,
6827ce04d0aSNeel Natu 	vtd_invalidate_tlb,
683366f6083SPeter Grehan };
684