xref: /freebsd/sys/amd64/vmm/intel/vtd.c (revision 4ec234c813eed05c166859bba82c882e40826eb9)
1 /*-
2  * Copyright (c) 2011 NetApp, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/systm.h>
35 #include <sys/malloc.h>
36 
37 #include <vm/vm.h>
38 #include <vm/pmap.h>
39 
40 #include <dev/pci/pcireg.h>
41 
42 #include <machine/vmparam.h>
43 #include <contrib/dev/acpica/include/acpi.h>
44 
45 #include "io/iommu.h"
46 
47 /*
48  * Documented in the "Intel Virtualization Technology for Directed I/O",
49  * Architecture Spec, September 2008.
50  */
51 
52 /* Section 10.4 "Register Descriptions" */
53 struct vtdmap {
54 	volatile uint32_t	version;
55 	volatile uint32_t	res0;
56 	volatile uint64_t	cap;
57 	volatile uint64_t	ext_cap;
58 	volatile uint32_t	gcr;
59 	volatile uint32_t	gsr;
60 	volatile uint64_t	rta;
61 	volatile uint64_t	ccr;
62 };
63 
64 #define	VTD_CAP_SAGAW(cap)	(((cap) >> 8) & 0x1F)
65 #define	VTD_CAP_ND(cap)		((cap) & 0x7)
66 #define	VTD_CAP_CM(cap)		(((cap) >> 7) & 0x1)
67 #define	VTD_CAP_SPS(cap)	(((cap) >> 34) & 0xF)
68 #define	VTD_CAP_RWBF(cap)	(((cap) >> 4) & 0x1)
69 
70 #define	VTD_ECAP_DI(ecap)	(((ecap) >> 2) & 0x1)
71 #define	VTD_ECAP_COHERENCY(ecap) ((ecap) & 0x1)
72 #define	VTD_ECAP_IRO(ecap)	(((ecap) >> 8) & 0x3FF)
73 
74 #define	VTD_GCR_WBF		(1 << 27)
75 #define	VTD_GCR_SRTP		(1 << 30)
76 #define	VTD_GCR_TE		(1U << 31)
77 
78 #define	VTD_GSR_WBFS		(1 << 27)
79 #define	VTD_GSR_RTPS		(1 << 30)
80 #define	VTD_GSR_TES		(1U << 31)
81 
82 #define	VTD_CCR_ICC		(1UL << 63)	/* invalidate context cache */
83 #define	VTD_CCR_CIRG_GLOBAL	(1UL << 61)	/* global invalidation */
84 
85 #define	VTD_IIR_IVT		(1UL << 63)	/* invalidation IOTLB */
86 #define	VTD_IIR_IIRG_GLOBAL	(1ULL << 60)	/* global IOTLB invalidation */
87 #define	VTD_IIR_IIRG_DOMAIN	(2ULL << 60)	/* domain IOTLB invalidation */
88 #define	VTD_IIR_IIRG_PAGE	(3ULL << 60)	/* page IOTLB invalidation */
89 #define	VTD_IIR_DRAIN_READS	(1ULL << 49)	/* drain pending DMA reads */
90 #define	VTD_IIR_DRAIN_WRITES	(1ULL << 48)	/* drain pending DMA writes */
91 #define	VTD_IIR_DOMAIN_P	32
92 
93 #define	VTD_ROOT_PRESENT	0x1
94 #define	VTD_CTX_PRESENT		0x1
95 #define	VTD_CTX_TT_ALL		(1UL << 2)
96 
97 #define	VTD_PTE_RD		(1UL << 0)
98 #define	VTD_PTE_WR		(1UL << 1)
99 #define	VTD_PTE_SUPERPAGE	(1UL << 7)
100 #define	VTD_PTE_ADDR_M		(0x000FFFFFFFFFF000UL)
101 
102 struct domain {
103 	uint64_t	*ptp;		/* first level page table page */
104 	int		pt_levels;	/* number of page table levels */
105 	int		addrwidth;	/* 'AW' field in context entry */
106 	int		spsmask;	/* supported super page sizes */
107 	u_int		id;		/* domain id */
108 	vm_paddr_t	maxaddr;	/* highest address to be mapped */
109 	SLIST_ENTRY(domain) next;
110 };
111 
112 static SLIST_HEAD(, domain) domhead;
113 
114 #define	DRHD_MAX_UNITS	8
115 static int		drhd_num;
116 static struct vtdmap	*vtdmaps[DRHD_MAX_UNITS];
117 static int		max_domains;
118 typedef int		(*drhd_ident_func_t)(void);
119 
120 static uint64_t root_table[PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
121 static uint64_t ctx_tables[256][PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
122 
123 static MALLOC_DEFINE(M_VTD, "vtd", "vtd");
124 
125 static int
126 vtd_max_domains(struct vtdmap *vtdmap)
127 {
128 	int nd;
129 
130 	nd = VTD_CAP_ND(vtdmap->cap);
131 
132 	switch (nd) {
133 	case 0:
134 		return (16);
135 	case 1:
136 		return (64);
137 	case 2:
138 		return (256);
139 	case 3:
140 		return (1024);
141 	case 4:
142 		return (4 * 1024);
143 	case 5:
144 		return (16 * 1024);
145 	case 6:
146 		return (64 * 1024);
147 	default:
148 		panic("vtd_max_domains: invalid value of nd (0x%0x)", nd);
149 	}
150 }
151 
152 static u_int
153 domain_id(void)
154 {
155 	u_int id;
156 	struct domain *dom;
157 
158 	/* Skip domain id 0 - it is reserved when Caching Mode field is set */
159 	for (id = 1; id < max_domains; id++) {
160 		SLIST_FOREACH(dom, &domhead, next) {
161 			if (dom->id == id)
162 				break;
163 		}
164 		if (dom == NULL)
165 			break;		/* found it */
166 	}
167 
168 	if (id >= max_domains)
169 		panic("domain ids exhausted");
170 
171 	return (id);
172 }
173 
174 static void
175 vtd_wbflush(struct vtdmap *vtdmap)
176 {
177 
178 	if (VTD_ECAP_COHERENCY(vtdmap->ext_cap) == 0)
179 		pmap_invalidate_cache();
180 
181 	if (VTD_CAP_RWBF(vtdmap->cap)) {
182 		vtdmap->gcr = VTD_GCR_WBF;
183 		while ((vtdmap->gsr & VTD_GSR_WBFS) != 0)
184 			;
185 	}
186 }
187 
188 static void
189 vtd_ctx_global_invalidate(struct vtdmap *vtdmap)
190 {
191 
192 	vtdmap->ccr = VTD_CCR_ICC | VTD_CCR_CIRG_GLOBAL;
193 	while ((vtdmap->ccr & VTD_CCR_ICC) != 0)
194 		;
195 }
196 
197 static void
198 vtd_iotlb_global_invalidate(struct vtdmap *vtdmap)
199 {
200 	int offset;
201 	volatile uint64_t *iotlb_reg, val;
202 
203 	vtd_wbflush(vtdmap);
204 
205 	offset = VTD_ECAP_IRO(vtdmap->ext_cap) * 16;
206 	iotlb_reg = (volatile uint64_t *)((caddr_t)vtdmap + offset + 8);
207 
208 	*iotlb_reg =  VTD_IIR_IVT | VTD_IIR_IIRG_GLOBAL |
209 		      VTD_IIR_DRAIN_READS | VTD_IIR_DRAIN_WRITES;
210 
211 	while (1) {
212 		val = *iotlb_reg;
213 		if ((val & VTD_IIR_IVT) == 0)
214 			break;
215 	}
216 }
217 
218 static void
219 vtd_translation_enable(struct vtdmap *vtdmap)
220 {
221 
222 	vtdmap->gcr = VTD_GCR_TE;
223 	while ((vtdmap->gsr & VTD_GSR_TES) == 0)
224 		;
225 }
226 
227 static void
228 vtd_translation_disable(struct vtdmap *vtdmap)
229 {
230 
231 	vtdmap->gcr = 0;
232 	while ((vtdmap->gsr & VTD_GSR_TES) != 0)
233 		;
234 }
235 
236 static int
237 vtd_init(void)
238 {
239 	int i, units, remaining;
240 	struct vtdmap *vtdmap;
241 	vm_paddr_t ctx_paddr;
242 	char *end, envname[32];
243 	unsigned long mapaddr;
244 	ACPI_STATUS status;
245 	ACPI_TABLE_DMAR *dmar;
246 	ACPI_DMAR_HEADER *hdr;
247 	ACPI_DMAR_HARDWARE_UNIT *drhd;
248 
249 	/*
250 	 * Allow the user to override the ACPI DMAR table by specifying the
251 	 * physical address of each remapping unit.
252 	 *
253 	 * The following example specifies two remapping units at
254 	 * physical addresses 0xfed90000 and 0xfeda0000 respectively.
255 	 * set vtd.regmap.0.addr=0xfed90000
256 	 * set vtd.regmap.1.addr=0xfeda0000
257 	 */
258 	for (units = 0; units < DRHD_MAX_UNITS; units++) {
259 		snprintf(envname, sizeof(envname), "vtd.regmap.%d.addr", units);
260 		if (getenv_ulong(envname, &mapaddr) == 0)
261 			break;
262 		vtdmaps[units] = (struct vtdmap *)PHYS_TO_DMAP(mapaddr);
263 	}
264 
265 	if (units > 0)
266 		goto skip_dmar;
267 
268 	/* Search for DMAR table. */
269 	status = AcpiGetTable(ACPI_SIG_DMAR, 0, (ACPI_TABLE_HEADER **)&dmar);
270 	if (ACPI_FAILURE(status))
271 		return (ENXIO);
272 
273 	end = (char *)dmar + dmar->Header.Length;
274 	remaining = dmar->Header.Length - sizeof(ACPI_TABLE_DMAR);
275 	while (remaining > sizeof(ACPI_DMAR_HEADER)) {
276 		hdr = (ACPI_DMAR_HEADER *)(end - remaining);
277 		if (hdr->Length > remaining)
278 			break;
279 		/*
280 		 * From Intel VT-d arch spec, version 1.3:
281 		 * BIOS implementations must report mapping structures
282 		 * in numerical order, i.e. All remapping structures of
283 		 * type 0 (DRHD) enumerated before remapping structures of
284 		 * type 1 (RMRR) and so forth.
285 		 */
286 		if (hdr->Type != ACPI_DMAR_TYPE_HARDWARE_UNIT)
287 			break;
288 
289 		drhd = (ACPI_DMAR_HARDWARE_UNIT *)hdr;
290 		vtdmaps[units++] = (struct vtdmap *)PHYS_TO_DMAP(drhd->Address);
291 		if (units >= DRHD_MAX_UNITS)
292 			break;
293 		remaining -= hdr->Length;
294 	}
295 
296 	if (units <= 0)
297 		return (ENXIO);
298 
299 skip_dmar:
300 	drhd_num = units;
301 	vtdmap = vtdmaps[0];
302 
303 	if (VTD_CAP_CM(vtdmap->cap) != 0)
304 		panic("vtd_init: invalid caching mode");
305 
306 	max_domains = vtd_max_domains(vtdmap);
307 
308 	/*
309 	 * Set up the root-table to point to the context-entry tables
310 	 */
311 	for (i = 0; i < 256; i++) {
312 		ctx_paddr = vtophys(ctx_tables[i]);
313 		if (ctx_paddr & PAGE_MASK)
314 			panic("ctx table (0x%0lx) not page aligned", ctx_paddr);
315 
316 		root_table[i * 2] = ctx_paddr | VTD_ROOT_PRESENT;
317 	}
318 
319 	return (0);
320 }
321 
322 static void
323 vtd_cleanup(void)
324 {
325 }
326 
327 static void
328 vtd_enable(void)
329 {
330 	int i;
331 	struct vtdmap *vtdmap;
332 
333 	for (i = 0; i < drhd_num; i++) {
334 		vtdmap = vtdmaps[i];
335 		vtd_wbflush(vtdmap);
336 
337 		/* Update the root table address */
338 		vtdmap->rta = vtophys(root_table);
339 		vtdmap->gcr = VTD_GCR_SRTP;
340 		while ((vtdmap->gsr & VTD_GSR_RTPS) == 0)
341 			;
342 
343 		vtd_ctx_global_invalidate(vtdmap);
344 		vtd_iotlb_global_invalidate(vtdmap);
345 
346 		vtd_translation_enable(vtdmap);
347 	}
348 }
349 
350 static void
351 vtd_disable(void)
352 {
353 	int i;
354 	struct vtdmap *vtdmap;
355 
356 	for (i = 0; i < drhd_num; i++) {
357 		vtdmap = vtdmaps[i];
358 		vtd_translation_disable(vtdmap);
359 	}
360 }
361 
362 static void
363 vtd_add_device(void *arg, int bus, int slot, int func)
364 {
365 	int idx;
366 	uint64_t *ctxp;
367 	struct domain *dom = arg;
368 	vm_paddr_t pt_paddr;
369 	struct vtdmap *vtdmap;
370 
371 	if (bus < 0 || bus > PCI_BUSMAX ||
372 	    slot < 0 || slot > PCI_SLOTMAX ||
373 	    func < 0 || func > PCI_FUNCMAX)
374 		panic("vtd_add_device: invalid bsf %d/%d/%d", bus, slot, func);
375 
376 	vtdmap = vtdmaps[0];
377 	ctxp = ctx_tables[bus];
378 	pt_paddr = vtophys(dom->ptp);
379 	idx = (slot << 3 | func) * 2;
380 
381 	if (ctxp[idx] & VTD_CTX_PRESENT) {
382 		panic("vtd_add_device: device %d/%d/%d is already owned by "
383 		      "domain %d", bus, slot, func,
384 		      (uint16_t)(ctxp[idx + 1] >> 8));
385 	}
386 
387 	/*
388 	 * Order is important. The 'present' bit is set only after all fields
389 	 * of the context pointer are initialized.
390 	 */
391 	ctxp[idx + 1] = dom->addrwidth | (dom->id << 8);
392 
393 	if (VTD_ECAP_DI(vtdmap->ext_cap))
394 		ctxp[idx] = VTD_CTX_TT_ALL;
395 	else
396 		ctxp[idx] = 0;
397 
398 	ctxp[idx] |= pt_paddr | VTD_CTX_PRESENT;
399 
400 	/*
401 	 * 'Not Present' entries are not cached in either the Context Cache
402 	 * or in the IOTLB, so there is no need to invalidate either of them.
403 	 */
404 }
405 
406 static void
407 vtd_remove_device(void *arg, int bus, int slot, int func)
408 {
409 	int i, idx;
410 	uint64_t *ctxp;
411 	struct vtdmap *vtdmap;
412 
413 	if (bus < 0 || bus > PCI_BUSMAX ||
414 	    slot < 0 || slot > PCI_SLOTMAX ||
415 	    func < 0 || func > PCI_FUNCMAX)
416 		panic("vtd_add_device: invalid bsf %d/%d/%d", bus, slot, func);
417 
418 	ctxp = ctx_tables[bus];
419 	idx = (slot << 3 | func) * 2;
420 
421 	/*
422 	 * Order is important. The 'present' bit is must be cleared first.
423 	 */
424 	ctxp[idx] = 0;
425 	ctxp[idx + 1] = 0;
426 
427 	/*
428 	 * Invalidate the Context Cache and the IOTLB.
429 	 *
430 	 * XXX use device-selective invalidation for Context Cache
431 	 * XXX use domain-selective invalidation for IOTLB
432 	 */
433 	for (i = 0; i < drhd_num; i++) {
434 		vtdmap = vtdmaps[i];
435 		vtd_ctx_global_invalidate(vtdmap);
436 		vtd_iotlb_global_invalidate(vtdmap);
437 	}
438 }
439 
440 #define	CREATE_MAPPING	0
441 #define	REMOVE_MAPPING	1
442 
443 static uint64_t
444 vtd_update_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len,
445 		   int remove)
446 {
447 	struct domain *dom;
448 	int i, spshift, ptpshift, ptpindex, nlevels;
449 	uint64_t spsize, *ptp;
450 
451 	dom = arg;
452 	ptpindex = 0;
453 	ptpshift = 0;
454 
455 	if (gpa & PAGE_MASK)
456 		panic("vtd_create_mapping: unaligned gpa 0x%0lx", gpa);
457 
458 	if (hpa & PAGE_MASK)
459 		panic("vtd_create_mapping: unaligned hpa 0x%0lx", hpa);
460 
461 	if (len & PAGE_MASK)
462 		panic("vtd_create_mapping: unaligned len 0x%0lx", len);
463 
464 	/*
465 	 * Compute the size of the mapping that we can accomodate.
466 	 *
467 	 * This is based on three factors:
468 	 * - supported super page size
469 	 * - alignment of the region starting at 'gpa' and 'hpa'
470 	 * - length of the region 'len'
471 	 */
472 	spshift = 48;
473 	for (i = 3; i >= 0; i--) {
474 		spsize = 1UL << spshift;
475 		if ((dom->spsmask & (1 << i)) != 0 &&
476 		    (gpa & (spsize - 1)) == 0 &&
477 		    (hpa & (spsize - 1)) == 0 &&
478 		    (len >= spsize)) {
479 			break;
480 		}
481 		spshift -= 9;
482 	}
483 
484 	ptp = dom->ptp;
485 	nlevels = dom->pt_levels;
486 	while (--nlevels >= 0) {
487 		ptpshift = 12 + nlevels * 9;
488 		ptpindex = (gpa >> ptpshift) & 0x1FF;
489 
490 		/* We have reached the leaf mapping */
491 		if (spshift >= ptpshift) {
492 			break;
493 		}
494 
495 		/*
496 		 * We are working on a non-leaf page table page.
497 		 *
498 		 * Create a downstream page table page if necessary and point
499 		 * to it from the current page table.
500 		 */
501 		if (ptp[ptpindex] == 0) {
502 			void *nlp = malloc(PAGE_SIZE, M_VTD, M_WAITOK | M_ZERO);
503 			ptp[ptpindex] = vtophys(nlp)| VTD_PTE_RD | VTD_PTE_WR;
504 		}
505 
506 		ptp = (uint64_t *)PHYS_TO_DMAP(ptp[ptpindex] & VTD_PTE_ADDR_M);
507 	}
508 
509 	if ((gpa & ((1UL << ptpshift) - 1)) != 0)
510 		panic("gpa 0x%lx and ptpshift %d mismatch", gpa, ptpshift);
511 
512 	/*
513 	 * Update the 'gpa' -> 'hpa' mapping
514 	 */
515 	if (remove) {
516 		ptp[ptpindex] = 0;
517 	} else {
518 		ptp[ptpindex] = hpa | VTD_PTE_RD | VTD_PTE_WR;
519 
520 		if (nlevels > 0)
521 			ptp[ptpindex] |= VTD_PTE_SUPERPAGE;
522 	}
523 
524 	return (1UL << ptpshift);
525 }
526 
527 static uint64_t
528 vtd_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len)
529 {
530 
531 	return (vtd_update_mapping(arg, gpa, hpa, len, CREATE_MAPPING));
532 }
533 
534 static uint64_t
535 vtd_remove_mapping(void *arg, vm_paddr_t gpa, uint64_t len)
536 {
537 
538 	return (vtd_update_mapping(arg, gpa, 0, len, REMOVE_MAPPING));
539 }
540 
541 static void
542 vtd_invalidate_tlb(void *dom)
543 {
544 	int i;
545 	struct vtdmap *vtdmap;
546 
547 	/*
548 	 * Invalidate the IOTLB.
549 	 * XXX use domain-selective invalidation for IOTLB
550 	 */
551 	for (i = 0; i < drhd_num; i++) {
552 		vtdmap = vtdmaps[i];
553 		vtd_iotlb_global_invalidate(vtdmap);
554 	}
555 }
556 
557 static void *
558 vtd_create_domain(vm_paddr_t maxaddr)
559 {
560 	struct domain *dom;
561 	vm_paddr_t addr;
562 	int tmp, i, gaw, agaw, sagaw, res, pt_levels, addrwidth;
563 	struct vtdmap *vtdmap;
564 
565 	if (drhd_num <= 0)
566 		panic("vtd_create_domain: no dma remapping hardware available");
567 
568 	vtdmap = vtdmaps[0];
569 
570 	/*
571 	 * Calculate AGAW.
572 	 * Section 3.4.2 "Adjusted Guest Address Width", Architecture Spec.
573 	 */
574 	addr = 0;
575 	for (gaw = 0; addr < maxaddr; gaw++)
576 		addr = 1ULL << gaw;
577 
578 	res = (gaw - 12) % 9;
579 	if (res == 0)
580 		agaw = gaw;
581 	else
582 		agaw = gaw + 9 - res;
583 
584 	if (agaw > 64)
585 		agaw = 64;
586 
587 	/*
588 	 * Select the smallest Supported AGAW and the corresponding number
589 	 * of page table levels.
590 	 */
591 	pt_levels = 2;
592 	sagaw = 30;
593 	addrwidth = 0;
594 	tmp = VTD_CAP_SAGAW(vtdmap->cap);
595 	for (i = 0; i < 5; i++) {
596 		if ((tmp & (1 << i)) != 0 && sagaw >= agaw)
597 			break;
598 		pt_levels++;
599 		addrwidth++;
600 		sagaw += 9;
601 		if (sagaw > 64)
602 			sagaw = 64;
603 	}
604 
605 	if (i >= 5) {
606 		panic("vtd_create_domain: SAGAW 0x%lx does not support AGAW %d",
607 		      VTD_CAP_SAGAW(vtdmap->cap), agaw);
608 	}
609 
610 	dom = malloc(sizeof(struct domain), M_VTD, M_ZERO | M_WAITOK);
611 	dom->pt_levels = pt_levels;
612 	dom->addrwidth = addrwidth;
613 	dom->id = domain_id();
614 	dom->maxaddr = maxaddr;
615 	dom->ptp = malloc(PAGE_SIZE, M_VTD, M_ZERO | M_WAITOK);
616 	if ((uintptr_t)dom->ptp & PAGE_MASK)
617 		panic("vtd_create_domain: ptp (%p) not page aligned", dom->ptp);
618 
619 #ifdef notyet
620 	/*
621 	 * XXX superpage mappings for the iommu do not work correctly.
622 	 *
623 	 * By default all physical memory is mapped into the host_domain.
624 	 * When a VM is allocated wired memory the pages belonging to it
625 	 * are removed from the host_domain and added to the vm's domain.
626 	 *
627 	 * If the page being removed was mapped using a superpage mapping
628 	 * in the host_domain then we need to demote the mapping before
629 	 * removing the page.
630 	 *
631 	 * There is not any code to deal with the demotion at the moment
632 	 * so we disable superpage mappings altogether.
633 	 */
634 	dom->spsmask = VTD_CAP_SPS(vtdmap->cap);
635 #endif
636 
637 	SLIST_INSERT_HEAD(&domhead, dom, next);
638 
639 	return (dom);
640 }
641 
642 static void
643 vtd_free_ptp(uint64_t *ptp, int level)
644 {
645 	int i;
646 	uint64_t *nlp;
647 
648 	if (level > 1) {
649 		for (i = 0; i < 512; i++) {
650 			if ((ptp[i] & (VTD_PTE_RD | VTD_PTE_WR)) == 0)
651 				continue;
652 			if ((ptp[i] & VTD_PTE_SUPERPAGE) != 0)
653 				continue;
654 			nlp = (uint64_t *)PHYS_TO_DMAP(ptp[i] & VTD_PTE_ADDR_M);
655 			vtd_free_ptp(nlp, level - 1);
656 		}
657 	}
658 
659 	bzero(ptp, PAGE_SIZE);
660 	free(ptp, M_VTD);
661 }
662 
663 static void
664 vtd_destroy_domain(void *arg)
665 {
666 	struct domain *dom;
667 
668 	dom = arg;
669 
670 	SLIST_REMOVE(&domhead, dom, domain, next);
671 	vtd_free_ptp(dom->ptp, dom->pt_levels);
672 	free(dom, M_VTD);
673 }
674 
675 struct iommu_ops iommu_ops_intel = {
676 	vtd_init,
677 	vtd_cleanup,
678 	vtd_enable,
679 	vtd_disable,
680 	vtd_create_domain,
681 	vtd_destroy_domain,
682 	vtd_create_mapping,
683 	vtd_remove_mapping,
684 	vtd_add_device,
685 	vtd_remove_device,
686 	vtd_invalidate_tlb,
687 };
688