xref: /freebsd/sys/amd64/vmm/intel/vtd.c (revision c1cdf6a42f0d951ba720688dfc6ce07608b02f6e)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include <sys/param.h>
35 #include <sys/kernel.h>
36 #include <sys/systm.h>
37 #include <sys/malloc.h>
38 
39 #include <vm/vm.h>
40 #include <vm/pmap.h>
41 
42 #include <dev/pci/pcireg.h>
43 
44 #include <machine/vmparam.h>
45 #include <contrib/dev/acpica/include/acpi.h>
46 
47 #include "io/iommu.h"
48 
49 /*
50  * Documented in the "Intel Virtualization Technology for Directed I/O",
51  * Architecture Spec, September 2008.
52  */
53 
54 /* Section 10.4 "Register Descriptions" */
55 struct vtdmap {
56 	volatile uint32_t	version;
57 	volatile uint32_t	res0;
58 	volatile uint64_t	cap;
59 	volatile uint64_t	ext_cap;
60 	volatile uint32_t	gcr;
61 	volatile uint32_t	gsr;
62 	volatile uint64_t	rta;
63 	volatile uint64_t	ccr;
64 };
65 
66 #define	VTD_CAP_SAGAW(cap)	(((cap) >> 8) & 0x1F)
67 #define	VTD_CAP_ND(cap)		((cap) & 0x7)
68 #define	VTD_CAP_CM(cap)		(((cap) >> 7) & 0x1)
69 #define	VTD_CAP_SPS(cap)	(((cap) >> 34) & 0xF)
70 #define	VTD_CAP_RWBF(cap)	(((cap) >> 4) & 0x1)
71 
72 #define	VTD_ECAP_DI(ecap)	(((ecap) >> 2) & 0x1)
73 #define	VTD_ECAP_COHERENCY(ecap) ((ecap) & 0x1)
74 #define	VTD_ECAP_IRO(ecap)	(((ecap) >> 8) & 0x3FF)
75 
76 #define	VTD_GCR_WBF		(1 << 27)
77 #define	VTD_GCR_SRTP		(1 << 30)
78 #define	VTD_GCR_TE		(1U << 31)
79 
80 #define	VTD_GSR_WBFS		(1 << 27)
81 #define	VTD_GSR_RTPS		(1 << 30)
82 #define	VTD_GSR_TES		(1U << 31)
83 
84 #define	VTD_CCR_ICC		(1UL << 63)	/* invalidate context cache */
85 #define	VTD_CCR_CIRG_GLOBAL	(1UL << 61)	/* global invalidation */
86 
87 #define	VTD_IIR_IVT		(1UL << 63)	/* invalidation IOTLB */
88 #define	VTD_IIR_IIRG_GLOBAL	(1ULL << 60)	/* global IOTLB invalidation */
89 #define	VTD_IIR_IIRG_DOMAIN	(2ULL << 60)	/* domain IOTLB invalidation */
90 #define	VTD_IIR_IIRG_PAGE	(3ULL << 60)	/* page IOTLB invalidation */
91 #define	VTD_IIR_DRAIN_READS	(1ULL << 49)	/* drain pending DMA reads */
92 #define	VTD_IIR_DRAIN_WRITES	(1ULL << 48)	/* drain pending DMA writes */
93 #define	VTD_IIR_DOMAIN_P	32
94 
95 #define	VTD_ROOT_PRESENT	0x1
96 #define	VTD_CTX_PRESENT		0x1
97 #define	VTD_CTX_TT_ALL		(1UL << 2)
98 
99 #define	VTD_PTE_RD		(1UL << 0)
100 #define	VTD_PTE_WR		(1UL << 1)
101 #define	VTD_PTE_SUPERPAGE	(1UL << 7)
102 #define	VTD_PTE_ADDR_M		(0x000FFFFFFFFFF000UL)
103 
104 #define VTD_RID2IDX(rid)	(((rid) & 0xff) * 2)
105 
106 struct domain {
107 	uint64_t	*ptp;		/* first level page table page */
108 	int		pt_levels;	/* number of page table levels */
109 	int		addrwidth;	/* 'AW' field in context entry */
110 	int		spsmask;	/* supported super page sizes */
111 	u_int		id;		/* domain id */
112 	vm_paddr_t	maxaddr;	/* highest address to be mapped */
113 	SLIST_ENTRY(domain) next;
114 };
115 
116 static SLIST_HEAD(, domain) domhead;
117 
118 #define	DRHD_MAX_UNITS	8
119 static int		drhd_num;
120 static struct vtdmap	*vtdmaps[DRHD_MAX_UNITS];
121 static int		max_domains;
122 typedef int		(*drhd_ident_func_t)(void);
123 
124 static uint64_t root_table[PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
125 static uint64_t ctx_tables[256][PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
126 
127 static MALLOC_DEFINE(M_VTD, "vtd", "vtd");
128 
129 static int
130 vtd_max_domains(struct vtdmap *vtdmap)
131 {
132 	int nd;
133 
134 	nd = VTD_CAP_ND(vtdmap->cap);
135 
136 	switch (nd) {
137 	case 0:
138 		return (16);
139 	case 1:
140 		return (64);
141 	case 2:
142 		return (256);
143 	case 3:
144 		return (1024);
145 	case 4:
146 		return (4 * 1024);
147 	case 5:
148 		return (16 * 1024);
149 	case 6:
150 		return (64 * 1024);
151 	default:
152 		panic("vtd_max_domains: invalid value of nd (0x%0x)", nd);
153 	}
154 }
155 
156 static u_int
157 domain_id(void)
158 {
159 	u_int id;
160 	struct domain *dom;
161 
162 	/* Skip domain id 0 - it is reserved when Caching Mode field is set */
163 	for (id = 1; id < max_domains; id++) {
164 		SLIST_FOREACH(dom, &domhead, next) {
165 			if (dom->id == id)
166 				break;
167 		}
168 		if (dom == NULL)
169 			break;		/* found it */
170 	}
171 
172 	if (id >= max_domains)
173 		panic("domain ids exhausted");
174 
175 	return (id);
176 }
177 
178 static void
179 vtd_wbflush(struct vtdmap *vtdmap)
180 {
181 
182 	if (VTD_ECAP_COHERENCY(vtdmap->ext_cap) == 0)
183 		pmap_invalidate_cache();
184 
185 	if (VTD_CAP_RWBF(vtdmap->cap)) {
186 		vtdmap->gcr = VTD_GCR_WBF;
187 		while ((vtdmap->gsr & VTD_GSR_WBFS) != 0)
188 			;
189 	}
190 }
191 
192 static void
193 vtd_ctx_global_invalidate(struct vtdmap *vtdmap)
194 {
195 
196 	vtdmap->ccr = VTD_CCR_ICC | VTD_CCR_CIRG_GLOBAL;
197 	while ((vtdmap->ccr & VTD_CCR_ICC) != 0)
198 		;
199 }
200 
201 static void
202 vtd_iotlb_global_invalidate(struct vtdmap *vtdmap)
203 {
204 	int offset;
205 	volatile uint64_t *iotlb_reg, val;
206 
207 	vtd_wbflush(vtdmap);
208 
209 	offset = VTD_ECAP_IRO(vtdmap->ext_cap) * 16;
210 	iotlb_reg = (volatile uint64_t *)((caddr_t)vtdmap + offset + 8);
211 
212 	*iotlb_reg =  VTD_IIR_IVT | VTD_IIR_IIRG_GLOBAL |
213 		      VTD_IIR_DRAIN_READS | VTD_IIR_DRAIN_WRITES;
214 
215 	while (1) {
216 		val = *iotlb_reg;
217 		if ((val & VTD_IIR_IVT) == 0)
218 			break;
219 	}
220 }
221 
222 static void
223 vtd_translation_enable(struct vtdmap *vtdmap)
224 {
225 
226 	vtdmap->gcr = VTD_GCR_TE;
227 	while ((vtdmap->gsr & VTD_GSR_TES) == 0)
228 		;
229 }
230 
231 static void
232 vtd_translation_disable(struct vtdmap *vtdmap)
233 {
234 
235 	vtdmap->gcr = 0;
236 	while ((vtdmap->gsr & VTD_GSR_TES) != 0)
237 		;
238 }
239 
240 static int
241 vtd_init(void)
242 {
243 	int i, units, remaining;
244 	struct vtdmap *vtdmap;
245 	vm_paddr_t ctx_paddr;
246 	char *end, envname[32];
247 	unsigned long mapaddr;
248 	ACPI_STATUS status;
249 	ACPI_TABLE_DMAR *dmar;
250 	ACPI_DMAR_HEADER *hdr;
251 	ACPI_DMAR_HARDWARE_UNIT *drhd;
252 
253 	/*
254 	 * Allow the user to override the ACPI DMAR table by specifying the
255 	 * physical address of each remapping unit.
256 	 *
257 	 * The following example specifies two remapping units at
258 	 * physical addresses 0xfed90000 and 0xfeda0000 respectively.
259 	 * set vtd.regmap.0.addr=0xfed90000
260 	 * set vtd.regmap.1.addr=0xfeda0000
261 	 */
262 	for (units = 0; units < DRHD_MAX_UNITS; units++) {
263 		snprintf(envname, sizeof(envname), "vtd.regmap.%d.addr", units);
264 		if (getenv_ulong(envname, &mapaddr) == 0)
265 			break;
266 		vtdmaps[units] = (struct vtdmap *)PHYS_TO_DMAP(mapaddr);
267 	}
268 
269 	if (units > 0)
270 		goto skip_dmar;
271 
272 	/* Search for DMAR table. */
273 	status = AcpiGetTable(ACPI_SIG_DMAR, 0, (ACPI_TABLE_HEADER **)&dmar);
274 	if (ACPI_FAILURE(status))
275 		return (ENXIO);
276 
277 	end = (char *)dmar + dmar->Header.Length;
278 	remaining = dmar->Header.Length - sizeof(ACPI_TABLE_DMAR);
279 	while (remaining > sizeof(ACPI_DMAR_HEADER)) {
280 		hdr = (ACPI_DMAR_HEADER *)(end - remaining);
281 		if (hdr->Length > remaining)
282 			break;
283 		/*
284 		 * From Intel VT-d arch spec, version 1.3:
285 		 * BIOS implementations must report mapping structures
286 		 * in numerical order, i.e. All remapping structures of
287 		 * type 0 (DRHD) enumerated before remapping structures of
288 		 * type 1 (RMRR) and so forth.
289 		 */
290 		if (hdr->Type != ACPI_DMAR_TYPE_HARDWARE_UNIT)
291 			break;
292 
293 		drhd = (ACPI_DMAR_HARDWARE_UNIT *)hdr;
294 		vtdmaps[units++] = (struct vtdmap *)PHYS_TO_DMAP(drhd->Address);
295 		if (units >= DRHD_MAX_UNITS)
296 			break;
297 		remaining -= hdr->Length;
298 	}
299 
300 	if (units <= 0)
301 		return (ENXIO);
302 
303 skip_dmar:
304 	drhd_num = units;
305 	vtdmap = vtdmaps[0];
306 
307 	if (VTD_CAP_CM(vtdmap->cap) != 0)
308 		panic("vtd_init: invalid caching mode");
309 
310 	max_domains = vtd_max_domains(vtdmap);
311 
312 	/*
313 	 * Set up the root-table to point to the context-entry tables
314 	 */
315 	for (i = 0; i < 256; i++) {
316 		ctx_paddr = vtophys(ctx_tables[i]);
317 		if (ctx_paddr & PAGE_MASK)
318 			panic("ctx table (0x%0lx) not page aligned", ctx_paddr);
319 
320 		root_table[i * 2] = ctx_paddr | VTD_ROOT_PRESENT;
321 	}
322 
323 	return (0);
324 }
325 
326 static void
327 vtd_cleanup(void)
328 {
329 }
330 
331 static void
332 vtd_enable(void)
333 {
334 	int i;
335 	struct vtdmap *vtdmap;
336 
337 	for (i = 0; i < drhd_num; i++) {
338 		vtdmap = vtdmaps[i];
339 		vtd_wbflush(vtdmap);
340 
341 		/* Update the root table address */
342 		vtdmap->rta = vtophys(root_table);
343 		vtdmap->gcr = VTD_GCR_SRTP;
344 		while ((vtdmap->gsr & VTD_GSR_RTPS) == 0)
345 			;
346 
347 		vtd_ctx_global_invalidate(vtdmap);
348 		vtd_iotlb_global_invalidate(vtdmap);
349 
350 		vtd_translation_enable(vtdmap);
351 	}
352 }
353 
354 static void
355 vtd_disable(void)
356 {
357 	int i;
358 	struct vtdmap *vtdmap;
359 
360 	for (i = 0; i < drhd_num; i++) {
361 		vtdmap = vtdmaps[i];
362 		vtd_translation_disable(vtdmap);
363 	}
364 }
365 
366 static void
367 vtd_add_device(void *arg, uint16_t rid)
368 {
369 	int idx;
370 	uint64_t *ctxp;
371 	struct domain *dom = arg;
372 	vm_paddr_t pt_paddr;
373 	struct vtdmap *vtdmap;
374 	uint8_t bus;
375 
376 	vtdmap = vtdmaps[0];
377 	bus = PCI_RID2BUS(rid);
378 	ctxp = ctx_tables[bus];
379 	pt_paddr = vtophys(dom->ptp);
380 	idx = VTD_RID2IDX(rid);
381 
382 	if (ctxp[idx] & VTD_CTX_PRESENT) {
383 		panic("vtd_add_device: device %x is already owned by "
384 		      "domain %d", rid,
385 		      (uint16_t)(ctxp[idx + 1] >> 8));
386 	}
387 
388 	/*
389 	 * Order is important. The 'present' bit is set only after all fields
390 	 * of the context pointer are initialized.
391 	 */
392 	ctxp[idx + 1] = dom->addrwidth | (dom->id << 8);
393 
394 	if (VTD_ECAP_DI(vtdmap->ext_cap))
395 		ctxp[idx] = VTD_CTX_TT_ALL;
396 	else
397 		ctxp[idx] = 0;
398 
399 	ctxp[idx] |= pt_paddr | VTD_CTX_PRESENT;
400 
401 	/*
402 	 * 'Not Present' entries are not cached in either the Context Cache
403 	 * or in the IOTLB, so there is no need to invalidate either of them.
404 	 */
405 }
406 
407 static void
408 vtd_remove_device(void *arg, uint16_t rid)
409 {
410 	int i, idx;
411 	uint64_t *ctxp;
412 	struct vtdmap *vtdmap;
413 	uint8_t bus;
414 
415 	bus = PCI_RID2BUS(rid);
416 	ctxp = ctx_tables[bus];
417 	idx = VTD_RID2IDX(rid);
418 
419 	/*
420 	 * Order is important. The 'present' bit is must be cleared first.
421 	 */
422 	ctxp[idx] = 0;
423 	ctxp[idx + 1] = 0;
424 
425 	/*
426 	 * Invalidate the Context Cache and the IOTLB.
427 	 *
428 	 * XXX use device-selective invalidation for Context Cache
429 	 * XXX use domain-selective invalidation for IOTLB
430 	 */
431 	for (i = 0; i < drhd_num; i++) {
432 		vtdmap = vtdmaps[i];
433 		vtd_ctx_global_invalidate(vtdmap);
434 		vtd_iotlb_global_invalidate(vtdmap);
435 	}
436 }
437 
438 #define	CREATE_MAPPING	0
439 #define	REMOVE_MAPPING	1
440 
441 static uint64_t
442 vtd_update_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len,
443 		   int remove)
444 {
445 	struct domain *dom;
446 	int i, spshift, ptpshift, ptpindex, nlevels;
447 	uint64_t spsize, *ptp;
448 
449 	dom = arg;
450 	ptpindex = 0;
451 	ptpshift = 0;
452 
453 	KASSERT(gpa + len > gpa, ("%s: invalid gpa range %#lx/%#lx", __func__,
454 	    gpa, len));
455 	KASSERT(gpa + len <= dom->maxaddr, ("%s: gpa range %#lx/%#lx beyond "
456 	    "domain maxaddr %#lx", __func__, gpa, len, dom->maxaddr));
457 
458 	if (gpa & PAGE_MASK)
459 		panic("vtd_create_mapping: unaligned gpa 0x%0lx", gpa);
460 
461 	if (hpa & PAGE_MASK)
462 		panic("vtd_create_mapping: unaligned hpa 0x%0lx", hpa);
463 
464 	if (len & PAGE_MASK)
465 		panic("vtd_create_mapping: unaligned len 0x%0lx", len);
466 
467 	/*
468 	 * Compute the size of the mapping that we can accommodate.
469 	 *
470 	 * This is based on three factors:
471 	 * - supported super page size
472 	 * - alignment of the region starting at 'gpa' and 'hpa'
473 	 * - length of the region 'len'
474 	 */
475 	spshift = 48;
476 	for (i = 3; i >= 0; i--) {
477 		spsize = 1UL << spshift;
478 		if ((dom->spsmask & (1 << i)) != 0 &&
479 		    (gpa & (spsize - 1)) == 0 &&
480 		    (hpa & (spsize - 1)) == 0 &&
481 		    (len >= spsize)) {
482 			break;
483 		}
484 		spshift -= 9;
485 	}
486 
487 	ptp = dom->ptp;
488 	nlevels = dom->pt_levels;
489 	while (--nlevels >= 0) {
490 		ptpshift = 12 + nlevels * 9;
491 		ptpindex = (gpa >> ptpshift) & 0x1FF;
492 
493 		/* We have reached the leaf mapping */
494 		if (spshift >= ptpshift) {
495 			break;
496 		}
497 
498 		/*
499 		 * We are working on a non-leaf page table page.
500 		 *
501 		 * Create a downstream page table page if necessary and point
502 		 * to it from the current page table.
503 		 */
504 		if (ptp[ptpindex] == 0) {
505 			void *nlp = malloc(PAGE_SIZE, M_VTD, M_WAITOK | M_ZERO);
506 			ptp[ptpindex] = vtophys(nlp)| VTD_PTE_RD | VTD_PTE_WR;
507 		}
508 
509 		ptp = (uint64_t *)PHYS_TO_DMAP(ptp[ptpindex] & VTD_PTE_ADDR_M);
510 	}
511 
512 	if ((gpa & ((1UL << ptpshift) - 1)) != 0)
513 		panic("gpa 0x%lx and ptpshift %d mismatch", gpa, ptpshift);
514 
515 	/*
516 	 * Update the 'gpa' -> 'hpa' mapping
517 	 */
518 	if (remove) {
519 		ptp[ptpindex] = 0;
520 	} else {
521 		ptp[ptpindex] = hpa | VTD_PTE_RD | VTD_PTE_WR;
522 
523 		if (nlevels > 0)
524 			ptp[ptpindex] |= VTD_PTE_SUPERPAGE;
525 	}
526 
527 	return (1UL << ptpshift);
528 }
529 
530 static uint64_t
531 vtd_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len)
532 {
533 
534 	return (vtd_update_mapping(arg, gpa, hpa, len, CREATE_MAPPING));
535 }
536 
537 static uint64_t
538 vtd_remove_mapping(void *arg, vm_paddr_t gpa, uint64_t len)
539 {
540 
541 	return (vtd_update_mapping(arg, gpa, 0, len, REMOVE_MAPPING));
542 }
543 
544 static void
545 vtd_invalidate_tlb(void *dom)
546 {
547 	int i;
548 	struct vtdmap *vtdmap;
549 
550 	/*
551 	 * Invalidate the IOTLB.
552 	 * XXX use domain-selective invalidation for IOTLB
553 	 */
554 	for (i = 0; i < drhd_num; i++) {
555 		vtdmap = vtdmaps[i];
556 		vtd_iotlb_global_invalidate(vtdmap);
557 	}
558 }
559 
560 static void *
561 vtd_create_domain(vm_paddr_t maxaddr)
562 {
563 	struct domain *dom;
564 	vm_paddr_t addr;
565 	int tmp, i, gaw, agaw, sagaw, res, pt_levels, addrwidth;
566 	struct vtdmap *vtdmap;
567 
568 	if (drhd_num <= 0)
569 		panic("vtd_create_domain: no dma remapping hardware available");
570 
571 	vtdmap = vtdmaps[0];
572 
573 	/*
574 	 * Calculate AGAW.
575 	 * Section 3.4.2 "Adjusted Guest Address Width", Architecture Spec.
576 	 */
577 	addr = 0;
578 	for (gaw = 0; addr < maxaddr; gaw++)
579 		addr = 1ULL << gaw;
580 
581 	res = (gaw - 12) % 9;
582 	if (res == 0)
583 		agaw = gaw;
584 	else
585 		agaw = gaw + 9 - res;
586 
587 	if (agaw > 64)
588 		agaw = 64;
589 
590 	/*
591 	 * Select the smallest Supported AGAW and the corresponding number
592 	 * of page table levels.
593 	 */
594 	pt_levels = 2;
595 	sagaw = 30;
596 	addrwidth = 0;
597 	tmp = VTD_CAP_SAGAW(vtdmap->cap);
598 	for (i = 0; i < 5; i++) {
599 		if ((tmp & (1 << i)) != 0 && sagaw >= agaw)
600 			break;
601 		pt_levels++;
602 		addrwidth++;
603 		sagaw += 9;
604 		if (sagaw > 64)
605 			sagaw = 64;
606 	}
607 
608 	if (i >= 5) {
609 		panic("vtd_create_domain: SAGAW 0x%lx does not support AGAW %d",
610 		      VTD_CAP_SAGAW(vtdmap->cap), agaw);
611 	}
612 
613 	dom = malloc(sizeof(struct domain), M_VTD, M_ZERO | M_WAITOK);
614 	dom->pt_levels = pt_levels;
615 	dom->addrwidth = addrwidth;
616 	dom->id = domain_id();
617 	dom->maxaddr = maxaddr;
618 	dom->ptp = malloc(PAGE_SIZE, M_VTD, M_ZERO | M_WAITOK);
619 	if ((uintptr_t)dom->ptp & PAGE_MASK)
620 		panic("vtd_create_domain: ptp (%p) not page aligned", dom->ptp);
621 
622 #ifdef notyet
623 	/*
624 	 * XXX superpage mappings for the iommu do not work correctly.
625 	 *
626 	 * By default all physical memory is mapped into the host_domain.
627 	 * When a VM is allocated wired memory the pages belonging to it
628 	 * are removed from the host_domain and added to the vm's domain.
629 	 *
630 	 * If the page being removed was mapped using a superpage mapping
631 	 * in the host_domain then we need to demote the mapping before
632 	 * removing the page.
633 	 *
634 	 * There is not any code to deal with the demotion at the moment
635 	 * so we disable superpage mappings altogether.
636 	 */
637 	dom->spsmask = VTD_CAP_SPS(vtdmap->cap);
638 #endif
639 
640 	SLIST_INSERT_HEAD(&domhead, dom, next);
641 
642 	return (dom);
643 }
644 
645 static void
646 vtd_free_ptp(uint64_t *ptp, int level)
647 {
648 	int i;
649 	uint64_t *nlp;
650 
651 	if (level > 1) {
652 		for (i = 0; i < 512; i++) {
653 			if ((ptp[i] & (VTD_PTE_RD | VTD_PTE_WR)) == 0)
654 				continue;
655 			if ((ptp[i] & VTD_PTE_SUPERPAGE) != 0)
656 				continue;
657 			nlp = (uint64_t *)PHYS_TO_DMAP(ptp[i] & VTD_PTE_ADDR_M);
658 			vtd_free_ptp(nlp, level - 1);
659 		}
660 	}
661 
662 	bzero(ptp, PAGE_SIZE);
663 	free(ptp, M_VTD);
664 }
665 
666 static void
667 vtd_destroy_domain(void *arg)
668 {
669 	struct domain *dom;
670 
671 	dom = arg;
672 
673 	SLIST_REMOVE(&domhead, dom, domain, next);
674 	vtd_free_ptp(dom->ptp, dom->pt_levels);
675 	free(dom, M_VTD);
676 }
677 
678 struct iommu_ops iommu_ops_intel = {
679 	vtd_init,
680 	vtd_cleanup,
681 	vtd_enable,
682 	vtd_disable,
683 	vtd_create_domain,
684 	vtd_destroy_domain,
685 	vtd_create_mapping,
686 	vtd_remove_mapping,
687 	vtd_add_device,
688 	vtd_remove_device,
689 	vtd_invalidate_tlb,
690 };
691