1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/param.h>
30 #include <sys/kernel.h>
31 #include <sys/systm.h>
32 #include <sys/malloc.h>
33
34 #include <vm/vm.h>
35 #include <vm/pmap.h>
36
37 #include <dev/pci/pcireg.h>
38
39 #include <machine/vmparam.h>
40 #include <contrib/dev/acpica/include/acpi.h>
41
42 #include "io/iommu.h"
43
44 /*
45 * Documented in the "Intel Virtualization Technology for Directed I/O",
46 * Architecture Spec, September 2008.
47 */
48
49 #define VTD_DRHD_INCLUDE_PCI_ALL(Flags) (((Flags) >> 0) & 0x1)
50
51 /* Section 10.4 "Register Descriptions" */
52 struct vtdmap {
53 volatile uint32_t version;
54 volatile uint32_t res0;
55 volatile uint64_t cap;
56 volatile uint64_t ext_cap;
57 volatile uint32_t gcr;
58 volatile uint32_t gsr;
59 volatile uint64_t rta;
60 volatile uint64_t ccr;
61 };
62
63 #define VTD_CAP_SAGAW(cap) (((cap) >> 8) & 0x1F)
64 #define VTD_CAP_ND(cap) ((cap) & 0x7)
65 #define VTD_CAP_CM(cap) (((cap) >> 7) & 0x1)
66 #define VTD_CAP_SPS(cap) (((cap) >> 34) & 0xF)
67 #define VTD_CAP_RWBF(cap) (((cap) >> 4) & 0x1)
68
69 #define VTD_ECAP_DI(ecap) (((ecap) >> 2) & 0x1)
70 #define VTD_ECAP_COHERENCY(ecap) ((ecap) & 0x1)
71 #define VTD_ECAP_IRO(ecap) (((ecap) >> 8) & 0x3FF)
72
73 #define VTD_GCR_WBF (1 << 27)
74 #define VTD_GCR_SRTP (1 << 30)
75 #define VTD_GCR_TE (1U << 31)
76
77 #define VTD_GSR_WBFS (1 << 27)
78 #define VTD_GSR_RTPS (1 << 30)
79 #define VTD_GSR_TES (1U << 31)
80
81 #define VTD_CCR_ICC (1UL << 63) /* invalidate context cache */
82 #define VTD_CCR_CIRG_GLOBAL (1UL << 61) /* global invalidation */
83
84 #define VTD_IIR_IVT (1UL << 63) /* invalidation IOTLB */
85 #define VTD_IIR_IIRG_GLOBAL (1ULL << 60) /* global IOTLB invalidation */
86 #define VTD_IIR_IIRG_DOMAIN (2ULL << 60) /* domain IOTLB invalidation */
87 #define VTD_IIR_IIRG_PAGE (3ULL << 60) /* page IOTLB invalidation */
88 #define VTD_IIR_DRAIN_READS (1ULL << 49) /* drain pending DMA reads */
89 #define VTD_IIR_DRAIN_WRITES (1ULL << 48) /* drain pending DMA writes */
90 #define VTD_IIR_DOMAIN_P 32
91
92 #define VTD_ROOT_PRESENT 0x1
93 #define VTD_CTX_PRESENT 0x1
94 #define VTD_CTX_TT_ALL (1UL << 2)
95
96 #define VTD_PTE_RD (1UL << 0)
97 #define VTD_PTE_WR (1UL << 1)
98 #define VTD_PTE_SUPERPAGE (1UL << 7)
99 #define VTD_PTE_ADDR_M (0x000FFFFFFFFFF000UL)
100
101 #define VTD_RID2IDX(rid) (((rid) & 0xff) * 2)
102
103 struct domain {
104 uint64_t *ptp; /* first level page table page */
105 int pt_levels; /* number of page table levels */
106 int addrwidth; /* 'AW' field in context entry */
107 int spsmask; /* supported super page sizes */
108 u_int id; /* domain id */
109 vm_paddr_t maxaddr; /* highest address to be mapped */
110 SLIST_ENTRY(domain) next;
111 };
112
113 static SLIST_HEAD(, domain) domhead;
114
115 #define DRHD_MAX_UNITS 16
116 static ACPI_DMAR_HARDWARE_UNIT *drhds[DRHD_MAX_UNITS];
117 static int drhd_num;
118 static struct vtdmap *vtdmaps[DRHD_MAX_UNITS];
119 static int max_domains;
120 typedef int (*drhd_ident_func_t)(void);
121
122 static uint64_t root_table[PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
123 static uint64_t ctx_tables[256][PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
124
125 static MALLOC_DEFINE(M_VTD, "vtd", "vtd");
126
127 static int
vtd_max_domains(struct vtdmap * vtdmap)128 vtd_max_domains(struct vtdmap *vtdmap)
129 {
130 int nd;
131
132 nd = VTD_CAP_ND(vtdmap->cap);
133
134 switch (nd) {
135 case 0:
136 return (16);
137 case 1:
138 return (64);
139 case 2:
140 return (256);
141 case 3:
142 return (1024);
143 case 4:
144 return (4 * 1024);
145 case 5:
146 return (16 * 1024);
147 case 6:
148 return (64 * 1024);
149 default:
150 panic("vtd_max_domains: invalid value of nd (0x%0x)", nd);
151 }
152 }
153
154 static u_int
domain_id(void)155 domain_id(void)
156 {
157 u_int id;
158 struct domain *dom;
159
160 /* Skip domain id 0 - it is reserved when Caching Mode field is set */
161 for (id = 1; id < max_domains; id++) {
162 SLIST_FOREACH(dom, &domhead, next) {
163 if (dom->id == id)
164 break;
165 }
166 if (dom == NULL)
167 break; /* found it */
168 }
169
170 if (id >= max_domains)
171 panic("domain ids exhausted");
172
173 return (id);
174 }
175
176 static struct vtdmap *
vtd_device_scope(uint16_t rid)177 vtd_device_scope(uint16_t rid)
178 {
179 int i, remaining, pathremaining;
180 char *end, *pathend;
181 struct vtdmap *vtdmap;
182 ACPI_DMAR_HARDWARE_UNIT *drhd;
183 ACPI_DMAR_DEVICE_SCOPE *device_scope;
184 ACPI_DMAR_PCI_PATH *path;
185
186 for (i = 0; i < drhd_num; i++) {
187 drhd = drhds[i];
188
189 if (VTD_DRHD_INCLUDE_PCI_ALL(drhd->Flags)) {
190 /*
191 * From Intel VT-d arch spec, version 3.0:
192 * If a DRHD structure with INCLUDE_PCI_ALL flag Set is reported
193 * for a Segment, it must be enumerated by BIOS after all other
194 * DRHD structures for the same Segment.
195 */
196 vtdmap = vtdmaps[i];
197 return(vtdmap);
198 }
199
200 end = (char *)drhd + drhd->Header.Length;
201 remaining = drhd->Header.Length - sizeof(ACPI_DMAR_HARDWARE_UNIT);
202 while (remaining > sizeof(ACPI_DMAR_DEVICE_SCOPE)) {
203 device_scope = (ACPI_DMAR_DEVICE_SCOPE *)(end - remaining);
204 remaining -= device_scope->Length;
205
206 switch (device_scope->EntryType){
207 /* 0x01 and 0x02 are PCI device entries */
208 case 0x01:
209 case 0x02:
210 break;
211 default:
212 continue;
213 }
214
215 if (PCI_RID2BUS(rid) != device_scope->Bus)
216 continue;
217
218 pathend = (char *)device_scope + device_scope->Length;
219 pathremaining = device_scope->Length - sizeof(ACPI_DMAR_DEVICE_SCOPE);
220 while (pathremaining >= sizeof(ACPI_DMAR_PCI_PATH)) {
221 path = (ACPI_DMAR_PCI_PATH *)(pathend - pathremaining);
222 pathremaining -= sizeof(ACPI_DMAR_PCI_PATH);
223
224 if (PCI_RID2SLOT(rid) != path->Device)
225 continue;
226 if (PCI_RID2FUNC(rid) != path->Function)
227 continue;
228
229 vtdmap = vtdmaps[i];
230 return (vtdmap);
231 }
232 }
233 }
234
235 /* No matching scope */
236 return (NULL);
237 }
238
239 static void
vtd_wbflush(struct vtdmap * vtdmap)240 vtd_wbflush(struct vtdmap *vtdmap)
241 {
242
243 if (VTD_ECAP_COHERENCY(vtdmap->ext_cap) == 0)
244 pmap_invalidate_cache();
245
246 if (VTD_CAP_RWBF(vtdmap->cap)) {
247 vtdmap->gcr = VTD_GCR_WBF;
248 while ((vtdmap->gsr & VTD_GSR_WBFS) != 0)
249 ;
250 }
251 }
252
253 static void
vtd_ctx_global_invalidate(struct vtdmap * vtdmap)254 vtd_ctx_global_invalidate(struct vtdmap *vtdmap)
255 {
256
257 vtdmap->ccr = VTD_CCR_ICC | VTD_CCR_CIRG_GLOBAL;
258 while ((vtdmap->ccr & VTD_CCR_ICC) != 0)
259 ;
260 }
261
262 static void
vtd_iotlb_global_invalidate(struct vtdmap * vtdmap)263 vtd_iotlb_global_invalidate(struct vtdmap *vtdmap)
264 {
265 int offset;
266 volatile uint64_t *iotlb_reg, val;
267
268 vtd_wbflush(vtdmap);
269
270 offset = VTD_ECAP_IRO(vtdmap->ext_cap) * 16;
271 iotlb_reg = (volatile uint64_t *)((caddr_t)vtdmap + offset + 8);
272
273 *iotlb_reg = VTD_IIR_IVT | VTD_IIR_IIRG_GLOBAL |
274 VTD_IIR_DRAIN_READS | VTD_IIR_DRAIN_WRITES;
275
276 while (1) {
277 val = *iotlb_reg;
278 if ((val & VTD_IIR_IVT) == 0)
279 break;
280 }
281 }
282
283 static void
vtd_translation_enable(struct vtdmap * vtdmap)284 vtd_translation_enable(struct vtdmap *vtdmap)
285 {
286
287 vtdmap->gcr = VTD_GCR_TE;
288 while ((vtdmap->gsr & VTD_GSR_TES) == 0)
289 ;
290 }
291
292 static void
vtd_translation_disable(struct vtdmap * vtdmap)293 vtd_translation_disable(struct vtdmap *vtdmap)
294 {
295
296 vtdmap->gcr = 0;
297 while ((vtdmap->gsr & VTD_GSR_TES) != 0)
298 ;
299 }
300
301 static int
vtd_init(void)302 vtd_init(void)
303 {
304 int i, units, remaining, tmp;
305 struct vtdmap *vtdmap;
306 vm_paddr_t ctx_paddr;
307 char *end, envname[32];
308 unsigned long mapaddr;
309 ACPI_STATUS status;
310 ACPI_TABLE_DMAR *dmar;
311 ACPI_DMAR_HEADER *hdr;
312 ACPI_DMAR_HARDWARE_UNIT *drhd;
313
314 /*
315 * Allow the user to override the ACPI DMAR table by specifying the
316 * physical address of each remapping unit.
317 *
318 * The following example specifies two remapping units at
319 * physical addresses 0xfed90000 and 0xfeda0000 respectively.
320 * set vtd.regmap.0.addr=0xfed90000
321 * set vtd.regmap.1.addr=0xfeda0000
322 */
323 for (units = 0; units < DRHD_MAX_UNITS; units++) {
324 snprintf(envname, sizeof(envname), "vtd.regmap.%d.addr", units);
325 if (getenv_ulong(envname, &mapaddr) == 0)
326 break;
327 vtdmaps[units] = (struct vtdmap *)PHYS_TO_DMAP(mapaddr);
328 }
329
330 if (units > 0)
331 goto skip_dmar;
332
333 /* Search for DMAR table. */
334 status = AcpiGetTable(ACPI_SIG_DMAR, 0, (ACPI_TABLE_HEADER **)&dmar);
335 if (ACPI_FAILURE(status))
336 return (ENXIO);
337
338 end = (char *)dmar + dmar->Header.Length;
339 remaining = dmar->Header.Length - sizeof(ACPI_TABLE_DMAR);
340 while (remaining > sizeof(ACPI_DMAR_HEADER)) {
341 hdr = (ACPI_DMAR_HEADER *)(end - remaining);
342 if (hdr->Length > remaining)
343 break;
344 /*
345 * From Intel VT-d arch spec, version 1.3:
346 * BIOS implementations must report mapping structures
347 * in numerical order, i.e. All remapping structures of
348 * type 0 (DRHD) enumerated before remapping structures of
349 * type 1 (RMRR) and so forth.
350 */
351 if (hdr->Type != ACPI_DMAR_TYPE_HARDWARE_UNIT)
352 break;
353
354 drhd = (ACPI_DMAR_HARDWARE_UNIT *)hdr;
355 drhds[units] = drhd;
356 vtdmaps[units] = (struct vtdmap *)PHYS_TO_DMAP(drhd->Address);
357 if (++units >= DRHD_MAX_UNITS)
358 break;
359 remaining -= hdr->Length;
360 }
361
362 if (units <= 0)
363 return (ENXIO);
364
365 skip_dmar:
366 drhd_num = units;
367
368 max_domains = 64 * 1024; /* maximum valid value */
369 for (i = 0; i < drhd_num; i++){
370 vtdmap = vtdmaps[i];
371
372 if (VTD_CAP_CM(vtdmap->cap) != 0)
373 panic("vtd_init: invalid caching mode");
374
375 /* take most compatible (minimum) value */
376 if ((tmp = vtd_max_domains(vtdmap)) < max_domains)
377 max_domains = tmp;
378 }
379
380 /*
381 * Set up the root-table to point to the context-entry tables
382 */
383 for (i = 0; i < 256; i++) {
384 ctx_paddr = vtophys(ctx_tables[i]);
385 if (ctx_paddr & PAGE_MASK)
386 panic("ctx table (0x%0lx) not page aligned", ctx_paddr);
387
388 root_table[i * 2] = ctx_paddr | VTD_ROOT_PRESENT;
389 }
390
391 return (0);
392 }
393
394 static void
vtd_cleanup(void)395 vtd_cleanup(void)
396 {
397 }
398
399 static void
vtd_enable(void)400 vtd_enable(void)
401 {
402 int i;
403 struct vtdmap *vtdmap;
404
405 for (i = 0; i < drhd_num; i++) {
406 vtdmap = vtdmaps[i];
407 vtd_wbflush(vtdmap);
408
409 /* Update the root table address */
410 vtdmap->rta = vtophys(root_table);
411 vtdmap->gcr = VTD_GCR_SRTP;
412 while ((vtdmap->gsr & VTD_GSR_RTPS) == 0)
413 ;
414
415 vtd_ctx_global_invalidate(vtdmap);
416 vtd_iotlb_global_invalidate(vtdmap);
417
418 vtd_translation_enable(vtdmap);
419 }
420 }
421
422 static void
vtd_disable(void)423 vtd_disable(void)
424 {
425 int i;
426 struct vtdmap *vtdmap;
427
428 for (i = 0; i < drhd_num; i++) {
429 vtdmap = vtdmaps[i];
430 vtd_translation_disable(vtdmap);
431 }
432 }
433
434 static int
vtd_add_device(void * arg,device_t dev __unused,uint16_t rid)435 vtd_add_device(void *arg, device_t dev __unused, uint16_t rid)
436 {
437 int idx;
438 uint64_t *ctxp;
439 struct domain *dom = arg;
440 vm_paddr_t pt_paddr;
441 struct vtdmap *vtdmap;
442 uint8_t bus;
443
444 KASSERT(dom != NULL, ("domain is NULL"));
445
446 bus = PCI_RID2BUS(rid);
447 ctxp = ctx_tables[bus];
448 pt_paddr = vtophys(dom->ptp);
449 idx = VTD_RID2IDX(rid);
450
451 if (ctxp[idx] & VTD_CTX_PRESENT) {
452 panic("vtd_add_device: device %x is already owned by "
453 "domain %d", rid,
454 (uint16_t)(ctxp[idx + 1] >> 8));
455 }
456
457 if ((vtdmap = vtd_device_scope(rid)) == NULL)
458 panic("vtd_add_device: device %x is not in scope for "
459 "any DMA remapping unit", rid);
460
461 /*
462 * Order is important. The 'present' bit is set only after all fields
463 * of the context pointer are initialized.
464 */
465 ctxp[idx + 1] = dom->addrwidth | (dom->id << 8);
466
467 if (VTD_ECAP_DI(vtdmap->ext_cap))
468 ctxp[idx] = VTD_CTX_TT_ALL;
469 else
470 ctxp[idx] = 0;
471
472 ctxp[idx] |= pt_paddr | VTD_CTX_PRESENT;
473
474 /*
475 * 'Not Present' entries are not cached in either the Context Cache
476 * or in the IOTLB, so there is no need to invalidate either of them.
477 */
478 return (0);
479 }
480
481 static int
vtd_remove_device(void * arg,device_t dev __unused,uint16_t rid)482 vtd_remove_device(void *arg, device_t dev __unused, uint16_t rid)
483 {
484 int i, idx;
485 uint64_t *ctxp;
486 struct vtdmap *vtdmap;
487 uint8_t bus;
488
489 bus = PCI_RID2BUS(rid);
490 ctxp = ctx_tables[bus];
491 idx = VTD_RID2IDX(rid);
492
493 /*
494 * Order is important. The 'present' bit is must be cleared first.
495 */
496 ctxp[idx] = 0;
497 ctxp[idx + 1] = 0;
498
499 /*
500 * Invalidate the Context Cache and the IOTLB.
501 *
502 * XXX use device-selective invalidation for Context Cache
503 * XXX use domain-selective invalidation for IOTLB
504 */
505 for (i = 0; i < drhd_num; i++) {
506 vtdmap = vtdmaps[i];
507 vtd_ctx_global_invalidate(vtdmap);
508 vtd_iotlb_global_invalidate(vtdmap);
509 }
510 return (0);
511 }
512
513 #define CREATE_MAPPING 0
514 #define REMOVE_MAPPING 1
515
516 static uint64_t
vtd_update_mapping(void * arg,vm_paddr_t gpa,vm_paddr_t hpa,uint64_t len,int remove)517 vtd_update_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len,
518 int remove)
519 {
520 struct domain *dom;
521 int i, spshift, ptpshift, ptpindex, nlevels;
522 uint64_t spsize, *ptp;
523
524 dom = arg;
525 ptpindex = 0;
526 ptpshift = 0;
527
528 KASSERT(gpa + len > gpa, ("%s: invalid gpa range %#lx/%#lx", __func__,
529 gpa, len));
530 KASSERT(gpa + len <= dom->maxaddr, ("%s: gpa range %#lx/%#lx beyond "
531 "domain maxaddr %#lx", __func__, gpa, len, dom->maxaddr));
532
533 if (gpa & PAGE_MASK)
534 panic("vtd_create_mapping: unaligned gpa 0x%0lx", gpa);
535
536 if (hpa & PAGE_MASK)
537 panic("vtd_create_mapping: unaligned hpa 0x%0lx", hpa);
538
539 if (len & PAGE_MASK)
540 panic("vtd_create_mapping: unaligned len 0x%0lx", len);
541
542 /*
543 * Compute the size of the mapping that we can accommodate.
544 *
545 * This is based on three factors:
546 * - supported super page size
547 * - alignment of the region starting at 'gpa' and 'hpa'
548 * - length of the region 'len'
549 */
550 spshift = 48;
551 for (i = 3; i >= 0; i--) {
552 spsize = 1UL << spshift;
553 if ((dom->spsmask & (1 << i)) != 0 &&
554 (gpa & (spsize - 1)) == 0 &&
555 (hpa & (spsize - 1)) == 0 &&
556 (len >= spsize)) {
557 break;
558 }
559 spshift -= 9;
560 }
561
562 ptp = dom->ptp;
563 nlevels = dom->pt_levels;
564 while (--nlevels >= 0) {
565 ptpshift = 12 + nlevels * 9;
566 ptpindex = (gpa >> ptpshift) & 0x1FF;
567
568 /* We have reached the leaf mapping */
569 if (spshift >= ptpshift) {
570 break;
571 }
572
573 /*
574 * We are working on a non-leaf page table page.
575 *
576 * Create a downstream page table page if necessary and point
577 * to it from the current page table.
578 */
579 if (ptp[ptpindex] == 0) {
580 void *nlp = malloc(PAGE_SIZE, M_VTD, M_WAITOK | M_ZERO);
581 ptp[ptpindex] = vtophys(nlp)| VTD_PTE_RD | VTD_PTE_WR;
582 }
583
584 ptp = (uint64_t *)PHYS_TO_DMAP(ptp[ptpindex] & VTD_PTE_ADDR_M);
585 }
586
587 if ((gpa & ((1UL << ptpshift) - 1)) != 0)
588 panic("gpa 0x%lx and ptpshift %d mismatch", gpa, ptpshift);
589
590 /*
591 * Update the 'gpa' -> 'hpa' mapping
592 */
593 if (remove) {
594 ptp[ptpindex] = 0;
595 } else {
596 ptp[ptpindex] = hpa | VTD_PTE_RD | VTD_PTE_WR;
597
598 if (nlevels > 0)
599 ptp[ptpindex] |= VTD_PTE_SUPERPAGE;
600 }
601
602 return (1UL << ptpshift);
603 }
604
605 static int
vtd_create_mapping(void * arg,vm_paddr_t gpa,vm_paddr_t hpa,uint64_t len,uint64_t * res_len)606 vtd_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len,
607 uint64_t *res_len)
608 {
609
610 *res_len = vtd_update_mapping(arg, gpa, hpa, len, CREATE_MAPPING);
611 return (0);
612 }
613
614 static int
vtd_remove_mapping(void * arg,vm_paddr_t gpa,uint64_t len,uint64_t * res_len)615 vtd_remove_mapping(void *arg, vm_paddr_t gpa, uint64_t len, uint64_t *res_len)
616 {
617
618 *res_len = vtd_update_mapping(arg, gpa, 0, len, REMOVE_MAPPING);
619 return (0);
620 }
621
622 static int
vtd_invalidate_tlb(void * dom)623 vtd_invalidate_tlb(void *dom)
624 {
625 int i;
626 struct vtdmap *vtdmap;
627
628 /*
629 * Invalidate the IOTLB.
630 * XXX use domain-selective invalidation for IOTLB
631 */
632 for (i = 0; i < drhd_num; i++) {
633 vtdmap = vtdmaps[i];
634 vtd_iotlb_global_invalidate(vtdmap);
635 }
636 return (0);
637 }
638
639 static void *
vtd_create_domain(vm_paddr_t maxaddr)640 vtd_create_domain(vm_paddr_t maxaddr)
641 {
642 struct domain *dom;
643 vm_paddr_t addr;
644 int tmp, i, gaw, agaw, sagaw, res, pt_levels, addrwidth;
645 struct vtdmap *vtdmap;
646
647 if (drhd_num <= 0)
648 panic("vtd_create_domain: no dma remapping hardware available");
649
650 /*
651 * Calculate AGAW.
652 * Section 3.4.2 "Adjusted Guest Address Width", Architecture Spec.
653 */
654 addr = 0;
655 for (gaw = 0; addr < maxaddr; gaw++)
656 addr = 1ULL << gaw;
657
658 res = (gaw - 12) % 9;
659 if (res == 0)
660 agaw = gaw;
661 else
662 agaw = gaw + 9 - res;
663
664 if (agaw > 64)
665 agaw = 64;
666
667 /*
668 * Select the smallest Supported AGAW and the corresponding number
669 * of page table levels.
670 */
671 pt_levels = 2;
672 sagaw = 30;
673 addrwidth = 0;
674
675 tmp = ~0;
676 for (i = 0; i < drhd_num; i++) {
677 vtdmap = vtdmaps[i];
678 /* take most compatible value */
679 tmp &= VTD_CAP_SAGAW(vtdmap->cap);
680 }
681
682 for (i = 0; i < 5; i++) {
683 if ((tmp & (1 << i)) != 0 && sagaw >= agaw)
684 break;
685 pt_levels++;
686 addrwidth++;
687 sagaw += 9;
688 if (sagaw > 64)
689 sagaw = 64;
690 }
691
692 if (i >= 5) {
693 panic("vtd_create_domain: SAGAW 0x%x does not support AGAW %d",
694 tmp, agaw);
695 }
696
697 dom = malloc(sizeof(struct domain), M_VTD, M_ZERO | M_WAITOK);
698 dom->pt_levels = pt_levels;
699 dom->addrwidth = addrwidth;
700 dom->id = domain_id();
701 dom->maxaddr = maxaddr;
702 dom->ptp = malloc(PAGE_SIZE, M_VTD, M_ZERO | M_WAITOK);
703 if ((uintptr_t)dom->ptp & PAGE_MASK)
704 panic("vtd_create_domain: ptp (%p) not page aligned", dom->ptp);
705
706 #ifdef notyet
707 /*
708 * XXX superpage mappings for the iommu do not work correctly.
709 *
710 * By default all physical memory is mapped into the host_domain.
711 * When a VM is allocated wired memory the pages belonging to it
712 * are removed from the host_domain and added to the vm's domain.
713 *
714 * If the page being removed was mapped using a superpage mapping
715 * in the host_domain then we need to demote the mapping before
716 * removing the page.
717 *
718 * There is not any code to deal with the demotion at the moment
719 * so we disable superpage mappings altogether.
720 */
721 dom->spsmask = ~0;
722 for (i = 0; i < drhd_num; i++) {
723 vtdmap = vtdmaps[i];
724 /* take most compatible value */
725 dom->spsmask &= VTD_CAP_SPS(vtdmap->cap);
726 }
727 #endif
728
729 SLIST_INSERT_HEAD(&domhead, dom, next);
730
731 return (dom);
732 }
733
734 static void
vtd_free_ptp(uint64_t * ptp,int level)735 vtd_free_ptp(uint64_t *ptp, int level)
736 {
737 int i;
738 uint64_t *nlp;
739
740 if (level > 1) {
741 for (i = 0; i < 512; i++) {
742 if ((ptp[i] & (VTD_PTE_RD | VTD_PTE_WR)) == 0)
743 continue;
744 if ((ptp[i] & VTD_PTE_SUPERPAGE) != 0)
745 continue;
746 nlp = (uint64_t *)PHYS_TO_DMAP(ptp[i] & VTD_PTE_ADDR_M);
747 vtd_free_ptp(nlp, level - 1);
748 }
749 }
750
751 bzero(ptp, PAGE_SIZE);
752 free(ptp, M_VTD);
753 }
754
755 static void
vtd_destroy_domain(void * arg)756 vtd_destroy_domain(void *arg)
757 {
758 struct domain *dom;
759
760 dom = arg;
761
762 SLIST_REMOVE(&domhead, dom, domain, next);
763 vtd_free_ptp(dom->ptp, dom->pt_levels);
764 free(dom, M_VTD);
765 }
766
767 const struct iommu_ops iommu_ops_intel = {
768 .init = vtd_init,
769 .cleanup = vtd_cleanup,
770 .enable = vtd_enable,
771 .disable = vtd_disable,
772 .create_domain = vtd_create_domain,
773 .destroy_domain = vtd_destroy_domain,
774 .create_mapping = vtd_create_mapping,
775 .remove_mapping = vtd_remove_mapping,
776 .add_device = vtd_add_device,
777 .remove_device = vtd_remove_device,
778 .invalidate_tlb = vtd_invalidate_tlb,
779 };
780