1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2013 The FreeBSD Foundation
5 *
6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
7 * under sponsorship from the FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/malloc.h>
34 #include <sys/bus.h>
35 #include <sys/interrupt.h>
36 #include <sys/kernel.h>
37 #include <sys/ktr.h>
38 #include <sys/limits.h>
39 #include <sys/lock.h>
40 #include <sys/memdesc.h>
41 #include <sys/mutex.h>
42 #include <sys/proc.h>
43 #include <sys/rwlock.h>
44 #include <sys/rman.h>
45 #include <sys/sysctl.h>
46 #include <sys/taskqueue.h>
47 #include <sys/tree.h>
48 #include <sys/uio.h>
49 #include <sys/vmem.h>
50 #include <vm/vm.h>
51 #include <vm/vm_extern.h>
52 #include <vm/vm_kern.h>
53 #include <vm/vm_object.h>
54 #include <vm/vm_page.h>
55 #include <vm/vm_pager.h>
56 #include <vm/vm_map.h>
57 #include <contrib/dev/acpica/include/acpi.h>
58 #include <contrib/dev/acpica/include/accommon.h>
59 #include <dev/pci/pcireg.h>
60 #include <dev/pci/pcivar.h>
61 #include <machine/atomic.h>
62 #include <machine/bus.h>
63 #include <machine/md_var.h>
64 #include <machine/specialreg.h>
65 #include <x86/include/busdma_impl.h>
66 #include <dev/iommu/busdma_iommu.h>
67 #include <x86/iommu/intel_reg.h>
68 #include <x86/iommu/x86_iommu.h>
69 #include <x86/iommu/intel_dmar.h>
70
71 static MALLOC_DEFINE(M_DMAR_CTX, "dmar_ctx", "Intel DMAR Context");
72 static MALLOC_DEFINE(M_DMAR_DOMAIN, "dmar_dom", "Intel DMAR Domain");
73
74 static void dmar_unref_domain_locked(struct dmar_unit *dmar,
75 struct dmar_domain *domain);
76 static void dmar_domain_destroy(struct dmar_domain *domain);
77
78 static void dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx);
79
80 static void
dmar_ensure_ctx_page(struct dmar_unit * dmar,int bus)81 dmar_ensure_ctx_page(struct dmar_unit *dmar, int bus)
82 {
83 struct sf_buf *sf;
84 dmar_root_entry_t *re;
85 vm_page_t ctxm;
86
87 /*
88 * Allocated context page must be linked.
89 */
90 ctxm = iommu_pgalloc(dmar->ctx_obj, 1 + bus, IOMMU_PGF_NOALLOC);
91 if (ctxm != NULL)
92 return;
93
94 /*
95 * Page not present, allocate and link. Note that other
96 * thread might execute this sequence in parallel. This
97 * should be safe, because the context entries written by both
98 * threads are equal.
99 */
100 TD_PREP_PINNED_ASSERT;
101 ctxm = iommu_pgalloc(dmar->ctx_obj, 1 + bus, IOMMU_PGF_ZERO |
102 IOMMU_PGF_WAITOK);
103 re = iommu_map_pgtbl(dmar->ctx_obj, 0, IOMMU_PGF_NOALLOC, &sf);
104 re += bus;
105 dmar_pte_store(&re->r1, DMAR_ROOT_R1_P | (DMAR_ROOT_R1_CTP_MASK &
106 VM_PAGE_TO_PHYS(ctxm)));
107 dmar_flush_root_to_ram(dmar, re);
108 iommu_unmap_pgtbl(sf);
109 TD_PINNED_ASSERT;
110 }
111
112 static dmar_ctx_entry_t *
dmar_map_ctx_entry(struct dmar_ctx * ctx,struct sf_buf ** sfp)113 dmar_map_ctx_entry(struct dmar_ctx *ctx, struct sf_buf **sfp)
114 {
115 struct dmar_unit *dmar;
116 dmar_ctx_entry_t *ctxp;
117
118 dmar = CTX2DMAR(ctx);
119
120 ctxp = iommu_map_pgtbl(dmar->ctx_obj, 1 + PCI_RID2BUS(ctx->context.rid),
121 IOMMU_PGF_NOALLOC | IOMMU_PGF_WAITOK, sfp);
122 ctxp += ctx->context.rid & 0xff;
123 return (ctxp);
124 }
125
126 static void
ctx_id_entry_init_one(dmar_ctx_entry_t * ctxp,struct dmar_domain * domain,vm_page_t ctx_root)127 ctx_id_entry_init_one(dmar_ctx_entry_t *ctxp, struct dmar_domain *domain,
128 vm_page_t ctx_root)
129 {
130 /*
131 * For update due to move, the store is not atomic. It is
132 * possible that DMAR read upper doubleword, while low
133 * doubleword is not yet updated. The domain id is stored in
134 * the upper doubleword, while the table pointer in the lower.
135 *
136 * There is no good solution, for the same reason it is wrong
137 * to clear P bit in the ctx entry for update.
138 */
139 dmar_pte_store1(&ctxp->ctx2, DMAR_CTX2_DID(domain->domain) |
140 domain->awlvl);
141 if (ctx_root == NULL) {
142 dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_PASS | DMAR_CTX1_P);
143 } else {
144 dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_UNTR |
145 (DMAR_CTX1_ASR_MASK & VM_PAGE_TO_PHYS(ctx_root)) |
146 DMAR_CTX1_P);
147 }
148 }
149
150 static void
ctx_id_entry_init(struct dmar_ctx * ctx,dmar_ctx_entry_t * ctxp,bool move,int busno)151 ctx_id_entry_init(struct dmar_ctx *ctx, dmar_ctx_entry_t *ctxp, bool move,
152 int busno)
153 {
154 struct dmar_unit *unit;
155 struct dmar_domain *domain;
156 vm_page_t ctx_root;
157 int i;
158
159 domain = CTX2DOM(ctx);
160 unit = DOM2DMAR(domain);
161 KASSERT(move || (ctxp->ctx1 == 0 && ctxp->ctx2 == 0),
162 ("dmar%d: initialized ctx entry %d:%d:%d 0x%jx 0x%jx",
163 unit->iommu.unit, busno, pci_get_slot(ctx->context.tag->owner),
164 pci_get_function(ctx->context.tag->owner),
165 ctxp->ctx1, ctxp->ctx2));
166
167 if ((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0 &&
168 (unit->hw_ecap & DMAR_ECAP_PT) != 0) {
169 KASSERT(domain->pgtbl_obj == NULL,
170 ("ctx %p non-null pgtbl_obj", ctx));
171 ctx_root = NULL;
172 } else {
173 ctx_root = iommu_pgalloc(domain->pgtbl_obj, 0,
174 IOMMU_PGF_NOALLOC);
175 }
176
177 if (iommu_is_buswide_ctx(DMAR2IOMMU(unit), busno)) {
178 MPASS(!move);
179 for (i = 0; i <= PCI_BUSMAX; i++) {
180 ctx_id_entry_init_one(&ctxp[i], domain, ctx_root);
181 }
182 } else {
183 ctx_id_entry_init_one(ctxp, domain, ctx_root);
184 }
185 dmar_flush_ctx_to_ram(unit, ctxp);
186 }
187
188 static int
dmar_flush_for_ctx_entry(struct dmar_unit * dmar,bool force)189 dmar_flush_for_ctx_entry(struct dmar_unit *dmar, bool force)
190 {
191 int error;
192
193 /*
194 * If dmar declares Caching Mode as Set, follow 11.5 "Caching
195 * Mode Consideration" and do the (global) invalidation of the
196 * negative TLB entries.
197 */
198 if ((dmar->hw_cap & DMAR_CAP_CM) == 0 && !force)
199 return (0);
200 if (dmar->qi_enabled) {
201 dmar_qi_invalidate_ctx_glob_locked(dmar);
202 if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force)
203 dmar_qi_invalidate_iotlb_glob_locked(dmar);
204 return (0);
205 }
206 error = dmar_inv_ctx_glob(dmar);
207 if (error == 0 && ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force))
208 error = dmar_inv_iotlb_glob(dmar);
209 return (error);
210 }
211
212 static int
domain_init_rmrr(struct dmar_domain * domain,device_t dev,int bus,int slot,int func,int dev_domain,int dev_busno,const void * dev_path,int dev_path_len)213 domain_init_rmrr(struct dmar_domain *domain, device_t dev, int bus,
214 int slot, int func, int dev_domain, int dev_busno,
215 const void *dev_path, int dev_path_len)
216 {
217 struct iommu_map_entries_tailq rmrr_entries;
218 struct iommu_map_entry *entry, *entry1;
219 vm_page_t *ma;
220 iommu_gaddr_t start, end;
221 vm_pindex_t size, i;
222 int error, error1;
223
224 if (!dmar_rmrr_enable)
225 return (0);
226
227 error = 0;
228 TAILQ_INIT(&rmrr_entries);
229 dmar_dev_parse_rmrr(domain, dev_domain, dev_busno, dev_path,
230 dev_path_len, &rmrr_entries);
231 TAILQ_FOREACH_SAFE(entry, &rmrr_entries, dmamap_link, entry1) {
232 /*
233 * VT-d specification requires that the start of an
234 * RMRR entry is 4k-aligned. Buggy BIOSes put
235 * anything into the start and end fields. Truncate
236 * and round as neccesary.
237 *
238 * We also allow the overlapping RMRR entries, see
239 * iommu_gas_alloc_region().
240 */
241 start = entry->start;
242 end = entry->end;
243 if (bootverbose)
244 printf("dmar%d ctx pci%d:%d:%d RMRR [%#jx, %#jx]\n",
245 domain->iodom.iommu->unit, bus, slot, func,
246 (uintmax_t)start, (uintmax_t)end);
247 entry->start = trunc_page(start);
248 entry->end = round_page(end);
249 if (entry->start == entry->end) {
250 /* Workaround for some AMI (?) BIOSes */
251 if (bootverbose) {
252 if (dev != NULL)
253 device_printf(dev, "");
254 printf("pci%d:%d:%d ", bus, slot, func);
255 printf("BIOS bug: dmar%d RMRR "
256 "region (%jx, %jx) corrected\n",
257 domain->iodom.iommu->unit, start, end);
258 }
259 entry->end += IOMMU_PAGE_SIZE * 0x20;
260 }
261 size = OFF_TO_IDX(entry->end - entry->start);
262 ma = malloc(sizeof(vm_page_t) * size, M_TEMP, M_WAITOK);
263 for (i = 0; i < size; i++) {
264 ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i,
265 VM_MEMATTR_DEFAULT);
266 }
267 error1 = iommu_gas_map_region(DOM2IODOM(domain), entry,
268 IOMMU_MAP_ENTRY_READ | IOMMU_MAP_ENTRY_WRITE,
269 IOMMU_MF_CANWAIT | IOMMU_MF_RMRR, ma);
270 /*
271 * Non-failed RMRR entries are owned by context rb
272 * tree. Get rid of the failed entry, but do not stop
273 * the loop. Rest of the parsed RMRR entries are
274 * loaded and removed on the context destruction.
275 */
276 if (error1 == 0 && entry->end != entry->start) {
277 IOMMU_LOCK(domain->iodom.iommu);
278 domain->refs++; /* XXXKIB prevent free */
279 domain->iodom.flags |= IOMMU_DOMAIN_RMRR;
280 IOMMU_UNLOCK(domain->iodom.iommu);
281 } else {
282 if (error1 != 0) {
283 if (dev != NULL)
284 device_printf(dev, "");
285 printf("pci%d:%d:%d ", bus, slot, func);
286 printf(
287 "dmar%d failed to map RMRR region (%jx, %jx) %d\n",
288 domain->iodom.iommu->unit, start, end,
289 error1);
290 error = error1;
291 }
292 TAILQ_REMOVE(&rmrr_entries, entry, dmamap_link);
293 iommu_gas_free_entry(entry);
294 }
295 for (i = 0; i < size; i++)
296 vm_page_putfake(ma[i]);
297 free(ma, M_TEMP);
298 }
299 return (error);
300 }
301
302 /*
303 * PCI memory address space is shared between memory-mapped devices (MMIO) and
304 * host memory (which may be remapped by an IOMMU). Device accesses to an
305 * address within a memory aperture in a PCIe root port will be treated as
306 * peer-to-peer and not forwarded to an IOMMU. To avoid this, reserve the
307 * address space of the root port's memory apertures in the address space used
308 * by the IOMMU for remapping.
309 */
310 static int
dmar_reserve_pci_regions(struct dmar_domain * domain,device_t dev)311 dmar_reserve_pci_regions(struct dmar_domain *domain, device_t dev)
312 {
313 struct iommu_domain *iodom;
314 device_t root;
315 uint32_t val;
316 uint64_t base, limit;
317 int error;
318
319 iodom = DOM2IODOM(domain);
320
321 root = pci_find_pcie_root_port(dev);
322 if (root == NULL)
323 return (0);
324
325 /* Disable downstream memory */
326 base = PCI_PPBMEMBASE(0, pci_read_config(root, PCIR_MEMBASE_1, 2));
327 limit = PCI_PPBMEMLIMIT(0, pci_read_config(root, PCIR_MEMLIMIT_1, 2));
328 error = iommu_gas_reserve_region_extend(iodom, base, limit + 1);
329 if (bootverbose || error != 0)
330 device_printf(dev, "DMAR reserve [%#jx-%#jx] (error %d)\n",
331 base, limit + 1, error);
332 if (error != 0)
333 return (error);
334
335 /* Disable downstream prefetchable memory */
336 val = pci_read_config(root, PCIR_PMBASEL_1, 2);
337 if (val != 0 || pci_read_config(root, PCIR_PMLIMITL_1, 2) != 0) {
338 if ((val & PCIM_BRPM_MASK) == PCIM_BRPM_64) {
339 base = PCI_PPBMEMBASE(
340 pci_read_config(root, PCIR_PMBASEH_1, 4),
341 val);
342 limit = PCI_PPBMEMLIMIT(
343 pci_read_config(root, PCIR_PMLIMITH_1, 4),
344 pci_read_config(root, PCIR_PMLIMITL_1, 2));
345 } else {
346 base = PCI_PPBMEMBASE(0, val);
347 limit = PCI_PPBMEMLIMIT(0,
348 pci_read_config(root, PCIR_PMLIMITL_1, 2));
349 }
350 error = iommu_gas_reserve_region_extend(iodom, base,
351 limit + 1);
352 if (bootverbose || error != 0)
353 device_printf(dev, "DMAR reserve [%#jx-%#jx] "
354 "(error %d)\n", base, limit + 1, error);
355 if (error != 0)
356 return (error);
357 }
358
359 return (error);
360 }
361
362 static struct dmar_domain *
dmar_domain_alloc(struct dmar_unit * dmar,bool id_mapped)363 dmar_domain_alloc(struct dmar_unit *dmar, bool id_mapped)
364 {
365 struct iommu_domain *iodom;
366 struct iommu_unit *unit;
367 struct dmar_domain *domain;
368 int error, id, mgaw;
369
370 id = alloc_unr(dmar->domids);
371 if (id == -1)
372 return (NULL);
373 domain = malloc(sizeof(*domain), M_DMAR_DOMAIN, M_WAITOK | M_ZERO);
374 iodom = DOM2IODOM(domain);
375 unit = DMAR2IOMMU(dmar);
376 domain->domain = id;
377 LIST_INIT(&iodom->contexts);
378 iommu_domain_init(unit, iodom, &dmar_domain_map_ops);
379
380 domain->dmar = dmar;
381
382 /*
383 * For now, use the maximal usable physical address of the
384 * installed memory to calculate the mgaw on id_mapped domain.
385 * It is useful for the identity mapping, and less so for the
386 * virtualized bus address space.
387 */
388 domain->iodom.end = id_mapped ? ptoa(Maxmem) : BUS_SPACE_MAXADDR;
389 mgaw = dmar_maxaddr2mgaw(dmar, domain->iodom.end, !id_mapped);
390 error = domain_set_agaw(domain, mgaw);
391 if (error != 0)
392 goto fail;
393 if (!id_mapped)
394 /* Use all supported address space for remapping. */
395 domain->iodom.end = 1ULL << (domain->agaw - 1);
396
397 iommu_gas_init_domain(DOM2IODOM(domain));
398
399 if (id_mapped) {
400 if ((dmar->hw_ecap & DMAR_ECAP_PT) == 0) {
401 domain->pgtbl_obj = dmar_get_idmap_pgtbl(domain,
402 domain->iodom.end);
403 }
404 domain->iodom.flags |= IOMMU_DOMAIN_IDMAP;
405 } else {
406 error = dmar_domain_alloc_pgtbl(domain);
407 if (error != 0)
408 goto fail;
409 /* Disable local apic region access */
410 error = iommu_gas_reserve_region(iodom, 0xfee00000,
411 0xfeefffff + 1, &iodom->msi_entry);
412 if (error != 0)
413 goto fail;
414 }
415 return (domain);
416
417 fail:
418 dmar_domain_destroy(domain);
419 return (NULL);
420 }
421
422 static struct dmar_ctx *
dmar_ctx_alloc(struct dmar_domain * domain,uint16_t rid)423 dmar_ctx_alloc(struct dmar_domain *domain, uint16_t rid)
424 {
425 struct dmar_ctx *ctx;
426
427 ctx = malloc(sizeof(*ctx), M_DMAR_CTX, M_WAITOK | M_ZERO);
428 ctx->context.domain = DOM2IODOM(domain);
429 ctx->context.tag = malloc(sizeof(struct bus_dma_tag_iommu),
430 M_DMAR_CTX, M_WAITOK | M_ZERO);
431 ctx->context.rid = rid;
432 ctx->context.refs = 1;
433 return (ctx);
434 }
435
436 static void
dmar_ctx_link(struct dmar_ctx * ctx)437 dmar_ctx_link(struct dmar_ctx *ctx)
438 {
439 struct dmar_domain *domain;
440
441 domain = CTX2DOM(ctx);
442 IOMMU_ASSERT_LOCKED(domain->iodom.iommu);
443 KASSERT(domain->refs >= domain->ctx_cnt,
444 ("dom %p ref underflow %d %d", domain, domain->refs,
445 domain->ctx_cnt));
446 domain->refs++;
447 domain->ctx_cnt++;
448 LIST_INSERT_HEAD(&domain->iodom.contexts, &ctx->context, link);
449 }
450
451 static void
dmar_ctx_unlink(struct dmar_ctx * ctx)452 dmar_ctx_unlink(struct dmar_ctx *ctx)
453 {
454 struct dmar_domain *domain;
455
456 domain = CTX2DOM(ctx);
457 IOMMU_ASSERT_LOCKED(domain->iodom.iommu);
458 KASSERT(domain->refs > 0,
459 ("domain %p ctx dtr refs %d", domain, domain->refs));
460 KASSERT(domain->ctx_cnt >= domain->refs,
461 ("domain %p ctx dtr refs %d ctx_cnt %d", domain,
462 domain->refs, domain->ctx_cnt));
463 domain->refs--;
464 domain->ctx_cnt--;
465 LIST_REMOVE(&ctx->context, link);
466 }
467
468 static void
dmar_domain_destroy(struct dmar_domain * domain)469 dmar_domain_destroy(struct dmar_domain *domain)
470 {
471 struct iommu_domain *iodom;
472 struct dmar_unit *dmar;
473
474 iodom = DOM2IODOM(domain);
475
476 KASSERT(TAILQ_EMPTY(&domain->iodom.unload_entries),
477 ("unfinished unloads %p", domain));
478 KASSERT(LIST_EMPTY(&iodom->contexts),
479 ("destroying dom %p with contexts", domain));
480 KASSERT(domain->ctx_cnt == 0,
481 ("destroying dom %p with ctx_cnt %d", domain, domain->ctx_cnt));
482 KASSERT(domain->refs == 0,
483 ("destroying dom %p with refs %d", domain, domain->refs));
484 if ((domain->iodom.flags & IOMMU_DOMAIN_GAS_INITED) != 0) {
485 DMAR_DOMAIN_LOCK(domain);
486 iommu_gas_fini_domain(iodom);
487 DMAR_DOMAIN_UNLOCK(domain);
488 }
489 if ((domain->iodom.flags & IOMMU_DOMAIN_PGTBL_INITED) != 0) {
490 if (domain->pgtbl_obj != NULL)
491 DMAR_DOMAIN_PGLOCK(domain);
492 dmar_domain_free_pgtbl(domain);
493 }
494 iommu_domain_fini(iodom);
495 dmar = DOM2DMAR(domain);
496 free_unr(dmar->domids, domain->domain);
497 free(domain, M_DMAR_DOMAIN);
498 }
499
500 static struct dmar_ctx *
dmar_get_ctx_for_dev1(struct dmar_unit * dmar,device_t dev,uint16_t rid,int dev_domain,int dev_busno,const void * dev_path,int dev_path_len,bool id_mapped,bool rmrr_init)501 dmar_get_ctx_for_dev1(struct dmar_unit *dmar, device_t dev, uint16_t rid,
502 int dev_domain, int dev_busno, const void *dev_path, int dev_path_len,
503 bool id_mapped, bool rmrr_init)
504 {
505 struct dmar_domain *domain, *domain1;
506 struct dmar_ctx *ctx, *ctx1;
507 struct iommu_unit *unit __diagused;
508 dmar_ctx_entry_t *ctxp;
509 struct sf_buf *sf;
510 int bus, slot, func, error;
511 bool enable;
512
513 if (dev != NULL) {
514 bus = pci_get_bus(dev);
515 slot = pci_get_slot(dev);
516 func = pci_get_function(dev);
517 } else {
518 bus = PCI_RID2BUS(rid);
519 slot = PCI_RID2SLOT(rid);
520 func = PCI_RID2FUNC(rid);
521 }
522 enable = false;
523 TD_PREP_PINNED_ASSERT;
524 unit = DMAR2IOMMU(dmar);
525 DMAR_LOCK(dmar);
526 KASSERT(!iommu_is_buswide_ctx(unit, bus) || (slot == 0 && func == 0),
527 ("iommu%d pci%d:%d:%d get_ctx for buswide", dmar->iommu.unit, bus,
528 slot, func));
529 ctx = dmar_find_ctx_locked(dmar, rid);
530 error = 0;
531 if (ctx == NULL) {
532 /*
533 * Perform the allocations which require sleep or have
534 * higher chance to succeed if the sleep is allowed.
535 */
536 DMAR_UNLOCK(dmar);
537 dmar_ensure_ctx_page(dmar, PCI_RID2BUS(rid));
538 domain1 = dmar_domain_alloc(dmar, id_mapped);
539 if (domain1 == NULL) {
540 TD_PINNED_ASSERT;
541 return (NULL);
542 }
543 if (!id_mapped) {
544 error = domain_init_rmrr(domain1, dev, bus,
545 slot, func, dev_domain, dev_busno, dev_path,
546 dev_path_len);
547 if (error == 0 && dev != NULL)
548 error = dmar_reserve_pci_regions(domain1, dev);
549 if (error != 0) {
550 dmar_domain_destroy(domain1);
551 TD_PINNED_ASSERT;
552 return (NULL);
553 }
554 }
555 ctx1 = dmar_ctx_alloc(domain1, rid);
556 ctxp = dmar_map_ctx_entry(ctx1, &sf);
557 DMAR_LOCK(dmar);
558
559 /*
560 * Recheck the contexts, other thread might have
561 * already allocated needed one.
562 */
563 ctx = dmar_find_ctx_locked(dmar, rid);
564 if (ctx == NULL) {
565 domain = domain1;
566 ctx = ctx1;
567 dmar_ctx_link(ctx);
568 ctx->context.tag->owner = dev;
569 iommu_device_tag_init(CTX2IOCTX(ctx), dev);
570
571 /*
572 * This is the first activated context for the
573 * DMAR unit. Enable the translation after
574 * everything is set up.
575 */
576 if (LIST_EMPTY(&dmar->domains))
577 enable = true;
578 LIST_INSERT_HEAD(&dmar->domains, domain, link);
579 ctx_id_entry_init(ctx, ctxp, false, bus);
580 if (dev != NULL) {
581 device_printf(dev,
582 "dmar%d pci%d:%d:%d:%d rid %x domain %d mgaw %d "
583 "agaw %d %s-mapped\n",
584 dmar->iommu.unit, dmar->segment, bus, slot,
585 func, rid, domain->domain, domain->mgaw,
586 domain->agaw, id_mapped ? "id" : "re");
587 }
588 iommu_unmap_pgtbl(sf);
589 } else {
590 iommu_unmap_pgtbl(sf);
591 dmar_domain_destroy(domain1);
592 /* Nothing needs to be done to destroy ctx1. */
593 free(ctx1, M_DMAR_CTX);
594 domain = CTX2DOM(ctx);
595 ctx->context.refs++; /* tag referenced us */
596 }
597 } else {
598 domain = CTX2DOM(ctx);
599 if (ctx->context.tag->owner == NULL)
600 ctx->context.tag->owner = dev;
601 ctx->context.refs++; /* tag referenced us */
602 }
603
604 error = dmar_flush_for_ctx_entry(dmar, enable);
605 if (error != 0) {
606 dmar_free_ctx_locked(dmar, ctx);
607 TD_PINNED_ASSERT;
608 return (NULL);
609 }
610
611 /*
612 * The dmar lock was potentially dropped between check for the
613 * empty context list and now. Recheck the state of GCMD_TE
614 * to avoid unneeded command.
615 */
616 if (enable && !rmrr_init && (dmar->hw_gcmd & DMAR_GCMD_TE) == 0) {
617 error = dmar_disable_protected_regions(dmar);
618 if (error != 0)
619 printf("dmar%d: Failed to disable protected regions\n",
620 dmar->iommu.unit);
621 error = dmar_enable_translation(dmar);
622 if (error == 0) {
623 if (bootverbose) {
624 printf("dmar%d: enabled translation\n",
625 dmar->iommu.unit);
626 }
627 } else {
628 printf("dmar%d: enabling translation failed, "
629 "error %d\n", dmar->iommu.unit, error);
630 dmar_free_ctx_locked(dmar, ctx);
631 TD_PINNED_ASSERT;
632 return (NULL);
633 }
634 }
635 DMAR_UNLOCK(dmar);
636 TD_PINNED_ASSERT;
637 return (ctx);
638 }
639
640 struct dmar_ctx *
dmar_get_ctx_for_dev(struct dmar_unit * dmar,device_t dev,uint16_t rid,bool id_mapped,bool rmrr_init)641 dmar_get_ctx_for_dev(struct dmar_unit *dmar, device_t dev, uint16_t rid,
642 bool id_mapped, bool rmrr_init)
643 {
644 int dev_domain, dev_path_len, dev_busno;
645
646 dev_domain = pci_get_domain(dev);
647 dev_path_len = dmar_dev_depth(dev);
648 ACPI_DMAR_PCI_PATH dev_path[dev_path_len];
649 dmar_dev_path(dev, &dev_busno, dev_path, dev_path_len);
650 return (dmar_get_ctx_for_dev1(dmar, dev, rid, dev_domain, dev_busno,
651 dev_path, dev_path_len, id_mapped, rmrr_init));
652 }
653
654 struct dmar_ctx *
dmar_get_ctx_for_devpath(struct dmar_unit * dmar,uint16_t rid,int dev_domain,int dev_busno,const void * dev_path,int dev_path_len,bool id_mapped,bool rmrr_init)655 dmar_get_ctx_for_devpath(struct dmar_unit *dmar, uint16_t rid,
656 int dev_domain, int dev_busno,
657 const void *dev_path, int dev_path_len,
658 bool id_mapped, bool rmrr_init)
659 {
660
661 return (dmar_get_ctx_for_dev1(dmar, NULL, rid, dev_domain, dev_busno,
662 dev_path, dev_path_len, id_mapped, rmrr_init));
663 }
664
665 int
dmar_move_ctx_to_domain(struct dmar_domain * domain,struct dmar_ctx * ctx)666 dmar_move_ctx_to_domain(struct dmar_domain *domain, struct dmar_ctx *ctx)
667 {
668 struct dmar_unit *dmar;
669 struct dmar_domain *old_domain;
670 dmar_ctx_entry_t *ctxp;
671 struct sf_buf *sf;
672 int error;
673
674 dmar = domain->dmar;
675 old_domain = CTX2DOM(ctx);
676 if (domain == old_domain)
677 return (0);
678 KASSERT(old_domain->iodom.iommu == domain->iodom.iommu,
679 ("domain %p %u moving between dmars %u %u", domain,
680 domain->domain, old_domain->iodom.iommu->unit,
681 domain->iodom.iommu->unit));
682 TD_PREP_PINNED_ASSERT;
683
684 ctxp = dmar_map_ctx_entry(ctx, &sf);
685 DMAR_LOCK(dmar);
686 dmar_ctx_unlink(ctx);
687 ctx->context.domain = &domain->iodom;
688 dmar_ctx_link(ctx);
689 ctx_id_entry_init(ctx, ctxp, true, PCI_BUSMAX + 100);
690 iommu_unmap_pgtbl(sf);
691 error = dmar_flush_for_ctx_entry(dmar, true);
692 /* If flush failed, rolling back would not work as well. */
693 printf("dmar%d rid %x domain %d->%d %s-mapped\n",
694 dmar->iommu.unit, ctx->context.rid, old_domain->domain,
695 domain->domain, (domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0 ?
696 "id" : "re");
697 dmar_unref_domain_locked(dmar, old_domain);
698 TD_PINNED_ASSERT;
699 return (error);
700 }
701
702 static void
dmar_unref_domain_locked(struct dmar_unit * dmar,struct dmar_domain * domain)703 dmar_unref_domain_locked(struct dmar_unit *dmar, struct dmar_domain *domain)
704 {
705
706 DMAR_ASSERT_LOCKED(dmar);
707 KASSERT(domain->refs >= 1,
708 ("dmar %d domain %p refs %u", dmar->iommu.unit, domain,
709 domain->refs));
710 KASSERT(domain->refs > domain->ctx_cnt,
711 ("dmar %d domain %p refs %d ctx_cnt %d", dmar->iommu.unit, domain,
712 domain->refs, domain->ctx_cnt));
713
714 if (domain->refs > 1) {
715 domain->refs--;
716 DMAR_UNLOCK(dmar);
717 return;
718 }
719
720 KASSERT((domain->iodom.flags & IOMMU_DOMAIN_RMRR) == 0,
721 ("lost ref on RMRR domain %p", domain));
722
723 LIST_REMOVE(domain, link);
724 DMAR_UNLOCK(dmar);
725
726 taskqueue_drain(dmar->iommu.delayed_taskqueue,
727 &domain->iodom.unload_task);
728 dmar_domain_destroy(domain);
729 }
730
731 static void
dmar_free_ctx_locked(struct dmar_unit * dmar,struct dmar_ctx * ctx)732 dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx)
733 {
734 struct sf_buf *sf;
735 dmar_ctx_entry_t *ctxp;
736 struct dmar_domain *domain;
737
738 DMAR_ASSERT_LOCKED(dmar);
739 KASSERT(ctx->context.refs >= 1,
740 ("dmar %p ctx %p refs %u", dmar, ctx, ctx->context.refs));
741
742 /*
743 * If our reference is not last, only the dereference should
744 * be performed.
745 */
746 if (ctx->context.refs > 1) {
747 ctx->context.refs--;
748 DMAR_UNLOCK(dmar);
749 return;
750 }
751
752 KASSERT((ctx->context.flags & IOMMU_CTX_DISABLED) == 0,
753 ("lost ref on disabled ctx %p", ctx));
754
755 /*
756 * Otherwise, the context entry must be cleared before the
757 * page table is destroyed. The mapping of the context
758 * entries page could require sleep, unlock the dmar.
759 */
760 DMAR_UNLOCK(dmar);
761 TD_PREP_PINNED_ASSERT;
762 ctxp = dmar_map_ctx_entry(ctx, &sf);
763 DMAR_LOCK(dmar);
764 KASSERT(ctx->context.refs >= 1,
765 ("dmar %p ctx %p refs %u", dmar, ctx, ctx->context.refs));
766
767 /*
768 * Other thread might have referenced the context, in which
769 * case again only the dereference should be performed.
770 */
771 if (ctx->context.refs > 1) {
772 ctx->context.refs--;
773 DMAR_UNLOCK(dmar);
774 iommu_unmap_pgtbl(sf);
775 TD_PINNED_ASSERT;
776 return;
777 }
778
779 KASSERT((ctx->context.flags & IOMMU_CTX_DISABLED) == 0,
780 ("lost ref on disabled ctx %p", ctx));
781
782 /*
783 * Clear the context pointer and flush the caches.
784 * XXXKIB: cannot do this if any RMRR entries are still present.
785 */
786 dmar_pte_clear(&ctxp->ctx1);
787 ctxp->ctx2 = 0;
788 dmar_flush_ctx_to_ram(dmar, ctxp);
789 dmar_inv_ctx_glob(dmar);
790 if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0) {
791 if (dmar->qi_enabled)
792 dmar_qi_invalidate_iotlb_glob_locked(dmar);
793 else
794 dmar_inv_iotlb_glob(dmar);
795 }
796 iommu_unmap_pgtbl(sf);
797 domain = CTX2DOM(ctx);
798 dmar_ctx_unlink(ctx);
799 free(ctx->context.tag, M_DMAR_CTX);
800 free(ctx, M_DMAR_CTX);
801 dmar_unref_domain_locked(dmar, domain);
802 TD_PINNED_ASSERT;
803 }
804
805 /*
806 * Returns with the domain locked.
807 */
808 struct dmar_ctx *
dmar_find_ctx_locked(struct dmar_unit * dmar,uint16_t rid)809 dmar_find_ctx_locked(struct dmar_unit *dmar, uint16_t rid)
810 {
811 struct dmar_domain *domain;
812 struct iommu_ctx *ctx;
813
814 DMAR_ASSERT_LOCKED(dmar);
815
816 LIST_FOREACH(domain, &dmar->domains, link) {
817 LIST_FOREACH(ctx, &domain->iodom.contexts, link) {
818 if (ctx->rid == rid)
819 return (IOCTX2CTX(ctx));
820 }
821 }
822 return (NULL);
823 }
824
825 /*
826 * If the given value for "free" is true, then the caller must not be using
827 * the entry's dmamap_link field.
828 */
829 void
dmar_domain_unload_entry(struct iommu_map_entry * entry,bool free,bool cansleep)830 dmar_domain_unload_entry(struct iommu_map_entry *entry, bool free,
831 bool cansleep)
832 {
833 struct dmar_domain *domain;
834 struct dmar_unit *unit;
835
836 domain = IODOM2DOM(entry->domain);
837 unit = DOM2DMAR(domain);
838
839 /*
840 * If "free" is false, then the IOTLB invalidation must be performed
841 * synchronously. Otherwise, the caller might free the entry before
842 * dmar_qi_task() is finished processing it.
843 */
844 if (unit->qi_enabled) {
845 if (free) {
846 DMAR_LOCK(unit);
847 iommu_qi_invalidate_locked(&domain->iodom, entry,
848 true);
849 DMAR_UNLOCK(unit);
850 } else {
851 iommu_qi_invalidate_sync(&domain->iodom, entry->start,
852 entry->end - entry->start, cansleep);
853 iommu_domain_free_entry(entry, false);
854 }
855 } else {
856 dmar_flush_iotlb_sync(domain, entry->start, entry->end -
857 entry->start);
858 iommu_domain_free_entry(entry, free);
859 }
860 }
861
862 static bool
dmar_domain_unload_emit_wait(struct dmar_domain * domain,struct iommu_map_entry * entry)863 dmar_domain_unload_emit_wait(struct dmar_domain *domain,
864 struct iommu_map_entry *entry)
865 {
866
867 if (TAILQ_NEXT(entry, dmamap_link) == NULL)
868 return (true);
869 return (domain->batch_no++ % iommu_qi_batch_coalesce == 0);
870 }
871
872 void
dmar_domain_unload(struct iommu_domain * iodom,struct iommu_map_entries_tailq * entries,bool cansleep)873 dmar_domain_unload(struct iommu_domain *iodom,
874 struct iommu_map_entries_tailq *entries, bool cansleep)
875 {
876 struct dmar_domain *domain;
877 struct dmar_unit *unit;
878 struct iommu_map_entry *entry, *entry1;
879 int error __diagused;
880
881 domain = IODOM2DOM(iodom);
882 unit = DOM2DMAR(domain);
883
884 TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) {
885 KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0,
886 ("not mapped entry %p %p", domain, entry));
887 error = iodom->ops->unmap(iodom, entry,
888 cansleep ? IOMMU_PGF_WAITOK : 0);
889 KASSERT(error == 0, ("unmap %p error %d", domain, error));
890 if (!unit->qi_enabled) {
891 dmar_flush_iotlb_sync(domain, entry->start,
892 entry->end - entry->start);
893 TAILQ_REMOVE(entries, entry, dmamap_link);
894 iommu_domain_free_entry(entry, true);
895 }
896 }
897 if (TAILQ_EMPTY(entries))
898 return;
899
900 KASSERT(unit->qi_enabled, ("loaded entry left"));
901 DMAR_LOCK(unit);
902 while ((entry = TAILQ_FIRST(entries)) != NULL) {
903 TAILQ_REMOVE(entries, entry, dmamap_link);
904 iommu_qi_invalidate_locked(&domain->iodom, entry,
905 dmar_domain_unload_emit_wait(domain, entry));
906 }
907 DMAR_UNLOCK(unit);
908 }
909
910 struct iommu_ctx *
dmar_get_ctx(struct iommu_unit * iommu,device_t dev,uint16_t rid,bool id_mapped,bool rmrr_init)911 dmar_get_ctx(struct iommu_unit *iommu, device_t dev, uint16_t rid,
912 bool id_mapped, bool rmrr_init)
913 {
914 struct dmar_unit *dmar;
915 struct dmar_ctx *ret;
916
917 dmar = IOMMU2DMAR(iommu);
918 ret = dmar_get_ctx_for_dev(dmar, dev, rid, id_mapped, rmrr_init);
919 return (CTX2IOCTX(ret));
920 }
921
922 void
dmar_free_ctx_locked_method(struct iommu_unit * iommu,struct iommu_ctx * context)923 dmar_free_ctx_locked_method(struct iommu_unit *iommu,
924 struct iommu_ctx *context)
925 {
926 struct dmar_unit *dmar;
927 struct dmar_ctx *ctx;
928
929 dmar = IOMMU2DMAR(iommu);
930 ctx = IOCTX2CTX(context);
931 dmar_free_ctx_locked(dmar, ctx);
932 }
933