xref: /freebsd/sys/x86/iommu/intel_ctx.c (revision 23145534154c279e3e8cbcd17d155f7ee67d8aa9)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2013 The FreeBSD Foundation
5  *
6  * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
7  * under sponsorship from the FreeBSD Foundation.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/malloc.h>
34 #include <sys/bus.h>
35 #include <sys/interrupt.h>
36 #include <sys/kernel.h>
37 #include <sys/ktr.h>
38 #include <sys/limits.h>
39 #include <sys/lock.h>
40 #include <sys/memdesc.h>
41 #include <sys/mutex.h>
42 #include <sys/proc.h>
43 #include <sys/rwlock.h>
44 #include <sys/rman.h>
45 #include <sys/sysctl.h>
46 #include <sys/taskqueue.h>
47 #include <sys/tree.h>
48 #include <sys/uio.h>
49 #include <sys/vmem.h>
50 #include <vm/vm.h>
51 #include <vm/vm_extern.h>
52 #include <vm/vm_kern.h>
53 #include <vm/vm_object.h>
54 #include <vm/vm_page.h>
55 #include <vm/vm_pager.h>
56 #include <vm/vm_map.h>
57 #include <contrib/dev/acpica/include/acpi.h>
58 #include <contrib/dev/acpica/include/accommon.h>
59 #include <dev/pci/pcireg.h>
60 #include <dev/pci/pcivar.h>
61 #include <machine/atomic.h>
62 #include <machine/bus.h>
63 #include <machine/md_var.h>
64 #include <machine/specialreg.h>
65 #include <x86/include/busdma_impl.h>
66 #include <dev/iommu/busdma_iommu.h>
67 #include <x86/iommu/intel_reg.h>
68 #include <x86/iommu/x86_iommu.h>
69 #include <x86/iommu/intel_dmar.h>
70 
71 static MALLOC_DEFINE(M_DMAR_CTX, "dmar_ctx", "Intel DMAR Context");
72 static MALLOC_DEFINE(M_DMAR_DOMAIN, "dmar_dom", "Intel DMAR Domain");
73 
74 static void dmar_unref_domain_locked(struct dmar_unit *dmar,
75     struct dmar_domain *domain);
76 static void dmar_domain_destroy(struct dmar_domain *domain);
77 
78 static void dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx);
79 static void dmar_free_ctx(struct dmar_ctx *ctx);
80 
81 static void
82 dmar_ensure_ctx_page(struct dmar_unit *dmar, int bus)
83 {
84 	struct sf_buf *sf;
85 	dmar_root_entry_t *re;
86 	vm_page_t ctxm;
87 
88 	/*
89 	 * Allocated context page must be linked.
90 	 */
91 	ctxm = iommu_pgalloc(dmar->ctx_obj, 1 + bus, IOMMU_PGF_NOALLOC);
92 	if (ctxm != NULL)
93 		return;
94 
95 	/*
96 	 * Page not present, allocate and link.  Note that other
97 	 * thread might execute this sequence in parallel.  This
98 	 * should be safe, because the context entries written by both
99 	 * threads are equal.
100 	 */
101 	TD_PREP_PINNED_ASSERT;
102 	ctxm = iommu_pgalloc(dmar->ctx_obj, 1 + bus, IOMMU_PGF_ZERO |
103 	    IOMMU_PGF_WAITOK);
104 	re = iommu_map_pgtbl(dmar->ctx_obj, 0, IOMMU_PGF_NOALLOC, &sf);
105 	re += bus;
106 	dmar_pte_store(&re->r1, DMAR_ROOT_R1_P | (DMAR_ROOT_R1_CTP_MASK &
107 	    VM_PAGE_TO_PHYS(ctxm)));
108 	dmar_flush_root_to_ram(dmar, re);
109 	iommu_unmap_pgtbl(sf);
110 	TD_PINNED_ASSERT;
111 }
112 
113 static dmar_ctx_entry_t *
114 dmar_map_ctx_entry(struct dmar_ctx *ctx, struct sf_buf **sfp)
115 {
116 	struct dmar_unit *dmar;
117 	dmar_ctx_entry_t *ctxp;
118 
119 	dmar = CTX2DMAR(ctx);
120 
121 	ctxp = iommu_map_pgtbl(dmar->ctx_obj, 1 + PCI_RID2BUS(ctx->context.rid),
122 	    IOMMU_PGF_NOALLOC | IOMMU_PGF_WAITOK, sfp);
123 	ctxp += ctx->context.rid & 0xff;
124 	return (ctxp);
125 }
126 
127 static void
128 device_tag_init(struct dmar_ctx *ctx, device_t dev)
129 {
130 	struct dmar_domain *domain;
131 	bus_addr_t maxaddr;
132 
133 	domain = CTX2DOM(ctx);
134 	maxaddr = MIN(domain->iodom.end, BUS_SPACE_MAXADDR);
135 	ctx->context.tag->common.impl = &bus_dma_iommu_impl;
136 	ctx->context.tag->common.boundary = 0;
137 	ctx->context.tag->common.lowaddr = maxaddr;
138 	ctx->context.tag->common.highaddr = maxaddr;
139 	ctx->context.tag->common.maxsize = maxaddr;
140 	ctx->context.tag->common.nsegments = BUS_SPACE_UNRESTRICTED;
141 	ctx->context.tag->common.maxsegsz = maxaddr;
142 	ctx->context.tag->ctx = CTX2IOCTX(ctx);
143 	ctx->context.tag->owner = dev;
144 }
145 
146 static void
147 ctx_id_entry_init_one(dmar_ctx_entry_t *ctxp, struct dmar_domain *domain,
148     vm_page_t ctx_root)
149 {
150 	/*
151 	 * For update due to move, the store is not atomic.  It is
152 	 * possible that DMAR read upper doubleword, while low
153 	 * doubleword is not yet updated.  The domain id is stored in
154 	 * the upper doubleword, while the table pointer in the lower.
155 	 *
156 	 * There is no good solution, for the same reason it is wrong
157 	 * to clear P bit in the ctx entry for update.
158 	 */
159 	dmar_pte_store1(&ctxp->ctx2, DMAR_CTX2_DID(domain->domain) |
160 	    domain->awlvl);
161 	if (ctx_root == NULL) {
162 		dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_PASS | DMAR_CTX1_P);
163 	} else {
164 		dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_UNTR |
165 		    (DMAR_CTX1_ASR_MASK & VM_PAGE_TO_PHYS(ctx_root)) |
166 		    DMAR_CTX1_P);
167 	}
168 }
169 
170 static void
171 ctx_id_entry_init(struct dmar_ctx *ctx, dmar_ctx_entry_t *ctxp, bool move,
172     int busno)
173 {
174 	struct dmar_unit *unit;
175 	struct dmar_domain *domain;
176 	vm_page_t ctx_root;
177 	int i;
178 
179 	domain = CTX2DOM(ctx);
180 	unit = DOM2DMAR(domain);
181 	KASSERT(move || (ctxp->ctx1 == 0 && ctxp->ctx2 == 0),
182 	    ("dmar%d: initialized ctx entry %d:%d:%d 0x%jx 0x%jx",
183 	    unit->iommu.unit, busno, pci_get_slot(ctx->context.tag->owner),
184 	    pci_get_function(ctx->context.tag->owner),
185 	    ctxp->ctx1, ctxp->ctx2));
186 
187 	if ((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0 &&
188 	    (unit->hw_ecap & DMAR_ECAP_PT) != 0) {
189 		KASSERT(domain->pgtbl_obj == NULL,
190 		    ("ctx %p non-null pgtbl_obj", ctx));
191 		ctx_root = NULL;
192 	} else {
193 		ctx_root = iommu_pgalloc(domain->pgtbl_obj, 0,
194 		    IOMMU_PGF_NOALLOC);
195 	}
196 
197 	if (iommu_is_buswide_ctx(DMAR2IOMMU(unit), busno)) {
198 		MPASS(!move);
199 		for (i = 0; i <= PCI_BUSMAX; i++) {
200 			ctx_id_entry_init_one(&ctxp[i], domain, ctx_root);
201 		}
202 	} else {
203 		ctx_id_entry_init_one(ctxp, domain, ctx_root);
204 	}
205 	dmar_flush_ctx_to_ram(unit, ctxp);
206 }
207 
208 static int
209 dmar_flush_for_ctx_entry(struct dmar_unit *dmar, bool force)
210 {
211 	int error;
212 
213 	/*
214 	 * If dmar declares Caching Mode as Set, follow 11.5 "Caching
215 	 * Mode Consideration" and do the (global) invalidation of the
216 	 * negative TLB entries.
217 	 */
218 	if ((dmar->hw_cap & DMAR_CAP_CM) == 0 && !force)
219 		return (0);
220 	if (dmar->qi_enabled) {
221 		dmar_qi_invalidate_ctx_glob_locked(dmar);
222 		if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force)
223 			dmar_qi_invalidate_iotlb_glob_locked(dmar);
224 		return (0);
225 	}
226 	error = dmar_inv_ctx_glob(dmar);
227 	if (error == 0 && ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force))
228 		error = dmar_inv_iotlb_glob(dmar);
229 	return (error);
230 }
231 
232 static int
233 domain_init_rmrr(struct dmar_domain *domain, device_t dev, int bus,
234     int slot, int func, int dev_domain, int dev_busno,
235     const void *dev_path, int dev_path_len)
236 {
237 	struct iommu_map_entries_tailq rmrr_entries;
238 	struct iommu_map_entry *entry, *entry1;
239 	vm_page_t *ma;
240 	iommu_gaddr_t start, end;
241 	vm_pindex_t size, i;
242 	int error, error1;
243 
244 	if (!dmar_rmrr_enable)
245 		return (0);
246 
247 	error = 0;
248 	TAILQ_INIT(&rmrr_entries);
249 	dmar_dev_parse_rmrr(domain, dev_domain, dev_busno, dev_path,
250 	    dev_path_len, &rmrr_entries);
251 	TAILQ_FOREACH_SAFE(entry, &rmrr_entries, dmamap_link, entry1) {
252 		/*
253 		 * VT-d specification requires that the start of an
254 		 * RMRR entry is 4k-aligned.  Buggy BIOSes put
255 		 * anything into the start and end fields.  Truncate
256 		 * and round as neccesary.
257 		 *
258 		 * We also allow the overlapping RMRR entries, see
259 		 * iommu_gas_alloc_region().
260 		 */
261 		start = entry->start;
262 		end = entry->end;
263 		if (bootverbose)
264 			printf("dmar%d ctx pci%d:%d:%d RMRR [%#jx, %#jx]\n",
265 			    domain->iodom.iommu->unit, bus, slot, func,
266 			    (uintmax_t)start, (uintmax_t)end);
267 		entry->start = trunc_page(start);
268 		entry->end = round_page(end);
269 		if (entry->start == entry->end) {
270 			/* Workaround for some AMI (?) BIOSes */
271 			if (bootverbose) {
272 				if (dev != NULL)
273 					device_printf(dev, "");
274 				printf("pci%d:%d:%d ", bus, slot, func);
275 				printf("BIOS bug: dmar%d RMRR "
276 				    "region (%jx, %jx) corrected\n",
277 				    domain->iodom.iommu->unit, start, end);
278 			}
279 			entry->end += IOMMU_PAGE_SIZE * 0x20;
280 		}
281 		size = OFF_TO_IDX(entry->end - entry->start);
282 		ma = malloc(sizeof(vm_page_t) * size, M_TEMP, M_WAITOK);
283 		for (i = 0; i < size; i++) {
284 			ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i,
285 			    VM_MEMATTR_DEFAULT);
286 		}
287 		error1 = iommu_gas_map_region(DOM2IODOM(domain), entry,
288 		    IOMMU_MAP_ENTRY_READ | IOMMU_MAP_ENTRY_WRITE,
289 		    IOMMU_MF_CANWAIT | IOMMU_MF_RMRR, ma);
290 		/*
291 		 * Non-failed RMRR entries are owned by context rb
292 		 * tree.  Get rid of the failed entry, but do not stop
293 		 * the loop.  Rest of the parsed RMRR entries are
294 		 * loaded and removed on the context destruction.
295 		 */
296 		if (error1 == 0 && entry->end != entry->start) {
297 			IOMMU_LOCK(domain->iodom.iommu);
298 			domain->refs++; /* XXXKIB prevent free */
299 			domain->iodom.flags |= IOMMU_DOMAIN_RMRR;
300 			IOMMU_UNLOCK(domain->iodom.iommu);
301 		} else {
302 			if (error1 != 0) {
303 				if (dev != NULL)
304 					device_printf(dev, "");
305 				printf("pci%d:%d:%d ", bus, slot, func);
306 				printf(
307 			    "dmar%d failed to map RMRR region (%jx, %jx) %d\n",
308 				    domain->iodom.iommu->unit, start, end,
309 				    error1);
310 				error = error1;
311 			}
312 			TAILQ_REMOVE(&rmrr_entries, entry, dmamap_link);
313 			iommu_gas_free_entry(entry);
314 		}
315 		for (i = 0; i < size; i++)
316 			vm_page_putfake(ma[i]);
317 		free(ma, M_TEMP);
318 	}
319 	return (error);
320 }
321 
322 /*
323  * PCI memory address space is shared between memory-mapped devices (MMIO) and
324  * host memory (which may be remapped by an IOMMU).  Device accesses to an
325  * address within a memory aperture in a PCIe root port will be treated as
326  * peer-to-peer and not forwarded to an IOMMU.  To avoid this, reserve the
327  * address space of the root port's memory apertures in the address space used
328  * by the IOMMU for remapping.
329  */
330 static int
331 dmar_reserve_pci_regions(struct dmar_domain *domain, device_t dev)
332 {
333 	struct iommu_domain *iodom;
334 	device_t root;
335 	uint32_t val;
336 	uint64_t base, limit;
337 	int error;
338 
339 	iodom = DOM2IODOM(domain);
340 
341 	root = pci_find_pcie_root_port(dev);
342 	if (root == NULL)
343 		return (0);
344 
345 	/* Disable downstream memory */
346 	base = PCI_PPBMEMBASE(0, pci_read_config(root, PCIR_MEMBASE_1, 2));
347 	limit = PCI_PPBMEMLIMIT(0, pci_read_config(root, PCIR_MEMLIMIT_1, 2));
348 	error = iommu_gas_reserve_region_extend(iodom, base, limit + 1);
349 	if (bootverbose || error != 0)
350 		device_printf(dev, "DMAR reserve [%#jx-%#jx] (error %d)\n",
351 		    base, limit + 1, error);
352 	if (error != 0)
353 		return (error);
354 
355 	/* Disable downstream prefetchable memory */
356 	val = pci_read_config(root, PCIR_PMBASEL_1, 2);
357 	if (val != 0 || pci_read_config(root, PCIR_PMLIMITL_1, 2) != 0) {
358 		if ((val & PCIM_BRPM_MASK) == PCIM_BRPM_64) {
359 			base = PCI_PPBMEMBASE(
360 			    pci_read_config(root, PCIR_PMBASEH_1, 4),
361 			    val);
362 			limit = PCI_PPBMEMLIMIT(
363 			    pci_read_config(root, PCIR_PMLIMITH_1, 4),
364 			    pci_read_config(root, PCIR_PMLIMITL_1, 2));
365 		} else {
366 			base = PCI_PPBMEMBASE(0, val);
367 			limit = PCI_PPBMEMLIMIT(0,
368 			    pci_read_config(root, PCIR_PMLIMITL_1, 2));
369 		}
370 		error = iommu_gas_reserve_region_extend(iodom, base,
371 		    limit + 1);
372 		if (bootverbose || error != 0)
373 			device_printf(dev, "DMAR reserve [%#jx-%#jx] "
374 			    "(error %d)\n", base, limit + 1, error);
375 		if (error != 0)
376 			return (error);
377 	}
378 
379 	return (error);
380 }
381 
382 static struct dmar_domain *
383 dmar_domain_alloc(struct dmar_unit *dmar, bool id_mapped)
384 {
385 	struct iommu_domain *iodom;
386 	struct iommu_unit *unit;
387 	struct dmar_domain *domain;
388 	int error, id, mgaw;
389 
390 	id = alloc_unr(dmar->domids);
391 	if (id == -1)
392 		return (NULL);
393 	domain = malloc(sizeof(*domain), M_DMAR_DOMAIN, M_WAITOK | M_ZERO);
394 	iodom = DOM2IODOM(domain);
395 	unit = DMAR2IOMMU(dmar);
396 	domain->domain = id;
397 	LIST_INIT(&domain->contexts);
398 	iommu_domain_init(unit, iodom, &dmar_domain_map_ops);
399 
400 	domain->dmar = dmar;
401 
402 	/*
403 	 * For now, use the maximal usable physical address of the
404 	 * installed memory to calculate the mgaw on id_mapped domain.
405 	 * It is useful for the identity mapping, and less so for the
406 	 * virtualized bus address space.
407 	 */
408 	domain->iodom.end = id_mapped ? ptoa(Maxmem) : BUS_SPACE_MAXADDR;
409 	mgaw = dmar_maxaddr2mgaw(dmar, domain->iodom.end, !id_mapped);
410 	error = domain_set_agaw(domain, mgaw);
411 	if (error != 0)
412 		goto fail;
413 	if (!id_mapped)
414 		/* Use all supported address space for remapping. */
415 		domain->iodom.end = 1ULL << (domain->agaw - 1);
416 
417 	iommu_gas_init_domain(DOM2IODOM(domain));
418 
419 	if (id_mapped) {
420 		if ((dmar->hw_ecap & DMAR_ECAP_PT) == 0) {
421 			domain->pgtbl_obj = domain_get_idmap_pgtbl(domain,
422 			    domain->iodom.end);
423 		}
424 		domain->iodom.flags |= IOMMU_DOMAIN_IDMAP;
425 	} else {
426 		error = dmar_domain_alloc_pgtbl(domain);
427 		if (error != 0)
428 			goto fail;
429 		/* Disable local apic region access */
430 		error = iommu_gas_reserve_region(iodom, 0xfee00000,
431 		    0xfeefffff + 1, &iodom->msi_entry);
432 		if (error != 0)
433 			goto fail;
434 	}
435 	return (domain);
436 
437 fail:
438 	dmar_domain_destroy(domain);
439 	return (NULL);
440 }
441 
442 static struct dmar_ctx *
443 dmar_ctx_alloc(struct dmar_domain *domain, uint16_t rid)
444 {
445 	struct dmar_ctx *ctx;
446 
447 	ctx = malloc(sizeof(*ctx), M_DMAR_CTX, M_WAITOK | M_ZERO);
448 	ctx->context.domain = DOM2IODOM(domain);
449 	ctx->context.tag = malloc(sizeof(struct bus_dma_tag_iommu),
450 	    M_DMAR_CTX, M_WAITOK | M_ZERO);
451 	ctx->context.rid = rid;
452 	ctx->refs = 1;
453 	return (ctx);
454 }
455 
456 static void
457 dmar_ctx_link(struct dmar_ctx *ctx)
458 {
459 	struct dmar_domain *domain;
460 
461 	domain = CTX2DOM(ctx);
462 	IOMMU_ASSERT_LOCKED(domain->iodom.iommu);
463 	KASSERT(domain->refs >= domain->ctx_cnt,
464 	    ("dom %p ref underflow %d %d", domain, domain->refs,
465 	    domain->ctx_cnt));
466 	domain->refs++;
467 	domain->ctx_cnt++;
468 	LIST_INSERT_HEAD(&domain->contexts, ctx, link);
469 }
470 
471 static void
472 dmar_ctx_unlink(struct dmar_ctx *ctx)
473 {
474 	struct dmar_domain *domain;
475 
476 	domain = CTX2DOM(ctx);
477 	IOMMU_ASSERT_LOCKED(domain->iodom.iommu);
478 	KASSERT(domain->refs > 0,
479 	    ("domain %p ctx dtr refs %d", domain, domain->refs));
480 	KASSERT(domain->ctx_cnt >= domain->refs,
481 	    ("domain %p ctx dtr refs %d ctx_cnt %d", domain,
482 	    domain->refs, domain->ctx_cnt));
483 	domain->refs--;
484 	domain->ctx_cnt--;
485 	LIST_REMOVE(ctx, link);
486 }
487 
488 static void
489 dmar_domain_destroy(struct dmar_domain *domain)
490 {
491 	struct iommu_domain *iodom;
492 	struct dmar_unit *dmar;
493 
494 	iodom = DOM2IODOM(domain);
495 
496 	KASSERT(TAILQ_EMPTY(&domain->iodom.unload_entries),
497 	    ("unfinished unloads %p", domain));
498 	KASSERT(LIST_EMPTY(&domain->contexts),
499 	    ("destroying dom %p with contexts", domain));
500 	KASSERT(domain->ctx_cnt == 0,
501 	    ("destroying dom %p with ctx_cnt %d", domain, domain->ctx_cnt));
502 	KASSERT(domain->refs == 0,
503 	    ("destroying dom %p with refs %d", domain, domain->refs));
504 	if ((domain->iodom.flags & IOMMU_DOMAIN_GAS_INITED) != 0) {
505 		DMAR_DOMAIN_LOCK(domain);
506 		iommu_gas_fini_domain(iodom);
507 		DMAR_DOMAIN_UNLOCK(domain);
508 	}
509 	if ((domain->iodom.flags & IOMMU_DOMAIN_PGTBL_INITED) != 0) {
510 		if (domain->pgtbl_obj != NULL)
511 			DMAR_DOMAIN_PGLOCK(domain);
512 		dmar_domain_free_pgtbl(domain);
513 	}
514 	iommu_domain_fini(iodom);
515 	dmar = DOM2DMAR(domain);
516 	free_unr(dmar->domids, domain->domain);
517 	free(domain, M_DMAR_DOMAIN);
518 }
519 
520 static struct dmar_ctx *
521 dmar_get_ctx_for_dev1(struct dmar_unit *dmar, device_t dev, uint16_t rid,
522     int dev_domain, int dev_busno, const void *dev_path, int dev_path_len,
523     bool id_mapped, bool rmrr_init)
524 {
525 	struct dmar_domain *domain, *domain1;
526 	struct dmar_ctx *ctx, *ctx1;
527 	struct iommu_unit *unit __diagused;
528 	dmar_ctx_entry_t *ctxp;
529 	struct sf_buf *sf;
530 	int bus, slot, func, error;
531 	bool enable;
532 
533 	if (dev != NULL) {
534 		bus = pci_get_bus(dev);
535 		slot = pci_get_slot(dev);
536 		func = pci_get_function(dev);
537 	} else {
538 		bus = PCI_RID2BUS(rid);
539 		slot = PCI_RID2SLOT(rid);
540 		func = PCI_RID2FUNC(rid);
541 	}
542 	enable = false;
543 	TD_PREP_PINNED_ASSERT;
544 	unit = DMAR2IOMMU(dmar);
545 	DMAR_LOCK(dmar);
546 	KASSERT(!iommu_is_buswide_ctx(unit, bus) || (slot == 0 && func == 0),
547 	    ("iommu%d pci%d:%d:%d get_ctx for buswide", dmar->iommu.unit, bus,
548 	    slot, func));
549 	ctx = dmar_find_ctx_locked(dmar, rid);
550 	error = 0;
551 	if (ctx == NULL) {
552 		/*
553 		 * Perform the allocations which require sleep or have
554 		 * higher chance to succeed if the sleep is allowed.
555 		 */
556 		DMAR_UNLOCK(dmar);
557 		dmar_ensure_ctx_page(dmar, PCI_RID2BUS(rid));
558 		domain1 = dmar_domain_alloc(dmar, id_mapped);
559 		if (domain1 == NULL) {
560 			TD_PINNED_ASSERT;
561 			return (NULL);
562 		}
563 		if (!id_mapped) {
564 			error = domain_init_rmrr(domain1, dev, bus,
565 			    slot, func, dev_domain, dev_busno, dev_path,
566 			    dev_path_len);
567 			if (error == 0 && dev != NULL)
568 				error = dmar_reserve_pci_regions(domain1, dev);
569 			if (error != 0) {
570 				dmar_domain_destroy(domain1);
571 				TD_PINNED_ASSERT;
572 				return (NULL);
573 			}
574 		}
575 		ctx1 = dmar_ctx_alloc(domain1, rid);
576 		ctxp = dmar_map_ctx_entry(ctx1, &sf);
577 		DMAR_LOCK(dmar);
578 
579 		/*
580 		 * Recheck the contexts, other thread might have
581 		 * already allocated needed one.
582 		 */
583 		ctx = dmar_find_ctx_locked(dmar, rid);
584 		if (ctx == NULL) {
585 			domain = domain1;
586 			ctx = ctx1;
587 			dmar_ctx_link(ctx);
588 			ctx->context.tag->owner = dev;
589 			device_tag_init(ctx, dev);
590 
591 			/*
592 			 * This is the first activated context for the
593 			 * DMAR unit.  Enable the translation after
594 			 * everything is set up.
595 			 */
596 			if (LIST_EMPTY(&dmar->domains))
597 				enable = true;
598 			LIST_INSERT_HEAD(&dmar->domains, domain, link);
599 			ctx_id_entry_init(ctx, ctxp, false, bus);
600 			if (dev != NULL) {
601 				device_printf(dev,
602 			    "dmar%d pci%d:%d:%d:%d rid %x domain %d mgaw %d "
603 				    "agaw %d %s-mapped\n",
604 				    dmar->iommu.unit, dmar->segment, bus, slot,
605 				    func, rid, domain->domain, domain->mgaw,
606 				    domain->agaw, id_mapped ? "id" : "re");
607 			}
608 			iommu_unmap_pgtbl(sf);
609 		} else {
610 			iommu_unmap_pgtbl(sf);
611 			dmar_domain_destroy(domain1);
612 			/* Nothing needs to be done to destroy ctx1. */
613 			free(ctx1, M_DMAR_CTX);
614 			domain = CTX2DOM(ctx);
615 			ctx->refs++; /* tag referenced us */
616 		}
617 	} else {
618 		domain = CTX2DOM(ctx);
619 		if (ctx->context.tag->owner == NULL)
620 			ctx->context.tag->owner = dev;
621 		ctx->refs++; /* tag referenced us */
622 	}
623 
624 	error = dmar_flush_for_ctx_entry(dmar, enable);
625 	if (error != 0) {
626 		dmar_free_ctx_locked(dmar, ctx);
627 		TD_PINNED_ASSERT;
628 		return (NULL);
629 	}
630 
631 	/*
632 	 * The dmar lock was potentially dropped between check for the
633 	 * empty context list and now.  Recheck the state of GCMD_TE
634 	 * to avoid unneeded command.
635 	 */
636 	if (enable && !rmrr_init && (dmar->hw_gcmd & DMAR_GCMD_TE) == 0) {
637 		error = dmar_disable_protected_regions(dmar);
638 		if (error != 0)
639 			printf("dmar%d: Failed to disable protected regions\n",
640 			    dmar->iommu.unit);
641 		error = dmar_enable_translation(dmar);
642 		if (error == 0) {
643 			if (bootverbose) {
644 				printf("dmar%d: enabled translation\n",
645 				    dmar->iommu.unit);
646 			}
647 		} else {
648 			printf("dmar%d: enabling translation failed, "
649 			    "error %d\n", dmar->iommu.unit, error);
650 			dmar_free_ctx_locked(dmar, ctx);
651 			TD_PINNED_ASSERT;
652 			return (NULL);
653 		}
654 	}
655 	DMAR_UNLOCK(dmar);
656 	TD_PINNED_ASSERT;
657 	return (ctx);
658 }
659 
660 struct dmar_ctx *
661 dmar_get_ctx_for_dev(struct dmar_unit *dmar, device_t dev, uint16_t rid,
662     bool id_mapped, bool rmrr_init)
663 {
664 	int dev_domain, dev_path_len, dev_busno;
665 
666 	dev_domain = pci_get_domain(dev);
667 	dev_path_len = dmar_dev_depth(dev);
668 	ACPI_DMAR_PCI_PATH dev_path[dev_path_len];
669 	dmar_dev_path(dev, &dev_busno, dev_path, dev_path_len);
670 	return (dmar_get_ctx_for_dev1(dmar, dev, rid, dev_domain, dev_busno,
671 	    dev_path, dev_path_len, id_mapped, rmrr_init));
672 }
673 
674 struct dmar_ctx *
675 dmar_get_ctx_for_devpath(struct dmar_unit *dmar, uint16_t rid,
676     int dev_domain, int dev_busno,
677     const void *dev_path, int dev_path_len,
678     bool id_mapped, bool rmrr_init)
679 {
680 
681 	return (dmar_get_ctx_for_dev1(dmar, NULL, rid, dev_domain, dev_busno,
682 	    dev_path, dev_path_len, id_mapped, rmrr_init));
683 }
684 
685 int
686 dmar_move_ctx_to_domain(struct dmar_domain *domain, struct dmar_ctx *ctx)
687 {
688 	struct dmar_unit *dmar;
689 	struct dmar_domain *old_domain;
690 	dmar_ctx_entry_t *ctxp;
691 	struct sf_buf *sf;
692 	int error;
693 
694 	dmar = domain->dmar;
695 	old_domain = CTX2DOM(ctx);
696 	if (domain == old_domain)
697 		return (0);
698 	KASSERT(old_domain->iodom.iommu == domain->iodom.iommu,
699 	    ("domain %p %u moving between dmars %u %u", domain,
700 	    domain->domain, old_domain->iodom.iommu->unit,
701 	    domain->iodom.iommu->unit));
702 	TD_PREP_PINNED_ASSERT;
703 
704 	ctxp = dmar_map_ctx_entry(ctx, &sf);
705 	DMAR_LOCK(dmar);
706 	dmar_ctx_unlink(ctx);
707 	ctx->context.domain = &domain->iodom;
708 	dmar_ctx_link(ctx);
709 	ctx_id_entry_init(ctx, ctxp, true, PCI_BUSMAX + 100);
710 	iommu_unmap_pgtbl(sf);
711 	error = dmar_flush_for_ctx_entry(dmar, true);
712 	/* If flush failed, rolling back would not work as well. */
713 	printf("dmar%d rid %x domain %d->%d %s-mapped\n",
714 	    dmar->iommu.unit, ctx->context.rid, old_domain->domain,
715 	    domain->domain, (domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0 ?
716 	    "id" : "re");
717 	dmar_unref_domain_locked(dmar, old_domain);
718 	TD_PINNED_ASSERT;
719 	return (error);
720 }
721 
722 static void
723 dmar_unref_domain_locked(struct dmar_unit *dmar, struct dmar_domain *domain)
724 {
725 
726 	DMAR_ASSERT_LOCKED(dmar);
727 	KASSERT(domain->refs >= 1,
728 	    ("dmar %d domain %p refs %u", dmar->iommu.unit, domain,
729 	    domain->refs));
730 	KASSERT(domain->refs > domain->ctx_cnt,
731 	    ("dmar %d domain %p refs %d ctx_cnt %d", dmar->iommu.unit, domain,
732 	    domain->refs, domain->ctx_cnt));
733 
734 	if (domain->refs > 1) {
735 		domain->refs--;
736 		DMAR_UNLOCK(dmar);
737 		return;
738 	}
739 
740 	KASSERT((domain->iodom.flags & IOMMU_DOMAIN_RMRR) == 0,
741 	    ("lost ref on RMRR domain %p", domain));
742 
743 	LIST_REMOVE(domain, link);
744 	DMAR_UNLOCK(dmar);
745 
746 	taskqueue_drain(dmar->iommu.delayed_taskqueue,
747 	    &domain->iodom.unload_task);
748 	dmar_domain_destroy(domain);
749 }
750 
751 static void
752 dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx)
753 {
754 	struct sf_buf *sf;
755 	dmar_ctx_entry_t *ctxp;
756 	struct dmar_domain *domain;
757 
758 	DMAR_ASSERT_LOCKED(dmar);
759 	KASSERT(ctx->refs >= 1,
760 	    ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs));
761 
762 	/*
763 	 * If our reference is not last, only the dereference should
764 	 * be performed.
765 	 */
766 	if (ctx->refs > 1) {
767 		ctx->refs--;
768 		DMAR_UNLOCK(dmar);
769 		return;
770 	}
771 
772 	KASSERT((ctx->context.flags & IOMMU_CTX_DISABLED) == 0,
773 	    ("lost ref on disabled ctx %p", ctx));
774 
775 	/*
776 	 * Otherwise, the context entry must be cleared before the
777 	 * page table is destroyed.  The mapping of the context
778 	 * entries page could require sleep, unlock the dmar.
779 	 */
780 	DMAR_UNLOCK(dmar);
781 	TD_PREP_PINNED_ASSERT;
782 	ctxp = dmar_map_ctx_entry(ctx, &sf);
783 	DMAR_LOCK(dmar);
784 	KASSERT(ctx->refs >= 1,
785 	    ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs));
786 
787 	/*
788 	 * Other thread might have referenced the context, in which
789 	 * case again only the dereference should be performed.
790 	 */
791 	if (ctx->refs > 1) {
792 		ctx->refs--;
793 		DMAR_UNLOCK(dmar);
794 		iommu_unmap_pgtbl(sf);
795 		TD_PINNED_ASSERT;
796 		return;
797 	}
798 
799 	KASSERT((ctx->context.flags & IOMMU_CTX_DISABLED) == 0,
800 	    ("lost ref on disabled ctx %p", ctx));
801 
802 	/*
803 	 * Clear the context pointer and flush the caches.
804 	 * XXXKIB: cannot do this if any RMRR entries are still present.
805 	 */
806 	dmar_pte_clear(&ctxp->ctx1);
807 	ctxp->ctx2 = 0;
808 	dmar_flush_ctx_to_ram(dmar, ctxp);
809 	dmar_inv_ctx_glob(dmar);
810 	if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0) {
811 		if (dmar->qi_enabled)
812 			dmar_qi_invalidate_iotlb_glob_locked(dmar);
813 		else
814 			dmar_inv_iotlb_glob(dmar);
815 	}
816 	iommu_unmap_pgtbl(sf);
817 	domain = CTX2DOM(ctx);
818 	dmar_ctx_unlink(ctx);
819 	free(ctx->context.tag, M_DMAR_CTX);
820 	free(ctx, M_DMAR_CTX);
821 	dmar_unref_domain_locked(dmar, domain);
822 	TD_PINNED_ASSERT;
823 }
824 
825 static void
826 dmar_free_ctx(struct dmar_ctx *ctx)
827 {
828 	struct dmar_unit *dmar;
829 
830 	dmar = CTX2DMAR(ctx);
831 	DMAR_LOCK(dmar);
832 	dmar_free_ctx_locked(dmar, ctx);
833 }
834 
835 /*
836  * Returns with the domain locked.
837  */
838 struct dmar_ctx *
839 dmar_find_ctx_locked(struct dmar_unit *dmar, uint16_t rid)
840 {
841 	struct dmar_domain *domain;
842 	struct dmar_ctx *ctx;
843 
844 	DMAR_ASSERT_LOCKED(dmar);
845 
846 	LIST_FOREACH(domain, &dmar->domains, link) {
847 		LIST_FOREACH(ctx, &domain->contexts, link) {
848 			if (ctx->context.rid == rid)
849 				return (ctx);
850 		}
851 	}
852 	return (NULL);
853 }
854 
855 void
856 dmar_domain_free_entry(struct iommu_map_entry *entry, bool free)
857 {
858 	if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
859 		iommu_gas_free_region(entry);
860 	else
861 		iommu_gas_free_space(entry);
862 	if (free)
863 		iommu_gas_free_entry(entry);
864 	else
865 		entry->flags = 0;
866 }
867 
868 /*
869  * If the given value for "free" is true, then the caller must not be using
870  * the entry's dmamap_link field.
871  */
872 void
873 dmar_domain_unload_entry(struct iommu_map_entry *entry, bool free,
874     bool cansleep)
875 {
876 	struct dmar_domain *domain;
877 	struct dmar_unit *unit;
878 
879 	domain = IODOM2DOM(entry->domain);
880 	unit = DOM2DMAR(domain);
881 
882 	/*
883 	 * If "free" is false, then the IOTLB invalidation must be performed
884 	 * synchronously.  Otherwise, the caller might free the entry before
885 	 * dmar_qi_task() is finished processing it.
886 	 */
887 	if (unit->qi_enabled) {
888 		if (free) {
889 			DMAR_LOCK(unit);
890 			iommu_qi_invalidate_locked(&domain->iodom, entry,
891 			    true);
892 			DMAR_UNLOCK(unit);
893 		} else {
894 			iommu_qi_invalidate_sync(&domain->iodom, entry->start,
895 			    entry->end - entry->start, cansleep);
896 			dmar_domain_free_entry(entry, false);
897 		}
898 	} else {
899 		domain_flush_iotlb_sync(domain, entry->start, entry->end -
900 		    entry->start);
901 		dmar_domain_free_entry(entry, free);
902 	}
903 }
904 
905 static bool
906 dmar_domain_unload_emit_wait(struct dmar_domain *domain,
907     struct iommu_map_entry *entry)
908 {
909 
910 	if (TAILQ_NEXT(entry, dmamap_link) == NULL)
911 		return (true);
912 	return (domain->batch_no++ % dmar_batch_coalesce == 0);
913 }
914 
915 void
916 dmar_domain_unload(struct iommu_domain *iodom,
917     struct iommu_map_entries_tailq *entries, bool cansleep)
918 {
919 	struct dmar_domain *domain;
920 	struct dmar_unit *unit;
921 	struct iommu_map_entry *entry, *entry1;
922 	int error __diagused;
923 
924 	domain = IODOM2DOM(iodom);
925 	unit = DOM2DMAR(domain);
926 
927 	TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) {
928 		KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0,
929 		    ("not mapped entry %p %p", domain, entry));
930 		error = iodom->ops->unmap(iodom, entry->start, entry->end -
931 		    entry->start, cansleep ? IOMMU_PGF_WAITOK : 0);
932 		KASSERT(error == 0, ("unmap %p error %d", domain, error));
933 		if (!unit->qi_enabled) {
934 			domain_flush_iotlb_sync(domain, entry->start,
935 			    entry->end - entry->start);
936 			TAILQ_REMOVE(entries, entry, dmamap_link);
937 			dmar_domain_free_entry(entry, true);
938 		}
939 	}
940 	if (TAILQ_EMPTY(entries))
941 		return;
942 
943 	KASSERT(unit->qi_enabled, ("loaded entry left"));
944 	DMAR_LOCK(unit);
945 	while ((entry = TAILQ_FIRST(entries)) != NULL) {
946 		TAILQ_REMOVE(entries, entry, dmamap_link);
947 		iommu_qi_invalidate_locked(&domain->iodom, entry,
948 		    dmar_domain_unload_emit_wait(domain, entry));
949 	}
950 	DMAR_UNLOCK(unit);
951 }
952 
953 struct iommu_ctx *
954 dmar_get_ctx(struct iommu_unit *iommu, device_t dev, uint16_t rid,
955     bool id_mapped, bool rmrr_init)
956 {
957 	struct dmar_unit *dmar;
958 	struct dmar_ctx *ret;
959 
960 	dmar = IOMMU2DMAR(iommu);
961 	ret = dmar_get_ctx_for_dev(dmar, dev, rid, id_mapped, rmrr_init);
962 	return (CTX2IOCTX(ret));
963 }
964 
965 void
966 dmar_free_ctx_locked_method(struct iommu_unit *iommu,
967     struct iommu_ctx *context)
968 {
969 	struct dmar_unit *dmar;
970 	struct dmar_ctx *ctx;
971 
972 	dmar = IOMMU2DMAR(iommu);
973 	ctx = IOCTX2CTX(context);
974 	dmar_free_ctx_locked(dmar, ctx);
975 }
976 
977 void
978 dmar_free_ctx_method(struct iommu_ctx *context)
979 {
980 	struct dmar_ctx *ctx;
981 
982 	ctx = IOCTX2CTX(context);
983 	dmar_free_ctx(ctx);
984 }
985