xref: /freebsd/sys/x86/iommu/intel_ctx.c (revision f5b7695d2d5abd735064870ad43f4b9c723940c1)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2013 The FreeBSD Foundation
5  * All rights reserved.
6  *
7  * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
8  * under sponsorship from the FreeBSD Foundation.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/malloc.h>
38 #include <sys/bus.h>
39 #include <sys/interrupt.h>
40 #include <sys/kernel.h>
41 #include <sys/ktr.h>
42 #include <sys/limits.h>
43 #include <sys/lock.h>
44 #include <sys/memdesc.h>
45 #include <sys/mutex.h>
46 #include <sys/proc.h>
47 #include <sys/rwlock.h>
48 #include <sys/rman.h>
49 #include <sys/sysctl.h>
50 #include <sys/taskqueue.h>
51 #include <sys/tree.h>
52 #include <sys/uio.h>
53 #include <sys/vmem.h>
54 #include <vm/vm.h>
55 #include <vm/vm_extern.h>
56 #include <vm/vm_kern.h>
57 #include <vm/vm_object.h>
58 #include <vm/vm_page.h>
59 #include <vm/vm_pager.h>
60 #include <vm/vm_map.h>
61 #include <machine/atomic.h>
62 #include <machine/bus.h>
63 #include <machine/md_var.h>
64 #include <machine/specialreg.h>
65 #include <contrib/dev/acpica/include/acpi.h>
66 #include <contrib/dev/acpica/include/accommon.h>
67 #include <x86/include/busdma_impl.h>
68 #include <x86/iommu/intel_reg.h>
69 #include <x86/iommu/busdma_dmar.h>
70 #include <dev/pci/pcireg.h>
71 #include <x86/iommu/intel_dmar.h>
72 #include <dev/pci/pcivar.h>
73 
74 static MALLOC_DEFINE(M_DMAR_CTX, "dmar_ctx", "Intel DMAR Context");
75 static MALLOC_DEFINE(M_DMAR_DOMAIN, "dmar_dom", "Intel DMAR Domain");
76 
77 static void dmar_domain_unload_task(void *arg, int pending);
78 static void dmar_unref_domain_locked(struct dmar_unit *dmar,
79     struct dmar_domain *domain);
80 static void dmar_domain_destroy(struct dmar_domain *domain);
81 
82 static void
83 dmar_ensure_ctx_page(struct dmar_unit *dmar, int bus)
84 {
85 	struct sf_buf *sf;
86 	dmar_root_entry_t *re;
87 	vm_page_t ctxm;
88 
89 	/*
90 	 * Allocated context page must be linked.
91 	 */
92 	ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, DMAR_PGF_NOALLOC);
93 	if (ctxm != NULL)
94 		return;
95 
96 	/*
97 	 * Page not present, allocate and link.  Note that other
98 	 * thread might execute this sequence in parallel.  This
99 	 * should be safe, because the context entries written by both
100 	 * threads are equal.
101 	 */
102 	TD_PREP_PINNED_ASSERT;
103 	ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, DMAR_PGF_ZERO |
104 	    DMAR_PGF_WAITOK);
105 	re = dmar_map_pgtbl(dmar->ctx_obj, 0, DMAR_PGF_NOALLOC, &sf);
106 	re += bus;
107 	dmar_pte_store(&re->r1, DMAR_ROOT_R1_P | (DMAR_ROOT_R1_CTP_MASK &
108 	    VM_PAGE_TO_PHYS(ctxm)));
109 	dmar_flush_root_to_ram(dmar, re);
110 	dmar_unmap_pgtbl(sf);
111 	TD_PINNED_ASSERT;
112 }
113 
114 static dmar_ctx_entry_t *
115 dmar_map_ctx_entry(struct dmar_ctx *ctx, struct sf_buf **sfp)
116 {
117 	dmar_ctx_entry_t *ctxp;
118 
119 	ctxp = dmar_map_pgtbl(ctx->domain->dmar->ctx_obj, 1 +
120 	    PCI_RID2BUS(ctx->rid), DMAR_PGF_NOALLOC | DMAR_PGF_WAITOK, sfp);
121 	ctxp += ctx->rid & 0xff;
122 	return (ctxp);
123 }
124 
125 static void
126 ctx_tag_init(struct dmar_ctx *ctx, device_t dev)
127 {
128 	bus_addr_t maxaddr;
129 
130 	maxaddr = MIN(ctx->domain->end, BUS_SPACE_MAXADDR);
131 	ctx->ctx_tag.common.ref_count = 1; /* Prevent free */
132 	ctx->ctx_tag.common.impl = &bus_dma_dmar_impl;
133 	ctx->ctx_tag.common.boundary = 0;
134 	ctx->ctx_tag.common.lowaddr = maxaddr;
135 	ctx->ctx_tag.common.highaddr = maxaddr;
136 	ctx->ctx_tag.common.maxsize = maxaddr;
137 	ctx->ctx_tag.common.nsegments = BUS_SPACE_UNRESTRICTED;
138 	ctx->ctx_tag.common.maxsegsz = maxaddr;
139 	ctx->ctx_tag.ctx = ctx;
140 	ctx->ctx_tag.owner = dev;
141 }
142 
143 static void
144 ctx_id_entry_init_one(dmar_ctx_entry_t *ctxp, struct dmar_domain *domain,
145     vm_page_t ctx_root)
146 {
147 	/*
148 	 * For update due to move, the store is not atomic.  It is
149 	 * possible that DMAR read upper doubleword, while low
150 	 * doubleword is not yet updated.  The domain id is stored in
151 	 * the upper doubleword, while the table pointer in the lower.
152 	 *
153 	 * There is no good solution, for the same reason it is wrong
154 	 * to clear P bit in the ctx entry for update.
155 	 */
156 	dmar_pte_store1(&ctxp->ctx2, DMAR_CTX2_DID(domain->domain) |
157 	    domain->awlvl);
158 	if (ctx_root == NULL) {
159 		dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_PASS | DMAR_CTX1_P);
160 	} else {
161 		dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_UNTR |
162 		    (DMAR_CTX1_ASR_MASK & VM_PAGE_TO_PHYS(ctx_root)) |
163 		    DMAR_CTX1_P);
164 	}
165 }
166 
167 static void
168 ctx_id_entry_init(struct dmar_ctx *ctx, dmar_ctx_entry_t *ctxp, bool move,
169     int busno)
170 {
171 	struct dmar_unit *unit;
172 	struct dmar_domain *domain;
173 	vm_page_t ctx_root;
174 	int i;
175 
176 	domain = ctx->domain;
177 	unit = domain->dmar;
178 	KASSERT(move || (ctxp->ctx1 == 0 && ctxp->ctx2 == 0),
179 	    ("dmar%d: initialized ctx entry %d:%d:%d 0x%jx 0x%jx",
180 	    unit->unit, busno, pci_get_slot(ctx->ctx_tag.owner),
181 	    pci_get_function(ctx->ctx_tag.owner),
182 	    ctxp->ctx1, ctxp->ctx2));
183 
184 	if ((domain->flags & DMAR_DOMAIN_IDMAP) != 0 &&
185 	    (unit->hw_ecap & DMAR_ECAP_PT) != 0) {
186 		KASSERT(domain->pgtbl_obj == NULL,
187 		    ("ctx %p non-null pgtbl_obj", ctx));
188 		ctx_root = NULL;
189 	} else {
190 		ctx_root = dmar_pgalloc(domain->pgtbl_obj, 0, DMAR_PGF_NOALLOC);
191 	}
192 
193 	if (dmar_is_buswide_ctx(unit, busno)) {
194 		MPASS(!move);
195 		for (i = 0; i <= PCI_BUSMAX; i++) {
196 			ctx_id_entry_init_one(&ctxp[i], domain, ctx_root);
197 		}
198 	} else {
199 		ctx_id_entry_init_one(ctxp, domain, ctx_root);
200 	}
201 	dmar_flush_ctx_to_ram(unit, ctxp);
202 }
203 
204 static int
205 dmar_flush_for_ctx_entry(struct dmar_unit *dmar, bool force)
206 {
207 	int error;
208 
209 	/*
210 	 * If dmar declares Caching Mode as Set, follow 11.5 "Caching
211 	 * Mode Consideration" and do the (global) invalidation of the
212 	 * negative TLB entries.
213 	 */
214 	if ((dmar->hw_cap & DMAR_CAP_CM) == 0 && !force)
215 		return (0);
216 	if (dmar->qi_enabled) {
217 		dmar_qi_invalidate_ctx_glob_locked(dmar);
218 		if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force)
219 			dmar_qi_invalidate_iotlb_glob_locked(dmar);
220 		return (0);
221 	}
222 	error = dmar_inv_ctx_glob(dmar);
223 	if (error == 0 && ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force))
224 		error = dmar_inv_iotlb_glob(dmar);
225 	return (error);
226 }
227 
228 static int
229 domain_init_rmrr(struct dmar_domain *domain, device_t dev, int bus,
230     int slot, int func, int dev_domain, int dev_busno,
231     const void *dev_path, int dev_path_len)
232 {
233 	struct dmar_map_entries_tailq rmrr_entries;
234 	struct dmar_map_entry *entry, *entry1;
235 	vm_page_t *ma;
236 	dmar_gaddr_t start, end;
237 	vm_pindex_t size, i;
238 	int error, error1;
239 
240 	error = 0;
241 	TAILQ_INIT(&rmrr_entries);
242 	dmar_dev_parse_rmrr(domain, dev_domain, dev_busno, dev_path,
243 	    dev_path_len, &rmrr_entries);
244 	TAILQ_FOREACH_SAFE(entry, &rmrr_entries, unroll_link, entry1) {
245 		/*
246 		 * VT-d specification requires that the start of an
247 		 * RMRR entry is 4k-aligned.  Buggy BIOSes put
248 		 * anything into the start and end fields.  Truncate
249 		 * and round as neccesary.
250 		 *
251 		 * We also allow the overlapping RMRR entries, see
252 		 * dmar_gas_alloc_region().
253 		 */
254 		start = entry->start;
255 		end = entry->end;
256 		if (bootverbose)
257 			printf("dmar%d ctx pci%d:%d:%d RMRR [%#jx, %#jx]\n",
258 			    domain->dmar->unit, bus, slot, func,
259 			    (uintmax_t)start, (uintmax_t)end);
260 		entry->start = trunc_page(start);
261 		entry->end = round_page(end);
262 		if (entry->start == entry->end) {
263 			/* Workaround for some AMI (?) BIOSes */
264 			if (bootverbose) {
265 				if (dev != NULL)
266 					device_printf(dev, "");
267 				printf("pci%d:%d:%d ", bus, slot, func);
268 				printf("BIOS bug: dmar%d RMRR "
269 				    "region (%jx, %jx) corrected\n",
270 				    domain->dmar->unit, start, end);
271 			}
272 			entry->end += DMAR_PAGE_SIZE * 0x20;
273 		}
274 		size = OFF_TO_IDX(entry->end - entry->start);
275 		ma = malloc(sizeof(vm_page_t) * size, M_TEMP, M_WAITOK);
276 		for (i = 0; i < size; i++) {
277 			ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i,
278 			    VM_MEMATTR_DEFAULT);
279 		}
280 		error1 = dmar_gas_map_region(domain, entry,
281 		    DMAR_MAP_ENTRY_READ | DMAR_MAP_ENTRY_WRITE,
282 		    DMAR_GM_CANWAIT | DMAR_GM_RMRR, ma);
283 		/*
284 		 * Non-failed RMRR entries are owned by context rb
285 		 * tree.  Get rid of the failed entry, but do not stop
286 		 * the loop.  Rest of the parsed RMRR entries are
287 		 * loaded and removed on the context destruction.
288 		 */
289 		if (error1 == 0 && entry->end != entry->start) {
290 			DMAR_LOCK(domain->dmar);
291 			domain->refs++; /* XXXKIB prevent free */
292 			domain->flags |= DMAR_DOMAIN_RMRR;
293 			DMAR_UNLOCK(domain->dmar);
294 		} else {
295 			if (error1 != 0) {
296 				if (dev != NULL)
297 					device_printf(dev, "");
298 				printf("pci%d:%d:%d ", bus, slot, func);
299 				printf(
300 			    "dmar%d failed to map RMRR region (%jx, %jx) %d\n",
301 				    domain->dmar->unit, start, end,
302 				    error1);
303 				error = error1;
304 			}
305 			TAILQ_REMOVE(&rmrr_entries, entry, unroll_link);
306 			dmar_gas_free_entry(domain, entry);
307 		}
308 		for (i = 0; i < size; i++)
309 			vm_page_putfake(ma[i]);
310 		free(ma, M_TEMP);
311 	}
312 	return (error);
313 }
314 
315 static struct dmar_domain *
316 dmar_domain_alloc(struct dmar_unit *dmar, bool id_mapped)
317 {
318 	struct dmar_domain *domain;
319 	int error, id, mgaw;
320 
321 	id = alloc_unr(dmar->domids);
322 	if (id == -1)
323 		return (NULL);
324 	domain = malloc(sizeof(*domain), M_DMAR_DOMAIN, M_WAITOK | M_ZERO);
325 	domain->domain = id;
326 	LIST_INIT(&domain->contexts);
327 	RB_INIT(&domain->rb_root);
328 	TAILQ_INIT(&domain->unload_entries);
329 	TASK_INIT(&domain->unload_task, 0, dmar_domain_unload_task, domain);
330 	mtx_init(&domain->lock, "dmardom", NULL, MTX_DEF);
331 	domain->dmar = dmar;
332 
333 	/*
334 	 * For now, use the maximal usable physical address of the
335 	 * installed memory to calculate the mgaw on id_mapped domain.
336 	 * It is useful for the identity mapping, and less so for the
337 	 * virtualized bus address space.
338 	 */
339 	domain->end = id_mapped ? ptoa(Maxmem) : BUS_SPACE_MAXADDR;
340 	mgaw = dmar_maxaddr2mgaw(dmar, domain->end, !id_mapped);
341 	error = domain_set_agaw(domain, mgaw);
342 	if (error != 0)
343 		goto fail;
344 	if (!id_mapped)
345 		/* Use all supported address space for remapping. */
346 		domain->end = 1ULL << (domain->agaw - 1);
347 
348 	dmar_gas_init_domain(domain);
349 
350 	if (id_mapped) {
351 		if ((dmar->hw_ecap & DMAR_ECAP_PT) == 0) {
352 			domain->pgtbl_obj = domain_get_idmap_pgtbl(domain,
353 			    domain->end);
354 		}
355 		domain->flags |= DMAR_DOMAIN_IDMAP;
356 	} else {
357 		error = domain_alloc_pgtbl(domain);
358 		if (error != 0)
359 			goto fail;
360 		/* Disable local apic region access */
361 		error = dmar_gas_reserve_region(domain, 0xfee00000,
362 		    0xfeefffff + 1);
363 		if (error != 0)
364 			goto fail;
365 	}
366 	return (domain);
367 
368 fail:
369 	dmar_domain_destroy(domain);
370 	return (NULL);
371 }
372 
373 static struct dmar_ctx *
374 dmar_ctx_alloc(struct dmar_domain *domain, uint16_t rid)
375 {
376 	struct dmar_ctx *ctx;
377 
378 	ctx = malloc(sizeof(*ctx), M_DMAR_CTX, M_WAITOK | M_ZERO);
379 	ctx->domain = domain;
380 	ctx->rid = rid;
381 	ctx->refs = 1;
382 	return (ctx);
383 }
384 
385 static void
386 dmar_ctx_link(struct dmar_ctx *ctx)
387 {
388 	struct dmar_domain *domain;
389 
390 	domain = ctx->domain;
391 	DMAR_ASSERT_LOCKED(domain->dmar);
392 	KASSERT(domain->refs >= domain->ctx_cnt,
393 	    ("dom %p ref underflow %d %d", domain, domain->refs,
394 	    domain->ctx_cnt));
395 	domain->refs++;
396 	domain->ctx_cnt++;
397 	LIST_INSERT_HEAD(&domain->contexts, ctx, link);
398 }
399 
400 static void
401 dmar_ctx_unlink(struct dmar_ctx *ctx)
402 {
403 	struct dmar_domain *domain;
404 
405 	domain = ctx->domain;
406 	DMAR_ASSERT_LOCKED(domain->dmar);
407 	KASSERT(domain->refs > 0,
408 	    ("domain %p ctx dtr refs %d", domain, domain->refs));
409 	KASSERT(domain->ctx_cnt >= domain->refs,
410 	    ("domain %p ctx dtr refs %d ctx_cnt %d", domain,
411 	    domain->refs, domain->ctx_cnt));
412 	domain->refs--;
413 	domain->ctx_cnt--;
414 	LIST_REMOVE(ctx, link);
415 }
416 
417 static void
418 dmar_domain_destroy(struct dmar_domain *domain)
419 {
420 
421 	KASSERT(TAILQ_EMPTY(&domain->unload_entries),
422 	    ("unfinished unloads %p", domain));
423 	KASSERT(LIST_EMPTY(&domain->contexts),
424 	    ("destroying dom %p with contexts", domain));
425 	KASSERT(domain->ctx_cnt == 0,
426 	    ("destroying dom %p with ctx_cnt %d", domain, domain->ctx_cnt));
427 	KASSERT(domain->refs == 0,
428 	    ("destroying dom %p with refs %d", domain, domain->refs));
429 	if ((domain->flags & DMAR_DOMAIN_GAS_INITED) != 0) {
430 		DMAR_DOMAIN_LOCK(domain);
431 		dmar_gas_fini_domain(domain);
432 		DMAR_DOMAIN_UNLOCK(domain);
433 	}
434 	if ((domain->flags & DMAR_DOMAIN_PGTBL_INITED) != 0) {
435 		if (domain->pgtbl_obj != NULL)
436 			DMAR_DOMAIN_PGLOCK(domain);
437 		domain_free_pgtbl(domain);
438 	}
439 	mtx_destroy(&domain->lock);
440 	free_unr(domain->dmar->domids, domain->domain);
441 	free(domain, M_DMAR_DOMAIN);
442 }
443 
444 static struct dmar_ctx *
445 dmar_get_ctx_for_dev1(struct dmar_unit *dmar, device_t dev, uint16_t rid,
446     int dev_domain, int dev_busno, const void *dev_path, int dev_path_len,
447     bool id_mapped, bool rmrr_init)
448 {
449 	struct dmar_domain *domain, *domain1;
450 	struct dmar_ctx *ctx, *ctx1;
451 	dmar_ctx_entry_t *ctxp;
452 	struct sf_buf *sf;
453 	int bus, slot, func, error;
454 	bool enable;
455 
456 	if (dev != NULL) {
457 		bus = pci_get_bus(dev);
458 		slot = pci_get_slot(dev);
459 		func = pci_get_function(dev);
460 	} else {
461 		bus = PCI_RID2BUS(rid);
462 		slot = PCI_RID2SLOT(rid);
463 		func = PCI_RID2FUNC(rid);
464 	}
465 	enable = false;
466 	TD_PREP_PINNED_ASSERT;
467 	DMAR_LOCK(dmar);
468 	KASSERT(!dmar_is_buswide_ctx(dmar, bus) || (slot == 0 && func == 0),
469 	    ("dmar%d pci%d:%d:%d get_ctx for buswide", dmar->unit, bus,
470 	    slot, func));
471 	ctx = dmar_find_ctx_locked(dmar, rid);
472 	error = 0;
473 	if (ctx == NULL) {
474 		/*
475 		 * Perform the allocations which require sleep or have
476 		 * higher chance to succeed if the sleep is allowed.
477 		 */
478 		DMAR_UNLOCK(dmar);
479 		dmar_ensure_ctx_page(dmar, PCI_RID2BUS(rid));
480 		domain1 = dmar_domain_alloc(dmar, id_mapped);
481 		if (domain1 == NULL) {
482 			TD_PINNED_ASSERT;
483 			return (NULL);
484 		}
485 		if (!id_mapped) {
486 			error = domain_init_rmrr(domain1, dev, bus,
487 			    slot, func, dev_domain, dev_busno, dev_path,
488 			    dev_path_len);
489 			if (error != 0) {
490 				dmar_domain_destroy(domain1);
491 				TD_PINNED_ASSERT;
492 				return (NULL);
493 			}
494 		}
495 		ctx1 = dmar_ctx_alloc(domain1, rid);
496 		ctxp = dmar_map_ctx_entry(ctx1, &sf);
497 		DMAR_LOCK(dmar);
498 
499 		/*
500 		 * Recheck the contexts, other thread might have
501 		 * already allocated needed one.
502 		 */
503 		ctx = dmar_find_ctx_locked(dmar, rid);
504 		if (ctx == NULL) {
505 			domain = domain1;
506 			ctx = ctx1;
507 			dmar_ctx_link(ctx);
508 			ctx->ctx_tag.owner = dev;
509 			ctx_tag_init(ctx, dev);
510 
511 			/*
512 			 * This is the first activated context for the
513 			 * DMAR unit.  Enable the translation after
514 			 * everything is set up.
515 			 */
516 			if (LIST_EMPTY(&dmar->domains))
517 				enable = true;
518 			LIST_INSERT_HEAD(&dmar->domains, domain, link);
519 			ctx_id_entry_init(ctx, ctxp, false, bus);
520 			if (dev != NULL) {
521 				device_printf(dev,
522 			    "dmar%d pci%d:%d:%d:%d rid %x domain %d mgaw %d "
523 				    "agaw %d %s-mapped\n",
524 				    dmar->unit, dmar->segment, bus, slot,
525 				    func, rid, domain->domain, domain->mgaw,
526 				    domain->agaw, id_mapped ? "id" : "re");
527 			}
528 			dmar_unmap_pgtbl(sf);
529 		} else {
530 			dmar_unmap_pgtbl(sf);
531 			dmar_domain_destroy(domain1);
532 			/* Nothing needs to be done to destroy ctx1. */
533 			free(ctx1, M_DMAR_CTX);
534 			domain = ctx->domain;
535 			ctx->refs++; /* tag referenced us */
536 		}
537 	} else {
538 		domain = ctx->domain;
539 		if (ctx->ctx_tag.owner == NULL)
540 			ctx->ctx_tag.owner = dev;
541 		ctx->refs++; /* tag referenced us */
542 	}
543 
544 	error = dmar_flush_for_ctx_entry(dmar, enable);
545 	if (error != 0) {
546 		dmar_free_ctx_locked(dmar, ctx);
547 		TD_PINNED_ASSERT;
548 		return (NULL);
549 	}
550 
551 	/*
552 	 * The dmar lock was potentially dropped between check for the
553 	 * empty context list and now.  Recheck the state of GCMD_TE
554 	 * to avoid unneeded command.
555 	 */
556 	if (enable && !rmrr_init && (dmar->hw_gcmd & DMAR_GCMD_TE) == 0) {
557 		error = dmar_enable_translation(dmar);
558 		if (error == 0) {
559 			if (bootverbose) {
560 				printf("dmar%d: enabled translation\n",
561 				    dmar->unit);
562 			}
563 		} else {
564 			printf("dmar%d: enabling translation failed, "
565 			    "error %d\n", dmar->unit, error);
566 			dmar_free_ctx_locked(dmar, ctx);
567 			TD_PINNED_ASSERT;
568 			return (NULL);
569 		}
570 	}
571 	DMAR_UNLOCK(dmar);
572 	TD_PINNED_ASSERT;
573 	return (ctx);
574 }
575 
576 struct dmar_ctx *
577 dmar_get_ctx_for_dev(struct dmar_unit *dmar, device_t dev, uint16_t rid,
578     bool id_mapped, bool rmrr_init)
579 {
580 	int dev_domain, dev_path_len, dev_busno;
581 
582 	dev_domain = pci_get_domain(dev);
583 	dev_path_len = dmar_dev_depth(dev);
584 	ACPI_DMAR_PCI_PATH dev_path[dev_path_len];
585 	dmar_dev_path(dev, &dev_busno, dev_path, dev_path_len);
586 	return (dmar_get_ctx_for_dev1(dmar, dev, rid, dev_domain, dev_busno,
587 	    dev_path, dev_path_len, id_mapped, rmrr_init));
588 }
589 
590 struct dmar_ctx *
591 dmar_get_ctx_for_devpath(struct dmar_unit *dmar, uint16_t rid,
592     int dev_domain, int dev_busno,
593     const void *dev_path, int dev_path_len,
594     bool id_mapped, bool rmrr_init)
595 {
596 
597 	return (dmar_get_ctx_for_dev1(dmar, NULL, rid, dev_domain, dev_busno,
598 	    dev_path, dev_path_len, id_mapped, rmrr_init));
599 }
600 
601 int
602 dmar_move_ctx_to_domain(struct dmar_domain *domain, struct dmar_ctx *ctx)
603 {
604 	struct dmar_unit *dmar;
605 	struct dmar_domain *old_domain;
606 	dmar_ctx_entry_t *ctxp;
607 	struct sf_buf *sf;
608 	int error;
609 
610 	dmar = domain->dmar;
611 	old_domain = ctx->domain;
612 	if (domain == old_domain)
613 		return (0);
614 	KASSERT(old_domain->dmar == dmar,
615 	    ("domain %p %u moving between dmars %u %u", domain,
616 	    domain->domain, old_domain->dmar->unit, domain->dmar->unit));
617 	TD_PREP_PINNED_ASSERT;
618 
619 	ctxp = dmar_map_ctx_entry(ctx, &sf);
620 	DMAR_LOCK(dmar);
621 	dmar_ctx_unlink(ctx);
622 	ctx->domain = domain;
623 	dmar_ctx_link(ctx);
624 	ctx_id_entry_init(ctx, ctxp, true, PCI_BUSMAX + 100);
625 	dmar_unmap_pgtbl(sf);
626 	error = dmar_flush_for_ctx_entry(dmar, true);
627 	/* If flush failed, rolling back would not work as well. */
628 	printf("dmar%d rid %x domain %d->%d %s-mapped\n",
629 	    dmar->unit, ctx->rid, old_domain->domain, domain->domain,
630 	    (domain->flags & DMAR_DOMAIN_IDMAP) != 0 ? "id" : "re");
631 	dmar_unref_domain_locked(dmar, old_domain);
632 	TD_PINNED_ASSERT;
633 	return (error);
634 }
635 
636 static void
637 dmar_unref_domain_locked(struct dmar_unit *dmar, struct dmar_domain *domain)
638 {
639 
640 	DMAR_ASSERT_LOCKED(dmar);
641 	KASSERT(domain->refs >= 1,
642 	    ("dmar %d domain %p refs %u", dmar->unit, domain, domain->refs));
643 	KASSERT(domain->refs > domain->ctx_cnt,
644 	    ("dmar %d domain %p refs %d ctx_cnt %d", dmar->unit, domain,
645 	    domain->refs, domain->ctx_cnt));
646 
647 	if (domain->refs > 1) {
648 		domain->refs--;
649 		DMAR_UNLOCK(dmar);
650 		return;
651 	}
652 
653 	KASSERT((domain->flags & DMAR_DOMAIN_RMRR) == 0,
654 	    ("lost ref on RMRR domain %p", domain));
655 
656 	LIST_REMOVE(domain, link);
657 	DMAR_UNLOCK(dmar);
658 
659 	taskqueue_drain(dmar->delayed_taskqueue, &domain->unload_task);
660 	dmar_domain_destroy(domain);
661 }
662 
663 void
664 dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx)
665 {
666 	struct sf_buf *sf;
667 	dmar_ctx_entry_t *ctxp;
668 	struct dmar_domain *domain;
669 
670 	DMAR_ASSERT_LOCKED(dmar);
671 	KASSERT(ctx->refs >= 1,
672 	    ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs));
673 
674 	/*
675 	 * If our reference is not last, only the dereference should
676 	 * be performed.
677 	 */
678 	if (ctx->refs > 1) {
679 		ctx->refs--;
680 		DMAR_UNLOCK(dmar);
681 		return;
682 	}
683 
684 	KASSERT((ctx->flags & DMAR_CTX_DISABLED) == 0,
685 	    ("lost ref on disabled ctx %p", ctx));
686 
687 	/*
688 	 * Otherwise, the context entry must be cleared before the
689 	 * page table is destroyed.  The mapping of the context
690 	 * entries page could require sleep, unlock the dmar.
691 	 */
692 	DMAR_UNLOCK(dmar);
693 	TD_PREP_PINNED_ASSERT;
694 	ctxp = dmar_map_ctx_entry(ctx, &sf);
695 	DMAR_LOCK(dmar);
696 	KASSERT(ctx->refs >= 1,
697 	    ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs));
698 
699 	/*
700 	 * Other thread might have referenced the context, in which
701 	 * case again only the dereference should be performed.
702 	 */
703 	if (ctx->refs > 1) {
704 		ctx->refs--;
705 		DMAR_UNLOCK(dmar);
706 		dmar_unmap_pgtbl(sf);
707 		TD_PINNED_ASSERT;
708 		return;
709 	}
710 
711 	KASSERT((ctx->flags & DMAR_CTX_DISABLED) == 0,
712 	    ("lost ref on disabled ctx %p", ctx));
713 
714 	/*
715 	 * Clear the context pointer and flush the caches.
716 	 * XXXKIB: cannot do this if any RMRR entries are still present.
717 	 */
718 	dmar_pte_clear(&ctxp->ctx1);
719 	ctxp->ctx2 = 0;
720 	dmar_flush_ctx_to_ram(dmar, ctxp);
721 	dmar_inv_ctx_glob(dmar);
722 	if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0) {
723 		if (dmar->qi_enabled)
724 			dmar_qi_invalidate_iotlb_glob_locked(dmar);
725 		else
726 			dmar_inv_iotlb_glob(dmar);
727 	}
728 	dmar_unmap_pgtbl(sf);
729 	domain = ctx->domain;
730 	dmar_ctx_unlink(ctx);
731 	free(ctx, M_DMAR_CTX);
732 	dmar_unref_domain_locked(dmar, domain);
733 	TD_PINNED_ASSERT;
734 }
735 
736 void
737 dmar_free_ctx(struct dmar_ctx *ctx)
738 {
739 	struct dmar_unit *dmar;
740 
741 	dmar = ctx->domain->dmar;
742 	DMAR_LOCK(dmar);
743 	dmar_free_ctx_locked(dmar, ctx);
744 }
745 
746 /*
747  * Returns with the domain locked.
748  */
749 struct dmar_ctx *
750 dmar_find_ctx_locked(struct dmar_unit *dmar, uint16_t rid)
751 {
752 	struct dmar_domain *domain;
753 	struct dmar_ctx *ctx;
754 
755 	DMAR_ASSERT_LOCKED(dmar);
756 
757 	LIST_FOREACH(domain, &dmar->domains, link) {
758 		LIST_FOREACH(ctx, &domain->contexts, link) {
759 			if (ctx->rid == rid)
760 				return (ctx);
761 		}
762 	}
763 	return (NULL);
764 }
765 
766 void
767 dmar_domain_free_entry(struct dmar_map_entry *entry, bool free)
768 {
769 	struct dmar_domain *domain;
770 
771 	domain = entry->domain;
772 	DMAR_DOMAIN_LOCK(domain);
773 	if ((entry->flags & DMAR_MAP_ENTRY_RMRR) != 0)
774 		dmar_gas_free_region(domain, entry);
775 	else
776 		dmar_gas_free_space(domain, entry);
777 	DMAR_DOMAIN_UNLOCK(domain);
778 	if (free)
779 		dmar_gas_free_entry(domain, entry);
780 	else
781 		entry->flags = 0;
782 }
783 
784 void
785 dmar_domain_unload_entry(struct dmar_map_entry *entry, bool free)
786 {
787 	struct dmar_unit *unit;
788 
789 	unit = entry->domain->dmar;
790 	if (unit->qi_enabled) {
791 		DMAR_LOCK(unit);
792 		dmar_qi_invalidate_locked(entry->domain, entry->start,
793 		    entry->end - entry->start, &entry->gseq, true);
794 		if (!free)
795 			entry->flags |= DMAR_MAP_ENTRY_QI_NF;
796 		TAILQ_INSERT_TAIL(&unit->tlb_flush_entries, entry, dmamap_link);
797 		DMAR_UNLOCK(unit);
798 	} else {
799 		domain_flush_iotlb_sync(entry->domain, entry->start,
800 		    entry->end - entry->start);
801 		dmar_domain_free_entry(entry, free);
802 	}
803 }
804 
805 static bool
806 dmar_domain_unload_emit_wait(struct dmar_domain *domain,
807     struct dmar_map_entry *entry)
808 {
809 
810 	if (TAILQ_NEXT(entry, dmamap_link) == NULL)
811 		return (true);
812 	return (domain->batch_no++ % dmar_batch_coalesce == 0);
813 }
814 
815 void
816 dmar_domain_unload(struct dmar_domain *domain,
817     struct dmar_map_entries_tailq *entries, bool cansleep)
818 {
819 	struct dmar_unit *unit;
820 	struct dmar_map_entry *entry, *entry1;
821 	int error;
822 
823 	unit = domain->dmar;
824 
825 	TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) {
826 		KASSERT((entry->flags & DMAR_MAP_ENTRY_MAP) != 0,
827 		    ("not mapped entry %p %p", domain, entry));
828 		error = domain_unmap_buf(domain, entry->start, entry->end -
829 		    entry->start, cansleep ? DMAR_PGF_WAITOK : 0);
830 		KASSERT(error == 0, ("unmap %p error %d", domain, error));
831 		if (!unit->qi_enabled) {
832 			domain_flush_iotlb_sync(domain, entry->start,
833 			    entry->end - entry->start);
834 			TAILQ_REMOVE(entries, entry, dmamap_link);
835 			dmar_domain_free_entry(entry, true);
836 		}
837 	}
838 	if (TAILQ_EMPTY(entries))
839 		return;
840 
841 	KASSERT(unit->qi_enabled, ("loaded entry left"));
842 	DMAR_LOCK(unit);
843 	TAILQ_FOREACH(entry, entries, dmamap_link) {
844 		dmar_qi_invalidate_locked(domain, entry->start, entry->end -
845 		    entry->start, &entry->gseq,
846 		    dmar_domain_unload_emit_wait(domain, entry));
847 	}
848 	TAILQ_CONCAT(&unit->tlb_flush_entries, entries, dmamap_link);
849 	DMAR_UNLOCK(unit);
850 }
851 
852 static void
853 dmar_domain_unload_task(void *arg, int pending)
854 {
855 	struct dmar_domain *domain;
856 	struct dmar_map_entries_tailq entries;
857 
858 	domain = arg;
859 	TAILQ_INIT(&entries);
860 
861 	for (;;) {
862 		DMAR_DOMAIN_LOCK(domain);
863 		TAILQ_SWAP(&domain->unload_entries, &entries, dmar_map_entry,
864 		    dmamap_link);
865 		DMAR_DOMAIN_UNLOCK(domain);
866 		if (TAILQ_EMPTY(&entries))
867 			break;
868 		dmar_domain_unload(domain, &entries, true);
869 	}
870 }
871