xref: /freebsd/sys/x86/iommu/intel_ctx.c (revision 4f52dfbb8d6c4d446500c5b097e3806ec219fbd4)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2013 The FreeBSD Foundation
5  * All rights reserved.
6  *
7  * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
8  * under sponsorship from the FreeBSD Foundation.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/malloc.h>
38 #include <sys/bus.h>
39 #include <sys/interrupt.h>
40 #include <sys/kernel.h>
41 #include <sys/ktr.h>
42 #include <sys/limits.h>
43 #include <sys/lock.h>
44 #include <sys/memdesc.h>
45 #include <sys/mutex.h>
46 #include <sys/proc.h>
47 #include <sys/rwlock.h>
48 #include <sys/rman.h>
49 #include <sys/sysctl.h>
50 #include <sys/taskqueue.h>
51 #include <sys/tree.h>
52 #include <sys/uio.h>
53 #include <sys/vmem.h>
54 #include <vm/vm.h>
55 #include <vm/vm_extern.h>
56 #include <vm/vm_kern.h>
57 #include <vm/vm_object.h>
58 #include <vm/vm_page.h>
59 #include <vm/vm_pager.h>
60 #include <vm/vm_map.h>
61 #include <machine/atomic.h>
62 #include <machine/bus.h>
63 #include <machine/md_var.h>
64 #include <machine/specialreg.h>
65 #include <x86/include/busdma_impl.h>
66 #include <x86/iommu/intel_reg.h>
67 #include <x86/iommu/busdma_dmar.h>
68 #include <x86/iommu/intel_dmar.h>
69 #include <dev/pci/pcireg.h>
70 #include <dev/pci/pcivar.h>
71 
72 static MALLOC_DEFINE(M_DMAR_CTX, "dmar_ctx", "Intel DMAR Context");
73 static MALLOC_DEFINE(M_DMAR_DOMAIN, "dmar_dom", "Intel DMAR Domain");
74 
75 static void dmar_domain_unload_task(void *arg, int pending);
76 static void dmar_unref_domain_locked(struct dmar_unit *dmar,
77     struct dmar_domain *domain);
78 static void dmar_domain_destroy(struct dmar_domain *domain);
79 
80 static void
81 dmar_ensure_ctx_page(struct dmar_unit *dmar, int bus)
82 {
83 	struct sf_buf *sf;
84 	dmar_root_entry_t *re;
85 	vm_page_t ctxm;
86 
87 	/*
88 	 * Allocated context page must be linked.
89 	 */
90 	ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, DMAR_PGF_NOALLOC);
91 	if (ctxm != NULL)
92 		return;
93 
94 	/*
95 	 * Page not present, allocate and link.  Note that other
96 	 * thread might execute this sequence in parallel.  This
97 	 * should be safe, because the context entries written by both
98 	 * threads are equal.
99 	 */
100 	TD_PREP_PINNED_ASSERT;
101 	ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, DMAR_PGF_ZERO |
102 	    DMAR_PGF_WAITOK);
103 	re = dmar_map_pgtbl(dmar->ctx_obj, 0, DMAR_PGF_NOALLOC, &sf);
104 	re += bus;
105 	dmar_pte_store(&re->r1, DMAR_ROOT_R1_P | (DMAR_ROOT_R1_CTP_MASK &
106 	    VM_PAGE_TO_PHYS(ctxm)));
107 	dmar_flush_root_to_ram(dmar, re);
108 	dmar_unmap_pgtbl(sf);
109 	TD_PINNED_ASSERT;
110 }
111 
112 static dmar_ctx_entry_t *
113 dmar_map_ctx_entry(struct dmar_ctx *ctx, struct sf_buf **sfp)
114 {
115 	dmar_ctx_entry_t *ctxp;
116 
117 	ctxp = dmar_map_pgtbl(ctx->domain->dmar->ctx_obj, 1 +
118 	    PCI_RID2BUS(ctx->rid), DMAR_PGF_NOALLOC | DMAR_PGF_WAITOK, sfp);
119 	ctxp += ctx->rid & 0xff;
120 	return (ctxp);
121 }
122 
123 static void
124 ctx_tag_init(struct dmar_ctx *ctx, device_t dev)
125 {
126 	bus_addr_t maxaddr;
127 
128 	maxaddr = MIN(ctx->domain->end, BUS_SPACE_MAXADDR);
129 	ctx->ctx_tag.common.ref_count = 1; /* Prevent free */
130 	ctx->ctx_tag.common.impl = &bus_dma_dmar_impl;
131 	ctx->ctx_tag.common.boundary = PCI_DMA_BOUNDARY;
132 	ctx->ctx_tag.common.lowaddr = maxaddr;
133 	ctx->ctx_tag.common.highaddr = maxaddr;
134 	ctx->ctx_tag.common.maxsize = maxaddr;
135 	ctx->ctx_tag.common.nsegments = BUS_SPACE_UNRESTRICTED;
136 	ctx->ctx_tag.common.maxsegsz = maxaddr;
137 	ctx->ctx_tag.ctx = ctx;
138 	ctx->ctx_tag.owner = dev;
139 }
140 
141 static void
142 ctx_id_entry_init(struct dmar_ctx *ctx, dmar_ctx_entry_t *ctxp, bool move)
143 {
144 	struct dmar_unit *unit;
145 	struct dmar_domain *domain;
146 	vm_page_t ctx_root;
147 
148 	domain = ctx->domain;
149 	unit = domain->dmar;
150 	KASSERT(move || (ctxp->ctx1 == 0 && ctxp->ctx2 == 0),
151 	    ("dmar%d: initialized ctx entry %d:%d:%d 0x%jx 0x%jx",
152 	    unit->unit, pci_get_bus(ctx->ctx_tag.owner),
153 	    pci_get_slot(ctx->ctx_tag.owner),
154 	    pci_get_function(ctx->ctx_tag.owner),
155 	    ctxp->ctx1, ctxp->ctx2));
156 	/*
157 	 * For update due to move, the store is not atomic.  It is
158 	 * possible that DMAR read upper doubleword, while low
159 	 * doubleword is not yet updated.  The domain id is stored in
160 	 * the upper doubleword, while the table pointer in the lower.
161 	 *
162 	 * There is no good solution, for the same reason it is wrong
163 	 * to clear P bit in the ctx entry for update.
164 	 */
165 	dmar_pte_store1(&ctxp->ctx2, DMAR_CTX2_DID(domain->domain) |
166 	    domain->awlvl);
167 	if ((domain->flags & DMAR_DOMAIN_IDMAP) != 0 &&
168 	    (unit->hw_ecap & DMAR_ECAP_PT) != 0) {
169 		KASSERT(domain->pgtbl_obj == NULL,
170 		    ("ctx %p non-null pgtbl_obj", ctx));
171 		dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_PASS | DMAR_CTX1_P);
172 	} else {
173 		ctx_root = dmar_pgalloc(domain->pgtbl_obj, 0, DMAR_PGF_NOALLOC);
174 		dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_UNTR |
175 		    (DMAR_CTX1_ASR_MASK & VM_PAGE_TO_PHYS(ctx_root)) |
176 		    DMAR_CTX1_P);
177 	}
178 	dmar_flush_ctx_to_ram(unit, ctxp);
179 }
180 
181 static int
182 dmar_flush_for_ctx_entry(struct dmar_unit *dmar, bool force)
183 {
184 	int error;
185 
186 	/*
187 	 * If dmar declares Caching Mode as Set, follow 11.5 "Caching
188 	 * Mode Consideration" and do the (global) invalidation of the
189 	 * negative TLB entries.
190 	 */
191 	if ((dmar->hw_cap & DMAR_CAP_CM) == 0 && !force)
192 		return (0);
193 	if (dmar->qi_enabled) {
194 		dmar_qi_invalidate_ctx_glob_locked(dmar);
195 		if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force)
196 			dmar_qi_invalidate_iotlb_glob_locked(dmar);
197 		return (0);
198 	}
199 	error = dmar_inv_ctx_glob(dmar);
200 	if (error == 0 && ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force))
201 		error = dmar_inv_iotlb_glob(dmar);
202 	return (error);
203 }
204 
205 static int
206 domain_init_rmrr(struct dmar_domain *domain, device_t dev)
207 {
208 	struct dmar_map_entries_tailq rmrr_entries;
209 	struct dmar_map_entry *entry, *entry1;
210 	vm_page_t *ma;
211 	dmar_gaddr_t start, end;
212 	vm_pindex_t size, i;
213 	int error, error1;
214 
215 	error = 0;
216 	TAILQ_INIT(&rmrr_entries);
217 	dmar_dev_parse_rmrr(domain, dev, &rmrr_entries);
218 	TAILQ_FOREACH_SAFE(entry, &rmrr_entries, unroll_link, entry1) {
219 		/*
220 		 * VT-d specification requires that the start of an
221 		 * RMRR entry is 4k-aligned.  Buggy BIOSes put
222 		 * anything into the start and end fields.  Truncate
223 		 * and round as neccesary.
224 		 *
225 		 * We also allow the overlapping RMRR entries, see
226 		 * dmar_gas_alloc_region().
227 		 */
228 		start = entry->start;
229 		end = entry->end;
230 		entry->start = trunc_page(start);
231 		entry->end = round_page(end);
232 		if (entry->start == entry->end) {
233 			/* Workaround for some AMI (?) BIOSes */
234 			if (bootverbose) {
235 				device_printf(dev, "BIOS bug: dmar%d RMRR "
236 				    "region (%jx, %jx) corrected\n",
237 				    domain->dmar->unit, start, end);
238 			}
239 			entry->end += DMAR_PAGE_SIZE * 0x20;
240 		}
241 		size = OFF_TO_IDX(entry->end - entry->start);
242 		ma = malloc(sizeof(vm_page_t) * size, M_TEMP, M_WAITOK);
243 		for (i = 0; i < size; i++) {
244 			ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i,
245 			    VM_MEMATTR_DEFAULT);
246 		}
247 		error1 = dmar_gas_map_region(domain, entry,
248 		    DMAR_MAP_ENTRY_READ | DMAR_MAP_ENTRY_WRITE,
249 		    DMAR_GM_CANWAIT, ma);
250 		/*
251 		 * Non-failed RMRR entries are owned by context rb
252 		 * tree.  Get rid of the failed entry, but do not stop
253 		 * the loop.  Rest of the parsed RMRR entries are
254 		 * loaded and removed on the context destruction.
255 		 */
256 		if (error1 == 0 && entry->end != entry->start) {
257 			DMAR_LOCK(domain->dmar);
258 			domain->refs++; /* XXXKIB prevent free */
259 			domain->flags |= DMAR_DOMAIN_RMRR;
260 			DMAR_UNLOCK(domain->dmar);
261 		} else {
262 			if (error1 != 0) {
263 				device_printf(dev,
264 			    "dmar%d failed to map RMRR region (%jx, %jx) %d\n",
265 				    domain->dmar->unit, start, end, error1);
266 				error = error1;
267 			}
268 			TAILQ_REMOVE(&rmrr_entries, entry, unroll_link);
269 			dmar_gas_free_entry(domain, entry);
270 		}
271 		for (i = 0; i < size; i++)
272 			vm_page_putfake(ma[i]);
273 		free(ma, M_TEMP);
274 	}
275 	return (error);
276 }
277 
278 static struct dmar_domain *
279 dmar_domain_alloc(struct dmar_unit *dmar, bool id_mapped)
280 {
281 	struct dmar_domain *domain;
282 	int error, id, mgaw;
283 
284 	id = alloc_unr(dmar->domids);
285 	if (id == -1)
286 		return (NULL);
287 	domain = malloc(sizeof(*domain), M_DMAR_DOMAIN, M_WAITOK | M_ZERO);
288 	domain->domain = id;
289 	LIST_INIT(&domain->contexts);
290 	RB_INIT(&domain->rb_root);
291 	TAILQ_INIT(&domain->unload_entries);
292 	TASK_INIT(&domain->unload_task, 0, dmar_domain_unload_task, domain);
293 	mtx_init(&domain->lock, "dmardom", NULL, MTX_DEF);
294 	domain->dmar = dmar;
295 
296 	/*
297 	 * For now, use the maximal usable physical address of the
298 	 * installed memory to calculate the mgaw on id_mapped domain.
299 	 * It is useful for the identity mapping, and less so for the
300 	 * virtualized bus address space.
301 	 */
302 	domain->end = id_mapped ? ptoa(Maxmem) : BUS_SPACE_MAXADDR;
303 	mgaw = dmar_maxaddr2mgaw(dmar, domain->end, !id_mapped);
304 	error = domain_set_agaw(domain, mgaw);
305 	if (error != 0)
306 		goto fail;
307 	if (!id_mapped)
308 		/* Use all supported address space for remapping. */
309 		domain->end = 1ULL << (domain->agaw - 1);
310 
311 	dmar_gas_init_domain(domain);
312 
313 	if (id_mapped) {
314 		if ((dmar->hw_ecap & DMAR_ECAP_PT) == 0) {
315 			domain->pgtbl_obj = domain_get_idmap_pgtbl(domain,
316 			    domain->end);
317 		}
318 		domain->flags |= DMAR_DOMAIN_IDMAP;
319 	} else {
320 		error = domain_alloc_pgtbl(domain);
321 		if (error != 0)
322 			goto fail;
323 		/* Disable local apic region access */
324 		error = dmar_gas_reserve_region(domain, 0xfee00000,
325 		    0xfeefffff + 1);
326 		if (error != 0)
327 			goto fail;
328 	}
329 	return (domain);
330 
331 fail:
332 	dmar_domain_destroy(domain);
333 	return (NULL);
334 }
335 
336 static struct dmar_ctx *
337 dmar_ctx_alloc(struct dmar_domain *domain, uint16_t rid)
338 {
339 	struct dmar_ctx *ctx;
340 
341 	ctx = malloc(sizeof(*ctx), M_DMAR_CTX, M_WAITOK | M_ZERO);
342 	ctx->domain = domain;
343 	ctx->rid = rid;
344 	ctx->refs = 1;
345 	return (ctx);
346 }
347 
348 static void
349 dmar_ctx_link(struct dmar_ctx *ctx)
350 {
351 	struct dmar_domain *domain;
352 
353 	domain = ctx->domain;
354 	DMAR_ASSERT_LOCKED(domain->dmar);
355 	KASSERT(domain->refs >= domain->ctx_cnt,
356 	    ("dom %p ref underflow %d %d", domain, domain->refs,
357 	    domain->ctx_cnt));
358 	domain->refs++;
359 	domain->ctx_cnt++;
360 	LIST_INSERT_HEAD(&domain->contexts, ctx, link);
361 }
362 
363 static void
364 dmar_ctx_unlink(struct dmar_ctx *ctx)
365 {
366 	struct dmar_domain *domain;
367 
368 	domain = ctx->domain;
369 	DMAR_ASSERT_LOCKED(domain->dmar);
370 	KASSERT(domain->refs > 0,
371 	    ("domain %p ctx dtr refs %d", domain, domain->refs));
372 	KASSERT(domain->ctx_cnt >= domain->refs,
373 	    ("domain %p ctx dtr refs %d ctx_cnt %d", domain,
374 	    domain->refs, domain->ctx_cnt));
375 	domain->refs--;
376 	domain->ctx_cnt--;
377 	LIST_REMOVE(ctx, link);
378 }
379 
380 static void
381 dmar_domain_destroy(struct dmar_domain *domain)
382 {
383 
384 	KASSERT(TAILQ_EMPTY(&domain->unload_entries),
385 	    ("unfinished unloads %p", domain));
386 	KASSERT(LIST_EMPTY(&domain->contexts),
387 	    ("destroying dom %p with contexts", domain));
388 	KASSERT(domain->ctx_cnt == 0,
389 	    ("destroying dom %p with ctx_cnt %d", domain, domain->ctx_cnt));
390 	KASSERT(domain->refs == 0,
391 	    ("destroying dom %p with refs %d", domain, domain->refs));
392 	if ((domain->flags & DMAR_DOMAIN_GAS_INITED) != 0) {
393 		DMAR_DOMAIN_LOCK(domain);
394 		dmar_gas_fini_domain(domain);
395 		DMAR_DOMAIN_UNLOCK(domain);
396 	}
397 	if ((domain->flags & DMAR_DOMAIN_PGTBL_INITED) != 0) {
398 		if (domain->pgtbl_obj != NULL)
399 			DMAR_DOMAIN_PGLOCK(domain);
400 		domain_free_pgtbl(domain);
401 	}
402 	mtx_destroy(&domain->lock);
403 	free_unr(domain->dmar->domids, domain->domain);
404 	free(domain, M_DMAR_DOMAIN);
405 }
406 
407 struct dmar_ctx *
408 dmar_get_ctx_for_dev(struct dmar_unit *dmar, device_t dev, uint16_t rid,
409     bool id_mapped, bool rmrr_init)
410 {
411 	struct dmar_domain *domain, *domain1;
412 	struct dmar_ctx *ctx, *ctx1;
413 	dmar_ctx_entry_t *ctxp;
414 	struct sf_buf *sf;
415 	int bus, slot, func, error;
416 	bool enable;
417 
418 	bus = pci_get_bus(dev);
419 	slot = pci_get_slot(dev);
420 	func = pci_get_function(dev);
421 	enable = false;
422 	TD_PREP_PINNED_ASSERT;
423 	DMAR_LOCK(dmar);
424 	ctx = dmar_find_ctx_locked(dmar, rid);
425 	error = 0;
426 	if (ctx == NULL) {
427 		/*
428 		 * Perform the allocations which require sleep or have
429 		 * higher chance to succeed if the sleep is allowed.
430 		 */
431 		DMAR_UNLOCK(dmar);
432 		dmar_ensure_ctx_page(dmar, PCI_RID2BUS(rid));
433 		domain1 = dmar_domain_alloc(dmar, id_mapped);
434 		if (domain1 == NULL) {
435 			TD_PINNED_ASSERT;
436 			return (NULL);
437 		}
438 		if (!id_mapped) {
439 			error = domain_init_rmrr(domain1, dev);
440 			if (error != 0) {
441 				dmar_domain_destroy(domain1);
442 				TD_PINNED_ASSERT;
443 				return (NULL);
444 			}
445 		}
446 		ctx1 = dmar_ctx_alloc(domain1, rid);
447 		ctxp = dmar_map_ctx_entry(ctx1, &sf);
448 		DMAR_LOCK(dmar);
449 
450 		/*
451 		 * Recheck the contexts, other thread might have
452 		 * already allocated needed one.
453 		 */
454 		ctx = dmar_find_ctx_locked(dmar, rid);
455 		if (ctx == NULL) {
456 			domain = domain1;
457 			ctx = ctx1;
458 			dmar_ctx_link(ctx);
459 			ctx->ctx_tag.owner = dev;
460 			ctx_tag_init(ctx, dev);
461 
462 			/*
463 			 * This is the first activated context for the
464 			 * DMAR unit.  Enable the translation after
465 			 * everything is set up.
466 			 */
467 			if (LIST_EMPTY(&dmar->domains))
468 				enable = true;
469 			LIST_INSERT_HEAD(&dmar->domains, domain, link);
470 			ctx_id_entry_init(ctx, ctxp, false);
471 			device_printf(dev,
472 			    "dmar%d pci%d:%d:%d:%d rid %x domain %d mgaw %d "
473 			    "agaw %d %s-mapped\n",
474 			    dmar->unit, dmar->segment, bus, slot,
475 			    func, rid, domain->domain, domain->mgaw,
476 			    domain->agaw, id_mapped ? "id" : "re");
477 			dmar_unmap_pgtbl(sf);
478 		} else {
479 			dmar_unmap_pgtbl(sf);
480 			dmar_domain_destroy(domain1);
481 			/* Nothing needs to be done to destroy ctx1. */
482 			free(ctx1, M_DMAR_CTX);
483 			domain = ctx->domain;
484 			ctx->refs++; /* tag referenced us */
485 		}
486 	} else {
487 		domain = ctx->domain;
488 		ctx->refs++; /* tag referenced us */
489 	}
490 
491 	error = dmar_flush_for_ctx_entry(dmar, enable);
492 	if (error != 0) {
493 		dmar_free_ctx_locked(dmar, ctx);
494 		TD_PINNED_ASSERT;
495 		return (NULL);
496 	}
497 
498 	/*
499 	 * The dmar lock was potentially dropped between check for the
500 	 * empty context list and now.  Recheck the state of GCMD_TE
501 	 * to avoid unneeded command.
502 	 */
503 	if (enable && !rmrr_init && (dmar->hw_gcmd & DMAR_GCMD_TE) == 0) {
504 		error = dmar_enable_translation(dmar);
505 		if (error != 0) {
506 			dmar_free_ctx_locked(dmar, ctx);
507 			TD_PINNED_ASSERT;
508 			return (NULL);
509 		}
510 	}
511 	DMAR_UNLOCK(dmar);
512 	TD_PINNED_ASSERT;
513 	return (ctx);
514 }
515 
516 int
517 dmar_move_ctx_to_domain(struct dmar_domain *domain, struct dmar_ctx *ctx)
518 {
519 	struct dmar_unit *dmar;
520 	struct dmar_domain *old_domain;
521 	dmar_ctx_entry_t *ctxp;
522 	struct sf_buf *sf;
523 	int error;
524 
525 	dmar = domain->dmar;
526 	old_domain = ctx->domain;
527 	if (domain == old_domain)
528 		return (0);
529 	KASSERT(old_domain->dmar == dmar,
530 	    ("domain %p %u moving between dmars %u %u", domain,
531 	    domain->domain, old_domain->dmar->unit, domain->dmar->unit));
532 	TD_PREP_PINNED_ASSERT;
533 
534 	ctxp = dmar_map_ctx_entry(ctx, &sf);
535 	DMAR_LOCK(dmar);
536 	dmar_ctx_unlink(ctx);
537 	ctx->domain = domain;
538 	dmar_ctx_link(ctx);
539 	ctx_id_entry_init(ctx, ctxp, true);
540 	dmar_unmap_pgtbl(sf);
541 	error = dmar_flush_for_ctx_entry(dmar, true);
542 	/* If flush failed, rolling back would not work as well. */
543 	printf("dmar%d rid %x domain %d->%d %s-mapped\n",
544 	    dmar->unit, ctx->rid, old_domain->domain, domain->domain,
545 	    (domain->flags & DMAR_DOMAIN_IDMAP) != 0 ? "id" : "re");
546 	dmar_unref_domain_locked(dmar, old_domain);
547 	TD_PINNED_ASSERT;
548 	return (error);
549 }
550 
551 static void
552 dmar_unref_domain_locked(struct dmar_unit *dmar, struct dmar_domain *domain)
553 {
554 
555 	DMAR_ASSERT_LOCKED(dmar);
556 	KASSERT(domain->refs >= 1,
557 	    ("dmar %d domain %p refs %u", dmar->unit, domain, domain->refs));
558 	KASSERT(domain->refs > domain->ctx_cnt,
559 	    ("dmar %d domain %p refs %d ctx_cnt %d", dmar->unit, domain,
560 	    domain->refs, domain->ctx_cnt));
561 
562 	if (domain->refs > 1) {
563 		domain->refs--;
564 		DMAR_UNLOCK(dmar);
565 		return;
566 	}
567 
568 	KASSERT((domain->flags & DMAR_DOMAIN_RMRR) == 0,
569 	    ("lost ref on RMRR domain %p", domain));
570 
571 	LIST_REMOVE(domain, link);
572 	DMAR_UNLOCK(dmar);
573 
574 	taskqueue_drain(dmar->delayed_taskqueue, &domain->unload_task);
575 	dmar_domain_destroy(domain);
576 }
577 
578 void
579 dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx)
580 {
581 	struct sf_buf *sf;
582 	dmar_ctx_entry_t *ctxp;
583 	struct dmar_domain *domain;
584 
585 	DMAR_ASSERT_LOCKED(dmar);
586 	KASSERT(ctx->refs >= 1,
587 	    ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs));
588 
589 	/*
590 	 * If our reference is not last, only the dereference should
591 	 * be performed.
592 	 */
593 	if (ctx->refs > 1) {
594 		ctx->refs--;
595 		DMAR_UNLOCK(dmar);
596 		return;
597 	}
598 
599 	KASSERT((ctx->flags & DMAR_CTX_DISABLED) == 0,
600 	    ("lost ref on disabled ctx %p", ctx));
601 
602 	/*
603 	 * Otherwise, the context entry must be cleared before the
604 	 * page table is destroyed.  The mapping of the context
605 	 * entries page could require sleep, unlock the dmar.
606 	 */
607 	DMAR_UNLOCK(dmar);
608 	TD_PREP_PINNED_ASSERT;
609 	ctxp = dmar_map_ctx_entry(ctx, &sf);
610 	DMAR_LOCK(dmar);
611 	KASSERT(ctx->refs >= 1,
612 	    ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs));
613 
614 	/*
615 	 * Other thread might have referenced the context, in which
616 	 * case again only the dereference should be performed.
617 	 */
618 	if (ctx->refs > 1) {
619 		ctx->refs--;
620 		DMAR_UNLOCK(dmar);
621 		dmar_unmap_pgtbl(sf);
622 		TD_PINNED_ASSERT;
623 		return;
624 	}
625 
626 	KASSERT((ctx->flags & DMAR_CTX_DISABLED) == 0,
627 	    ("lost ref on disabled ctx %p", ctx));
628 
629 	/*
630 	 * Clear the context pointer and flush the caches.
631 	 * XXXKIB: cannot do this if any RMRR entries are still present.
632 	 */
633 	dmar_pte_clear(&ctxp->ctx1);
634 	ctxp->ctx2 = 0;
635 	dmar_flush_ctx_to_ram(dmar, ctxp);
636 	dmar_inv_ctx_glob(dmar);
637 	if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0) {
638 		if (dmar->qi_enabled)
639 			dmar_qi_invalidate_iotlb_glob_locked(dmar);
640 		else
641 			dmar_inv_iotlb_glob(dmar);
642 	}
643 	dmar_unmap_pgtbl(sf);
644 	domain = ctx->domain;
645 	dmar_ctx_unlink(ctx);
646 	free(ctx, M_DMAR_CTX);
647 	dmar_unref_domain_locked(dmar, domain);
648 	TD_PINNED_ASSERT;
649 }
650 
651 void
652 dmar_free_ctx(struct dmar_ctx *ctx)
653 {
654 	struct dmar_unit *dmar;
655 
656 	dmar = ctx->domain->dmar;
657 	DMAR_LOCK(dmar);
658 	dmar_free_ctx_locked(dmar, ctx);
659 }
660 
661 /*
662  * Returns with the domain locked.
663  */
664 struct dmar_ctx *
665 dmar_find_ctx_locked(struct dmar_unit *dmar, uint16_t rid)
666 {
667 	struct dmar_domain *domain;
668 	struct dmar_ctx *ctx;
669 
670 	DMAR_ASSERT_LOCKED(dmar);
671 
672 	LIST_FOREACH(domain, &dmar->domains, link) {
673 		LIST_FOREACH(ctx, &domain->contexts, link) {
674 			if (ctx->rid == rid)
675 				return (ctx);
676 		}
677 	}
678 	return (NULL);
679 }
680 
681 void
682 dmar_domain_free_entry(struct dmar_map_entry *entry, bool free)
683 {
684 	struct dmar_domain *domain;
685 
686 	domain = entry->domain;
687 	DMAR_DOMAIN_LOCK(domain);
688 	if ((entry->flags & DMAR_MAP_ENTRY_RMRR) != 0)
689 		dmar_gas_free_region(domain, entry);
690 	else
691 		dmar_gas_free_space(domain, entry);
692 	DMAR_DOMAIN_UNLOCK(domain);
693 	if (free)
694 		dmar_gas_free_entry(domain, entry);
695 	else
696 		entry->flags = 0;
697 }
698 
699 void
700 dmar_domain_unload_entry(struct dmar_map_entry *entry, bool free)
701 {
702 	struct dmar_unit *unit;
703 
704 	unit = entry->domain->dmar;
705 	if (unit->qi_enabled) {
706 		DMAR_LOCK(unit);
707 		dmar_qi_invalidate_locked(entry->domain, entry->start,
708 		    entry->end - entry->start, &entry->gseq, true);
709 		if (!free)
710 			entry->flags |= DMAR_MAP_ENTRY_QI_NF;
711 		TAILQ_INSERT_TAIL(&unit->tlb_flush_entries, entry, dmamap_link);
712 		DMAR_UNLOCK(unit);
713 	} else {
714 		domain_flush_iotlb_sync(entry->domain, entry->start,
715 		    entry->end - entry->start);
716 		dmar_domain_free_entry(entry, free);
717 	}
718 }
719 
720 static bool
721 dmar_domain_unload_emit_wait(struct dmar_domain *domain,
722     struct dmar_map_entry *entry)
723 {
724 
725 	if (TAILQ_NEXT(entry, dmamap_link) == NULL)
726 		return (true);
727 	return (domain->batch_no++ % dmar_batch_coalesce == 0);
728 }
729 
730 void
731 dmar_domain_unload(struct dmar_domain *domain,
732     struct dmar_map_entries_tailq *entries, bool cansleep)
733 {
734 	struct dmar_unit *unit;
735 	struct dmar_map_entry *entry, *entry1;
736 	int error;
737 
738 	unit = domain->dmar;
739 
740 	TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) {
741 		KASSERT((entry->flags & DMAR_MAP_ENTRY_MAP) != 0,
742 		    ("not mapped entry %p %p", domain, entry));
743 		error = domain_unmap_buf(domain, entry->start, entry->end -
744 		    entry->start, cansleep ? DMAR_PGF_WAITOK : 0);
745 		KASSERT(error == 0, ("unmap %p error %d", domain, error));
746 		if (!unit->qi_enabled) {
747 			domain_flush_iotlb_sync(domain, entry->start,
748 			    entry->end - entry->start);
749 			TAILQ_REMOVE(entries, entry, dmamap_link);
750 			dmar_domain_free_entry(entry, true);
751 		}
752 	}
753 	if (TAILQ_EMPTY(entries))
754 		return;
755 
756 	KASSERT(unit->qi_enabled, ("loaded entry left"));
757 	DMAR_LOCK(unit);
758 	TAILQ_FOREACH(entry, entries, dmamap_link) {
759 		dmar_qi_invalidate_locked(domain, entry->start, entry->end -
760 		    entry->start, &entry->gseq,
761 		    dmar_domain_unload_emit_wait(domain, entry));
762 	}
763 	TAILQ_CONCAT(&unit->tlb_flush_entries, entries, dmamap_link);
764 	DMAR_UNLOCK(unit);
765 }
766 
767 static void
768 dmar_domain_unload_task(void *arg, int pending)
769 {
770 	struct dmar_domain *domain;
771 	struct dmar_map_entries_tailq entries;
772 
773 	domain = arg;
774 	TAILQ_INIT(&entries);
775 
776 	for (;;) {
777 		DMAR_DOMAIN_LOCK(domain);
778 		TAILQ_SWAP(&domain->unload_entries, &entries, dmar_map_entry,
779 		    dmamap_link);
780 		DMAR_DOMAIN_UNLOCK(domain);
781 		if (TAILQ_EMPTY(&entries))
782 			break;
783 		dmar_domain_unload(domain, &entries, true);
784 	}
785 }
786