xref: /freebsd/sys/x86/iommu/intel_ctx.c (revision 0183e0151669735d62584fbba9125ed90716af5e)
1 /*-
2  * Copyright (c) 2013 The FreeBSD Foundation
3  * All rights reserved.
4  *
5  * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
6  * under sponsorship from the FreeBSD Foundation.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/malloc.h>
36 #include <sys/bus.h>
37 #include <sys/interrupt.h>
38 #include <sys/kernel.h>
39 #include <sys/ktr.h>
40 #include <sys/limits.h>
41 #include <sys/lock.h>
42 #include <sys/memdesc.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h>
45 #include <sys/rwlock.h>
46 #include <sys/rman.h>
47 #include <sys/sysctl.h>
48 #include <sys/taskqueue.h>
49 #include <sys/tree.h>
50 #include <sys/uio.h>
51 #include <sys/vmem.h>
52 #include <vm/vm.h>
53 #include <vm/vm_extern.h>
54 #include <vm/vm_kern.h>
55 #include <vm/vm_object.h>
56 #include <vm/vm_page.h>
57 #include <vm/vm_pager.h>
58 #include <vm/vm_map.h>
59 #include <machine/atomic.h>
60 #include <machine/bus.h>
61 #include <machine/md_var.h>
62 #include <machine/specialreg.h>
63 #include <x86/include/busdma_impl.h>
64 #include <x86/iommu/intel_reg.h>
65 #include <x86/iommu/busdma_dmar.h>
66 #include <x86/iommu/intel_dmar.h>
67 #include <dev/pci/pcireg.h>
68 #include <dev/pci/pcivar.h>
69 
70 static MALLOC_DEFINE(M_DMAR_CTX, "dmar_ctx", "Intel DMAR Context");
71 static MALLOC_DEFINE(M_DMAR_DOMAIN, "dmar_dom", "Intel DMAR Domain");
72 
73 static void dmar_domain_unload_task(void *arg, int pending);
74 static void dmar_unref_domain_locked(struct dmar_unit *dmar,
75     struct dmar_domain *domain);
76 static void dmar_domain_destroy(struct dmar_domain *domain);
77 
78 static void
79 dmar_ensure_ctx_page(struct dmar_unit *dmar, int bus)
80 {
81 	struct sf_buf *sf;
82 	dmar_root_entry_t *re;
83 	vm_page_t ctxm;
84 
85 	/*
86 	 * Allocated context page must be linked.
87 	 */
88 	ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, DMAR_PGF_NOALLOC);
89 	if (ctxm != NULL)
90 		return;
91 
92 	/*
93 	 * Page not present, allocate and link.  Note that other
94 	 * thread might execute this sequence in parallel.  This
95 	 * should be safe, because the context entries written by both
96 	 * threads are equal.
97 	 */
98 	TD_PREP_PINNED_ASSERT;
99 	ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, DMAR_PGF_ZERO |
100 	    DMAR_PGF_WAITOK);
101 	re = dmar_map_pgtbl(dmar->ctx_obj, 0, DMAR_PGF_NOALLOC, &sf);
102 	re += bus;
103 	dmar_pte_store(&re->r1, DMAR_ROOT_R1_P | (DMAR_ROOT_R1_CTP_MASK &
104 	    VM_PAGE_TO_PHYS(ctxm)));
105 	dmar_flush_root_to_ram(dmar, re);
106 	dmar_unmap_pgtbl(sf);
107 	TD_PINNED_ASSERT;
108 }
109 
110 static dmar_ctx_entry_t *
111 dmar_map_ctx_entry(struct dmar_ctx *ctx, struct sf_buf **sfp)
112 {
113 	dmar_ctx_entry_t *ctxp;
114 
115 	ctxp = dmar_map_pgtbl(ctx->domain->dmar->ctx_obj, 1 +
116 	    PCI_RID2BUS(ctx->rid), DMAR_PGF_NOALLOC | DMAR_PGF_WAITOK, sfp);
117 	ctxp += ctx->rid & 0xff;
118 	return (ctxp);
119 }
120 
121 static void
122 ctx_tag_init(struct dmar_ctx *ctx, device_t dev)
123 {
124 	bus_addr_t maxaddr;
125 
126 	maxaddr = MIN(ctx->domain->end, BUS_SPACE_MAXADDR);
127 	ctx->ctx_tag.common.ref_count = 1; /* Prevent free */
128 	ctx->ctx_tag.common.impl = &bus_dma_dmar_impl;
129 	ctx->ctx_tag.common.boundary = PCI_DMA_BOUNDARY;
130 	ctx->ctx_tag.common.lowaddr = maxaddr;
131 	ctx->ctx_tag.common.highaddr = maxaddr;
132 	ctx->ctx_tag.common.maxsize = maxaddr;
133 	ctx->ctx_tag.common.nsegments = BUS_SPACE_UNRESTRICTED;
134 	ctx->ctx_tag.common.maxsegsz = maxaddr;
135 	ctx->ctx_tag.ctx = ctx;
136 	ctx->ctx_tag.owner = dev;
137 }
138 
139 static void
140 ctx_id_entry_init(struct dmar_ctx *ctx, dmar_ctx_entry_t *ctxp, bool move)
141 {
142 	struct dmar_unit *unit;
143 	struct dmar_domain *domain;
144 	vm_page_t ctx_root;
145 
146 	domain = ctx->domain;
147 	unit = domain->dmar;
148 	KASSERT(move || (ctxp->ctx1 == 0 && ctxp->ctx2 == 0),
149 	    ("dmar%d: initialized ctx entry %d:%d:%d 0x%jx 0x%jx",
150 	    unit->unit, pci_get_bus(ctx->ctx_tag.owner),
151 	    pci_get_slot(ctx->ctx_tag.owner),
152 	    pci_get_function(ctx->ctx_tag.owner),
153 	    ctxp->ctx1, ctxp->ctx2));
154 	/*
155 	 * For update due to move, the store is not atomic.  It is
156 	 * possible that DMAR read upper doubleword, while low
157 	 * doubleword is not yet updated.  The domain id is stored in
158 	 * the upper doubleword, while the table pointer in the lower.
159 	 *
160 	 * There is no good solution, for the same reason it is wrong
161 	 * to clear P bit in the ctx entry for update.
162 	 */
163 	dmar_pte_store1(&ctxp->ctx2, DMAR_CTX2_DID(domain->domain) |
164 	    domain->awlvl);
165 	if ((domain->flags & DMAR_DOMAIN_IDMAP) != 0 &&
166 	    (unit->hw_ecap & DMAR_ECAP_PT) != 0) {
167 		KASSERT(domain->pgtbl_obj == NULL,
168 		    ("ctx %p non-null pgtbl_obj", ctx));
169 		dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_PASS | DMAR_CTX1_P);
170 	} else {
171 		ctx_root = dmar_pgalloc(domain->pgtbl_obj, 0, DMAR_PGF_NOALLOC);
172 		dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_UNTR |
173 		    (DMAR_CTX1_ASR_MASK & VM_PAGE_TO_PHYS(ctx_root)) |
174 		    DMAR_CTX1_P);
175 	}
176 	dmar_flush_ctx_to_ram(unit, ctxp);
177 }
178 
179 static int
180 dmar_flush_for_ctx_entry(struct dmar_unit *dmar, bool force)
181 {
182 	int error;
183 
184 	/*
185 	 * If dmar declares Caching Mode as Set, follow 11.5 "Caching
186 	 * Mode Consideration" and do the (global) invalidation of the
187 	 * negative TLB entries.
188 	 */
189 	if ((dmar->hw_cap & DMAR_CAP_CM) == 0 && !force)
190 		return (0);
191 	if (dmar->qi_enabled) {
192 		dmar_qi_invalidate_ctx_glob_locked(dmar);
193 		if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force)
194 			dmar_qi_invalidate_iotlb_glob_locked(dmar);
195 		return (0);
196 	}
197 	error = dmar_inv_ctx_glob(dmar);
198 	if (error == 0 && ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force))
199 		error = dmar_inv_iotlb_glob(dmar);
200 	return (error);
201 }
202 
203 static int
204 domain_init_rmrr(struct dmar_domain *domain, device_t dev)
205 {
206 	struct dmar_map_entries_tailq rmrr_entries;
207 	struct dmar_map_entry *entry, *entry1;
208 	vm_page_t *ma;
209 	dmar_gaddr_t start, end;
210 	vm_pindex_t size, i;
211 	int error, error1;
212 
213 	error = 0;
214 	TAILQ_INIT(&rmrr_entries);
215 	dmar_dev_parse_rmrr(domain, dev, &rmrr_entries);
216 	TAILQ_FOREACH_SAFE(entry, &rmrr_entries, unroll_link, entry1) {
217 		/*
218 		 * VT-d specification requires that the start of an
219 		 * RMRR entry is 4k-aligned.  Buggy BIOSes put
220 		 * anything into the start and end fields.  Truncate
221 		 * and round as neccesary.
222 		 *
223 		 * We also allow the overlapping RMRR entries, see
224 		 * dmar_gas_alloc_region().
225 		 */
226 		start = entry->start;
227 		end = entry->end;
228 		entry->start = trunc_page(start);
229 		entry->end = round_page(end);
230 		if (entry->start == entry->end) {
231 			/* Workaround for some AMI (?) BIOSes */
232 			if (bootverbose) {
233 				device_printf(dev, "BIOS bug: dmar%d RMRR "
234 				    "region (%jx, %jx) corrected\n",
235 				    domain->dmar->unit, start, end);
236 			}
237 			entry->end += DMAR_PAGE_SIZE * 0x20;
238 		}
239 		size = OFF_TO_IDX(entry->end - entry->start);
240 		ma = malloc(sizeof(vm_page_t) * size, M_TEMP, M_WAITOK);
241 		for (i = 0; i < size; i++) {
242 			ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i,
243 			    VM_MEMATTR_DEFAULT);
244 		}
245 		error1 = dmar_gas_map_region(domain, entry,
246 		    DMAR_MAP_ENTRY_READ | DMAR_MAP_ENTRY_WRITE,
247 		    DMAR_GM_CANWAIT, ma);
248 		/*
249 		 * Non-failed RMRR entries are owned by context rb
250 		 * tree.  Get rid of the failed entry, but do not stop
251 		 * the loop.  Rest of the parsed RMRR entries are
252 		 * loaded and removed on the context destruction.
253 		 */
254 		if (error1 == 0 && entry->end != entry->start) {
255 			DMAR_LOCK(domain->dmar);
256 			domain->refs++; /* XXXKIB prevent free */
257 			domain->flags |= DMAR_DOMAIN_RMRR;
258 			DMAR_UNLOCK(domain->dmar);
259 		} else {
260 			if (error1 != 0) {
261 				device_printf(dev,
262 			    "dmar%d failed to map RMRR region (%jx, %jx) %d\n",
263 				    domain->dmar->unit, start, end, error1);
264 				error = error1;
265 			}
266 			TAILQ_REMOVE(&rmrr_entries, entry, unroll_link);
267 			dmar_gas_free_entry(domain, entry);
268 		}
269 		for (i = 0; i < size; i++)
270 			vm_page_putfake(ma[i]);
271 		free(ma, M_TEMP);
272 	}
273 	return (error);
274 }
275 
276 static struct dmar_domain *
277 dmar_domain_alloc(struct dmar_unit *dmar, bool id_mapped)
278 {
279 	struct dmar_domain *domain;
280 	int error, id, mgaw;
281 
282 	id = alloc_unr(dmar->domids);
283 	if (id == -1)
284 		return (NULL);
285 	domain = malloc(sizeof(*domain), M_DMAR_DOMAIN, M_WAITOK | M_ZERO);
286 	domain->domain = id;
287 	LIST_INIT(&domain->contexts);
288 	RB_INIT(&domain->rb_root);
289 	TAILQ_INIT(&domain->unload_entries);
290 	TASK_INIT(&domain->unload_task, 0, dmar_domain_unload_task, domain);
291 	mtx_init(&domain->lock, "dmardom", NULL, MTX_DEF);
292 	domain->dmar = dmar;
293 
294 	/*
295 	 * For now, use the maximal usable physical address of the
296 	 * installed memory to calculate the mgaw on id_mapped domain.
297 	 * It is useful for the identity mapping, and less so for the
298 	 * virtualized bus address space.
299 	 */
300 	domain->end = id_mapped ? ptoa(Maxmem) : BUS_SPACE_MAXADDR;
301 	mgaw = dmar_maxaddr2mgaw(dmar, domain->end, !id_mapped);
302 	error = domain_set_agaw(domain, mgaw);
303 	if (error != 0)
304 		goto fail;
305 	if (!id_mapped)
306 		/* Use all supported address space for remapping. */
307 		domain->end = 1ULL << (domain->agaw - 1);
308 
309 	dmar_gas_init_domain(domain);
310 
311 	if (id_mapped) {
312 		if ((dmar->hw_ecap & DMAR_ECAP_PT) == 0) {
313 			domain->pgtbl_obj = domain_get_idmap_pgtbl(domain,
314 			    domain->end);
315 		}
316 		domain->flags |= DMAR_DOMAIN_IDMAP;
317 	} else {
318 		error = domain_alloc_pgtbl(domain);
319 		if (error != 0)
320 			goto fail;
321 		/* Disable local apic region access */
322 		error = dmar_gas_reserve_region(domain, 0xfee00000,
323 		    0xfeefffff + 1);
324 		if (error != 0)
325 			goto fail;
326 	}
327 	return (domain);
328 
329 fail:
330 	dmar_domain_destroy(domain);
331 	return (NULL);
332 }
333 
334 static struct dmar_ctx *
335 dmar_ctx_alloc(struct dmar_domain *domain, uint16_t rid)
336 {
337 	struct dmar_ctx *ctx;
338 
339 	ctx = malloc(sizeof(*ctx), M_DMAR_CTX, M_WAITOK | M_ZERO);
340 	ctx->domain = domain;
341 	ctx->rid = rid;
342 	ctx->refs = 1;
343 	return (ctx);
344 }
345 
346 static void
347 dmar_ctx_link(struct dmar_ctx *ctx)
348 {
349 	struct dmar_domain *domain;
350 
351 	domain = ctx->domain;
352 	DMAR_ASSERT_LOCKED(domain->dmar);
353 	KASSERT(domain->refs >= domain->ctx_cnt,
354 	    ("dom %p ref underflow %d %d", domain, domain->refs,
355 	    domain->ctx_cnt));
356 	domain->refs++;
357 	domain->ctx_cnt++;
358 	LIST_INSERT_HEAD(&domain->contexts, ctx, link);
359 }
360 
361 static void
362 dmar_ctx_unlink(struct dmar_ctx *ctx)
363 {
364 	struct dmar_domain *domain;
365 
366 	domain = ctx->domain;
367 	DMAR_ASSERT_LOCKED(domain->dmar);
368 	KASSERT(domain->refs > 0,
369 	    ("domain %p ctx dtr refs %d", domain, domain->refs));
370 	KASSERT(domain->ctx_cnt >= domain->refs,
371 	    ("domain %p ctx dtr refs %d ctx_cnt %d", domain,
372 	    domain->refs, domain->ctx_cnt));
373 	domain->refs--;
374 	domain->ctx_cnt--;
375 	LIST_REMOVE(ctx, link);
376 }
377 
378 static void
379 dmar_domain_destroy(struct dmar_domain *domain)
380 {
381 
382 	KASSERT(TAILQ_EMPTY(&domain->unload_entries),
383 	    ("unfinished unloads %p", domain));
384 	KASSERT(LIST_EMPTY(&domain->contexts),
385 	    ("destroying dom %p with contexts", domain));
386 	KASSERT(domain->ctx_cnt == 0,
387 	    ("destroying dom %p with ctx_cnt %d", domain, domain->ctx_cnt));
388 	KASSERT(domain->refs == 0,
389 	    ("destroying dom %p with refs %d", domain, domain->refs));
390 	if ((domain->flags & DMAR_DOMAIN_GAS_INITED) != 0) {
391 		DMAR_DOMAIN_LOCK(domain);
392 		dmar_gas_fini_domain(domain);
393 		DMAR_DOMAIN_UNLOCK(domain);
394 	}
395 	if ((domain->flags & DMAR_DOMAIN_PGTBL_INITED) != 0) {
396 		if (domain->pgtbl_obj != NULL)
397 			DMAR_DOMAIN_PGLOCK(domain);
398 		domain_free_pgtbl(domain);
399 	}
400 	mtx_destroy(&domain->lock);
401 	free_unr(domain->dmar->domids, domain->domain);
402 	free(domain, M_DMAR_DOMAIN);
403 }
404 
405 struct dmar_ctx *
406 dmar_get_ctx_for_dev(struct dmar_unit *dmar, device_t dev, uint16_t rid,
407     bool id_mapped, bool rmrr_init)
408 {
409 	struct dmar_domain *domain, *domain1;
410 	struct dmar_ctx *ctx, *ctx1;
411 	dmar_ctx_entry_t *ctxp;
412 	struct sf_buf *sf;
413 	int bus, slot, func, error;
414 	bool enable;
415 
416 	bus = pci_get_bus(dev);
417 	slot = pci_get_slot(dev);
418 	func = pci_get_function(dev);
419 	enable = false;
420 	TD_PREP_PINNED_ASSERT;
421 	DMAR_LOCK(dmar);
422 	ctx = dmar_find_ctx_locked(dmar, rid);
423 	error = 0;
424 	if (ctx == NULL) {
425 		/*
426 		 * Perform the allocations which require sleep or have
427 		 * higher chance to succeed if the sleep is allowed.
428 		 */
429 		DMAR_UNLOCK(dmar);
430 		dmar_ensure_ctx_page(dmar, PCI_RID2BUS(rid));
431 		domain1 = dmar_domain_alloc(dmar, id_mapped);
432 		if (domain1 == NULL) {
433 			TD_PINNED_ASSERT;
434 			return (NULL);
435 		}
436 		if (!id_mapped) {
437 			error = domain_init_rmrr(domain1, dev);
438 			if (error != 0) {
439 				dmar_domain_destroy(domain1);
440 				TD_PINNED_ASSERT;
441 				return (NULL);
442 			}
443 		}
444 		ctx1 = dmar_ctx_alloc(domain1, rid);
445 		ctxp = dmar_map_ctx_entry(ctx1, &sf);
446 		DMAR_LOCK(dmar);
447 
448 		/*
449 		 * Recheck the contexts, other thread might have
450 		 * already allocated needed one.
451 		 */
452 		ctx = dmar_find_ctx_locked(dmar, rid);
453 		if (ctx == NULL) {
454 			domain = domain1;
455 			ctx = ctx1;
456 			dmar_ctx_link(ctx);
457 			ctx->ctx_tag.owner = dev;
458 			ctx_tag_init(ctx, dev);
459 
460 			/*
461 			 * This is the first activated context for the
462 			 * DMAR unit.  Enable the translation after
463 			 * everything is set up.
464 			 */
465 			if (LIST_EMPTY(&dmar->domains))
466 				enable = true;
467 			LIST_INSERT_HEAD(&dmar->domains, domain, link);
468 			ctx_id_entry_init(ctx, ctxp, false);
469 			device_printf(dev,
470 			    "dmar%d pci%d:%d:%d:%d rid %x domain %d mgaw %d "
471 			    "agaw %d %s-mapped\n",
472 			    dmar->unit, dmar->segment, bus, slot,
473 			    func, rid, domain->domain, domain->mgaw,
474 			    domain->agaw, id_mapped ? "id" : "re");
475 			dmar_unmap_pgtbl(sf);
476 		} else {
477 			dmar_unmap_pgtbl(sf);
478 			dmar_domain_destroy(domain1);
479 			/* Nothing needs to be done to destroy ctx1. */
480 			free(ctx1, M_DMAR_CTX);
481 			domain = ctx->domain;
482 			ctx->refs++; /* tag referenced us */
483 		}
484 	} else {
485 		domain = ctx->domain;
486 		ctx->refs++; /* tag referenced us */
487 	}
488 
489 	error = dmar_flush_for_ctx_entry(dmar, enable);
490 	if (error != 0) {
491 		dmar_free_ctx_locked(dmar, ctx);
492 		TD_PINNED_ASSERT;
493 		return (NULL);
494 	}
495 
496 	/*
497 	 * The dmar lock was potentially dropped between check for the
498 	 * empty context list and now.  Recheck the state of GCMD_TE
499 	 * to avoid unneeded command.
500 	 */
501 	if (enable && !rmrr_init && (dmar->hw_gcmd & DMAR_GCMD_TE) == 0) {
502 		error = dmar_enable_translation(dmar);
503 		if (error != 0) {
504 			dmar_free_ctx_locked(dmar, ctx);
505 			TD_PINNED_ASSERT;
506 			return (NULL);
507 		}
508 	}
509 	DMAR_UNLOCK(dmar);
510 	TD_PINNED_ASSERT;
511 	return (ctx);
512 }
513 
514 int
515 dmar_move_ctx_to_domain(struct dmar_domain *domain, struct dmar_ctx *ctx)
516 {
517 	struct dmar_unit *dmar;
518 	struct dmar_domain *old_domain;
519 	dmar_ctx_entry_t *ctxp;
520 	struct sf_buf *sf;
521 	int error;
522 
523 	dmar = domain->dmar;
524 	old_domain = ctx->domain;
525 	if (domain == old_domain)
526 		return (0);
527 	KASSERT(old_domain->dmar == dmar,
528 	    ("domain %p %u moving between dmars %u %u", domain,
529 	    domain->domain, old_domain->dmar->unit, domain->dmar->unit));
530 	TD_PREP_PINNED_ASSERT;
531 
532 	ctxp = dmar_map_ctx_entry(ctx, &sf);
533 	DMAR_LOCK(dmar);
534 	dmar_ctx_unlink(ctx);
535 	ctx->domain = domain;
536 	dmar_ctx_link(ctx);
537 	ctx_id_entry_init(ctx, ctxp, true);
538 	dmar_unmap_pgtbl(sf);
539 	error = dmar_flush_for_ctx_entry(dmar, true);
540 	/* If flush failed, rolling back would not work as well. */
541 	printf("dmar%d rid %x domain %d->%d %s-mapped\n",
542 	    dmar->unit, ctx->rid, old_domain->domain, domain->domain,
543 	    (domain->flags & DMAR_DOMAIN_IDMAP) != 0 ? "id" : "re");
544 	dmar_unref_domain_locked(dmar, old_domain);
545 	TD_PINNED_ASSERT;
546 	return (error);
547 }
548 
549 static void
550 dmar_unref_domain_locked(struct dmar_unit *dmar, struct dmar_domain *domain)
551 {
552 
553 	DMAR_ASSERT_LOCKED(dmar);
554 	KASSERT(domain->refs >= 1,
555 	    ("dmar %d domain %p refs %u", dmar->unit, domain, domain->refs));
556 	KASSERT(domain->refs > domain->ctx_cnt,
557 	    ("dmar %d domain %p refs %d ctx_cnt %d", dmar->unit, domain,
558 	    domain->refs, domain->ctx_cnt));
559 
560 	if (domain->refs > 1) {
561 		domain->refs--;
562 		DMAR_UNLOCK(dmar);
563 		return;
564 	}
565 
566 	KASSERT((domain->flags & DMAR_DOMAIN_RMRR) == 0,
567 	    ("lost ref on RMRR domain %p", domain));
568 
569 	LIST_REMOVE(domain, link);
570 	DMAR_UNLOCK(dmar);
571 
572 	taskqueue_drain(dmar->delayed_taskqueue, &domain->unload_task);
573 	dmar_domain_destroy(domain);
574 }
575 
576 void
577 dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx)
578 {
579 	struct sf_buf *sf;
580 	dmar_ctx_entry_t *ctxp;
581 	struct dmar_domain *domain;
582 
583 	DMAR_ASSERT_LOCKED(dmar);
584 	KASSERT(ctx->refs >= 1,
585 	    ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs));
586 
587 	/*
588 	 * If our reference is not last, only the dereference should
589 	 * be performed.
590 	 */
591 	if (ctx->refs > 1) {
592 		ctx->refs--;
593 		DMAR_UNLOCK(dmar);
594 		return;
595 	}
596 
597 	KASSERT((ctx->flags & DMAR_CTX_DISABLED) == 0,
598 	    ("lost ref on disabled ctx %p", ctx));
599 
600 	/*
601 	 * Otherwise, the context entry must be cleared before the
602 	 * page table is destroyed.  The mapping of the context
603 	 * entries page could require sleep, unlock the dmar.
604 	 */
605 	DMAR_UNLOCK(dmar);
606 	TD_PREP_PINNED_ASSERT;
607 	ctxp = dmar_map_ctx_entry(ctx, &sf);
608 	DMAR_LOCK(dmar);
609 	KASSERT(ctx->refs >= 1,
610 	    ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs));
611 
612 	/*
613 	 * Other thread might have referenced the context, in which
614 	 * case again only the dereference should be performed.
615 	 */
616 	if (ctx->refs > 1) {
617 		ctx->refs--;
618 		DMAR_UNLOCK(dmar);
619 		dmar_unmap_pgtbl(sf);
620 		TD_PINNED_ASSERT;
621 		return;
622 	}
623 
624 	KASSERT((ctx->flags & DMAR_CTX_DISABLED) == 0,
625 	    ("lost ref on disabled ctx %p", ctx));
626 
627 	/*
628 	 * Clear the context pointer and flush the caches.
629 	 * XXXKIB: cannot do this if any RMRR entries are still present.
630 	 */
631 	dmar_pte_clear(&ctxp->ctx1);
632 	ctxp->ctx2 = 0;
633 	dmar_flush_ctx_to_ram(dmar, ctxp);
634 	dmar_inv_ctx_glob(dmar);
635 	if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0) {
636 		if (dmar->qi_enabled)
637 			dmar_qi_invalidate_iotlb_glob_locked(dmar);
638 		else
639 			dmar_inv_iotlb_glob(dmar);
640 	}
641 	dmar_unmap_pgtbl(sf);
642 	domain = ctx->domain;
643 	dmar_ctx_unlink(ctx);
644 	free(ctx, M_DMAR_CTX);
645 	dmar_unref_domain_locked(dmar, domain);
646 	TD_PINNED_ASSERT;
647 }
648 
649 void
650 dmar_free_ctx(struct dmar_ctx *ctx)
651 {
652 	struct dmar_unit *dmar;
653 
654 	dmar = ctx->domain->dmar;
655 	DMAR_LOCK(dmar);
656 	dmar_free_ctx_locked(dmar, ctx);
657 }
658 
659 /*
660  * Returns with the domain locked.
661  */
662 struct dmar_ctx *
663 dmar_find_ctx_locked(struct dmar_unit *dmar, uint16_t rid)
664 {
665 	struct dmar_domain *domain;
666 	struct dmar_ctx *ctx;
667 
668 	DMAR_ASSERT_LOCKED(dmar);
669 
670 	LIST_FOREACH(domain, &dmar->domains, link) {
671 		LIST_FOREACH(ctx, &domain->contexts, link) {
672 			if (ctx->rid == rid)
673 				return (ctx);
674 		}
675 	}
676 	return (NULL);
677 }
678 
679 void
680 dmar_domain_free_entry(struct dmar_map_entry *entry, bool free)
681 {
682 	struct dmar_domain *domain;
683 
684 	domain = entry->domain;
685 	DMAR_DOMAIN_LOCK(domain);
686 	if ((entry->flags & DMAR_MAP_ENTRY_RMRR) != 0)
687 		dmar_gas_free_region(domain, entry);
688 	else
689 		dmar_gas_free_space(domain, entry);
690 	DMAR_DOMAIN_UNLOCK(domain);
691 	if (free)
692 		dmar_gas_free_entry(domain, entry);
693 	else
694 		entry->flags = 0;
695 }
696 
697 void
698 dmar_domain_unload_entry(struct dmar_map_entry *entry, bool free)
699 {
700 	struct dmar_unit *unit;
701 
702 	unit = entry->domain->dmar;
703 	if (unit->qi_enabled) {
704 		DMAR_LOCK(unit);
705 		dmar_qi_invalidate_locked(entry->domain, entry->start,
706 		    entry->end - entry->start, &entry->gseq);
707 		if (!free)
708 			entry->flags |= DMAR_MAP_ENTRY_QI_NF;
709 		TAILQ_INSERT_TAIL(&unit->tlb_flush_entries, entry, dmamap_link);
710 		DMAR_UNLOCK(unit);
711 	} else {
712 		domain_flush_iotlb_sync(entry->domain, entry->start,
713 		    entry->end - entry->start);
714 		dmar_domain_free_entry(entry, free);
715 	}
716 }
717 
718 static struct dmar_qi_genseq *
719 dmar_domain_unload_gseq(struct dmar_domain *domain,
720     struct dmar_map_entry *entry, struct dmar_qi_genseq *gseq)
721 {
722 
723 	if (TAILQ_NEXT(entry, dmamap_link) != NULL)
724 		return (NULL);
725 	if (domain->batch_no++ % dmar_batch_coalesce != 0)
726 		return (NULL);
727 	return (gseq);
728 }
729 
730 void
731 dmar_domain_unload(struct dmar_domain *domain,
732     struct dmar_map_entries_tailq *entries, bool cansleep)
733 {
734 	struct dmar_unit *unit;
735 	struct dmar_map_entry *entry, *entry1;
736 	struct dmar_qi_genseq gseq;
737 	int error;
738 
739 	unit = domain->dmar;
740 
741 	TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) {
742 		KASSERT((entry->flags & DMAR_MAP_ENTRY_MAP) != 0,
743 		    ("not mapped entry %p %p", domain, entry));
744 		error = domain_unmap_buf(domain, entry->start, entry->end -
745 		    entry->start, cansleep ? DMAR_PGF_WAITOK : 0);
746 		KASSERT(error == 0, ("unmap %p error %d", domain, error));
747 		if (!unit->qi_enabled) {
748 			domain_flush_iotlb_sync(domain, entry->start,
749 			    entry->end - entry->start);
750 			TAILQ_REMOVE(entries, entry, dmamap_link);
751 			dmar_domain_free_entry(entry, true);
752 		}
753 	}
754 	if (TAILQ_EMPTY(entries))
755 		return;
756 
757 	KASSERT(unit->qi_enabled, ("loaded entry left"));
758 	DMAR_LOCK(unit);
759 	TAILQ_FOREACH(entry, entries, dmamap_link) {
760 		entry->gseq.gen = 0;
761 		entry->gseq.seq = 0;
762 		dmar_qi_invalidate_locked(domain, entry->start, entry->end -
763 		    entry->start, dmar_domain_unload_gseq(domain, entry,
764 		    &gseq));
765 	}
766 	TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) {
767 		entry->gseq = gseq;
768 		TAILQ_REMOVE(entries, entry, dmamap_link);
769 		TAILQ_INSERT_TAIL(&unit->tlb_flush_entries, entry, dmamap_link);
770 	}
771 	DMAR_UNLOCK(unit);
772 }
773 
774 static void
775 dmar_domain_unload_task(void *arg, int pending)
776 {
777 	struct dmar_domain *domain;
778 	struct dmar_map_entries_tailq entries;
779 
780 	domain = arg;
781 	TAILQ_INIT(&entries);
782 
783 	for (;;) {
784 		DMAR_DOMAIN_LOCK(domain);
785 		TAILQ_SWAP(&domain->unload_entries, &entries, dmar_map_entry,
786 		    dmamap_link);
787 		DMAR_DOMAIN_UNLOCK(domain);
788 		if (TAILQ_EMPTY(&entries))
789 			break;
790 		dmar_domain_unload(domain, &entries, true);
791 	}
792 }
793