xref: /freebsd/sys/x86/iommu/intel_ctx.c (revision 61898cde69374d5a9994e2074605bc4101aff72d)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2013 The FreeBSD Foundation
5  * All rights reserved.
6  *
7  * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
8  * under sponsorship from the FreeBSD Foundation.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/malloc.h>
38 #include <sys/bus.h>
39 #include <sys/interrupt.h>
40 #include <sys/kernel.h>
41 #include <sys/ktr.h>
42 #include <sys/limits.h>
43 #include <sys/lock.h>
44 #include <sys/memdesc.h>
45 #include <sys/mutex.h>
46 #include <sys/proc.h>
47 #include <sys/rwlock.h>
48 #include <sys/rman.h>
49 #include <sys/sysctl.h>
50 #include <sys/taskqueue.h>
51 #include <sys/tree.h>
52 #include <sys/uio.h>
53 #include <sys/vmem.h>
54 #include <vm/vm.h>
55 #include <vm/vm_extern.h>
56 #include <vm/vm_kern.h>
57 #include <vm/vm_object.h>
58 #include <vm/vm_page.h>
59 #include <vm/vm_pager.h>
60 #include <vm/vm_map.h>
61 #include <contrib/dev/acpica/include/acpi.h>
62 #include <contrib/dev/acpica/include/accommon.h>
63 #include <dev/pci/pcireg.h>
64 #include <dev/pci/pcivar.h>
65 #include <machine/atomic.h>
66 #include <machine/bus.h>
67 #include <machine/md_var.h>
68 #include <machine/specialreg.h>
69 #include <x86/include/busdma_impl.h>
70 #include <dev/iommu/busdma_iommu.h>
71 #include <x86/iommu/intel_reg.h>
72 #include <x86/iommu/intel_dmar.h>
73 
74 static MALLOC_DEFINE(M_DMAR_CTX, "dmar_ctx", "Intel DMAR Context");
75 static MALLOC_DEFINE(M_DMAR_DOMAIN, "dmar_dom", "Intel DMAR Domain");
76 
77 static void dmar_domain_unload_task(void *arg, int pending);
78 static void dmar_unref_domain_locked(struct dmar_unit *dmar,
79     struct dmar_domain *domain);
80 static void dmar_domain_destroy(struct dmar_domain *domain);
81 
82 static void
83 dmar_ensure_ctx_page(struct dmar_unit *dmar, int bus)
84 {
85 	struct sf_buf *sf;
86 	dmar_root_entry_t *re;
87 	vm_page_t ctxm;
88 
89 	/*
90 	 * Allocated context page must be linked.
91 	 */
92 	ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, IOMMU_PGF_NOALLOC);
93 	if (ctxm != NULL)
94 		return;
95 
96 	/*
97 	 * Page not present, allocate and link.  Note that other
98 	 * thread might execute this sequence in parallel.  This
99 	 * should be safe, because the context entries written by both
100 	 * threads are equal.
101 	 */
102 	TD_PREP_PINNED_ASSERT;
103 	ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, IOMMU_PGF_ZERO |
104 	    IOMMU_PGF_WAITOK);
105 	re = dmar_map_pgtbl(dmar->ctx_obj, 0, IOMMU_PGF_NOALLOC, &sf);
106 	re += bus;
107 	dmar_pte_store(&re->r1, DMAR_ROOT_R1_P | (DMAR_ROOT_R1_CTP_MASK &
108 	    VM_PAGE_TO_PHYS(ctxm)));
109 	dmar_flush_root_to_ram(dmar, re);
110 	dmar_unmap_pgtbl(sf);
111 	TD_PINNED_ASSERT;
112 }
113 
114 static dmar_ctx_entry_t *
115 dmar_map_ctx_entry(struct dmar_ctx *ctx, struct sf_buf **sfp)
116 {
117 	struct dmar_unit *dmar;
118 	dmar_ctx_entry_t *ctxp;
119 
120 	dmar = (struct dmar_unit *)ctx->context.domain->iommu;
121 
122 	ctxp = dmar_map_pgtbl(dmar->ctx_obj, 1 +
123 	    PCI_RID2BUS(ctx->rid), IOMMU_PGF_NOALLOC | IOMMU_PGF_WAITOK, sfp);
124 	ctxp += ctx->rid & 0xff;
125 	return (ctxp);
126 }
127 
128 static void
129 device_tag_init(struct dmar_ctx *ctx, device_t dev)
130 {
131 	struct dmar_domain *domain;
132 	bus_addr_t maxaddr;
133 
134 	domain = (struct dmar_domain *)ctx->context.domain;
135 	maxaddr = MIN(domain->iodom.end, BUS_SPACE_MAXADDR);
136 	ctx->context.tag->common.ref_count = 1; /* Prevent free */
137 	ctx->context.tag->common.impl = &bus_dma_iommu_impl;
138 	ctx->context.tag->common.boundary = 0;
139 	ctx->context.tag->common.lowaddr = maxaddr;
140 	ctx->context.tag->common.highaddr = maxaddr;
141 	ctx->context.tag->common.maxsize = maxaddr;
142 	ctx->context.tag->common.nsegments = BUS_SPACE_UNRESTRICTED;
143 	ctx->context.tag->common.maxsegsz = maxaddr;
144 	ctx->context.tag->ctx = (struct iommu_ctx *)ctx;
145 	ctx->context.tag->owner = dev;
146 }
147 
148 static void
149 ctx_id_entry_init_one(dmar_ctx_entry_t *ctxp, struct dmar_domain *domain,
150     vm_page_t ctx_root)
151 {
152 	/*
153 	 * For update due to move, the store is not atomic.  It is
154 	 * possible that DMAR read upper doubleword, while low
155 	 * doubleword is not yet updated.  The domain id is stored in
156 	 * the upper doubleword, while the table pointer in the lower.
157 	 *
158 	 * There is no good solution, for the same reason it is wrong
159 	 * to clear P bit in the ctx entry for update.
160 	 */
161 	dmar_pte_store1(&ctxp->ctx2, DMAR_CTX2_DID(domain->domain) |
162 	    domain->awlvl);
163 	if (ctx_root == NULL) {
164 		dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_PASS | DMAR_CTX1_P);
165 	} else {
166 		dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_UNTR |
167 		    (DMAR_CTX1_ASR_MASK & VM_PAGE_TO_PHYS(ctx_root)) |
168 		    DMAR_CTX1_P);
169 	}
170 }
171 
172 static void
173 ctx_id_entry_init(struct dmar_ctx *ctx, dmar_ctx_entry_t *ctxp, bool move,
174     int busno)
175 {
176 	struct dmar_unit *unit;
177 	struct dmar_domain *domain;
178 	vm_page_t ctx_root;
179 	int i;
180 
181 	domain = (struct dmar_domain *)ctx->context.domain;
182 	unit = (struct dmar_unit *)domain->iodom.iommu;
183 	KASSERT(move || (ctxp->ctx1 == 0 && ctxp->ctx2 == 0),
184 	    ("dmar%d: initialized ctx entry %d:%d:%d 0x%jx 0x%jx",
185 	    unit->iommu.unit, busno, pci_get_slot(ctx->context.tag->owner),
186 	    pci_get_function(ctx->context.tag->owner),
187 	    ctxp->ctx1, ctxp->ctx2));
188 
189 	if ((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0 &&
190 	    (unit->hw_ecap & DMAR_ECAP_PT) != 0) {
191 		KASSERT(domain->pgtbl_obj == NULL,
192 		    ("ctx %p non-null pgtbl_obj", ctx));
193 		ctx_root = NULL;
194 	} else {
195 		ctx_root = dmar_pgalloc(domain->pgtbl_obj, 0,
196 		    IOMMU_PGF_NOALLOC);
197 	}
198 
199 	if (iommu_is_buswide_ctx((struct iommu_unit *)unit, busno)) {
200 		MPASS(!move);
201 		for (i = 0; i <= PCI_BUSMAX; i++) {
202 			ctx_id_entry_init_one(&ctxp[i], domain, ctx_root);
203 		}
204 	} else {
205 		ctx_id_entry_init_one(ctxp, domain, ctx_root);
206 	}
207 	dmar_flush_ctx_to_ram(unit, ctxp);
208 }
209 
210 static int
211 dmar_flush_for_ctx_entry(struct dmar_unit *dmar, bool force)
212 {
213 	int error;
214 
215 	/*
216 	 * If dmar declares Caching Mode as Set, follow 11.5 "Caching
217 	 * Mode Consideration" and do the (global) invalidation of the
218 	 * negative TLB entries.
219 	 */
220 	if ((dmar->hw_cap & DMAR_CAP_CM) == 0 && !force)
221 		return (0);
222 	if (dmar->qi_enabled) {
223 		dmar_qi_invalidate_ctx_glob_locked(dmar);
224 		if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force)
225 			dmar_qi_invalidate_iotlb_glob_locked(dmar);
226 		return (0);
227 	}
228 	error = dmar_inv_ctx_glob(dmar);
229 	if (error == 0 && ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force))
230 		error = dmar_inv_iotlb_glob(dmar);
231 	return (error);
232 }
233 
234 static int
235 domain_init_rmrr(struct dmar_domain *domain, device_t dev, int bus,
236     int slot, int func, int dev_domain, int dev_busno,
237     const void *dev_path, int dev_path_len)
238 {
239 	struct iommu_map_entries_tailq rmrr_entries;
240 	struct iommu_map_entry *entry, *entry1;
241 	vm_page_t *ma;
242 	iommu_gaddr_t start, end;
243 	vm_pindex_t size, i;
244 	int error, error1;
245 
246 	error = 0;
247 	TAILQ_INIT(&rmrr_entries);
248 	dmar_dev_parse_rmrr(domain, dev_domain, dev_busno, dev_path,
249 	    dev_path_len, &rmrr_entries);
250 	TAILQ_FOREACH_SAFE(entry, &rmrr_entries, unroll_link, entry1) {
251 		/*
252 		 * VT-d specification requires that the start of an
253 		 * RMRR entry is 4k-aligned.  Buggy BIOSes put
254 		 * anything into the start and end fields.  Truncate
255 		 * and round as neccesary.
256 		 *
257 		 * We also allow the overlapping RMRR entries, see
258 		 * iommu_gas_alloc_region().
259 		 */
260 		start = entry->start;
261 		end = entry->end;
262 		if (bootverbose)
263 			printf("dmar%d ctx pci%d:%d:%d RMRR [%#jx, %#jx]\n",
264 			    domain->iodom.iommu->unit, bus, slot, func,
265 			    (uintmax_t)start, (uintmax_t)end);
266 		entry->start = trunc_page(start);
267 		entry->end = round_page(end);
268 		if (entry->start == entry->end) {
269 			/* Workaround for some AMI (?) BIOSes */
270 			if (bootverbose) {
271 				if (dev != NULL)
272 					device_printf(dev, "");
273 				printf("pci%d:%d:%d ", bus, slot, func);
274 				printf("BIOS bug: dmar%d RMRR "
275 				    "region (%jx, %jx) corrected\n",
276 				    domain->iodom.iommu->unit, start, end);
277 			}
278 			entry->end += DMAR_PAGE_SIZE * 0x20;
279 		}
280 		size = OFF_TO_IDX(entry->end - entry->start);
281 		ma = malloc(sizeof(vm_page_t) * size, M_TEMP, M_WAITOK);
282 		for (i = 0; i < size; i++) {
283 			ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i,
284 			    VM_MEMATTR_DEFAULT);
285 		}
286 		error1 = iommu_gas_map_region((struct iommu_domain *)domain,
287 		    entry,
288 		    IOMMU_MAP_ENTRY_READ | IOMMU_MAP_ENTRY_WRITE,
289 		    IOMMU_MF_CANWAIT | IOMMU_MF_RMRR, ma);
290 		/*
291 		 * Non-failed RMRR entries are owned by context rb
292 		 * tree.  Get rid of the failed entry, but do not stop
293 		 * the loop.  Rest of the parsed RMRR entries are
294 		 * loaded and removed on the context destruction.
295 		 */
296 		if (error1 == 0 && entry->end != entry->start) {
297 			IOMMU_LOCK(domain->iodom.iommu);
298 			domain->refs++; /* XXXKIB prevent free */
299 			domain->iodom.flags |= IOMMU_DOMAIN_RMRR;
300 			IOMMU_UNLOCK(domain->iodom.iommu);
301 		} else {
302 			if (error1 != 0) {
303 				if (dev != NULL)
304 					device_printf(dev, "");
305 				printf("pci%d:%d:%d ", bus, slot, func);
306 				printf(
307 			    "dmar%d failed to map RMRR region (%jx, %jx) %d\n",
308 				    domain->iodom.iommu->unit, start, end,
309 				    error1);
310 				error = error1;
311 			}
312 			TAILQ_REMOVE(&rmrr_entries, entry, unroll_link);
313 			iommu_gas_free_entry((struct iommu_domain *)domain,
314 			    entry);
315 		}
316 		for (i = 0; i < size; i++)
317 			vm_page_putfake(ma[i]);
318 		free(ma, M_TEMP);
319 	}
320 	return (error);
321 }
322 
323 static struct dmar_domain *
324 dmar_domain_alloc(struct dmar_unit *dmar, bool id_mapped)
325 {
326 	struct iommu_domain *iodom;
327 	struct dmar_domain *domain;
328 	int error, id, mgaw;
329 
330 	id = alloc_unr(dmar->domids);
331 	if (id == -1)
332 		return (NULL);
333 	domain = malloc(sizeof(*domain), M_DMAR_DOMAIN, M_WAITOK | M_ZERO);
334 	iodom = (struct iommu_domain *)domain;
335 	domain->domain = id;
336 	LIST_INIT(&domain->contexts);
337 	RB_INIT(&domain->iodom.rb_root);
338 	TAILQ_INIT(&domain->iodom.unload_entries);
339 	TASK_INIT(&domain->iodom.unload_task, 0, dmar_domain_unload_task,
340 	    domain);
341 	mtx_init(&domain->iodom.lock, "dmardom", NULL, MTX_DEF);
342 	domain->dmar = dmar;
343 	domain->iodom.iommu = &dmar->iommu;
344 	domain_pgtbl_init(domain);
345 
346 	/*
347 	 * For now, use the maximal usable physical address of the
348 	 * installed memory to calculate the mgaw on id_mapped domain.
349 	 * It is useful for the identity mapping, and less so for the
350 	 * virtualized bus address space.
351 	 */
352 	domain->iodom.end = id_mapped ? ptoa(Maxmem) : BUS_SPACE_MAXADDR;
353 	mgaw = dmar_maxaddr2mgaw(dmar, domain->iodom.end, !id_mapped);
354 	error = domain_set_agaw(domain, mgaw);
355 	if (error != 0)
356 		goto fail;
357 	if (!id_mapped)
358 		/* Use all supported address space for remapping. */
359 		domain->iodom.end = 1ULL << (domain->agaw - 1);
360 
361 	iommu_gas_init_domain((struct iommu_domain *)domain);
362 
363 	if (id_mapped) {
364 		if ((dmar->hw_ecap & DMAR_ECAP_PT) == 0) {
365 			domain->pgtbl_obj = domain_get_idmap_pgtbl(domain,
366 			    domain->iodom.end);
367 		}
368 		domain->iodom.flags |= IOMMU_DOMAIN_IDMAP;
369 	} else {
370 		error = domain_alloc_pgtbl(domain);
371 		if (error != 0)
372 			goto fail;
373 		/* Disable local apic region access */
374 		error = iommu_gas_reserve_region(iodom, 0xfee00000,
375 		    0xfeefffff + 1);
376 		if (error != 0)
377 			goto fail;
378 	}
379 	return (domain);
380 
381 fail:
382 	dmar_domain_destroy(domain);
383 	return (NULL);
384 }
385 
386 static struct dmar_ctx *
387 dmar_ctx_alloc(struct dmar_domain *domain, uint16_t rid)
388 {
389 	struct dmar_ctx *ctx;
390 
391 	ctx = malloc(sizeof(*ctx), M_DMAR_CTX, M_WAITOK | M_ZERO);
392 	ctx->context.domain = (struct iommu_domain *)domain;
393 	ctx->context.tag = malloc(sizeof(struct bus_dma_tag_iommu),
394 	    M_DMAR_CTX, M_WAITOK | M_ZERO);
395 	ctx->rid = rid;
396 	ctx->refs = 1;
397 	return (ctx);
398 }
399 
400 static void
401 dmar_ctx_link(struct dmar_ctx *ctx)
402 {
403 	struct dmar_domain *domain;
404 
405 	domain = (struct dmar_domain *)ctx->context.domain;
406 	IOMMU_ASSERT_LOCKED(domain->iodom.iommu);
407 	KASSERT(domain->refs >= domain->ctx_cnt,
408 	    ("dom %p ref underflow %d %d", domain, domain->refs,
409 	    domain->ctx_cnt));
410 	domain->refs++;
411 	domain->ctx_cnt++;
412 	LIST_INSERT_HEAD(&domain->contexts, ctx, link);
413 }
414 
415 static void
416 dmar_ctx_unlink(struct dmar_ctx *ctx)
417 {
418 	struct dmar_domain *domain;
419 
420 	domain = (struct dmar_domain *)ctx->context.domain;
421 	IOMMU_ASSERT_LOCKED(domain->iodom.iommu);
422 	KASSERT(domain->refs > 0,
423 	    ("domain %p ctx dtr refs %d", domain, domain->refs));
424 	KASSERT(domain->ctx_cnt >= domain->refs,
425 	    ("domain %p ctx dtr refs %d ctx_cnt %d", domain,
426 	    domain->refs, domain->ctx_cnt));
427 	domain->refs--;
428 	domain->ctx_cnt--;
429 	LIST_REMOVE(ctx, link);
430 }
431 
432 static void
433 dmar_domain_destroy(struct dmar_domain *domain)
434 {
435 	struct dmar_unit *dmar;
436 
437 	KASSERT(TAILQ_EMPTY(&domain->iodom.unload_entries),
438 	    ("unfinished unloads %p", domain));
439 	KASSERT(LIST_EMPTY(&domain->contexts),
440 	    ("destroying dom %p with contexts", domain));
441 	KASSERT(domain->ctx_cnt == 0,
442 	    ("destroying dom %p with ctx_cnt %d", domain, domain->ctx_cnt));
443 	KASSERT(domain->refs == 0,
444 	    ("destroying dom %p with refs %d", domain, domain->refs));
445 	if ((domain->iodom.flags & IOMMU_DOMAIN_GAS_INITED) != 0) {
446 		DMAR_DOMAIN_LOCK(domain);
447 		iommu_gas_fini_domain((struct iommu_domain *)domain);
448 		DMAR_DOMAIN_UNLOCK(domain);
449 	}
450 	if ((domain->iodom.flags & IOMMU_DOMAIN_PGTBL_INITED) != 0) {
451 		if (domain->pgtbl_obj != NULL)
452 			DMAR_DOMAIN_PGLOCK(domain);
453 		domain_free_pgtbl(domain);
454 	}
455 	mtx_destroy(&domain->iodom.lock);
456 	dmar = (struct dmar_unit *)domain->iodom.iommu;
457 	free_unr(dmar->domids, domain->domain);
458 	free(domain, M_DMAR_DOMAIN);
459 }
460 
461 static struct dmar_ctx *
462 dmar_get_ctx_for_dev1(struct dmar_unit *dmar, device_t dev, uint16_t rid,
463     int dev_domain, int dev_busno, const void *dev_path, int dev_path_len,
464     bool id_mapped, bool rmrr_init)
465 {
466 	struct dmar_domain *domain, *domain1;
467 	struct dmar_ctx *ctx, *ctx1;
468 	struct iommu_unit *unit;
469 	dmar_ctx_entry_t *ctxp;
470 	struct sf_buf *sf;
471 	int bus, slot, func, error;
472 	bool enable;
473 
474 	if (dev != NULL) {
475 		bus = pci_get_bus(dev);
476 		slot = pci_get_slot(dev);
477 		func = pci_get_function(dev);
478 	} else {
479 		bus = PCI_RID2BUS(rid);
480 		slot = PCI_RID2SLOT(rid);
481 		func = PCI_RID2FUNC(rid);
482 	}
483 	enable = false;
484 	TD_PREP_PINNED_ASSERT;
485 	unit = (struct iommu_unit *)dmar;
486 	DMAR_LOCK(dmar);
487 	KASSERT(!iommu_is_buswide_ctx(unit, bus) || (slot == 0 && func == 0),
488 	    ("iommu%d pci%d:%d:%d get_ctx for buswide", dmar->iommu.unit, bus,
489 	    slot, func));
490 	ctx = dmar_find_ctx_locked(dmar, rid);
491 	error = 0;
492 	if (ctx == NULL) {
493 		/*
494 		 * Perform the allocations which require sleep or have
495 		 * higher chance to succeed if the sleep is allowed.
496 		 */
497 		DMAR_UNLOCK(dmar);
498 		dmar_ensure_ctx_page(dmar, PCI_RID2BUS(rid));
499 		domain1 = dmar_domain_alloc(dmar, id_mapped);
500 		if (domain1 == NULL) {
501 			TD_PINNED_ASSERT;
502 			return (NULL);
503 		}
504 		if (!id_mapped) {
505 			error = domain_init_rmrr(domain1, dev, bus,
506 			    slot, func, dev_domain, dev_busno, dev_path,
507 			    dev_path_len);
508 			if (error != 0) {
509 				dmar_domain_destroy(domain1);
510 				TD_PINNED_ASSERT;
511 				return (NULL);
512 			}
513 		}
514 		ctx1 = dmar_ctx_alloc(domain1, rid);
515 		ctxp = dmar_map_ctx_entry(ctx1, &sf);
516 		DMAR_LOCK(dmar);
517 
518 		/*
519 		 * Recheck the contexts, other thread might have
520 		 * already allocated needed one.
521 		 */
522 		ctx = dmar_find_ctx_locked(dmar, rid);
523 		if (ctx == NULL) {
524 			domain = domain1;
525 			ctx = ctx1;
526 			dmar_ctx_link(ctx);
527 			ctx->context.tag->owner = dev;
528 			device_tag_init(ctx, dev);
529 
530 			/*
531 			 * This is the first activated context for the
532 			 * DMAR unit.  Enable the translation after
533 			 * everything is set up.
534 			 */
535 			if (LIST_EMPTY(&dmar->domains))
536 				enable = true;
537 			LIST_INSERT_HEAD(&dmar->domains, domain, link);
538 			ctx_id_entry_init(ctx, ctxp, false, bus);
539 			if (dev != NULL) {
540 				device_printf(dev,
541 			    "dmar%d pci%d:%d:%d:%d rid %x domain %d mgaw %d "
542 				    "agaw %d %s-mapped\n",
543 				    dmar->iommu.unit, dmar->segment, bus, slot,
544 				    func, rid, domain->domain, domain->mgaw,
545 				    domain->agaw, id_mapped ? "id" : "re");
546 			}
547 			dmar_unmap_pgtbl(sf);
548 		} else {
549 			dmar_unmap_pgtbl(sf);
550 			dmar_domain_destroy(domain1);
551 			/* Nothing needs to be done to destroy ctx1. */
552 			free(ctx1, M_DMAR_CTX);
553 			domain = (struct dmar_domain *)ctx->context.domain;
554 			ctx->refs++; /* tag referenced us */
555 		}
556 	} else {
557 		domain = (struct dmar_domain *)ctx->context.domain;
558 		if (ctx->context.tag->owner == NULL)
559 			ctx->context.tag->owner = dev;
560 		ctx->refs++; /* tag referenced us */
561 	}
562 
563 	error = dmar_flush_for_ctx_entry(dmar, enable);
564 	if (error != 0) {
565 		dmar_free_ctx_locked(dmar, ctx);
566 		TD_PINNED_ASSERT;
567 		return (NULL);
568 	}
569 
570 	/*
571 	 * The dmar lock was potentially dropped between check for the
572 	 * empty context list and now.  Recheck the state of GCMD_TE
573 	 * to avoid unneeded command.
574 	 */
575 	if (enable && !rmrr_init && (dmar->hw_gcmd & DMAR_GCMD_TE) == 0) {
576 		error = dmar_enable_translation(dmar);
577 		if (error == 0) {
578 			if (bootverbose) {
579 				printf("dmar%d: enabled translation\n",
580 				    dmar->iommu.unit);
581 			}
582 		} else {
583 			printf("dmar%d: enabling translation failed, "
584 			    "error %d\n", dmar->iommu.unit, error);
585 			dmar_free_ctx_locked(dmar, ctx);
586 			TD_PINNED_ASSERT;
587 			return (NULL);
588 		}
589 	}
590 	DMAR_UNLOCK(dmar);
591 	TD_PINNED_ASSERT;
592 	return (ctx);
593 }
594 
595 struct dmar_ctx *
596 dmar_get_ctx_for_dev(struct dmar_unit *dmar, device_t dev, uint16_t rid,
597     bool id_mapped, bool rmrr_init)
598 {
599 	int dev_domain, dev_path_len, dev_busno;
600 
601 	dev_domain = pci_get_domain(dev);
602 	dev_path_len = dmar_dev_depth(dev);
603 	ACPI_DMAR_PCI_PATH dev_path[dev_path_len];
604 	dmar_dev_path(dev, &dev_busno, dev_path, dev_path_len);
605 	return (dmar_get_ctx_for_dev1(dmar, dev, rid, dev_domain, dev_busno,
606 	    dev_path, dev_path_len, id_mapped, rmrr_init));
607 }
608 
609 struct dmar_ctx *
610 dmar_get_ctx_for_devpath(struct dmar_unit *dmar, uint16_t rid,
611     int dev_domain, int dev_busno,
612     const void *dev_path, int dev_path_len,
613     bool id_mapped, bool rmrr_init)
614 {
615 
616 	return (dmar_get_ctx_for_dev1(dmar, NULL, rid, dev_domain, dev_busno,
617 	    dev_path, dev_path_len, id_mapped, rmrr_init));
618 }
619 
620 int
621 dmar_move_ctx_to_domain(struct dmar_domain *domain, struct dmar_ctx *ctx)
622 {
623 	struct dmar_unit *dmar;
624 	struct dmar_domain *old_domain;
625 	dmar_ctx_entry_t *ctxp;
626 	struct sf_buf *sf;
627 	int error;
628 
629 	dmar = domain->dmar;
630 	old_domain = (struct dmar_domain *)ctx->context.domain;
631 	if (domain == old_domain)
632 		return (0);
633 	KASSERT(old_domain->iodom.iommu == domain->iodom.iommu,
634 	    ("domain %p %u moving between dmars %u %u", domain,
635 	    domain->domain, old_domain->iodom.iommu->unit,
636 	    domain->iodom.iommu->unit));
637 	TD_PREP_PINNED_ASSERT;
638 
639 	ctxp = dmar_map_ctx_entry(ctx, &sf);
640 	DMAR_LOCK(dmar);
641 	dmar_ctx_unlink(ctx);
642 	ctx->context.domain = &domain->iodom;
643 	dmar_ctx_link(ctx);
644 	ctx_id_entry_init(ctx, ctxp, true, PCI_BUSMAX + 100);
645 	dmar_unmap_pgtbl(sf);
646 	error = dmar_flush_for_ctx_entry(dmar, true);
647 	/* If flush failed, rolling back would not work as well. */
648 	printf("dmar%d rid %x domain %d->%d %s-mapped\n",
649 	    dmar->iommu.unit, ctx->rid, old_domain->domain, domain->domain,
650 	    (domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0 ? "id" : "re");
651 	dmar_unref_domain_locked(dmar, old_domain);
652 	TD_PINNED_ASSERT;
653 	return (error);
654 }
655 
656 static void
657 dmar_unref_domain_locked(struct dmar_unit *dmar, struct dmar_domain *domain)
658 {
659 
660 	DMAR_ASSERT_LOCKED(dmar);
661 	KASSERT(domain->refs >= 1,
662 	    ("dmar %d domain %p refs %u", dmar->iommu.unit, domain,
663 	    domain->refs));
664 	KASSERT(domain->refs > domain->ctx_cnt,
665 	    ("dmar %d domain %p refs %d ctx_cnt %d", dmar->iommu.unit, domain,
666 	    domain->refs, domain->ctx_cnt));
667 
668 	if (domain->refs > 1) {
669 		domain->refs--;
670 		DMAR_UNLOCK(dmar);
671 		return;
672 	}
673 
674 	KASSERT((domain->iodom.flags & IOMMU_DOMAIN_RMRR) == 0,
675 	    ("lost ref on RMRR domain %p", domain));
676 
677 	LIST_REMOVE(domain, link);
678 	DMAR_UNLOCK(dmar);
679 
680 	taskqueue_drain(dmar->iommu.delayed_taskqueue,
681 	    &domain->iodom.unload_task);
682 	dmar_domain_destroy(domain);
683 }
684 
685 void
686 dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx)
687 {
688 	struct sf_buf *sf;
689 	dmar_ctx_entry_t *ctxp;
690 	struct dmar_domain *domain;
691 
692 	DMAR_ASSERT_LOCKED(dmar);
693 	KASSERT(ctx->refs >= 1,
694 	    ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs));
695 
696 	/*
697 	 * If our reference is not last, only the dereference should
698 	 * be performed.
699 	 */
700 	if (ctx->refs > 1) {
701 		ctx->refs--;
702 		DMAR_UNLOCK(dmar);
703 		return;
704 	}
705 
706 	KASSERT((ctx->context.flags & IOMMU_CTX_DISABLED) == 0,
707 	    ("lost ref on disabled ctx %p", ctx));
708 
709 	/*
710 	 * Otherwise, the context entry must be cleared before the
711 	 * page table is destroyed.  The mapping of the context
712 	 * entries page could require sleep, unlock the dmar.
713 	 */
714 	DMAR_UNLOCK(dmar);
715 	TD_PREP_PINNED_ASSERT;
716 	ctxp = dmar_map_ctx_entry(ctx, &sf);
717 	DMAR_LOCK(dmar);
718 	KASSERT(ctx->refs >= 1,
719 	    ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs));
720 
721 	/*
722 	 * Other thread might have referenced the context, in which
723 	 * case again only the dereference should be performed.
724 	 */
725 	if (ctx->refs > 1) {
726 		ctx->refs--;
727 		DMAR_UNLOCK(dmar);
728 		dmar_unmap_pgtbl(sf);
729 		TD_PINNED_ASSERT;
730 		return;
731 	}
732 
733 	KASSERT((ctx->context.flags & IOMMU_CTX_DISABLED) == 0,
734 	    ("lost ref on disabled ctx %p", ctx));
735 
736 	/*
737 	 * Clear the context pointer and flush the caches.
738 	 * XXXKIB: cannot do this if any RMRR entries are still present.
739 	 */
740 	dmar_pte_clear(&ctxp->ctx1);
741 	ctxp->ctx2 = 0;
742 	dmar_flush_ctx_to_ram(dmar, ctxp);
743 	dmar_inv_ctx_glob(dmar);
744 	if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0) {
745 		if (dmar->qi_enabled)
746 			dmar_qi_invalidate_iotlb_glob_locked(dmar);
747 		else
748 			dmar_inv_iotlb_glob(dmar);
749 	}
750 	dmar_unmap_pgtbl(sf);
751 	domain = (struct dmar_domain *)ctx->context.domain;
752 	dmar_ctx_unlink(ctx);
753 	free(ctx->context.tag, M_DMAR_CTX);
754 	free(ctx, M_DMAR_CTX);
755 	dmar_unref_domain_locked(dmar, domain);
756 	TD_PINNED_ASSERT;
757 }
758 
759 void
760 dmar_free_ctx(struct dmar_ctx *ctx)
761 {
762 	struct dmar_unit *dmar;
763 
764 	dmar = (struct dmar_unit *)ctx->context.domain->iommu;
765 	DMAR_LOCK(dmar);
766 	dmar_free_ctx_locked(dmar, ctx);
767 }
768 
769 /*
770  * Returns with the domain locked.
771  */
772 struct dmar_ctx *
773 dmar_find_ctx_locked(struct dmar_unit *dmar, uint16_t rid)
774 {
775 	struct dmar_domain *domain;
776 	struct dmar_ctx *ctx;
777 
778 	DMAR_ASSERT_LOCKED(dmar);
779 
780 	LIST_FOREACH(domain, &dmar->domains, link) {
781 		LIST_FOREACH(ctx, &domain->contexts, link) {
782 			if (ctx->rid == rid)
783 				return (ctx);
784 		}
785 	}
786 	return (NULL);
787 }
788 
789 void
790 dmar_domain_free_entry(struct iommu_map_entry *entry, bool free)
791 {
792 	struct iommu_domain *domain;
793 
794 	domain = entry->domain;
795 	IOMMU_DOMAIN_LOCK(domain);
796 	if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
797 		iommu_gas_free_region(domain, entry);
798 	else
799 		iommu_gas_free_space(domain, entry);
800 	IOMMU_DOMAIN_UNLOCK(domain);
801 	if (free)
802 		iommu_gas_free_entry(domain, entry);
803 	else
804 		entry->flags = 0;
805 }
806 
807 void
808 dmar_domain_unload_entry(struct iommu_map_entry *entry, bool free)
809 {
810 	struct dmar_domain *domain;
811 	struct dmar_unit *unit;
812 
813 	domain = (struct dmar_domain *)entry->domain;
814 	unit = (struct dmar_unit *)domain->iodom.iommu;
815 	if (unit->qi_enabled) {
816 		DMAR_LOCK(unit);
817 		dmar_qi_invalidate_locked((struct dmar_domain *)entry->domain,
818 		    entry->start, entry->end - entry->start, &entry->gseq,
819 		    true);
820 		if (!free)
821 			entry->flags |= IOMMU_MAP_ENTRY_QI_NF;
822 		TAILQ_INSERT_TAIL(&unit->tlb_flush_entries, entry, dmamap_link);
823 		DMAR_UNLOCK(unit);
824 	} else {
825 		domain_flush_iotlb_sync((struct dmar_domain *)entry->domain,
826 		    entry->start, entry->end - entry->start);
827 		dmar_domain_free_entry(entry, free);
828 	}
829 }
830 
831 static bool
832 dmar_domain_unload_emit_wait(struct dmar_domain *domain,
833     struct iommu_map_entry *entry)
834 {
835 
836 	if (TAILQ_NEXT(entry, dmamap_link) == NULL)
837 		return (true);
838 	return (domain->batch_no++ % dmar_batch_coalesce == 0);
839 }
840 
841 void
842 dmar_domain_unload(struct dmar_domain *domain,
843     struct iommu_map_entries_tailq *entries, bool cansleep)
844 {
845 	struct dmar_unit *unit;
846 	struct iommu_domain *iodom;
847 	struct iommu_map_entry *entry, *entry1;
848 	int error;
849 
850 	iodom = (struct iommu_domain *)domain;
851 	unit = (struct dmar_unit *)domain->iodom.iommu;
852 
853 	TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) {
854 		KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0,
855 		    ("not mapped entry %p %p", domain, entry));
856 		error = iodom->ops->unmap(iodom, entry->start, entry->end -
857 		    entry->start, cansleep ? IOMMU_PGF_WAITOK : 0);
858 		KASSERT(error == 0, ("unmap %p error %d", domain, error));
859 		if (!unit->qi_enabled) {
860 			domain_flush_iotlb_sync(domain, entry->start,
861 			    entry->end - entry->start);
862 			TAILQ_REMOVE(entries, entry, dmamap_link);
863 			dmar_domain_free_entry(entry, true);
864 		}
865 	}
866 	if (TAILQ_EMPTY(entries))
867 		return;
868 
869 	KASSERT(unit->qi_enabled, ("loaded entry left"));
870 	DMAR_LOCK(unit);
871 	TAILQ_FOREACH(entry, entries, dmamap_link) {
872 		dmar_qi_invalidate_locked(domain, entry->start, entry->end -
873 		    entry->start, &entry->gseq,
874 		    dmar_domain_unload_emit_wait(domain, entry));
875 	}
876 	TAILQ_CONCAT(&unit->tlb_flush_entries, entries, dmamap_link);
877 	DMAR_UNLOCK(unit);
878 }
879 
880 static void
881 dmar_domain_unload_task(void *arg, int pending)
882 {
883 	struct dmar_domain *domain;
884 	struct iommu_map_entries_tailq entries;
885 
886 	domain = arg;
887 	TAILQ_INIT(&entries);
888 
889 	for (;;) {
890 		DMAR_DOMAIN_LOCK(domain);
891 		TAILQ_SWAP(&domain->iodom.unload_entries, &entries,
892 		    iommu_map_entry, dmamap_link);
893 		DMAR_DOMAIN_UNLOCK(domain);
894 		if (TAILQ_EMPTY(&entries))
895 			break;
896 		dmar_domain_unload(domain, &entries, true);
897 	}
898 }
899 
900 struct iommu_ctx *
901 iommu_get_ctx(struct iommu_unit *iommu, device_t dev, uint16_t rid,
902     bool id_mapped, bool rmrr_init)
903 {
904 	struct dmar_unit *dmar;
905 	struct dmar_ctx *ret;
906 
907 	dmar = (struct dmar_unit *)iommu;
908 
909 	ret = dmar_get_ctx_for_dev(dmar, dev, rid, id_mapped, rmrr_init);
910 
911 	return ((struct iommu_ctx *)ret);
912 }
913 
914 void
915 iommu_free_ctx_locked(struct iommu_unit *iommu, struct iommu_ctx *context)
916 {
917 	struct dmar_unit *dmar;
918 	struct dmar_ctx *ctx;
919 
920 	dmar = (struct dmar_unit *)iommu;
921 	ctx = (struct dmar_ctx *)context;
922 
923 	dmar_free_ctx_locked(dmar, ctx);
924 }
925 
926 void
927 iommu_free_ctx(struct iommu_ctx *context)
928 {
929 	struct dmar_unit *dmar;
930 	struct dmar_ctx *ctx;
931 
932 	ctx = (struct dmar_ctx *)context;
933 	dmar = (struct dmar_unit *)ctx->context.domain->iommu;
934 
935 	dmar_free_ctx(ctx);
936 }
937 
938 void
939 iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free)
940 {
941 
942 	dmar_domain_unload_entry(entry, free);
943 }
944 
945 void
946 iommu_domain_unload(struct iommu_domain *iodom,
947     struct iommu_map_entries_tailq *entries, bool cansleep)
948 {
949 	struct dmar_domain *domain;
950 
951 	domain = (struct dmar_domain *)iodom;
952 
953 	dmar_domain_unload(domain, entries, cansleep);
954 }
955