xref: /freebsd/sys/dev/iommu/busdma_iommu.c (revision c745a6818bcbf33cf7f59641c925d19b3f98cea8)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2013 The FreeBSD Foundation
5  *
6  * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
7  * under sponsorship from the FreeBSD Foundation.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/domainset.h>
34 #include <sys/malloc.h>
35 #include <sys/bus.h>
36 #include <sys/conf.h>
37 #include <sys/interrupt.h>
38 #include <sys/kernel.h>
39 #include <sys/ktr.h>
40 #include <sys/lock.h>
41 #include <sys/proc.h>
42 #include <sys/memdesc.h>
43 #include <sys/msan.h>
44 #include <sys/mutex.h>
45 #include <sys/sysctl.h>
46 #include <sys/rman.h>
47 #include <sys/taskqueue.h>
48 #include <sys/tree.h>
49 #include <sys/uio.h>
50 #include <sys/vmem.h>
51 #include <dev/pci/pcireg.h>
52 #include <dev/pci/pcivar.h>
53 #include <vm/vm.h>
54 #include <vm/vm_extern.h>
55 #include <vm/vm_kern.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_page.h>
58 #include <vm/vm_map.h>
59 #include <dev/iommu/iommu.h>
60 #include <machine/atomic.h>
61 #include <machine/bus.h>
62 #include <machine/md_var.h>
63 #include <machine/iommu.h>
64 #include <dev/iommu/busdma_iommu.h>
65 
66 /*
67  * busdma_iommu.c, the implementation of the busdma(9) interface using
68  * IOMMU units from Intel VT-d.
69  */
70 
71 static bool
iommu_bus_dma_is_dev_disabled(int domain,int bus,int slot,int func)72 iommu_bus_dma_is_dev_disabled(int domain, int bus, int slot, int func)
73 {
74 	char str[128], *env;
75 	int default_bounce;
76 	bool ret;
77 	static const char bounce_str[] = "bounce";
78 	static const char iommu_str[] = "iommu";
79 	static const char dmar_str[] = "dmar"; /* compatibility */
80 
81 	default_bounce = 0;
82 	env = kern_getenv("hw.busdma.default");
83 	if (env != NULL) {
84 		if (strcmp(env, bounce_str) == 0)
85 			default_bounce = 1;
86 		else if (strcmp(env, iommu_str) == 0 ||
87 		    strcmp(env, dmar_str) == 0)
88 			default_bounce = 0;
89 		freeenv(env);
90 	}
91 
92 	snprintf(str, sizeof(str), "hw.busdma.pci%d.%d.%d.%d",
93 	    domain, bus, slot, func);
94 	env = kern_getenv(str);
95 	if (env == NULL)
96 		return (default_bounce != 0);
97 	if (strcmp(env, bounce_str) == 0)
98 		ret = true;
99 	else if (strcmp(env, iommu_str) == 0 ||
100 	    strcmp(env, dmar_str) == 0)
101 		ret = false;
102 	else
103 		ret = default_bounce != 0;
104 	freeenv(env);
105 	return (ret);
106 }
107 
108 /*
109  * Given original device, find the requester ID that will be seen by
110  * the IOMMU unit and used for page table lookup.  PCI bridges may take
111  * ownership of transactions from downstream devices, so it may not be
112  * the same as the BSF of the target device.  In those cases, all
113  * devices downstream of the bridge must share a single mapping
114  * domain, and must collectively be assigned to use either IOMMU or
115  * bounce mapping.
116  */
117 int
iommu_get_requester(device_t dev,device_t * requesterp,uint16_t * rid)118 iommu_get_requester(device_t dev, device_t *requesterp, uint16_t *rid)
119 {
120 	devclass_t pci_class;
121 	device_t l, pci, pcib, pcip, pcibp, requester;
122 	int cap_offset;
123 	uint16_t pcie_flags;
124 	bool bridge_is_pcie;
125 
126 	pci_class = devclass_find("pci");
127 	l = requester = dev;
128 
129 	pci = device_get_parent(dev);
130 	if (pci == NULL || device_get_devclass(pci) != pci_class) {
131 		*rid = 0;	/* XXXKIB: Could be ACPI HID */
132 		*requesterp = NULL;
133 		return (ENOTTY);
134 	}
135 
136 	*rid = pci_get_rid(dev);
137 
138 	/*
139 	 * Walk the bridge hierarchy from the target device to the
140 	 * host port to find the translating bridge nearest the IOMMU
141 	 * unit.
142 	 */
143 	for (;;) {
144 		pci = device_get_parent(l);
145 		if (pci == NULL) {
146 			if (bootverbose) {
147 				printf(
148 			"iommu_get_requester(%s): NULL parent for %s\n",
149 				    device_get_name(dev), device_get_name(l));
150 			}
151 			*rid = 0;
152 			*requesterp = NULL;
153 			return (ENXIO);
154 		}
155 		if (device_get_devclass(pci) != pci_class) {
156 			if (bootverbose) {
157 				printf(
158 			"iommu_get_requester(%s): non-pci parent %s for %s\n",
159 				    device_get_name(dev), device_get_name(pci),
160 				    device_get_name(l));
161 			}
162 			*rid = 0;
163 			*requesterp = NULL;
164 			return (ENXIO);
165 		}
166 
167 		pcib = device_get_parent(pci);
168 		if (pcib == NULL) {
169 			if (bootverbose) {
170 				printf(
171 			"iommu_get_requester(%s): NULL bridge for %s\n",
172 				    device_get_name(dev), device_get_name(pci));
173 			}
174 			*rid = 0;
175 			*requesterp = NULL;
176 			return (ENXIO);
177 		}
178 
179 		/*
180 		 * The parent of our "bridge" isn't another PCI bus,
181 		 * so pcib isn't a PCI->PCI bridge but rather a host
182 		 * port, and the requester ID won't be translated
183 		 * further.
184 		 */
185 		pcip = device_get_parent(pcib);
186 		if (device_get_devclass(pcip) != pci_class)
187 			break;
188 		pcibp = device_get_parent(pcip);
189 
190 		if (pci_find_cap(l, PCIY_EXPRESS, &cap_offset) == 0) {
191 			/*
192 			 * Do not stop the loop even if the target
193 			 * device is PCIe, because it is possible (but
194 			 * unlikely) to have a PCI->PCIe bridge
195 			 * somewhere in the hierarchy.
196 			 */
197 			l = pcib;
198 		} else {
199 			/*
200 			 * Device is not PCIe, it cannot be seen as a
201 			 * requester by IOMMU unit.  Check whether the
202 			 * bridge is PCIe.
203 			 */
204 			bridge_is_pcie = pci_find_cap(pcib, PCIY_EXPRESS,
205 			    &cap_offset) == 0;
206 			requester = pcib;
207 
208 			/*
209 			 * Check for a buggy PCIe/PCI bridge that
210 			 * doesn't report the express capability.  If
211 			 * the bridge above it is express but isn't a
212 			 * PCI bridge, then we know pcib is actually a
213 			 * PCIe/PCI bridge.
214 			 */
215 			if (!bridge_is_pcie && pci_find_cap(pcibp,
216 			    PCIY_EXPRESS, &cap_offset) == 0) {
217 				pcie_flags = pci_read_config(pcibp,
218 				    cap_offset + PCIER_FLAGS, 2);
219 				if ((pcie_flags & PCIEM_FLAGS_TYPE) !=
220 				    PCIEM_TYPE_PCI_BRIDGE)
221 					bridge_is_pcie = true;
222 			}
223 
224 			if (bridge_is_pcie) {
225 				/*
226 				 * The current device is not PCIe, but
227 				 * the bridge above it is.  This is a
228 				 * PCIe->PCI bridge.  Assume that the
229 				 * requester ID will be the secondary
230 				 * bus number with slot and function
231 				 * set to zero.
232 				 *
233 				 * XXX: Doesn't handle the case where
234 				 * the bridge is PCIe->PCI-X, and the
235 				 * bridge will only take ownership of
236 				 * requests in some cases.  We should
237 				 * provide context entries with the
238 				 * same page tables for taken and
239 				 * non-taken transactions.
240 				 */
241 				*rid = PCI_RID(pci_get_bus(l), 0, 0);
242 				l = pcibp;
243 			} else {
244 				/*
245 				 * Neither the device nor the bridge
246 				 * above it are PCIe.  This is a
247 				 * conventional PCI->PCI bridge, which
248 				 * will use the bridge's BSF as the
249 				 * requester ID.
250 				 */
251 				*rid = pci_get_rid(pcib);
252 				l = pcib;
253 			}
254 		}
255 	}
256 	*requesterp = requester;
257 	return (0);
258 }
259 
260 struct iommu_ctx *
iommu_instantiate_ctx(struct iommu_unit * unit,device_t dev,bool rmrr)261 iommu_instantiate_ctx(struct iommu_unit *unit, device_t dev, bool rmrr)
262 {
263 	device_t requester;
264 	struct iommu_ctx *ctx;
265 	int error;
266 	bool disabled;
267 	uint16_t rid;
268 
269 	error = iommu_get_requester(dev, &requester, &rid);
270 	if (error != 0)
271 		return (NULL);
272 
273 	/*
274 	 * If the user requested the IOMMU disabled for the device, we
275 	 * cannot disable the IOMMU unit, due to possibility of other
276 	 * devices on the same IOMMU unit still requiring translation.
277 	 * Instead provide the identity mapping for the device
278 	 * context.
279 	 */
280 	disabled = iommu_bus_dma_is_dev_disabled(pci_get_domain(requester),
281 	    pci_get_bus(requester), pci_get_slot(requester),
282 	    pci_get_function(requester));
283 	ctx = iommu_get_ctx(unit, requester, rid, disabled, rmrr);
284 	if (ctx == NULL)
285 		return (NULL);
286 	if (disabled) {
287 		/*
288 		 * Keep the first reference on context, release the
289 		 * later refs.
290 		 */
291 		IOMMU_LOCK(unit);
292 		if ((ctx->flags & IOMMU_CTX_DISABLED) == 0) {
293 			ctx->flags |= IOMMU_CTX_DISABLED;
294 			IOMMU_UNLOCK(unit);
295 		} else {
296 			iommu_free_ctx_locked(unit, ctx);
297 		}
298 		ctx = NULL;
299 	}
300 	return (ctx);
301 }
302 
303 struct iommu_ctx *
iommu_get_dev_ctx(device_t dev)304 iommu_get_dev_ctx(device_t dev)
305 {
306 	struct iommu_unit *unit;
307 
308 	unit = iommu_find(dev, bootverbose);
309 	/* Not in scope of any IOMMU ? */
310 	if (unit == NULL)
311 		return (NULL);
312 	if (!unit->dma_enabled)
313 		return (NULL);
314 
315 	iommu_unit_pre_instantiate_ctx(unit);
316 	return (iommu_instantiate_ctx(unit, dev, false));
317 }
318 
319 bus_dma_tag_t
iommu_get_dma_tag(device_t dev,device_t child)320 iommu_get_dma_tag(device_t dev, device_t child)
321 {
322 	struct iommu_ctx *ctx;
323 	bus_dma_tag_t res;
324 
325 	ctx = iommu_get_dev_ctx(child);
326 	if (ctx == NULL)
327 		return (NULL);
328 
329 	res = (bus_dma_tag_t)ctx->tag;
330 	return (res);
331 }
332 
333 bool
bus_dma_iommu_set_buswide(device_t dev)334 bus_dma_iommu_set_buswide(device_t dev)
335 {
336 	struct iommu_unit *unit;
337 	device_t parent;
338 	u_int busno, slot, func;
339 
340 	parent = device_get_parent(dev);
341 	if (device_get_devclass(parent) != devclass_find("pci"))
342 		return (false);
343 	unit = iommu_find(dev, bootverbose);
344 	if (unit == NULL)
345 		return (false);
346 	busno = pci_get_bus(dev);
347 	slot = pci_get_slot(dev);
348 	func = pci_get_function(dev);
349 	if (slot != 0 || func != 0) {
350 		if (bootverbose) {
351 			device_printf(dev,
352 			    "iommu%d pci%d:%d:%d requested buswide busdma\n",
353 			    unit->unit, busno, slot, func);
354 		}
355 		return (false);
356 	}
357 	iommu_set_buswide_ctx(unit, busno);
358 	return (true);
359 }
360 
361 void
iommu_set_buswide_ctx(struct iommu_unit * unit,u_int busno)362 iommu_set_buswide_ctx(struct iommu_unit *unit, u_int busno)
363 {
364 
365 	MPASS(busno <= PCI_BUSMAX);
366 	IOMMU_LOCK(unit);
367 	unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] |=
368 	    1 << (busno % (NBBY * sizeof(uint32_t)));
369 	IOMMU_UNLOCK(unit);
370 }
371 
372 bool
iommu_is_buswide_ctx(struct iommu_unit * unit,u_int busno)373 iommu_is_buswide_ctx(struct iommu_unit *unit, u_int busno)
374 {
375 
376 	MPASS(busno <= PCI_BUSMAX);
377 	return ((unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] &
378 	    (1U << (busno % (NBBY * sizeof(uint32_t))))) != 0);
379 }
380 
381 static MALLOC_DEFINE(M_IOMMU_DMAMAP, "iommu_dmamap", "IOMMU DMA Map");
382 
383 static void iommu_bus_schedule_dmamap(struct iommu_unit *unit,
384     struct bus_dmamap_iommu *map);
385 
386 static int
iommu_bus_dma_tag_create(bus_dma_tag_t parent,bus_size_t alignment,bus_addr_t boundary,bus_addr_t lowaddr,bus_addr_t highaddr,bus_size_t maxsize,int nsegments,bus_size_t maxsegsz,int flags,bus_dma_lock_t * lockfunc,void * lockfuncarg,bus_dma_tag_t * dmat)387 iommu_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
388     bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
389     bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags,
390     bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat)
391 {
392 	struct bus_dma_tag_iommu *newtag, *oldtag;
393 	int error;
394 
395 	*dmat = NULL;
396 	error = common_bus_dma_tag_create(parent != NULL ?
397 	    &((struct bus_dma_tag_iommu *)parent)->common : NULL, alignment,
398 	    boundary, lowaddr, highaddr, maxsize, nsegments, maxsegsz, flags,
399 	    lockfunc, lockfuncarg, sizeof(struct bus_dma_tag_iommu),
400 	    (void **)&newtag);
401 	if (error != 0)
402 		goto out;
403 
404 	oldtag = (struct bus_dma_tag_iommu *)parent;
405 	newtag->common.impl = &bus_dma_iommu_impl;
406 	newtag->ctx = oldtag->ctx;
407 	newtag->owner = oldtag->owner;
408 
409 	*dmat = (bus_dma_tag_t)newtag;
410 out:
411 	CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
412 	    __func__, newtag, (newtag != NULL ? newtag->common.flags : 0),
413 	    error);
414 	return (error);
415 }
416 
417 static int
iommu_bus_dma_tag_set_domain(bus_dma_tag_t dmat)418 iommu_bus_dma_tag_set_domain(bus_dma_tag_t dmat)
419 {
420 
421 	return (0);
422 }
423 
424 static int
iommu_bus_dma_tag_destroy(bus_dma_tag_t dmat1)425 iommu_bus_dma_tag_destroy(bus_dma_tag_t dmat1)
426 {
427 	struct bus_dma_tag_iommu *dmat;
428 	struct iommu_unit *iommu;
429 	struct iommu_ctx *ctx;
430 	int error;
431 
432 	error = 0;
433 	dmat = (struct bus_dma_tag_iommu *)dmat1;
434 
435 	if (dmat != NULL) {
436 		if (dmat->map_count != 0) {
437 			error = EBUSY;
438 			goto out;
439 		}
440 		ctx = dmat->ctx;
441 		if (dmat == ctx->tag) {
442 			iommu = ctx->domain->iommu;
443 			IOMMU_LOCK(iommu);
444 			iommu_free_ctx_locked(iommu, dmat->ctx);
445 		}
446 		free(dmat->segments, M_IOMMU_DMAMAP);
447 		free(dmat, M_DEVBUF);
448 	}
449 out:
450 	CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat, error);
451 	return (error);
452 }
453 
454 static bool
iommu_bus_dma_id_mapped(bus_dma_tag_t dmat,vm_paddr_t buf,bus_size_t buflen)455 iommu_bus_dma_id_mapped(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen)
456 {
457 
458 	return (false);
459 }
460 
461 static int
iommu_bus_dmamap_create(bus_dma_tag_t dmat,int flags,bus_dmamap_t * mapp)462 iommu_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
463 {
464 	struct bus_dma_tag_iommu *tag;
465 	struct bus_dmamap_iommu *map;
466 
467 	tag = (struct bus_dma_tag_iommu *)dmat;
468 	map = malloc_domainset(sizeof(*map), M_IOMMU_DMAMAP,
469 	    DOMAINSET_PREF(tag->common.domain), M_NOWAIT | M_ZERO);
470 	if (map == NULL) {
471 		*mapp = NULL;
472 		return (ENOMEM);
473 	}
474 	if (tag->segments == NULL) {
475 		tag->segments = malloc_domainset(sizeof(bus_dma_segment_t) *
476 		    tag->common.nsegments, M_IOMMU_DMAMAP,
477 		    DOMAINSET_PREF(tag->common.domain), M_NOWAIT);
478 		if (tag->segments == NULL) {
479 			free(map, M_IOMMU_DMAMAP);
480 			*mapp = NULL;
481 			return (ENOMEM);
482 		}
483 	}
484 	IOMMU_DMAMAP_INIT(map);
485 	TAILQ_INIT(&map->map_entries);
486 	map->tag = tag;
487 	map->locked = true;
488 	map->cansleep = false;
489 	tag->map_count++;
490 	*mapp = (bus_dmamap_t)map;
491 
492 	return (0);
493 }
494 
495 static int
iommu_bus_dmamap_destroy(bus_dma_tag_t dmat,bus_dmamap_t map1)496 iommu_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map1)
497 {
498 	struct bus_dma_tag_iommu *tag;
499 	struct bus_dmamap_iommu *map;
500 
501 	tag = (struct bus_dma_tag_iommu *)dmat;
502 	map = (struct bus_dmamap_iommu *)map1;
503 	if (map != NULL) {
504 		IOMMU_DMAMAP_LOCK(map);
505 		if (!TAILQ_EMPTY(&map->map_entries)) {
506 			IOMMU_DMAMAP_UNLOCK(map);
507 			return (EBUSY);
508 		}
509 		IOMMU_DMAMAP_DESTROY(map);
510 		free(map, M_IOMMU_DMAMAP);
511 	}
512 	tag->map_count--;
513 	return (0);
514 }
515 
516 
517 static int
iommu_bus_dmamem_alloc(bus_dma_tag_t dmat,void ** vaddr,int flags,bus_dmamap_t * mapp)518 iommu_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
519     bus_dmamap_t *mapp)
520 {
521 	struct bus_dma_tag_iommu *tag;
522 	struct bus_dmamap_iommu *map;
523 	int error, mflags;
524 	vm_memattr_t attr;
525 
526 	error = iommu_bus_dmamap_create(dmat, flags, mapp);
527 	if (error != 0)
528 		return (error);
529 
530 	mflags = (flags & BUS_DMA_NOWAIT) != 0 ? M_NOWAIT : M_WAITOK;
531 	mflags |= (flags & BUS_DMA_ZERO) != 0 ? M_ZERO : 0;
532 	attr = (flags & BUS_DMA_NOCACHE) != 0 ? VM_MEMATTR_UNCACHEABLE :
533 	    VM_MEMATTR_DEFAULT;
534 
535 	tag = (struct bus_dma_tag_iommu *)dmat;
536 	map = (struct bus_dmamap_iommu *)*mapp;
537 
538 	if (tag->common.maxsize < PAGE_SIZE &&
539 	    tag->common.alignment <= tag->common.maxsize &&
540 	    attr == VM_MEMATTR_DEFAULT) {
541 		*vaddr = malloc_domainset(tag->common.maxsize, M_DEVBUF,
542 		    DOMAINSET_PREF(tag->common.domain), mflags);
543 		map->flags |= BUS_DMAMAP_IOMMU_MALLOC;
544 	} else {
545 		*vaddr = kmem_alloc_attr_domainset(
546 		    DOMAINSET_PREF(tag->common.domain), tag->common.maxsize,
547 		    mflags, 0ul, BUS_SPACE_MAXADDR, attr);
548 		map->flags |= BUS_DMAMAP_IOMMU_KMEM_ALLOC;
549 	}
550 	if (*vaddr == NULL) {
551 		iommu_bus_dmamap_destroy(dmat, *mapp);
552 		*mapp = NULL;
553 		return (ENOMEM);
554 	}
555 	return (0);
556 }
557 
558 static void
iommu_bus_dmamem_free(bus_dma_tag_t dmat,void * vaddr,bus_dmamap_t map1)559 iommu_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map1)
560 {
561 	struct bus_dma_tag_iommu *tag;
562 	struct bus_dmamap_iommu *map;
563 
564 	tag = (struct bus_dma_tag_iommu *)dmat;
565 	map = (struct bus_dmamap_iommu *)map1;
566 
567 	if ((map->flags & BUS_DMAMAP_IOMMU_MALLOC) != 0) {
568 		free(vaddr, M_DEVBUF);
569 		map->flags &= ~BUS_DMAMAP_IOMMU_MALLOC;
570 	} else {
571 		KASSERT((map->flags & BUS_DMAMAP_IOMMU_KMEM_ALLOC) != 0,
572 		    ("iommu_bus_dmamem_free for non alloced map %p", map));
573 		kmem_free(vaddr, tag->common.maxsize);
574 		map->flags &= ~BUS_DMAMAP_IOMMU_KMEM_ALLOC;
575 	}
576 
577 	iommu_bus_dmamap_destroy(dmat, map1);
578 }
579 
580 static int
iommu_bus_dmamap_load_something1(struct bus_dma_tag_iommu * tag,struct bus_dmamap_iommu * map,vm_page_t * ma,int offset,bus_size_t buflen,int flags,bus_dma_segment_t * segs,int * segp,struct iommu_map_entries_tailq * entries)581 iommu_bus_dmamap_load_something1(struct bus_dma_tag_iommu *tag,
582     struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen,
583     int flags, bus_dma_segment_t *segs, int *segp,
584     struct iommu_map_entries_tailq *entries)
585 {
586 	struct iommu_ctx *ctx;
587 	struct iommu_domain *domain;
588 	struct iommu_map_entry *entry;
589 	bus_size_t buflen1;
590 	int error, e_flags, idx, gas_flags, seg;
591 
592 	KASSERT(offset < IOMMU_PAGE_SIZE, ("offset %d", offset));
593 	if (segs == NULL)
594 		segs = tag->segments;
595 	ctx = tag->ctx;
596 	domain = ctx->domain;
597 	e_flags = IOMMU_MAP_ENTRY_READ |
598 	    ((flags & BUS_DMA_NOWRITE) == 0 ? IOMMU_MAP_ENTRY_WRITE : 0);
599 	seg = *segp;
600 	error = 0;
601 	idx = 0;
602 	while (buflen > 0) {
603 		seg++;
604 		if (seg >= tag->common.nsegments) {
605 			error = EFBIG;
606 			break;
607 		}
608 		buflen1 = buflen > tag->common.maxsegsz ?
609 		    tag->common.maxsegsz : buflen;
610 
611 		/*
612 		 * (Too) optimistically allow split if there are more
613 		 * then one segments left.
614 		 */
615 		gas_flags = map->cansleep ? IOMMU_MF_CANWAIT : 0;
616 		if (seg + 1 < tag->common.nsegments)
617 			gas_flags |= IOMMU_MF_CANSPLIT;
618 
619 		error = iommu_gas_map(domain, &tag->common, buflen1,
620 		    offset, e_flags, gas_flags, ma + idx, &entry);
621 		if (error != 0)
622 			break;
623 		/* Update buflen1 in case buffer split. */
624 		if (buflen1 > entry->end - entry->start - offset)
625 			buflen1 = entry->end - entry->start - offset;
626 
627 		KASSERT(vm_addr_align_ok(entry->start + offset,
628 		    tag->common.alignment),
629 		    ("alignment failed: ctx %p start 0x%jx offset %x "
630 		    "align 0x%jx", ctx, (uintmax_t)entry->start, offset,
631 		    (uintmax_t)tag->common.alignment));
632 		KASSERT(entry->end <= tag->common.lowaddr ||
633 		    entry->start >= tag->common.highaddr,
634 		    ("entry placement failed: ctx %p start 0x%jx end 0x%jx "
635 		    "lowaddr 0x%jx highaddr 0x%jx", ctx,
636 		    (uintmax_t)entry->start, (uintmax_t)entry->end,
637 		    (uintmax_t)tag->common.lowaddr,
638 		    (uintmax_t)tag->common.highaddr));
639 		KASSERT(vm_addr_bound_ok(entry->start + offset, buflen1,
640 		    tag->common.boundary),
641 		    ("boundary failed: ctx %p start 0x%jx end 0x%jx "
642 		    "boundary 0x%jx", ctx, (uintmax_t)entry->start,
643 		    (uintmax_t)entry->end, (uintmax_t)tag->common.boundary));
644 		KASSERT(buflen1 <= tag->common.maxsegsz,
645 		    ("segment too large: ctx %p start 0x%jx end 0x%jx "
646 		    "buflen1 0x%jx maxsegsz 0x%jx", ctx,
647 		    (uintmax_t)entry->start, (uintmax_t)entry->end,
648 		    (uintmax_t)buflen1, (uintmax_t)tag->common.maxsegsz));
649 
650 		KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0,
651 		    ("entry %p missing IOMMU_MAP_ENTRY_MAP", entry));
652 		TAILQ_INSERT_TAIL(entries, entry, dmamap_link);
653 
654 		segs[seg].ds_addr = entry->start + offset;
655 		segs[seg].ds_len = buflen1;
656 
657 		idx += OFF_TO_IDX(offset + buflen1);
658 		offset += buflen1;
659 		offset &= IOMMU_PAGE_MASK;
660 		buflen -= buflen1;
661 	}
662 	if (error == 0)
663 		*segp = seg;
664 	return (error);
665 }
666 
667 static int
iommu_bus_dmamap_load_something(struct bus_dma_tag_iommu * tag,struct bus_dmamap_iommu * map,vm_page_t * ma,int offset,bus_size_t buflen,int flags,bus_dma_segment_t * segs,int * segp)668 iommu_bus_dmamap_load_something(struct bus_dma_tag_iommu *tag,
669     struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen,
670     int flags, bus_dma_segment_t *segs, int *segp)
671 {
672 	struct iommu_ctx *ctx;
673 	struct iommu_domain *domain;
674 	struct iommu_map_entries_tailq entries;
675 	int error;
676 
677 	ctx = tag->ctx;
678 	domain = ctx->domain;
679 	atomic_add_long(&ctx->loads, 1);
680 
681 	TAILQ_INIT(&entries);
682 	error = iommu_bus_dmamap_load_something1(tag, map, ma, offset,
683 	    buflen, flags, segs, segp, &entries);
684 	if (error == 0) {
685 		IOMMU_DMAMAP_LOCK(map);
686 		TAILQ_CONCAT(&map->map_entries, &entries, dmamap_link);
687 		IOMMU_DMAMAP_UNLOCK(map);
688 	} else if (!TAILQ_EMPTY(&entries)) {
689 		/*
690 		 * The busdma interface does not allow us to report
691 		 * partial buffer load, so unfortunately we have to
692 		 * revert all work done.
693 		 */
694 		IOMMU_DOMAIN_LOCK(domain);
695 		TAILQ_CONCAT(&domain->unload_entries, &entries, dmamap_link);
696 		IOMMU_DOMAIN_UNLOCK(domain);
697 		taskqueue_enqueue(domain->iommu->delayed_taskqueue,
698 		    &domain->unload_task);
699 	}
700 
701 	if (error == ENOMEM && (flags & BUS_DMA_NOWAIT) == 0 &&
702 	    !map->cansleep)
703 		error = EINPROGRESS;
704 	if (error == EINPROGRESS)
705 		iommu_bus_schedule_dmamap(domain->iommu, map);
706 	return (error);
707 }
708 
709 static int
iommu_bus_dmamap_load_ma(bus_dma_tag_t dmat,bus_dmamap_t map1,struct vm_page ** ma,bus_size_t tlen,int ma_offs,int flags,bus_dma_segment_t * segs,int * segp)710 iommu_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map1,
711     struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags,
712     bus_dma_segment_t *segs, int *segp)
713 {
714 	struct bus_dma_tag_iommu *tag;
715 	struct bus_dmamap_iommu *map;
716 
717 	tag = (struct bus_dma_tag_iommu *)dmat;
718 	map = (struct bus_dmamap_iommu *)map1;
719 	return (iommu_bus_dmamap_load_something(tag, map, ma, ma_offs, tlen,
720 	    flags, segs, segp));
721 }
722 
723 static int
iommu_bus_dmamap_load_phys(bus_dma_tag_t dmat,bus_dmamap_t map1,vm_paddr_t buf,bus_size_t buflen,int flags,bus_dma_segment_t * segs,int * segp)724 iommu_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map1,
725     vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs,
726     int *segp)
727 {
728 	struct bus_dma_tag_iommu *tag;
729 	struct bus_dmamap_iommu *map;
730 	vm_page_t *ma, fma;
731 	vm_paddr_t pstart, pend, paddr;
732 	int error, i, ma_cnt, mflags, offset;
733 
734 	tag = (struct bus_dma_tag_iommu *)dmat;
735 	map = (struct bus_dmamap_iommu *)map1;
736 	pstart = trunc_page(buf);
737 	pend = round_page(buf + buflen);
738 	offset = buf & PAGE_MASK;
739 	ma_cnt = OFF_TO_IDX(pend - pstart);
740 	mflags = map->cansleep ? M_WAITOK : M_NOWAIT;
741 	ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags);
742 	if (ma == NULL)
743 		return (ENOMEM);
744 	fma = NULL;
745 	for (i = 0; i < ma_cnt; i++) {
746 		paddr = pstart + ptoa(i);
747 		ma[i] = PHYS_TO_VM_PAGE(paddr);
748 		if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) {
749 			/*
750 			 * If PHYS_TO_VM_PAGE() returned NULL or the
751 			 * vm_page was not initialized we'll use a
752 			 * fake page.
753 			 */
754 			if (fma == NULL) {
755 				fma = malloc(sizeof(struct vm_page) * ma_cnt,
756 				    M_DEVBUF, M_ZERO | mflags);
757 				if (fma == NULL) {
758 					free(ma, M_DEVBUF);
759 					return (ENOMEM);
760 				}
761 			}
762 			vm_page_initfake(&fma[i], pstart + ptoa(i),
763 			    VM_MEMATTR_DEFAULT);
764 			ma[i] = &fma[i];
765 		}
766 	}
767 	error = iommu_bus_dmamap_load_something(tag, map, ma, offset, buflen,
768 	    flags, segs, segp);
769 	free(fma, M_DEVBUF);
770 	free(ma, M_DEVBUF);
771 	return (error);
772 }
773 
774 static int
iommu_bus_dmamap_load_buffer(bus_dma_tag_t dmat,bus_dmamap_t map1,void * buf,bus_size_t buflen,pmap_t pmap,int flags,bus_dma_segment_t * segs,int * segp)775 iommu_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map1, void *buf,
776     bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs,
777     int *segp)
778 {
779 	struct bus_dma_tag_iommu *tag;
780 	struct bus_dmamap_iommu *map;
781 	vm_page_t *ma, fma;
782 	vm_paddr_t pstart, pend, paddr;
783 	int error, i, ma_cnt, mflags, offset;
784 
785 	tag = (struct bus_dma_tag_iommu *)dmat;
786 	map = (struct bus_dmamap_iommu *)map1;
787 	pstart = trunc_page((vm_offset_t)buf);
788 	pend = round_page((vm_offset_t)buf + buflen);
789 	offset = (vm_offset_t)buf & PAGE_MASK;
790 	ma_cnt = OFF_TO_IDX(pend - pstart);
791 	mflags = map->cansleep ? M_WAITOK : M_NOWAIT;
792 	ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags);
793 	if (ma == NULL)
794 		return (ENOMEM);
795 	fma = NULL;
796 	for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) {
797 		if (pmap == kernel_pmap)
798 			paddr = pmap_kextract(pstart);
799 		else
800 			paddr = pmap_extract(pmap, pstart);
801 		ma[i] = PHYS_TO_VM_PAGE(paddr);
802 		if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) {
803 			/*
804 			 * If PHYS_TO_VM_PAGE() returned NULL or the
805 			 * vm_page was not initialized we'll use a
806 			 * fake page.
807 			 */
808 			if (fma == NULL) {
809 				fma = malloc(sizeof(struct vm_page) * ma_cnt,
810 				    M_DEVBUF, M_ZERO | mflags);
811 				if (fma == NULL) {
812 					free(ma, M_DEVBUF);
813 					return (ENOMEM);
814 				}
815 			}
816 			vm_page_initfake(&fma[i], paddr, VM_MEMATTR_DEFAULT);
817 			ma[i] = &fma[i];
818 		}
819 	}
820 	error = iommu_bus_dmamap_load_something(tag, map, ma, offset, buflen,
821 	    flags, segs, segp);
822 	free(ma, M_DEVBUF);
823 	free(fma, M_DEVBUF);
824 	return (error);
825 }
826 
827 static void
iommu_bus_dmamap_waitok(bus_dma_tag_t dmat,bus_dmamap_t map1,struct memdesc * mem,bus_dmamap_callback_t * callback,void * callback_arg)828 iommu_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map1,
829     struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg)
830 {
831 	struct bus_dmamap_iommu *map;
832 
833 	if (map1 == NULL)
834 		return;
835 	map = (struct bus_dmamap_iommu *)map1;
836 	map->mem = *mem;
837 	map->tag = (struct bus_dma_tag_iommu *)dmat;
838 	map->callback = callback;
839 	map->callback_arg = callback_arg;
840 }
841 
842 static bus_dma_segment_t *
iommu_bus_dmamap_complete(bus_dma_tag_t dmat,bus_dmamap_t map1,bus_dma_segment_t * segs,int nsegs,int error)843 iommu_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map1,
844     bus_dma_segment_t *segs, int nsegs, int error)
845 {
846 	struct bus_dma_tag_iommu *tag;
847 	struct bus_dmamap_iommu *map;
848 
849 	tag = (struct bus_dma_tag_iommu *)dmat;
850 	map = (struct bus_dmamap_iommu *)map1;
851 
852 	if (!map->locked) {
853 		KASSERT(map->cansleep,
854 		    ("map not locked and not sleepable context %p", map));
855 
856 		/*
857 		 * We are called from the delayed context.  Relock the
858 		 * driver.
859 		 */
860 		(tag->common.lockfunc)(tag->common.lockfuncarg, BUS_DMA_LOCK);
861 		map->locked = true;
862 	}
863 
864 	if (segs == NULL)
865 		segs = tag->segments;
866 	return (segs);
867 }
868 
869 /*
870  * The limitations of busdma KPI forces the iommu to perform the actual
871  * unload, consisting of the unmapping of the map entries page tables,
872  * from the delayed context on i386, since page table page mapping
873  * might require a sleep to be successfull.  The unfortunate
874  * consequence is that the DMA requests can be served some time after
875  * the bus_dmamap_unload() call returned.
876  *
877  * On amd64, we assume that sf allocation cannot fail.
878  */
879 static void
iommu_bus_dmamap_unload(bus_dma_tag_t dmat,bus_dmamap_t map1)880 iommu_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map1)
881 {
882 	struct bus_dma_tag_iommu *tag;
883 	struct bus_dmamap_iommu *map;
884 	struct iommu_ctx *ctx;
885 	struct iommu_domain *domain;
886 	struct iommu_map_entries_tailq entries;
887 
888 	tag = (struct bus_dma_tag_iommu *)dmat;
889 	map = (struct bus_dmamap_iommu *)map1;
890 	ctx = tag->ctx;
891 	domain = ctx->domain;
892 	atomic_add_long(&ctx->unloads, 1);
893 
894 	TAILQ_INIT(&entries);
895 	IOMMU_DMAMAP_LOCK(map);
896 	TAILQ_CONCAT(&entries, &map->map_entries, dmamap_link);
897 	IOMMU_DMAMAP_UNLOCK(map);
898 #if defined(IOMMU_DOMAIN_UNLOAD_SLEEP)
899 	IOMMU_DOMAIN_LOCK(domain);
900 	TAILQ_CONCAT(&domain->unload_entries, &entries, dmamap_link);
901 	IOMMU_DOMAIN_UNLOCK(domain);
902 	taskqueue_enqueue(domain->iommu->delayed_taskqueue,
903 	    &domain->unload_task);
904 #else
905 	THREAD_NO_SLEEPING();
906 	iommu_domain_unload(domain, &entries, false);
907 	THREAD_SLEEPING_OK();
908 	KASSERT(TAILQ_EMPTY(&entries), ("lazy iommu_ctx_unload %p", ctx));
909 #endif
910 }
911 
912 static void
iommu_bus_dmamap_sync(bus_dma_tag_t dmat,bus_dmamap_t map1,bus_dmasync_op_t op)913 iommu_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map1,
914     bus_dmasync_op_t op)
915 {
916 	struct bus_dmamap_iommu *map __unused;
917 
918 	map = (struct bus_dmamap_iommu *)map1;
919 	kmsan_bus_dmamap_sync(&map->kmsan_mem, op);
920 }
921 
922 #ifdef KMSAN
923 static void
iommu_bus_dmamap_load_kmsan(bus_dmamap_t map1,struct memdesc * mem)924 iommu_bus_dmamap_load_kmsan(bus_dmamap_t map1, struct memdesc *mem)
925 {
926 	struct bus_dmamap_iommu *map;
927 
928 	map = (struct bus_dmamap_iommu *)map1;
929 	if (map == NULL)
930 		return;
931 	memcpy(&map->kmsan_mem, mem, sizeof(struct memdesc));
932 }
933 #endif
934 
935 struct bus_dma_impl bus_dma_iommu_impl = {
936 	.tag_create = iommu_bus_dma_tag_create,
937 	.tag_destroy = iommu_bus_dma_tag_destroy,
938 	.tag_set_domain = iommu_bus_dma_tag_set_domain,
939 	.id_mapped = iommu_bus_dma_id_mapped,
940 	.map_create = iommu_bus_dmamap_create,
941 	.map_destroy = iommu_bus_dmamap_destroy,
942 	.mem_alloc = iommu_bus_dmamem_alloc,
943 	.mem_free = iommu_bus_dmamem_free,
944 	.load_phys = iommu_bus_dmamap_load_phys,
945 	.load_buffer = iommu_bus_dmamap_load_buffer,
946 	.load_ma = iommu_bus_dmamap_load_ma,
947 	.map_waitok = iommu_bus_dmamap_waitok,
948 	.map_complete = iommu_bus_dmamap_complete,
949 	.map_unload = iommu_bus_dmamap_unload,
950 	.map_sync = iommu_bus_dmamap_sync,
951 #ifdef KMSAN
952 	.load_kmsan = iommu_bus_dmamap_load_kmsan,
953 #endif
954 };
955 
956 static void
iommu_bus_task_dmamap(void * arg,int pending)957 iommu_bus_task_dmamap(void *arg, int pending)
958 {
959 	struct bus_dma_tag_iommu *tag;
960 	struct bus_dmamap_iommu *map;
961 	struct iommu_unit *unit;
962 
963 	unit = arg;
964 	IOMMU_LOCK(unit);
965 	while ((map = TAILQ_FIRST(&unit->delayed_maps)) != NULL) {
966 		TAILQ_REMOVE(&unit->delayed_maps, map, delay_link);
967 		IOMMU_UNLOCK(unit);
968 		tag = map->tag;
969 		map->cansleep = true;
970 		map->locked = false;
971 		bus_dmamap_load_mem((bus_dma_tag_t)tag, (bus_dmamap_t)map,
972 		    &map->mem, map->callback, map->callback_arg,
973 		    BUS_DMA_WAITOK);
974 		map->cansleep = false;
975 		if (map->locked) {
976 			(tag->common.lockfunc)(tag->common.lockfuncarg,
977 			    BUS_DMA_UNLOCK);
978 		} else
979 			map->locked = true;
980 		map->cansleep = false;
981 		IOMMU_LOCK(unit);
982 	}
983 	IOMMU_UNLOCK(unit);
984 }
985 
986 static void
iommu_bus_schedule_dmamap(struct iommu_unit * unit,struct bus_dmamap_iommu * map)987 iommu_bus_schedule_dmamap(struct iommu_unit *unit, struct bus_dmamap_iommu *map)
988 {
989 
990 	map->locked = false;
991 	IOMMU_LOCK(unit);
992 	TAILQ_INSERT_TAIL(&unit->delayed_maps, map, delay_link);
993 	IOMMU_UNLOCK(unit);
994 	taskqueue_enqueue(unit->delayed_taskqueue, &unit->dmamap_load_task);
995 }
996 
997 int
iommu_init_busdma(struct iommu_unit * unit)998 iommu_init_busdma(struct iommu_unit *unit)
999 {
1000 	int error;
1001 
1002 	unit->dma_enabled = 0;
1003 	error = TUNABLE_INT_FETCH("hw.iommu.dma", &unit->dma_enabled);
1004 	if (error == 0) /* compatibility */
1005 		TUNABLE_INT_FETCH("hw.dmar.dma", &unit->dma_enabled);
1006 	SYSCTL_ADD_INT(&unit->sysctl_ctx,
1007 	    SYSCTL_CHILDREN(device_get_sysctl_tree(unit->dev)),
1008 	    OID_AUTO, "dma", CTLFLAG_RD, &unit->dma_enabled, 0,
1009 	    "DMA ops enabled");
1010 	TAILQ_INIT(&unit->delayed_maps);
1011 	TASK_INIT(&unit->dmamap_load_task, 0, iommu_bus_task_dmamap, unit);
1012 	unit->delayed_taskqueue = taskqueue_create("iommu", M_WAITOK,
1013 	    taskqueue_thread_enqueue, &unit->delayed_taskqueue);
1014 	taskqueue_start_threads(&unit->delayed_taskqueue, 1, PI_DISK,
1015 	    "iommu%d busdma taskq", unit->unit);
1016 	return (0);
1017 }
1018 
1019 void
iommu_fini_busdma(struct iommu_unit * unit)1020 iommu_fini_busdma(struct iommu_unit *unit)
1021 {
1022 
1023 	if (unit->delayed_taskqueue == NULL)
1024 		return;
1025 
1026 	taskqueue_drain(unit->delayed_taskqueue, &unit->dmamap_load_task);
1027 	taskqueue_free(unit->delayed_taskqueue);
1028 	unit->delayed_taskqueue = NULL;
1029 }
1030 
1031 int
bus_dma_iommu_load_ident(bus_dma_tag_t dmat,bus_dmamap_t map1,vm_paddr_t start,vm_size_t length,int flags)1032 bus_dma_iommu_load_ident(bus_dma_tag_t dmat, bus_dmamap_t map1,
1033     vm_paddr_t start, vm_size_t length, int flags)
1034 {
1035 	struct bus_dma_tag_common *tc;
1036 	struct bus_dma_tag_iommu *tag;
1037 	struct bus_dmamap_iommu *map;
1038 	struct iommu_ctx *ctx;
1039 	struct iommu_domain *domain;
1040 	struct iommu_map_entry *entry;
1041 	vm_page_t *ma;
1042 	vm_size_t i;
1043 	int error;
1044 	bool waitok;
1045 
1046 	MPASS((start & PAGE_MASK) == 0);
1047 	MPASS((length & PAGE_MASK) == 0);
1048 	MPASS(length > 0);
1049 	MPASS(start + length >= start);
1050 	MPASS((flags & ~(BUS_DMA_NOWAIT | BUS_DMA_NOWRITE)) == 0);
1051 
1052 	tc = (struct bus_dma_tag_common *)dmat;
1053 	if (tc->impl != &bus_dma_iommu_impl)
1054 		return (0);
1055 
1056 	tag = (struct bus_dma_tag_iommu *)dmat;
1057 	ctx = tag->ctx;
1058 	domain = ctx->domain;
1059 	map = (struct bus_dmamap_iommu *)map1;
1060 	waitok = (flags & BUS_DMA_NOWAIT) != 0;
1061 
1062 	entry = iommu_gas_alloc_entry(domain, waitok ? 0 : IOMMU_PGF_WAITOK);
1063 	if (entry == NULL)
1064 		return (ENOMEM);
1065 	entry->start = start;
1066 	entry->end = start + length;
1067 	ma = malloc(sizeof(vm_page_t) * atop(length), M_TEMP, waitok ?
1068 	    M_WAITOK : M_NOWAIT);
1069 	if (ma == NULL) {
1070 		iommu_gas_free_entry(entry);
1071 		return (ENOMEM);
1072 	}
1073 	for (i = 0; i < atop(length); i++) {
1074 		ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i,
1075 		    VM_MEMATTR_DEFAULT);
1076 	}
1077 	error = iommu_gas_map_region(domain, entry, IOMMU_MAP_ENTRY_READ |
1078 	    ((flags & BUS_DMA_NOWRITE) ? 0 : IOMMU_MAP_ENTRY_WRITE) |
1079 	    IOMMU_MAP_ENTRY_MAP, waitok ? IOMMU_MF_CANWAIT : 0, ma);
1080 	if (error == 0) {
1081 		IOMMU_DMAMAP_LOCK(map);
1082 		TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link);
1083 		IOMMU_DMAMAP_UNLOCK(map);
1084 	} else {
1085 		iommu_gas_free_entry(entry);
1086 	}
1087 	for (i = 0; i < atop(length); i++)
1088 		vm_page_putfake(ma[i]);
1089 	free(ma, M_TEMP);
1090 	return (error);
1091 }
1092 
1093 static void
iommu_domain_unload_task(void * arg,int pending)1094 iommu_domain_unload_task(void *arg, int pending)
1095 {
1096 	struct iommu_domain *domain;
1097 	struct iommu_map_entries_tailq entries;
1098 
1099 	domain = arg;
1100 	TAILQ_INIT(&entries);
1101 
1102 	for (;;) {
1103 		IOMMU_DOMAIN_LOCK(domain);
1104 		TAILQ_SWAP(&domain->unload_entries, &entries,
1105 		    iommu_map_entry, dmamap_link);
1106 		IOMMU_DOMAIN_UNLOCK(domain);
1107 		if (TAILQ_EMPTY(&entries))
1108 			break;
1109 		iommu_domain_unload(domain, &entries, true);
1110 	}
1111 }
1112 
1113 void
iommu_domain_init(struct iommu_unit * unit,struct iommu_domain * domain,const struct iommu_domain_map_ops * ops)1114 iommu_domain_init(struct iommu_unit *unit, struct iommu_domain *domain,
1115     const struct iommu_domain_map_ops *ops)
1116 {
1117 
1118 	domain->ops = ops;
1119 	domain->iommu = unit;
1120 
1121 	TASK_INIT(&domain->unload_task, 0, iommu_domain_unload_task, domain);
1122 	RB_INIT(&domain->rb_root);
1123 	TAILQ_INIT(&domain->unload_entries);
1124 	mtx_init(&domain->lock, "iodom", NULL, MTX_DEF);
1125 }
1126 
1127 void
iommu_domain_fini(struct iommu_domain * domain)1128 iommu_domain_fini(struct iommu_domain *domain)
1129 {
1130 
1131 	mtx_destroy(&domain->lock);
1132 }
1133