xref: /freebsd/sys/dev/iommu/iommu_gas.c (revision b16f993ec2cafe48fae96ca0eb27224951b30d7e)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2013 The FreeBSD Foundation
5  *
6  * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
7  * under sponsorship from the FreeBSD Foundation.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #define	RB_AUGMENT_CHECK(entry) iommu_gas_augment_entry(entry)
35 
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/malloc.h>
39 #include <sys/bus.h>
40 #include <sys/interrupt.h>
41 #include <sys/kernel.h>
42 #include <sys/ktr.h>
43 #include <sys/lock.h>
44 #include <sys/proc.h>
45 #include <sys/rwlock.h>
46 #include <sys/memdesc.h>
47 #include <sys/mutex.h>
48 #include <sys/sysctl.h>
49 #include <sys/rman.h>
50 #include <sys/taskqueue.h>
51 #include <sys/tree.h>
52 #include <sys/uio.h>
53 #include <sys/vmem.h>
54 #include <vm/vm.h>
55 #include <vm/vm_extern.h>
56 #include <vm/vm_kern.h>
57 #include <vm/vm_object.h>
58 #include <vm/vm_page.h>
59 #include <vm/vm_map.h>
60 #include <vm/uma.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/iommu/iommu.h>
64 #include <dev/iommu/iommu_gas.h>
65 #include <dev/iommu/iommu_msi.h>
66 #include <machine/atomic.h>
67 #include <machine/bus.h>
68 #include <machine/md_var.h>
69 #include <machine/iommu.h>
70 #include <dev/iommu/busdma_iommu.h>
71 
72 /*
73  * Guest Address Space management.
74  */
75 
76 static uma_zone_t iommu_map_entry_zone;
77 
78 #ifdef INVARIANTS
79 static int iommu_check_free;
80 #endif
81 
82 static void
83 intel_gas_init(void)
84 {
85 
86 	iommu_map_entry_zone = uma_zcreate("IOMMU_MAP_ENTRY",
87 	    sizeof(struct iommu_map_entry), NULL, NULL,
88 	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NODUMP);
89 }
90 SYSINIT(intel_gas, SI_SUB_DRIVERS, SI_ORDER_FIRST, intel_gas_init, NULL);
91 
92 struct iommu_map_entry *
93 iommu_gas_alloc_entry(struct iommu_domain *domain, u_int flags)
94 {
95 	struct iommu_map_entry *res;
96 
97 	KASSERT((flags & ~(IOMMU_PGF_WAITOK)) == 0,
98 	    ("unsupported flags %x", flags));
99 
100 	res = uma_zalloc(iommu_map_entry_zone, ((flags & IOMMU_PGF_WAITOK) !=
101 	    0 ? M_WAITOK : M_NOWAIT) | M_ZERO);
102 	if (res != NULL && domain != NULL) {
103 		res->domain = domain;
104 		atomic_add_int(&domain->entries_cnt, 1);
105 	}
106 	return (res);
107 }
108 
109 void
110 iommu_gas_free_entry(struct iommu_map_entry *entry)
111 {
112 	struct iommu_domain *domain;
113 
114 	domain = entry->domain;
115 	if (domain != NULL)
116 		atomic_subtract_int(&domain->entries_cnt, 1);
117 	uma_zfree(iommu_map_entry_zone, entry);
118 }
119 
120 static int
121 iommu_gas_cmp_entries(struct iommu_map_entry *a, struct iommu_map_entry *b)
122 {
123 
124 	/* Last entry have zero size, so <= */
125 	KASSERT(a->start <= a->end, ("inverted entry %p (%jx, %jx)",
126 	    a, (uintmax_t)a->start, (uintmax_t)a->end));
127 	KASSERT(b->start <= b->end, ("inverted entry %p (%jx, %jx)",
128 	    b, (uintmax_t)b->start, (uintmax_t)b->end));
129 	KASSERT(a->end <= b->start || b->end <= a->start ||
130 	    a->end == a->start || b->end == b->start,
131 	    ("overlapping entries %p (%jx, %jx) %p (%jx, %jx)",
132 	    a, (uintmax_t)a->start, (uintmax_t)a->end,
133 	    b, (uintmax_t)b->start, (uintmax_t)b->end));
134 
135 	if (a->end < b->end)
136 		return (-1);
137 	else if (b->end < a->end)
138 		return (1);
139 	return (0);
140 }
141 
142 /*
143  * Update augmentation data based on data from children.
144  * Return true if and only if the update changes the augmentation data.
145  */
146 static bool
147 iommu_gas_augment_entry(struct iommu_map_entry *entry)
148 {
149 	struct iommu_map_entry *child;
150 	iommu_gaddr_t bound, delta, free_down;
151 
152 	free_down = 0;
153 	bound = entry->start;
154 	if ((child = RB_LEFT(entry, rb_entry)) != NULL) {
155 		free_down = MAX(child->free_down, bound - child->last);
156 		bound = child->first;
157 	}
158 	delta = bound - entry->first;
159 	entry->first = bound;
160 	bound = entry->end;
161 	if ((child = RB_RIGHT(entry, rb_entry)) != NULL) {
162 		free_down = MAX(free_down, child->free_down);
163 		free_down = MAX(free_down, child->first - bound);
164 		bound = child->last;
165 	}
166 	delta += entry->last - bound;
167 	if (delta == 0)
168 		delta = entry->free_down - free_down;
169 	entry->last = bound;
170 	entry->free_down = free_down;
171 
172 	/*
173 	 * Return true either if the value of last-first changed,
174 	 * or if free_down changed.
175 	 */
176 	return (delta != 0);
177 }
178 
179 RB_GENERATE(iommu_gas_entries_tree, iommu_map_entry, rb_entry,
180     iommu_gas_cmp_entries);
181 
182 #ifdef INVARIANTS
183 static void
184 iommu_gas_check_free(struct iommu_domain *domain)
185 {
186 	struct iommu_map_entry *entry, *l, *r;
187 	iommu_gaddr_t v;
188 
189 	RB_FOREACH(entry, iommu_gas_entries_tree, &domain->rb_root) {
190 		KASSERT(domain == entry->domain,
191 		    ("mismatched free domain %p entry %p entry->domain %p",
192 		    domain, entry, entry->domain));
193 		l = RB_LEFT(entry, rb_entry);
194 		r = RB_RIGHT(entry, rb_entry);
195 		v = 0;
196 		if (l != NULL) {
197 			v = MAX(v, l->free_down);
198 			v = MAX(v, entry->start - l->last);
199 		}
200 		if (r != NULL) {
201 			v = MAX(v, r->free_down);
202 			v = MAX(v, r->first - entry->end);
203 		}
204 		MPASS(entry->free_down == v);
205 	}
206 }
207 #endif
208 
209 static bool
210 iommu_gas_rb_insert(struct iommu_domain *domain, struct iommu_map_entry *entry)
211 {
212 	struct iommu_map_entry *found;
213 
214 	found = RB_INSERT(iommu_gas_entries_tree, &domain->rb_root, entry);
215 	return (found == NULL);
216 }
217 
218 static void
219 iommu_gas_rb_remove(struct iommu_domain *domain, struct iommu_map_entry *entry)
220 {
221 
222 	RB_REMOVE(iommu_gas_entries_tree, &domain->rb_root, entry);
223 }
224 
225 struct iommu_domain *
226 iommu_get_ctx_domain(struct iommu_ctx *ctx)
227 {
228 
229 	return (ctx->domain);
230 }
231 
232 void
233 iommu_gas_init_domain(struct iommu_domain *domain)
234 {
235 	struct iommu_map_entry *begin, *end;
236 
237 	begin = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK);
238 	end = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK);
239 
240 	IOMMU_DOMAIN_LOCK(domain);
241 	KASSERT(domain->entries_cnt == 2, ("dirty domain %p", domain));
242 	KASSERT(RB_EMPTY(&domain->rb_root),
243 	    ("non-empty entries %p", domain));
244 
245 	/*
246 	 * The end entry must be inserted first because it has a zero-length gap
247 	 * between start and end.  Initially, all augmentation data for a new
248 	 * entry is zero.  Function iommu_gas_augment_entry will compute no
249 	 * change in the value of (start-end) and no change in the value of
250 	 * free_down, so it will return false to suggest that nothing changed in
251 	 * the entry.  Thus, inserting the end entry second prevents
252 	 * augmentation information to be propogated to the begin entry at the
253 	 * tree root.  So it is inserted first.
254 	 */
255 	end->start = domain->end;
256 	end->end = domain->end;
257 	end->flags = IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED;
258 	iommu_gas_rb_insert(domain, end);
259 
260 	begin->start = 0;
261 	begin->end = IOMMU_PAGE_SIZE;
262 	begin->flags = IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED;
263 	iommu_gas_rb_insert(domain, begin);
264 
265 	domain->first_place = begin;
266 	domain->last_place = end;
267 	domain->flags |= IOMMU_DOMAIN_GAS_INITED;
268 	IOMMU_DOMAIN_UNLOCK(domain);
269 }
270 
271 void
272 iommu_gas_fini_domain(struct iommu_domain *domain)
273 {
274 	struct iommu_map_entry *entry, *entry1;
275 
276 	IOMMU_DOMAIN_ASSERT_LOCKED(domain);
277 	KASSERT(domain->entries_cnt == 2,
278 	    ("domain still in use %p", domain));
279 
280 	entry = RB_MIN(iommu_gas_entries_tree, &domain->rb_root);
281 	KASSERT(entry->start == 0, ("start entry start %p", domain));
282 	KASSERT(entry->end == IOMMU_PAGE_SIZE, ("start entry end %p", domain));
283 	KASSERT(entry->flags ==
284 	    (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED),
285 	    ("start entry flags %p", domain));
286 	RB_REMOVE(iommu_gas_entries_tree, &domain->rb_root, entry);
287 	iommu_gas_free_entry(entry);
288 
289 	entry = RB_MAX(iommu_gas_entries_tree, &domain->rb_root);
290 	KASSERT(entry->start == domain->end, ("end entry start %p", domain));
291 	KASSERT(entry->end == domain->end, ("end entry end %p", domain));
292 	KASSERT(entry->flags ==
293 	    (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED),
294 	    ("end entry flags %p", domain));
295 	RB_REMOVE(iommu_gas_entries_tree, &domain->rb_root, entry);
296 	iommu_gas_free_entry(entry);
297 
298 	RB_FOREACH_SAFE(entry, iommu_gas_entries_tree, &domain->rb_root,
299 	    entry1) {
300 		KASSERT((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0,
301 		    ("non-RMRR entry left %p", domain));
302 		RB_REMOVE(iommu_gas_entries_tree, &domain->rb_root,
303 		    entry);
304 		iommu_gas_free_entry(entry);
305 	}
306 }
307 
308 struct iommu_gas_match_args {
309 	struct iommu_domain *domain;
310 	iommu_gaddr_t size;
311 	int offset;
312 	const struct bus_dma_tag_common *common;
313 	u_int gas_flags;
314 	struct iommu_map_entry *entry;
315 };
316 
317 /*
318  * The interval [beg, end) is a free interval between two iommu_map_entries.
319  * Addresses can be allocated only in the range [lbound, ubound). Try to
320  * allocate space in the free interval, subject to the conditions expressed by
321  * a, and return 'true' if and only if the allocation attempt succeeds.
322  */
323 static bool
324 iommu_gas_match_one(struct iommu_gas_match_args *a, iommu_gaddr_t beg,
325     iommu_gaddr_t end, iommu_gaddr_t lbound, iommu_gaddr_t ubound)
326 {
327 	struct iommu_map_entry *entry;
328 	iommu_gaddr_t first, size, start;
329 	bool found __diagused;
330 	int offset;
331 
332 	/*
333 	 * The prev->end is always aligned on the page size, which
334 	 * causes page alignment for the entry->start too.
335 	 *
336 	 * Create IOMMU_PAGE_SIZE gaps before, after new entry
337 	 * to ensure that out-of-bounds accesses fault.
338 	 */
339 	beg = MAX(beg + IOMMU_PAGE_SIZE, lbound);
340 	start = roundup2(beg, a->common->alignment);
341 	if (start < beg)
342 		return (false);
343 	end = MIN(end - IOMMU_PAGE_SIZE, ubound);
344 	offset = a->offset;
345 	size = a->size;
346 	if (start + offset + size > end)
347 		return (false);
348 
349 	/* Check for and try to skip past boundary crossing. */
350 	if (!vm_addr_bound_ok(start + offset, size, a->common->boundary)) {
351 		/*
352 		 * The start + offset to start + offset + size region crosses
353 		 * the boundary.  Check if there is enough space after the next
354 		 * boundary after the beg.
355 		 */
356 		first = start;
357 		beg = roundup2(start + offset + 1, a->common->boundary);
358 		start = roundup2(beg, a->common->alignment);
359 
360 		if (start + offset + size > end ||
361 		    !vm_addr_bound_ok(start + offset, size,
362 		    a->common->boundary)) {
363 			/*
364 			 * Not enough space to align at the requested boundary,
365 			 * or boundary is smaller than the size, but allowed to
366 			 * split.  We already checked that start + size does not
367 			 * overlap ubound.
368 			 *
369 			 * XXXKIB. It is possible that beg is exactly at the
370 			 * start of the next entry, then we do not have gap.
371 			 * Ignore for now.
372 			 */
373 			if ((a->gas_flags & IOMMU_MF_CANSPLIT) == 0)
374 				return (false);
375 			size = beg - first - offset;
376 			start = first;
377 		}
378 	}
379 	entry = a->entry;
380 	entry->start = start;
381 	entry->end = start + roundup2(size + offset, IOMMU_PAGE_SIZE);
382 	entry->flags = IOMMU_MAP_ENTRY_MAP;
383 	found = iommu_gas_rb_insert(a->domain, entry);
384 	KASSERT(found, ("found dup %p start %jx size %jx",
385 	    a->domain, (uintmax_t)start, (uintmax_t)size));
386 	return (true);
387 }
388 
389 /* Find the next entry that might abut a big-enough range. */
390 static struct iommu_map_entry *
391 iommu_gas_next(struct iommu_map_entry *curr, iommu_gaddr_t min_free)
392 {
393 	struct iommu_map_entry *next;
394 
395 	if ((next = RB_RIGHT(curr, rb_entry)) != NULL &&
396 	    next->free_down >= min_free) {
397 		/* Find next entry in right subtree. */
398 		do
399 			curr = next;
400 		while ((next = RB_LEFT(curr, rb_entry)) != NULL &&
401 		    next->free_down >= min_free);
402 	} else {
403 		/* Find next entry in a left-parent ancestor. */
404 		while ((next = RB_PARENT(curr, rb_entry)) != NULL &&
405 		    curr == RB_RIGHT(next, rb_entry))
406 			curr = next;
407 		curr = next;
408 	}
409 	return (curr);
410 }
411 
412 static int
413 iommu_gas_find_space(struct iommu_gas_match_args *a)
414 {
415 	struct iommu_domain *domain;
416 	struct iommu_map_entry *curr, *first;
417 	iommu_gaddr_t addr, min_free;
418 
419 	IOMMU_DOMAIN_ASSERT_LOCKED(a->domain);
420 	KASSERT(a->entry->flags == 0,
421 	    ("dirty entry %p %p", a->domain, a->entry));
422 
423 	/*
424 	 * If the subtree doesn't have free space for the requested allocation
425 	 * plus two guard pages, skip it.
426 	 */
427 	min_free = 2 * IOMMU_PAGE_SIZE +
428 	    roundup2(a->size + a->offset, IOMMU_PAGE_SIZE);
429 
430 	/*
431 	 * Find the first entry in the lower region that could abut a big-enough
432 	 * range.
433 	 */
434 	curr = RB_ROOT(&a->domain->rb_root);
435 	first = NULL;
436 	while (curr != NULL && curr->free_down >= min_free) {
437 		first = curr;
438 		curr = RB_LEFT(curr, rb_entry);
439 	}
440 
441 	/*
442 	 * Walk the big-enough ranges until one satisfies alignment
443 	 * requirements, or violates lowaddr address requirement.
444 	 */
445 	addr = a->common->lowaddr + 1;
446 	for (curr = first; curr != NULL;
447 	    curr = iommu_gas_next(curr, min_free)) {
448 		if ((first = RB_LEFT(curr, rb_entry)) != NULL &&
449 		    iommu_gas_match_one(a, first->last, curr->start,
450 		    0, addr))
451 			return (0);
452 		if (curr->end >= addr) {
453 			/* All remaining ranges >= addr */
454 			break;
455 		}
456 		if ((first = RB_RIGHT(curr, rb_entry)) != NULL &&
457 		    iommu_gas_match_one(a, curr->end, first->first,
458 		    0, addr))
459 			return (0);
460 	}
461 
462 	/*
463 	 * To resume the search at the start of the upper region, first climb to
464 	 * the nearest ancestor that spans highaddr.  Then find the last entry
465 	 * before highaddr that could abut a big-enough range.
466 	 */
467 	addr = a->common->highaddr;
468 	while (curr != NULL && curr->last < addr)
469 		curr = RB_PARENT(curr, rb_entry);
470 	first = NULL;
471 	while (curr != NULL && curr->free_down >= min_free) {
472 		if (addr < curr->end)
473 			curr = RB_LEFT(curr, rb_entry);
474 		else {
475 			first = curr;
476 			curr = RB_RIGHT(curr, rb_entry);
477 		}
478 	}
479 
480 	/*
481 	 * Walk the remaining big-enough ranges until one satisfies alignment
482 	 * requirements.
483 	 */
484 	domain = a->domain;
485 	for (curr = first; curr != NULL;
486 	    curr = iommu_gas_next(curr, min_free)) {
487 		if ((first = RB_LEFT(curr, rb_entry)) != NULL &&
488 		    iommu_gas_match_one(a, first->last, curr->start,
489 		    addr + 1, domain->end))
490 			return (0);
491 		if ((first = RB_RIGHT(curr, rb_entry)) != NULL &&
492 		    iommu_gas_match_one(a, curr->end, first->first,
493 		    addr + 1, domain->end))
494 			return (0);
495 	}
496 
497 	return (ENOMEM);
498 }
499 
500 static int
501 iommu_gas_alloc_region(struct iommu_domain *domain, struct iommu_map_entry *entry,
502     u_int flags)
503 {
504 	struct iommu_map_entry *next, *prev;
505 	bool found __diagused;
506 
507 	IOMMU_DOMAIN_ASSERT_LOCKED(domain);
508 
509 	if ((entry->start & IOMMU_PAGE_MASK) != 0 ||
510 	    (entry->end & IOMMU_PAGE_MASK) != 0)
511 		return (EINVAL);
512 	if (entry->start >= entry->end)
513 		return (EINVAL);
514 	if (entry->end >= domain->end)
515 		return (EINVAL);
516 
517 	next = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, entry);
518 	KASSERT(next != NULL, ("next must be non-null %p %jx", domain,
519 	    (uintmax_t)entry->start));
520 	prev = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, next);
521 	/* prev could be NULL */
522 
523 	/*
524 	 * Adapt to broken BIOSes which specify overlapping RMRR
525 	 * entries.
526 	 *
527 	 * XXXKIB: this does not handle a case when prev or next
528 	 * entries are completely covered by the current one, which
529 	 * extends both ways.
530 	 */
531 	if (prev != NULL && prev->end > entry->start &&
532 	    (prev->flags & IOMMU_MAP_ENTRY_PLACE) == 0) {
533 		if ((flags & IOMMU_MF_RMRR) == 0 ||
534 		    (prev->flags & IOMMU_MAP_ENTRY_RMRR) == 0)
535 			return (EBUSY);
536 		entry->start = prev->end;
537 	}
538 	if (next->start < entry->end &&
539 	    (next->flags & IOMMU_MAP_ENTRY_PLACE) == 0) {
540 		if ((flags & IOMMU_MF_RMRR) == 0 ||
541 		    (next->flags & IOMMU_MAP_ENTRY_RMRR) == 0)
542 			return (EBUSY);
543 		entry->end = next->start;
544 	}
545 	if (entry->end == entry->start)
546 		return (0);
547 
548 	if (prev != NULL && prev->end > entry->start) {
549 		/* This assumes that prev is the placeholder entry. */
550 		iommu_gas_rb_remove(domain, prev);
551 		prev = NULL;
552 	}
553 	if (next->start < entry->end) {
554 		iommu_gas_rb_remove(domain, next);
555 		next = NULL;
556 	}
557 
558 	found = iommu_gas_rb_insert(domain, entry);
559 	KASSERT(found, ("found RMRR dup %p start %jx end %jx",
560 	    domain, (uintmax_t)entry->start, (uintmax_t)entry->end));
561 	if ((flags & IOMMU_MF_RMRR) != 0)
562 		entry->flags = IOMMU_MAP_ENTRY_RMRR;
563 
564 #ifdef INVARIANTS
565 	struct iommu_map_entry *ip, *in;
566 	ip = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, entry);
567 	in = RB_NEXT(iommu_gas_entries_tree, &domain->rb_root, entry);
568 	KASSERT(prev == NULL || ip == prev,
569 	    ("RMRR %p (%jx %jx) prev %p (%jx %jx) ins prev %p (%jx %jx)",
570 	    entry, entry->start, entry->end, prev,
571 	    prev == NULL ? 0 : prev->start, prev == NULL ? 0 : prev->end,
572 	    ip, ip == NULL ? 0 : ip->start, ip == NULL ? 0 : ip->end));
573 	KASSERT(next == NULL || in == next,
574 	    ("RMRR %p (%jx %jx) next %p (%jx %jx) ins next %p (%jx %jx)",
575 	    entry, entry->start, entry->end, next,
576 	    next == NULL ? 0 : next->start, next == NULL ? 0 : next->end,
577 	    in, in == NULL ? 0 : in->start, in == NULL ? 0 : in->end));
578 #endif
579 
580 	return (0);
581 }
582 
583 void
584 iommu_gas_free_space(struct iommu_map_entry *entry)
585 {
586 	struct iommu_domain *domain;
587 
588 	domain = entry->domain;
589 	KASSERT((entry->flags & (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_RMRR |
590 	    IOMMU_MAP_ENTRY_MAP)) == IOMMU_MAP_ENTRY_MAP,
591 	    ("permanent entry %p %p", domain, entry));
592 
593 	IOMMU_DOMAIN_LOCK(domain);
594 	iommu_gas_rb_remove(domain, entry);
595 	entry->flags &= ~IOMMU_MAP_ENTRY_MAP;
596 #ifdef INVARIANTS
597 	if (iommu_check_free)
598 		iommu_gas_check_free(domain);
599 #endif
600 	IOMMU_DOMAIN_UNLOCK(domain);
601 }
602 
603 void
604 iommu_gas_free_region(struct iommu_map_entry *entry)
605 {
606 	struct iommu_domain *domain;
607 
608 	domain = entry->domain;
609 	KASSERT((entry->flags & (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_RMRR |
610 	    IOMMU_MAP_ENTRY_MAP)) == IOMMU_MAP_ENTRY_RMRR,
611 	    ("non-RMRR entry %p %p", domain, entry));
612 
613 	IOMMU_DOMAIN_LOCK(domain);
614 	if (entry != domain->first_place &&
615 	    entry != domain->last_place)
616 		iommu_gas_rb_remove(domain, entry);
617 	entry->flags &= ~IOMMU_MAP_ENTRY_RMRR;
618 	IOMMU_DOMAIN_UNLOCK(domain);
619 }
620 
621 static struct iommu_map_entry *
622 iommu_gas_remove_clip_left(struct iommu_domain *domain, iommu_gaddr_t start,
623     iommu_gaddr_t end, struct iommu_map_entry **r)
624 {
625 	struct iommu_map_entry *entry, *res, fentry;
626 
627 	IOMMU_DOMAIN_ASSERT_LOCKED(domain);
628 	MPASS(start <= end);
629 	MPASS(end <= domain->end);
630 
631 	/*
632 	 * Find an entry which contains the supplied guest's address
633 	 * start, or the first entry after the start.  Since we
634 	 * asserted that start is below domain end, entry should
635 	 * exist.  Then clip it if needed.
636 	 */
637 	fentry.start = start + 1;
638 	fentry.end = start + 1;
639 	entry = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, &fentry);
640 
641 	if (entry->start >= start ||
642 	    (entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
643 		return (entry);
644 
645 	res = *r;
646 	*r = NULL;
647 	*res = *entry;
648 	res->start = entry->end = start;
649 	RB_UPDATE_AUGMENT(entry, rb_entry);
650 	iommu_gas_rb_insert(domain, res);
651 	return (res);
652 }
653 
654 static bool
655 iommu_gas_remove_clip_right(struct iommu_domain *domain,
656     iommu_gaddr_t end, struct iommu_map_entry *entry,
657     struct iommu_map_entry *r)
658 {
659 	if (entry->start >= end || (entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
660 		return (false);
661 
662 	*r = *entry;
663 	r->end = entry->start = end;
664 	RB_UPDATE_AUGMENT(entry, rb_entry);
665 	iommu_gas_rb_insert(domain, r);
666 	return (true);
667 }
668 
669 static void
670 iommu_gas_remove_unmap(struct iommu_domain *domain,
671     struct iommu_map_entry *entry, struct iommu_map_entries_tailq *gcp)
672 {
673 	IOMMU_DOMAIN_ASSERT_LOCKED(domain);
674 
675 	if ((entry->flags & (IOMMU_MAP_ENTRY_UNMAPPED |
676 	    IOMMU_MAP_ENTRY_REMOVING)) != 0)
677 		return;
678 	MPASS((entry->flags & IOMMU_MAP_ENTRY_PLACE) == 0);
679 	entry->flags |= IOMMU_MAP_ENTRY_REMOVING;
680 	TAILQ_INSERT_TAIL(gcp, entry, dmamap_link);
681 }
682 
683 /*
684  * Remove specified range from the GAS of the domain.  Note that the
685  * removal is not guaranteed to occur upon the function return, it
686  * might be finalized some time after, when hardware reports that
687  * (queued) IOTLB invalidation was performed.
688  */
689 void
690 iommu_gas_remove(struct iommu_domain *domain, iommu_gaddr_t start,
691     iommu_gaddr_t size)
692 {
693 	struct iommu_map_entry *entry, *nentry, *r1, *r2;
694 	struct iommu_map_entries_tailq gc;
695 	iommu_gaddr_t end;
696 
697 	end = start + size;
698 	r1 = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK);
699 	r2 = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK);
700 	TAILQ_INIT(&gc);
701 
702 	IOMMU_DOMAIN_LOCK(domain);
703 
704 	nentry = iommu_gas_remove_clip_left(domain, start, end, &r1);
705 	RB_FOREACH_FROM(entry, iommu_gas_entries_tree, nentry) {
706 		if (entry->start >= end)
707 			break;
708 		KASSERT(start <= entry->start,
709 		    ("iommu_gas_remove entry (%#jx, %#jx) start %#jx",
710 		    entry->start, entry->end, start));
711 		if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
712 			continue;
713 		iommu_gas_remove_unmap(domain, entry, &gc);
714 	}
715 	if (iommu_gas_remove_clip_right(domain, end, entry, r2)) {
716 		iommu_gas_remove_unmap(domain, r2, &gc);
717 		r2 = NULL;
718 	}
719 
720 #ifdef INVARIANTS
721 	RB_FOREACH(entry, iommu_gas_entries_tree, &domain->rb_root) {
722 		if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
723 			continue;
724 		KASSERT(entry->end <= start || entry->start >= end,
725 		    ("iommu_gas_remove leftover entry (%#jx, %#jx) range "
726 		    "(%#jx, %#jx)",
727 		    entry->start, entry->end, start, end));
728 	}
729 #endif
730 
731 	IOMMU_DOMAIN_UNLOCK(domain);
732 	if (r1 != NULL)
733 		iommu_gas_free_entry(r1);
734 	if (r2 != NULL)
735 		iommu_gas_free_entry(r2);
736 	iommu_domain_unload(domain, &gc, true);
737 }
738 
739 int
740 iommu_gas_map(struct iommu_domain *domain,
741     const struct bus_dma_tag_common *common, iommu_gaddr_t size, int offset,
742     u_int eflags, u_int flags, vm_page_t *ma, struct iommu_map_entry **res)
743 {
744 	struct iommu_gas_match_args a;
745 	struct iommu_map_entry *entry;
746 	int error;
747 
748 	KASSERT((flags & ~(IOMMU_MF_CANWAIT | IOMMU_MF_CANSPLIT)) == 0,
749 	    ("invalid flags 0x%x", flags));
750 
751 	a.domain = domain;
752 	a.size = size;
753 	a.offset = offset;
754 	a.common = common;
755 	a.gas_flags = flags;
756 	entry = iommu_gas_alloc_entry(domain,
757 	    (flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0);
758 	if (entry == NULL)
759 		return (ENOMEM);
760 	a.entry = entry;
761 	IOMMU_DOMAIN_LOCK(domain);
762 	error = iommu_gas_find_space(&a);
763 	if (error == ENOMEM) {
764 		IOMMU_DOMAIN_UNLOCK(domain);
765 		iommu_gas_free_entry(entry);
766 		return (error);
767 	}
768 #ifdef INVARIANTS
769 	if (iommu_check_free)
770 		iommu_gas_check_free(domain);
771 #endif
772 	KASSERT(error == 0,
773 	    ("unexpected error %d from iommu_gas_find_entry", error));
774 	KASSERT(entry->end < domain->end, ("allocated GPA %jx, max GPA %jx",
775 	    (uintmax_t)entry->end, (uintmax_t)domain->end));
776 	entry->flags |= eflags;
777 	IOMMU_DOMAIN_UNLOCK(domain);
778 
779 	error = domain->ops->map(domain, entry->start,
780 	    entry->end - entry->start, ma, eflags,
781 	    ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0));
782 	if (error == ENOMEM) {
783 		iommu_domain_unload_entry(entry, true,
784 		    (flags & IOMMU_MF_CANWAIT) != 0);
785 		return (error);
786 	}
787 	KASSERT(error == 0,
788 	    ("unexpected error %d from domain_map_buf", error));
789 
790 	*res = entry;
791 	return (0);
792 }
793 
794 int
795 iommu_gas_map_region(struct iommu_domain *domain, struct iommu_map_entry *entry,
796     u_int eflags, u_int flags, vm_page_t *ma)
797 {
798 	iommu_gaddr_t start;
799 	int error;
800 
801 	KASSERT(entry->domain == domain,
802 	    ("mismatched domain %p entry %p entry->domain %p", domain,
803 	    entry, entry->domain));
804 	KASSERT(entry->flags == 0, ("used RMRR entry %p %p %x", domain,
805 	    entry, entry->flags));
806 	KASSERT((flags & ~(IOMMU_MF_CANWAIT | IOMMU_MF_RMRR)) == 0,
807 	    ("invalid flags 0x%x", flags));
808 
809 	start = entry->start;
810 	IOMMU_DOMAIN_LOCK(domain);
811 	error = iommu_gas_alloc_region(domain, entry, flags);
812 	if (error != 0) {
813 		IOMMU_DOMAIN_UNLOCK(domain);
814 		return (error);
815 	}
816 	entry->flags |= eflags;
817 	IOMMU_DOMAIN_UNLOCK(domain);
818 	if (entry->end == entry->start)
819 		return (0);
820 
821 	error = domain->ops->map(domain, entry->start,
822 	    entry->end - entry->start, ma + OFF_TO_IDX(start - entry->start),
823 	    eflags, ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0));
824 	if (error == ENOMEM) {
825 		iommu_domain_unload_entry(entry, false,
826 		    (flags & IOMMU_MF_CANWAIT) != 0);
827 		return (error);
828 	}
829 	KASSERT(error == 0,
830 	    ("unexpected error %d from domain_map_buf", error));
831 
832 	return (0);
833 }
834 
835 static int
836 iommu_gas_reserve_region_locked(struct iommu_domain *domain,
837     iommu_gaddr_t start, iommu_gaddr_t end, struct iommu_map_entry *entry)
838 {
839 	int error;
840 
841 	IOMMU_DOMAIN_ASSERT_LOCKED(domain);
842 
843 	entry->start = start;
844 	entry->end = end;
845 	error = iommu_gas_alloc_region(domain, entry, IOMMU_MF_CANWAIT);
846 	if (error == 0)
847 		entry->flags |= IOMMU_MAP_ENTRY_UNMAPPED;
848 	return (error);
849 }
850 
851 int
852 iommu_gas_reserve_region(struct iommu_domain *domain, iommu_gaddr_t start,
853     iommu_gaddr_t end, struct iommu_map_entry **entry0)
854 {
855 	struct iommu_map_entry *entry;
856 	int error;
857 
858 	entry = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK);
859 	IOMMU_DOMAIN_LOCK(domain);
860 	error = iommu_gas_reserve_region_locked(domain, start, end, entry);
861 	IOMMU_DOMAIN_UNLOCK(domain);
862 	if (error != 0)
863 		iommu_gas_free_entry(entry);
864 	else if (entry0 != NULL)
865 		*entry0 = entry;
866 	return (error);
867 }
868 
869 /*
870  * As in iommu_gas_reserve_region, reserve [start, end), but allow for existing
871  * entries.
872  */
873 int
874 iommu_gas_reserve_region_extend(struct iommu_domain *domain,
875     iommu_gaddr_t start, iommu_gaddr_t end)
876 {
877 	struct iommu_map_entry *entry, *next, *prev, key = {};
878 	iommu_gaddr_t entry_start, entry_end;
879 	int error;
880 
881 	error = 0;
882 	entry = NULL;
883 	end = ummin(end, domain->end);
884 	while (start < end) {
885 		/* Preallocate an entry. */
886 		if (entry == NULL)
887 			entry = iommu_gas_alloc_entry(domain,
888 			    IOMMU_PGF_WAITOK);
889 		/* Calculate the free region from here to the next entry. */
890 		key.start = key.end = start;
891 		IOMMU_DOMAIN_LOCK(domain);
892 		next = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, &key);
893 		KASSERT(next != NULL, ("domain %p with end %#jx has no entry "
894 		    "after %#jx", domain, (uintmax_t)domain->end,
895 		    (uintmax_t)start));
896 		entry_end = ummin(end, next->start);
897 		prev = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, next);
898 		if (prev != NULL)
899 			entry_start = ummax(start, prev->end);
900 		else
901 			entry_start = start;
902 		start = next->end;
903 		/* Reserve the region if non-empty. */
904 		if (entry_start != entry_end) {
905 			error = iommu_gas_reserve_region_locked(domain,
906 			    entry_start, entry_end, entry);
907 			if (error != 0) {
908 				IOMMU_DOMAIN_UNLOCK(domain);
909 				break;
910 			}
911 			entry = NULL;
912 		}
913 		IOMMU_DOMAIN_UNLOCK(domain);
914 	}
915 	/* Release a preallocated entry if it was not used. */
916 	if (entry != NULL)
917 		iommu_gas_free_entry(entry);
918 	return (error);
919 }
920 
921 void
922 iommu_unmap_msi(struct iommu_ctx *ctx)
923 {
924 	struct iommu_map_entry *entry;
925 	struct iommu_domain *domain;
926 
927 	domain = ctx->domain;
928 	entry = domain->msi_entry;
929 	if (entry == NULL)
930 		return;
931 
932 	domain->ops->unmap(domain, entry->start, entry->end -
933 	    entry->start, IOMMU_PGF_WAITOK);
934 
935 	iommu_gas_free_space(entry);
936 
937 	iommu_gas_free_entry(entry);
938 
939 	domain->msi_entry = NULL;
940 	domain->msi_base = 0;
941 	domain->msi_phys = 0;
942 }
943 
944 int
945 iommu_map_msi(struct iommu_ctx *ctx, iommu_gaddr_t size, int offset,
946     u_int eflags, u_int flags, vm_page_t *ma)
947 {
948 	struct iommu_domain *domain;
949 	struct iommu_map_entry *entry;
950 	int error;
951 
952 	error = 0;
953 	domain = ctx->domain;
954 
955 	/* Check if there is already an MSI page allocated */
956 	IOMMU_DOMAIN_LOCK(domain);
957 	entry = domain->msi_entry;
958 	IOMMU_DOMAIN_UNLOCK(domain);
959 
960 	if (entry == NULL) {
961 		error = iommu_gas_map(domain, &ctx->tag->common, size, offset,
962 		    eflags, flags, ma, &entry);
963 		IOMMU_DOMAIN_LOCK(domain);
964 		if (error == 0) {
965 			if (domain->msi_entry == NULL) {
966 				MPASS(domain->msi_base == 0);
967 				MPASS(domain->msi_phys == 0);
968 
969 				domain->msi_entry = entry;
970 				domain->msi_base = entry->start;
971 				domain->msi_phys = VM_PAGE_TO_PHYS(ma[0]);
972 			} else {
973 				/*
974 				 * We lost the race and already have an
975 				 * MSI page allocated. Free the unneeded entry.
976 				 */
977 				iommu_gas_free_entry(entry);
978 			}
979 		} else if (domain->msi_entry != NULL) {
980 			/*
981 			 * The allocation failed, but another succeeded.
982 			 * Return success as there is a valid MSI page.
983 			 */
984 			error = 0;
985 		}
986 		IOMMU_DOMAIN_UNLOCK(domain);
987 	}
988 
989 	return (error);
990 }
991 
992 void
993 iommu_translate_msi(struct iommu_domain *domain, uint64_t *addr)
994 {
995 
996 	*addr = (*addr - domain->msi_phys) + domain->msi_base;
997 
998 	KASSERT(*addr >= domain->msi_entry->start,
999 	    ("%s: Address is below the MSI entry start address (%jx < %jx)",
1000 	    __func__, (uintmax_t)*addr, (uintmax_t)domain->msi_entry->start));
1001 
1002 	KASSERT(*addr + sizeof(*addr) <= domain->msi_entry->end,
1003 	    ("%s: Address is above the MSI entry end address (%jx < %jx)",
1004 	    __func__, (uintmax_t)*addr, (uintmax_t)domain->msi_entry->end));
1005 }
1006 
1007 SYSCTL_NODE(_hw, OID_AUTO, iommu, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "");
1008 
1009 #ifdef INVARIANTS
1010 SYSCTL_INT(_hw_iommu, OID_AUTO, check_free, CTLFLAG_RWTUN,
1011     &iommu_check_free, 0,
1012     "Check the GPA RBtree for free_down and free_after validity");
1013 #endif
1014