1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2013 The FreeBSD Foundation
5 *
6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
7 * under sponsorship from the FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 #define RB_AUGMENT_CHECK(entry) iommu_gas_augment_entry(entry)
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/malloc.h>
36 #include <sys/bus.h>
37 #include <sys/interrupt.h>
38 #include <sys/kernel.h>
39 #include <sys/ktr.h>
40 #include <sys/lock.h>
41 #include <sys/proc.h>
42 #include <sys/rwlock.h>
43 #include <sys/memdesc.h>
44 #include <sys/mutex.h>
45 #include <sys/sysctl.h>
46 #include <sys/rman.h>
47 #include <sys/taskqueue.h>
48 #include <sys/tree.h>
49 #include <sys/uio.h>
50 #include <sys/vmem.h>
51 #include <vm/vm.h>
52 #include <vm/vm_extern.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_object.h>
55 #include <vm/vm_page.h>
56 #include <vm/vm_map.h>
57 #include <vm/uma.h>
58 #include <dev/pci/pcireg.h>
59 #include <dev/pci/pcivar.h>
60 #include <dev/iommu/iommu.h>
61 #include <dev/iommu/iommu_gas.h>
62 #include <dev/iommu/iommu_msi.h>
63 #include <machine/atomic.h>
64 #include <machine/bus.h>
65 #include <machine/md_var.h>
66 #include <machine/iommu.h>
67 #include <dev/iommu/busdma_iommu.h>
68
69 /*
70 * Guest Address Space management.
71 */
72
73 static uma_zone_t iommu_map_entry_zone;
74
75 #ifdef INVARIANTS
76 static int iommu_check_free;
77 #endif
78
79 static void
intel_gas_init(void)80 intel_gas_init(void)
81 {
82
83 iommu_map_entry_zone = uma_zcreate("IOMMU_MAP_ENTRY",
84 sizeof(struct iommu_map_entry), NULL, NULL,
85 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NODUMP);
86 }
87 SYSINIT(intel_gas, SI_SUB_DRIVERS, SI_ORDER_FIRST, intel_gas_init, NULL);
88
89 struct iommu_map_entry *
iommu_gas_alloc_entry(struct iommu_domain * domain,u_int flags)90 iommu_gas_alloc_entry(struct iommu_domain *domain, u_int flags)
91 {
92 struct iommu_map_entry *res;
93
94 KASSERT((flags & ~(IOMMU_PGF_WAITOK)) == 0,
95 ("unsupported flags %x", flags));
96
97 res = uma_zalloc(iommu_map_entry_zone, ((flags & IOMMU_PGF_WAITOK) !=
98 0 ? M_WAITOK : M_NOWAIT) | M_ZERO);
99 if (res != NULL) {
100 SLIST_INIT(&res->pgtbl_free);
101 if (domain != NULL) {
102 res->domain = domain;
103 atomic_add_int(&domain->entries_cnt, 1);
104 }
105 }
106 return (res);
107 }
108
109 void
iommu_gas_free_entry(struct iommu_map_entry * entry)110 iommu_gas_free_entry(struct iommu_map_entry *entry)
111 {
112 struct iommu_domain *domain;
113 int n __unused;
114
115 n = vm_page_free_pages_toq(&entry->pgtbl_free, false);
116 #if defined(__i386__) || defined(__amd64__)
117 atomic_subtract_int(&iommu_tbl_pagecnt, n);
118 #endif
119 domain = entry->domain;
120 if (domain != NULL)
121 atomic_subtract_int(&domain->entries_cnt, 1);
122 uma_zfree(iommu_map_entry_zone, entry);
123 }
124
125 static int
iommu_gas_cmp_entries(struct iommu_map_entry * a,struct iommu_map_entry * b)126 iommu_gas_cmp_entries(struct iommu_map_entry *a, struct iommu_map_entry *b)
127 {
128
129 /* First and last entries have zero size, so <= */
130 KASSERT(a->start <= a->end, ("inverted entry %p (%jx, %jx)",
131 a, (uintmax_t)a->start, (uintmax_t)a->end));
132 KASSERT(b->start <= b->end, ("inverted entry %p (%jx, %jx)",
133 b, (uintmax_t)b->start, (uintmax_t)b->end));
134 KASSERT(((a->flags | b->flags) & IOMMU_MAP_ENTRY_FAKE) != 0 ||
135 a->end <= b->start || b->end <= a->start ||
136 a->end == a->start || b->end == b->start,
137 ("overlapping entries %p (%jx, %jx) f %#x %p (%jx, %jx) f %#x"
138 " domain %p %p",
139 a, (uintmax_t)a->start, (uintmax_t)a->end, a->flags,
140 b, (uintmax_t)b->start, (uintmax_t)b->end, b->flags,
141 a->domain, b->domain));
142
143 if (a->end < b->end)
144 return (-1);
145 else if (b->end < a->end)
146 return (1);
147 return (0);
148 }
149
150 /*
151 * Update augmentation data based on data from children.
152 * Return true if and only if the update changes the augmentation data.
153 */
154 static bool
iommu_gas_augment_entry(struct iommu_map_entry * entry)155 iommu_gas_augment_entry(struct iommu_map_entry *entry)
156 {
157 struct iommu_map_entry *child;
158 iommu_gaddr_t bound, delta, free_down;
159
160 free_down = 0;
161 bound = entry->start;
162 if ((child = RB_LEFT(entry, rb_entry)) != NULL) {
163 free_down = MAX(child->free_down, bound - child->last);
164 bound = child->first;
165 }
166 delta = bound - entry->first;
167 entry->first = bound;
168 bound = entry->end;
169 if ((child = RB_RIGHT(entry, rb_entry)) != NULL) {
170 free_down = MAX(free_down, child->free_down);
171 free_down = MAX(free_down, child->first - bound);
172 bound = child->last;
173 }
174 delta += entry->last - bound;
175 if (delta == 0)
176 delta = entry->free_down - free_down;
177 entry->last = bound;
178 entry->free_down = free_down;
179
180 /*
181 * Return true either if the value of last-first changed,
182 * or if free_down changed.
183 */
184 return (delta != 0);
185 }
186
187 RB_GENERATE(iommu_gas_entries_tree, iommu_map_entry, rb_entry,
188 iommu_gas_cmp_entries);
189
190 #ifdef INVARIANTS
191 static void
iommu_gas_check_free(struct iommu_domain * domain)192 iommu_gas_check_free(struct iommu_domain *domain)
193 {
194 struct iommu_map_entry *entry, *l, *r;
195 iommu_gaddr_t v;
196
197 RB_FOREACH(entry, iommu_gas_entries_tree, &domain->rb_root) {
198 KASSERT(domain == entry->domain,
199 ("mismatched free domain %p entry %p entry->domain %p",
200 domain, entry, entry->domain));
201 l = RB_LEFT(entry, rb_entry);
202 r = RB_RIGHT(entry, rb_entry);
203 v = 0;
204 if (l != NULL) {
205 v = MAX(v, l->free_down);
206 v = MAX(v, entry->start - l->last);
207 }
208 if (r != NULL) {
209 v = MAX(v, r->free_down);
210 v = MAX(v, r->first - entry->end);
211 }
212 MPASS(entry->free_down == v);
213 }
214 }
215 #endif
216
217 static void
iommu_gas_rb_remove(struct iommu_domain * domain,struct iommu_map_entry * entry)218 iommu_gas_rb_remove(struct iommu_domain *domain, struct iommu_map_entry *entry)
219 {
220 struct iommu_map_entry *nbr;
221
222 /* Removing entry may open a new free gap before domain->start_gap. */
223 if (entry->end <= domain->start_gap->end) {
224 if (RB_RIGHT(entry, rb_entry) != NULL)
225 nbr = iommu_gas_entries_tree_RB_NEXT(entry);
226 else if (RB_LEFT(entry, rb_entry) != NULL)
227 nbr = RB_LEFT(entry, rb_entry);
228 else
229 nbr = RB_PARENT(entry, rb_entry);
230 domain->start_gap = nbr;
231 }
232 RB_REMOVE(iommu_gas_entries_tree, &domain->rb_root, entry);
233 }
234
235 struct iommu_domain *
iommu_get_ctx_domain(struct iommu_ctx * ctx)236 iommu_get_ctx_domain(struct iommu_ctx *ctx)
237 {
238
239 return (ctx->domain);
240 }
241
242 void
iommu_gas_init_domain(struct iommu_domain * domain)243 iommu_gas_init_domain(struct iommu_domain *domain)
244 {
245 struct iommu_map_entry *begin, *end;
246
247 begin = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK);
248 end = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK);
249
250 IOMMU_DOMAIN_LOCK(domain);
251 KASSERT(domain->entries_cnt == 2, ("dirty domain %p", domain));
252 KASSERT(RB_EMPTY(&domain->rb_root),
253 ("non-empty entries %p", domain));
254
255 end->start = domain->end;
256 end->end = domain->end;
257 end->flags = IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED;
258 RB_INSERT(iommu_gas_entries_tree, &domain->rb_root, end);
259
260 begin->start = 0;
261 begin->end = 0;
262 begin->flags = IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED;
263 RB_INSERT_PREV(iommu_gas_entries_tree, &domain->rb_root, end, begin);
264 iommu_gas_augment_entry(end);
265 iommu_gas_augment_entry(begin);
266
267 domain->start_gap = begin;
268 domain->first_place = begin;
269 domain->last_place = end;
270 domain->flags |= IOMMU_DOMAIN_GAS_INITED;
271 IOMMU_DOMAIN_UNLOCK(domain);
272 }
273
274 void
iommu_gas_fini_domain(struct iommu_domain * domain)275 iommu_gas_fini_domain(struct iommu_domain *domain)
276 {
277 struct iommu_map_entry *entry;
278
279 IOMMU_DOMAIN_ASSERT_LOCKED(domain);
280 KASSERT(domain->entries_cnt == 2,
281 ("domain still in use %p", domain));
282
283 entry = RB_MIN(iommu_gas_entries_tree, &domain->rb_root);
284 KASSERT(entry->start == 0, ("start entry start %p", domain));
285 KASSERT(entry->end == IOMMU_PAGE_SIZE, ("start entry end %p", domain));
286 KASSERT(entry->flags ==
287 (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED),
288 ("start entry flags %p", domain));
289 iommu_gas_rb_remove(domain, entry);
290 iommu_gas_free_entry(entry);
291
292 entry = RB_MAX(iommu_gas_entries_tree, &domain->rb_root);
293 KASSERT(entry->start == domain->end, ("end entry start %p", domain));
294 KASSERT(entry->end == domain->end, ("end entry end %p", domain));
295 KASSERT(entry->flags ==
296 (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED),
297 ("end entry flags %p", domain));
298 iommu_gas_rb_remove(domain, entry);
299 iommu_gas_free_entry(entry);
300 }
301
302 struct iommu_gas_match_args {
303 iommu_gaddr_t size;
304 int offset;
305 const struct bus_dma_tag_common *common;
306 u_int gas_flags;
307 struct iommu_map_entry *entry;
308 };
309
310 /*
311 * The interval [beg, end) is a free interval between two iommu_map_entries.
312 * Addresses can be allocated only in the range [lbound, ubound]. Try to
313 * allocate space in the free interval, subject to the conditions expressed by
314 * a, and return 'true' if and only if the allocation attempt succeeds.
315 */
316 static bool
iommu_gas_match_one(struct iommu_gas_match_args * a,iommu_gaddr_t beg,iommu_gaddr_t end,iommu_gaddr_t lbound,iommu_gaddr_t ubound)317 iommu_gas_match_one(struct iommu_gas_match_args *a, iommu_gaddr_t beg,
318 iommu_gaddr_t end, iommu_gaddr_t lbound, iommu_gaddr_t ubound)
319 {
320 struct iommu_map_entry *entry;
321 iommu_gaddr_t first, size, start;
322 int offset;
323
324 /*
325 * The prev->end is always aligned on the page size, which
326 * causes page alignment for the entry->start too.
327 *
328 * Create IOMMU_PAGE_SIZE gaps before, after new entry
329 * to ensure that out-of-bounds accesses fault.
330 */
331 beg = MAX(beg + IOMMU_PAGE_SIZE, lbound);
332 start = roundup2(beg, a->common->alignment);
333 if (start < beg)
334 return (false);
335 if (end < IOMMU_PAGE_SIZE + 1)
336 return (false);
337 end = MIN(end - IOMMU_PAGE_SIZE - 1, ubound);
338 offset = a->offset;
339 size = a->size;
340 if (start + offset + size - 1 > end)
341 return (false);
342
343 /* Check for and try to skip past boundary crossing. */
344 if (!vm_addr_bound_ok(start + offset, size, a->common->boundary)) {
345 /*
346 * The start + offset to start + offset + size region crosses
347 * the boundary. Check if there is enough space after the next
348 * boundary after the beg.
349 */
350 first = start;
351 beg = roundup2(start + offset + 1, a->common->boundary);
352 start = roundup2(beg, a->common->alignment);
353
354 if (start + offset + size - 1 > end ||
355 !vm_addr_bound_ok(start + offset, size,
356 a->common->boundary)) {
357 /*
358 * Not enough space to align at the requested boundary,
359 * or boundary is smaller than the size, but allowed to
360 * split. We already checked that start + size does not
361 * overlap ubound.
362 *
363 * XXXKIB. It is possible that beg is exactly at the
364 * start of the next entry, then we do not have gap.
365 * Ignore for now.
366 */
367 if ((a->gas_flags & IOMMU_MF_CANSPLIT) == 0)
368 return (false);
369 size = beg - first - offset;
370 start = first;
371 }
372 }
373 entry = a->entry;
374 entry->start = start;
375 entry->end = start + roundup2(size + offset, IOMMU_PAGE_SIZE);
376 entry->flags = IOMMU_MAP_ENTRY_MAP;
377 return (true);
378 }
379
380 /* Find the next entry that might abut a big-enough range. */
381 static struct iommu_map_entry *
iommu_gas_next(struct iommu_map_entry * curr,iommu_gaddr_t min_free)382 iommu_gas_next(struct iommu_map_entry *curr, iommu_gaddr_t min_free)
383 {
384 struct iommu_map_entry *next;
385
386 if ((next = RB_RIGHT(curr, rb_entry)) != NULL &&
387 next->free_down >= min_free) {
388 /* Find next entry in right subtree. */
389 do
390 curr = next;
391 while ((next = RB_LEFT(curr, rb_entry)) != NULL &&
392 next->free_down >= min_free);
393 } else {
394 /* Find next entry in a left-parent ancestor. */
395 while ((next = RB_PARENT(curr, rb_entry)) != NULL &&
396 curr == RB_RIGHT(next, rb_entry))
397 curr = next;
398 curr = next;
399 }
400 return (curr);
401 }
402
403 /*
404 * Address-ordered first-fit search of 'domain' for free space satisfying the
405 * conditions of 'a'. The space allocated is at least one page big, and is
406 * bounded by guard pages to the left and right. The allocated space for
407 * 'domain' is described by an rb-tree of map entries at domain->rb_root, and
408 * domain->start_gap points to a map entry less than or adjacent to the first
409 * free-space of size at least 3 pages.
410 */
411 static int
iommu_gas_find_space(struct iommu_domain * domain,struct iommu_gas_match_args * a)412 iommu_gas_find_space(struct iommu_domain *domain,
413 struct iommu_gas_match_args *a)
414 {
415 struct iommu_map_entry *curr, *first;
416 iommu_gaddr_t addr, min_free;
417
418 IOMMU_DOMAIN_ASSERT_LOCKED(domain);
419 KASSERT(a->entry->flags == 0,
420 ("dirty entry %p %p", domain, a->entry));
421
422 /*
423 * start_gap may point to an entry adjacent to gaps too small for any
424 * new allocation. In that case, advance start_gap to the first free
425 * space big enough for a minimum allocation plus two guard pages.
426 */
427 min_free = 3 * IOMMU_PAGE_SIZE;
428 first = domain->start_gap;
429 while (first != NULL && first->free_down < min_free)
430 first = RB_PARENT(first, rb_entry);
431 for (curr = first; curr != NULL;
432 curr = iommu_gas_next(curr, min_free)) {
433 if ((first = RB_LEFT(curr, rb_entry)) != NULL &&
434 first->last + min_free <= curr->start)
435 break;
436 if ((first = RB_RIGHT(curr, rb_entry)) != NULL &&
437 curr->end + min_free <= first->first)
438 break;
439 }
440 domain->start_gap = curr;
441
442 /*
443 * If the subtree doesn't have free space for the requested allocation
444 * plus two guard pages, skip it.
445 */
446 min_free = 2 * IOMMU_PAGE_SIZE +
447 roundup2(a->size + a->offset, IOMMU_PAGE_SIZE);
448
449 /* Climb to find a node in the subtree of big-enough ranges. */
450 first = curr;
451 while (first != NULL && first->free_down < min_free)
452 first = RB_PARENT(first, rb_entry);
453
454 /*
455 * Walk the big-enough ranges tree until one satisfies alignment
456 * requirements, or violates lowaddr address requirement.
457 */
458 addr = a->common->lowaddr;
459 for (curr = first; curr != NULL;
460 curr = iommu_gas_next(curr, min_free)) {
461 if ((first = RB_LEFT(curr, rb_entry)) != NULL &&
462 iommu_gas_match_one(a, first->last, curr->start,
463 0, addr)) {
464 RB_INSERT_PREV(iommu_gas_entries_tree,
465 &domain->rb_root, curr, a->entry);
466 return (0);
467 }
468 if (curr->end >= addr) {
469 /* All remaining ranges > addr */
470 break;
471 }
472 if ((first = RB_RIGHT(curr, rb_entry)) != NULL &&
473 iommu_gas_match_one(a, curr->end, first->first,
474 0, addr)) {
475 RB_INSERT_NEXT(iommu_gas_entries_tree,
476 &domain->rb_root, curr, a->entry);
477 return (0);
478 }
479 }
480
481 /*
482 * To resume the search at the start of the upper region, first climb to
483 * the nearest ancestor that spans highaddr. Then find the last entry
484 * before highaddr that could abut a big-enough range.
485 */
486 addr = a->common->highaddr;
487 while (curr != NULL && curr->last < addr)
488 curr = RB_PARENT(curr, rb_entry);
489 first = NULL;
490 while (curr != NULL && curr->free_down >= min_free) {
491 if (addr < curr->end)
492 curr = RB_LEFT(curr, rb_entry);
493 else {
494 first = curr;
495 curr = RB_RIGHT(curr, rb_entry);
496 }
497 }
498
499 /*
500 * Walk the remaining big-enough ranges until one satisfies alignment
501 * requirements.
502 */
503 for (curr = first; curr != NULL;
504 curr = iommu_gas_next(curr, min_free)) {
505 if ((first = RB_LEFT(curr, rb_entry)) != NULL &&
506 iommu_gas_match_one(a, first->last, curr->start,
507 addr + 1, domain->end - 1)) {
508 RB_INSERT_PREV(iommu_gas_entries_tree,
509 &domain->rb_root, curr, a->entry);
510 return (0);
511 }
512 if ((first = RB_RIGHT(curr, rb_entry)) != NULL &&
513 iommu_gas_match_one(a, curr->end, first->first,
514 addr + 1, domain->end - 1)) {
515 RB_INSERT_NEXT(iommu_gas_entries_tree,
516 &domain->rb_root, curr, a->entry);
517 return (0);
518 }
519 }
520
521 return (ENOMEM);
522 }
523
524 static int
iommu_gas_alloc_region(struct iommu_domain * domain,struct iommu_map_entry * entry,u_int flags)525 iommu_gas_alloc_region(struct iommu_domain *domain, struct iommu_map_entry *entry,
526 u_int flags)
527 {
528 struct iommu_map_entry *next, *prev;
529
530 IOMMU_DOMAIN_ASSERT_LOCKED(domain);
531
532 if ((entry->start & IOMMU_PAGE_MASK) != 0 ||
533 (entry->end & IOMMU_PAGE_MASK) != 0)
534 return (EINVAL);
535 if (entry->start >= entry->end)
536 return (EINVAL);
537 if (entry->end >= domain->end)
538 return (EINVAL);
539
540 entry->flags |= IOMMU_MAP_ENTRY_FAKE;
541 next = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, entry);
542 KASSERT(next != NULL, ("next must be non-null %p %jx", domain,
543 (uintmax_t)entry->start));
544 prev = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, next);
545 /* prev could be NULL */
546 entry->flags &= ~IOMMU_MAP_ENTRY_FAKE;
547
548 /*
549 * Adapt to broken BIOSes which specify overlapping RMRR
550 * entries.
551 *
552 * XXXKIB: this does not handle a case when prev or next
553 * entries are completely covered by the current one, which
554 * extends both ways.
555 */
556 if (prev != NULL && prev->end > entry->start &&
557 (prev->flags & IOMMU_MAP_ENTRY_PLACE) == 0) {
558 if ((flags & IOMMU_MF_RMRR) == 0 ||
559 (prev->flags & IOMMU_MAP_ENTRY_RMRR) == 0)
560 return (EBUSY);
561 entry->start = prev->end;
562 }
563 if (next->start < entry->end &&
564 (next->flags & IOMMU_MAP_ENTRY_PLACE) == 0) {
565 if ((flags & IOMMU_MF_RMRR) == 0 ||
566 (next->flags & IOMMU_MAP_ENTRY_RMRR) == 0)
567 return (EBUSY);
568 entry->end = next->start;
569 }
570 if (entry->end == entry->start)
571 return (0);
572
573 if (prev != NULL && prev->end > entry->start) {
574 /* This assumes that prev is the placeholder entry. */
575 iommu_gas_rb_remove(domain, prev);
576 prev = NULL;
577 }
578 RB_INSERT_PREV(iommu_gas_entries_tree,
579 &domain->rb_root, next, entry);
580 if (next->start < entry->end) {
581 iommu_gas_rb_remove(domain, next);
582 next = NULL;
583 }
584
585 if ((flags & IOMMU_MF_RMRR) != 0)
586 entry->flags = IOMMU_MAP_ENTRY_RMRR;
587
588 #ifdef INVARIANTS
589 struct iommu_map_entry *ip, *in;
590 ip = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, entry);
591 in = RB_NEXT(iommu_gas_entries_tree, &domain->rb_root, entry);
592 KASSERT(prev == NULL || ip == prev,
593 ("RMRR %p (%jx %jx) prev %p (%jx %jx) ins prev %p (%jx %jx)",
594 entry, entry->start, entry->end, prev,
595 prev == NULL ? 0 : prev->start, prev == NULL ? 0 : prev->end,
596 ip, ip == NULL ? 0 : ip->start, ip == NULL ? 0 : ip->end));
597 KASSERT(next == NULL || in == next,
598 ("RMRR %p (%jx %jx) next %p (%jx %jx) ins next %p (%jx %jx)",
599 entry, entry->start, entry->end, next,
600 next == NULL ? 0 : next->start, next == NULL ? 0 : next->end,
601 in, in == NULL ? 0 : in->start, in == NULL ? 0 : in->end));
602 #endif
603
604 return (0);
605 }
606
607 void
iommu_gas_free_space(struct iommu_map_entry * entry)608 iommu_gas_free_space(struct iommu_map_entry *entry)
609 {
610 struct iommu_domain *domain;
611
612 domain = entry->domain;
613 KASSERT((entry->flags & (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_RMRR |
614 IOMMU_MAP_ENTRY_MAP)) == IOMMU_MAP_ENTRY_MAP,
615 ("permanent entry %p %p", domain, entry));
616
617 IOMMU_DOMAIN_LOCK(domain);
618 iommu_gas_rb_remove(domain, entry);
619 entry->flags &= ~IOMMU_MAP_ENTRY_MAP;
620 #ifdef INVARIANTS
621 if (iommu_check_free)
622 iommu_gas_check_free(domain);
623 #endif
624 IOMMU_DOMAIN_UNLOCK(domain);
625 }
626
627 void
iommu_gas_free_region(struct iommu_map_entry * entry)628 iommu_gas_free_region(struct iommu_map_entry *entry)
629 {
630 struct iommu_domain *domain;
631
632 domain = entry->domain;
633 KASSERT((entry->flags & (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_RMRR |
634 IOMMU_MAP_ENTRY_MAP)) == IOMMU_MAP_ENTRY_RMRR,
635 ("non-RMRR entry %p %p", domain, entry));
636
637 IOMMU_DOMAIN_LOCK(domain);
638 if (entry != domain->first_place &&
639 entry != domain->last_place)
640 iommu_gas_rb_remove(domain, entry);
641 entry->flags &= ~IOMMU_MAP_ENTRY_RMRR;
642 IOMMU_DOMAIN_UNLOCK(domain);
643 }
644
645 static struct iommu_map_entry *
iommu_gas_remove_clip_left(struct iommu_domain * domain,iommu_gaddr_t start,iommu_gaddr_t end,struct iommu_map_entry ** r)646 iommu_gas_remove_clip_left(struct iommu_domain *domain, iommu_gaddr_t start,
647 iommu_gaddr_t end, struct iommu_map_entry **r)
648 {
649 struct iommu_map_entry *entry, *res, fentry;
650
651 IOMMU_DOMAIN_ASSERT_LOCKED(domain);
652 MPASS(start <= end);
653 MPASS(end <= domain->end);
654
655 /*
656 * Find an entry which contains the supplied guest's address
657 * start, or the first entry after the start. Since we
658 * asserted that start is below domain end, entry should
659 * exist. Then clip it if needed.
660 */
661 bzero(&fentry, sizeof(fentry));
662 fentry.start = start + 1;
663 fentry.end = start + 1;
664 fentry.flags = IOMMU_MAP_ENTRY_FAKE;
665 entry = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, &fentry);
666
667 if (entry->start >= start ||
668 (entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
669 return (entry);
670
671 res = *r;
672 *r = NULL;
673 *res = *entry;
674 res->start = entry->end = start;
675 RB_UPDATE_AUGMENT(entry, rb_entry);
676 RB_INSERT_NEXT(iommu_gas_entries_tree,
677 &domain->rb_root, entry, res);
678 return (res);
679 }
680
681 static bool
iommu_gas_remove_clip_right(struct iommu_domain * domain,iommu_gaddr_t end,struct iommu_map_entry * entry,struct iommu_map_entry * r)682 iommu_gas_remove_clip_right(struct iommu_domain *domain,
683 iommu_gaddr_t end, struct iommu_map_entry *entry,
684 struct iommu_map_entry *r)
685 {
686 if (entry->start >= end || (entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
687 return (false);
688
689 *r = *entry;
690 r->end = entry->start = end;
691 RB_UPDATE_AUGMENT(entry, rb_entry);
692 RB_INSERT_PREV(iommu_gas_entries_tree,
693 &domain->rb_root, entry, r);
694 return (true);
695 }
696
697 static void
iommu_gas_remove_unmap(struct iommu_domain * domain,struct iommu_map_entry * entry,struct iommu_map_entries_tailq * gcp)698 iommu_gas_remove_unmap(struct iommu_domain *domain,
699 struct iommu_map_entry *entry, struct iommu_map_entries_tailq *gcp)
700 {
701 IOMMU_DOMAIN_ASSERT_LOCKED(domain);
702
703 if ((entry->flags & (IOMMU_MAP_ENTRY_UNMAPPED |
704 IOMMU_MAP_ENTRY_RMRR |
705 IOMMU_MAP_ENTRY_REMOVING)) != 0)
706 return;
707 MPASS((entry->flags & IOMMU_MAP_ENTRY_PLACE) == 0);
708 entry->flags |= IOMMU_MAP_ENTRY_REMOVING;
709 TAILQ_INSERT_TAIL(gcp, entry, dmamap_link);
710 }
711
712 static void
iommu_gas_remove_locked(struct iommu_domain * domain,iommu_gaddr_t start,iommu_gaddr_t size,struct iommu_map_entries_tailq * gc,struct iommu_map_entry ** r1,struct iommu_map_entry ** r2)713 iommu_gas_remove_locked(struct iommu_domain *domain,
714 iommu_gaddr_t start, iommu_gaddr_t size,
715 struct iommu_map_entries_tailq *gc,
716 struct iommu_map_entry **r1, struct iommu_map_entry **r2)
717 {
718 struct iommu_map_entry *entry, *nentry;
719 iommu_gaddr_t end;
720
721 IOMMU_DOMAIN_ASSERT_LOCKED(domain);
722
723 end = start + size;
724
725 nentry = iommu_gas_remove_clip_left(domain, start, end, r1);
726 RB_FOREACH_FROM(entry, iommu_gas_entries_tree, nentry) {
727 if (entry->start >= end)
728 break;
729 KASSERT(start <= entry->start,
730 ("iommu_gas_remove entry (%#jx, %#jx) start %#jx",
731 entry->start, entry->end, start));
732 iommu_gas_remove_unmap(domain, entry, gc);
733 }
734 if (iommu_gas_remove_clip_right(domain, end, entry, *r2)) {
735 iommu_gas_remove_unmap(domain, *r2, gc);
736 *r2 = NULL;
737 }
738
739 #ifdef INVARIANTS
740 RB_FOREACH(entry, iommu_gas_entries_tree, &domain->rb_root) {
741 if ((entry->flags & (IOMMU_MAP_ENTRY_RMRR |
742 IOMMU_MAP_ENTRY_PLACE)) != 0)
743 continue;
744 KASSERT(entry->end <= start || entry->start >= end,
745 ("iommu_gas_remove leftover entry (%#jx, %#jx) range "
746 "(%#jx, %#jx)",
747 entry->start, entry->end, start, end));
748 }
749 #endif
750 }
751
752 static void
iommu_gas_remove_init(struct iommu_domain * domain,struct iommu_map_entries_tailq * gc,struct iommu_map_entry ** r1,struct iommu_map_entry ** r2)753 iommu_gas_remove_init(struct iommu_domain *domain,
754 struct iommu_map_entries_tailq *gc, struct iommu_map_entry **r1,
755 struct iommu_map_entry **r2)
756 {
757 TAILQ_INIT(gc);
758 *r1 = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK);
759 *r2 = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK);
760 }
761
762 static void
iommu_gas_remove_cleanup(struct iommu_domain * domain,struct iommu_map_entries_tailq * gc,struct iommu_map_entry ** r1,struct iommu_map_entry ** r2)763 iommu_gas_remove_cleanup(struct iommu_domain *domain,
764 struct iommu_map_entries_tailq *gc, struct iommu_map_entry **r1,
765 struct iommu_map_entry **r2)
766 {
767 if (*r1 != NULL) {
768 iommu_gas_free_entry(*r1);
769 *r1 = NULL;
770 }
771 if (*r2 != NULL) {
772 iommu_gas_free_entry(*r2);
773 *r2 = NULL;
774 }
775 iommu_domain_unload(domain, gc, true);
776 }
777
778 /*
779 * Remove specified range from the GAS of the domain. Note that the
780 * removal is not guaranteed to occur upon the function return, it
781 * might be finalized some time after, when hardware reports that
782 * (queued) IOTLB invalidation was performed.
783 */
784 void
iommu_gas_remove(struct iommu_domain * domain,iommu_gaddr_t start,iommu_gaddr_t size)785 iommu_gas_remove(struct iommu_domain *domain, iommu_gaddr_t start,
786 iommu_gaddr_t size)
787 {
788 struct iommu_map_entry *r1, *r2;
789 struct iommu_map_entries_tailq gc;
790
791 iommu_gas_remove_init(domain, &gc, &r1, &r2);
792 IOMMU_DOMAIN_LOCK(domain);
793 iommu_gas_remove_locked(domain, start, size, &gc, &r1, &r2);
794 IOMMU_DOMAIN_UNLOCK(domain);
795 iommu_gas_remove_cleanup(domain, &gc, &r1, &r2);
796 }
797
798 int
iommu_gas_map(struct iommu_domain * domain,const struct bus_dma_tag_common * common,iommu_gaddr_t size,int offset,u_int eflags,u_int flags,vm_page_t * ma,struct iommu_map_entry ** res)799 iommu_gas_map(struct iommu_domain *domain,
800 const struct bus_dma_tag_common *common, iommu_gaddr_t size, int offset,
801 u_int eflags, u_int flags, vm_page_t *ma, struct iommu_map_entry **res)
802 {
803 struct iommu_gas_match_args a;
804 struct iommu_map_entry *entry;
805 int error;
806
807 KASSERT((flags & ~(IOMMU_MF_CANWAIT | IOMMU_MF_CANSPLIT)) == 0,
808 ("invalid flags 0x%x", flags));
809
810 a.size = size;
811 a.offset = offset;
812 a.common = common;
813 a.gas_flags = flags;
814 entry = iommu_gas_alloc_entry(domain,
815 (flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0);
816 if (entry == NULL)
817 return (ENOMEM);
818 a.entry = entry;
819 IOMMU_DOMAIN_LOCK(domain);
820 error = iommu_gas_find_space(domain, &a);
821 if (error == ENOMEM) {
822 IOMMU_DOMAIN_UNLOCK(domain);
823 iommu_gas_free_entry(entry);
824 return (error);
825 }
826 #ifdef INVARIANTS
827 if (iommu_check_free)
828 iommu_gas_check_free(domain);
829 #endif
830 KASSERT(error == 0,
831 ("unexpected error %d from iommu_gas_find_entry", error));
832 KASSERT(entry->end < domain->end, ("allocated GPA %jx, max GPA %jx",
833 (uintmax_t)entry->end, (uintmax_t)domain->end));
834 entry->flags |= eflags;
835 IOMMU_DOMAIN_UNLOCK(domain);
836
837 error = domain->ops->map(domain, entry, ma, eflags,
838 ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0));
839 if (error == ENOMEM) {
840 iommu_domain_unload_entry(entry, true,
841 (flags & IOMMU_MF_CANWAIT) != 0);
842 return (error);
843 }
844 KASSERT(error == 0,
845 ("unexpected error %d from domain_map_buf", error));
846
847 *res = entry;
848 return (0);
849 }
850
851 int
iommu_gas_map_region(struct iommu_domain * domain,struct iommu_map_entry * entry,u_int eflags,u_int flags,vm_page_t * ma)852 iommu_gas_map_region(struct iommu_domain *domain, struct iommu_map_entry *entry,
853 u_int eflags, u_int flags, vm_page_t *ma)
854 {
855 iommu_gaddr_t start;
856 int error;
857
858 KASSERT(entry->domain == domain,
859 ("mismatched domain %p entry %p entry->domain %p", domain,
860 entry, entry->domain));
861 KASSERT(entry->flags == 0, ("used RMRR entry %p %p %x", domain,
862 entry, entry->flags));
863 KASSERT((flags & ~(IOMMU_MF_CANWAIT | IOMMU_MF_RMRR)) == 0,
864 ("invalid flags 0x%x", flags));
865
866 start = entry->start;
867 IOMMU_DOMAIN_LOCK(domain);
868 error = iommu_gas_alloc_region(domain, entry, flags);
869 if (error != 0) {
870 IOMMU_DOMAIN_UNLOCK(domain);
871 return (error);
872 }
873 entry->flags |= eflags;
874 IOMMU_DOMAIN_UNLOCK(domain);
875 if (entry->end == entry->start)
876 return (0);
877
878 error = domain->ops->map(domain, entry,
879 ma + OFF_TO_IDX(start - entry->start), eflags,
880 ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0));
881 if (error == ENOMEM) {
882 iommu_domain_unload_entry(entry, false,
883 (flags & IOMMU_MF_CANWAIT) != 0);
884 return (error);
885 }
886 KASSERT(error == 0,
887 ("unexpected error %d from domain_map_buf", error));
888
889 return (0);
890 }
891
892 static int
iommu_gas_reserve_region_locked(struct iommu_domain * domain,iommu_gaddr_t start,iommu_gaddr_t end,struct iommu_map_entry * entry)893 iommu_gas_reserve_region_locked(struct iommu_domain *domain,
894 iommu_gaddr_t start, iommu_gaddr_t end, struct iommu_map_entry *entry)
895 {
896 int error;
897
898 IOMMU_DOMAIN_ASSERT_LOCKED(domain);
899
900 entry->start = start;
901 entry->end = end;
902 error = iommu_gas_alloc_region(domain, entry, IOMMU_MF_CANWAIT);
903 if (error == 0)
904 entry->flags |= IOMMU_MAP_ENTRY_UNMAPPED;
905 return (error);
906 }
907
908 int
iommu_gas_reserve_region(struct iommu_domain * domain,iommu_gaddr_t start,iommu_gaddr_t end,struct iommu_map_entry ** entry0)909 iommu_gas_reserve_region(struct iommu_domain *domain, iommu_gaddr_t start,
910 iommu_gaddr_t end, struct iommu_map_entry **entry0)
911 {
912 struct iommu_map_entry *entry;
913 int error;
914
915 entry = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK);
916 IOMMU_DOMAIN_LOCK(domain);
917 error = iommu_gas_reserve_region_locked(domain, start, end, entry);
918 IOMMU_DOMAIN_UNLOCK(domain);
919 if (error != 0)
920 iommu_gas_free_entry(entry);
921 else if (entry0 != NULL)
922 *entry0 = entry;
923 return (error);
924 }
925
926 /*
927 * As in iommu_gas_reserve_region, reserve [start, end), but allow for existing
928 * entries.
929 */
930 int
iommu_gas_reserve_region_extend(struct iommu_domain * domain,iommu_gaddr_t start,iommu_gaddr_t end)931 iommu_gas_reserve_region_extend(struct iommu_domain *domain,
932 iommu_gaddr_t start, iommu_gaddr_t end)
933 {
934 struct iommu_map_entry *entry, *next, *prev, key = {};
935 iommu_gaddr_t entry_start, entry_end;
936 int error;
937
938 error = 0;
939 entry = NULL;
940 end = ummin(end, domain->end);
941 while (start < end) {
942 /* Preallocate an entry. */
943 if (entry == NULL)
944 entry = iommu_gas_alloc_entry(domain,
945 IOMMU_PGF_WAITOK);
946 /* Calculate the free region from here to the next entry. */
947 key.start = key.end = start;
948 IOMMU_DOMAIN_LOCK(domain);
949 next = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, &key);
950 KASSERT(next != NULL, ("domain %p with end %#jx has no entry "
951 "after %#jx", domain, (uintmax_t)domain->end,
952 (uintmax_t)start));
953 entry_end = ummin(end, next->start);
954 prev = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, next);
955 if (prev != NULL)
956 entry_start = ummax(start, prev->end);
957 else
958 entry_start = start;
959 start = next->end;
960 /* Reserve the region if non-empty. */
961 if (entry_start != entry_end) {
962 error = iommu_gas_reserve_region_locked(domain,
963 entry_start, entry_end, entry);
964 if (error != 0) {
965 IOMMU_DOMAIN_UNLOCK(domain);
966 break;
967 }
968 entry = NULL;
969 }
970 IOMMU_DOMAIN_UNLOCK(domain);
971 }
972 /* Release a preallocated entry if it was not used. */
973 if (entry != NULL)
974 iommu_gas_free_entry(entry);
975 return (error);
976 }
977
978 void
iommu_unmap_msi(struct iommu_ctx * ctx)979 iommu_unmap_msi(struct iommu_ctx *ctx)
980 {
981 struct iommu_map_entry *entry;
982 struct iommu_domain *domain;
983
984 domain = ctx->domain;
985 entry = domain->msi_entry;
986 if (entry == NULL)
987 return;
988
989 domain->ops->unmap(domain, entry, IOMMU_PGF_WAITOK);
990
991 iommu_gas_free_space(entry);
992
993 iommu_gas_free_entry(entry);
994
995 domain->msi_entry = NULL;
996 domain->msi_base = 0;
997 domain->msi_phys = 0;
998 }
999
1000 int
iommu_map_msi(struct iommu_ctx * ctx,iommu_gaddr_t size,int offset,u_int eflags,u_int flags,vm_page_t * ma)1001 iommu_map_msi(struct iommu_ctx *ctx, iommu_gaddr_t size, int offset,
1002 u_int eflags, u_int flags, vm_page_t *ma)
1003 {
1004 struct iommu_domain *domain;
1005 struct iommu_map_entry *entry;
1006 int error;
1007
1008 error = 0;
1009 domain = ctx->domain;
1010
1011 /* Check if there is already an MSI page allocated */
1012 IOMMU_DOMAIN_LOCK(domain);
1013 entry = domain->msi_entry;
1014 IOMMU_DOMAIN_UNLOCK(domain);
1015
1016 if (entry == NULL) {
1017 error = iommu_gas_map(domain, &ctx->tag->common, size, offset,
1018 eflags, flags, ma, &entry);
1019 IOMMU_DOMAIN_LOCK(domain);
1020 if (error == 0) {
1021 if (domain->msi_entry == NULL) {
1022 MPASS(domain->msi_base == 0);
1023 MPASS(domain->msi_phys == 0);
1024
1025 domain->msi_entry = entry;
1026 domain->msi_base = entry->start;
1027 domain->msi_phys = VM_PAGE_TO_PHYS(ma[0]);
1028 } else {
1029 /*
1030 * We lost the race and already have an
1031 * MSI page allocated. Free the unneeded entry.
1032 */
1033 iommu_gas_free_entry(entry);
1034 }
1035 } else if (domain->msi_entry != NULL) {
1036 /*
1037 * The allocation failed, but another succeeded.
1038 * Return success as there is a valid MSI page.
1039 */
1040 error = 0;
1041 }
1042 IOMMU_DOMAIN_UNLOCK(domain);
1043 }
1044
1045 return (error);
1046 }
1047
1048 void
iommu_translate_msi(struct iommu_domain * domain,uint64_t * addr)1049 iommu_translate_msi(struct iommu_domain *domain, uint64_t *addr)
1050 {
1051
1052 *addr = (*addr - domain->msi_phys) + domain->msi_base;
1053
1054 KASSERT(*addr >= domain->msi_entry->start,
1055 ("%s: Address is below the MSI entry start address (%jx < %jx)",
1056 __func__, (uintmax_t)*addr, (uintmax_t)domain->msi_entry->start));
1057
1058 KASSERT(*addr + sizeof(*addr) <= domain->msi_entry->end,
1059 ("%s: Address is above the MSI entry end address (%jx < %jx)",
1060 __func__, (uintmax_t)*addr, (uintmax_t)domain->msi_entry->end));
1061 }
1062
1063 SYSCTL_NODE(_hw, OID_AUTO, iommu, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "");
1064
1065 #ifdef INVARIANTS
1066 SYSCTL_INT(_hw_iommu, OID_AUTO, check_free, CTLFLAG_RWTUN,
1067 &iommu_check_free, 0,
1068 "Check the GPA RBtree for free_down and free_after validity");
1069 #endif
1070
1071 #include "opt_ddb.h"
1072 #ifdef DDB
1073
1074 #include <ddb/ddb.h>
1075
1076 static void
iommu_debug_dump_gas(struct iommu_domain * domain)1077 iommu_debug_dump_gas(struct iommu_domain *domain)
1078 {
1079 struct iommu_map_entry *entry;
1080
1081 db_printf("iommu_domain %p tree %p iommu %p fl %#x\n", domain,
1082 &domain->rb_root, domain->iommu, domain->flags);
1083 db_printf("iommu_domain %p tree %p\n", domain, &domain->rb_root);
1084 RB_FOREACH(entry, iommu_gas_entries_tree, &domain->rb_root) {
1085 db_printf(
1086 " e %p [%#jx %#jx] fl %#x first %#jx last %#jx free_down %#jx",
1087 entry, (uintmax_t)entry->start, (uintmax_t)entry->end,
1088 entry->flags,
1089 (uintmax_t)entry->first, (uintmax_t)entry->last,
1090 (uintmax_t)entry->free_down);
1091 if (entry == domain->start_gap)
1092 db_printf(" start_gap");
1093 if (entry == domain->first_place)
1094 db_printf(" first_place");
1095 if (entry == domain->last_place)
1096 db_printf(" last_place");
1097 db_printf("\n");
1098 }
1099 }
1100
DB_SHOW_COMMAND(iommu_domain,iommu_domain_show)1101 DB_SHOW_COMMAND(iommu_domain, iommu_domain_show)
1102 {
1103 struct iommu_domain *domain;
1104
1105 if (!have_addr) {
1106 db_printf("show iommu_domain addr\n");
1107 return;
1108 }
1109
1110 domain = (void *)addr;
1111 iommu_debug_dump_gas(domain);
1112 }
1113
1114 #endif
1115