1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2013 The FreeBSD Foundation 5 * 6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org> 7 * under sponsorship from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #define RB_AUGMENT_CHECK(entry) iommu_gas_augment_entry(entry) 35 36 #include <sys/param.h> 37 #include <sys/systm.h> 38 #include <sys/malloc.h> 39 #include <sys/bus.h> 40 #include <sys/interrupt.h> 41 #include <sys/kernel.h> 42 #include <sys/ktr.h> 43 #include <sys/lock.h> 44 #include <sys/proc.h> 45 #include <sys/rwlock.h> 46 #include <sys/memdesc.h> 47 #include <sys/mutex.h> 48 #include <sys/sysctl.h> 49 #include <sys/rman.h> 50 #include <sys/taskqueue.h> 51 #include <sys/tree.h> 52 #include <sys/uio.h> 53 #include <sys/vmem.h> 54 #include <vm/vm.h> 55 #include <vm/vm_extern.h> 56 #include <vm/vm_kern.h> 57 #include <vm/vm_object.h> 58 #include <vm/vm_page.h> 59 #include <vm/vm_map.h> 60 #include <vm/uma.h> 61 #include <dev/pci/pcireg.h> 62 #include <dev/pci/pcivar.h> 63 #include <dev/iommu/iommu.h> 64 #include <dev/iommu/iommu_gas.h> 65 #include <dev/iommu/iommu_msi.h> 66 #include <machine/atomic.h> 67 #include <machine/bus.h> 68 #include <machine/md_var.h> 69 #include <machine/iommu.h> 70 #include <dev/iommu/busdma_iommu.h> 71 72 /* 73 * Guest Address Space management. 74 */ 75 76 static uma_zone_t iommu_map_entry_zone; 77 78 #ifdef INVARIANTS 79 static int iommu_check_free; 80 #endif 81 82 static void 83 intel_gas_init(void) 84 { 85 86 iommu_map_entry_zone = uma_zcreate("IOMMU_MAP_ENTRY", 87 sizeof(struct iommu_map_entry), NULL, NULL, 88 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NODUMP); 89 } 90 SYSINIT(intel_gas, SI_SUB_DRIVERS, SI_ORDER_FIRST, intel_gas_init, NULL); 91 92 struct iommu_map_entry * 93 iommu_gas_alloc_entry(struct iommu_domain *domain, u_int flags) 94 { 95 struct iommu_map_entry *res; 96 97 KASSERT((flags & ~(IOMMU_PGF_WAITOK)) == 0, 98 ("unsupported flags %x", flags)); 99 100 res = uma_zalloc(iommu_map_entry_zone, ((flags & IOMMU_PGF_WAITOK) != 101 0 ? M_WAITOK : M_NOWAIT) | M_ZERO); 102 if (res != NULL && domain != NULL) { 103 res->domain = domain; 104 atomic_add_int(&domain->entries_cnt, 1); 105 } 106 return (res); 107 } 108 109 void 110 iommu_gas_free_entry(struct iommu_map_entry *entry) 111 { 112 struct iommu_domain *domain; 113 114 domain = entry->domain; 115 if (domain != NULL) 116 atomic_subtract_int(&domain->entries_cnt, 1); 117 uma_zfree(iommu_map_entry_zone, entry); 118 } 119 120 static int 121 iommu_gas_cmp_entries(struct iommu_map_entry *a, struct iommu_map_entry *b) 122 { 123 124 /* Last entry have zero size, so <= */ 125 KASSERT(a->start <= a->end, ("inverted entry %p (%jx, %jx)", 126 a, (uintmax_t)a->start, (uintmax_t)a->end)); 127 KASSERT(b->start <= b->end, ("inverted entry %p (%jx, %jx)", 128 b, (uintmax_t)b->start, (uintmax_t)b->end)); 129 KASSERT(a->end <= b->start || b->end <= a->start || 130 a->end == a->start || b->end == b->start, 131 ("overlapping entries %p (%jx, %jx) %p (%jx, %jx)", 132 a, (uintmax_t)a->start, (uintmax_t)a->end, 133 b, (uintmax_t)b->start, (uintmax_t)b->end)); 134 135 if (a->end < b->end) 136 return (-1); 137 else if (b->end < a->end) 138 return (1); 139 return (0); 140 } 141 142 /* 143 * Update augmentation data based on data from children. 144 * Return true if and only if the update changes the augmentation data. 145 */ 146 static bool 147 iommu_gas_augment_entry(struct iommu_map_entry *entry) 148 { 149 struct iommu_map_entry *child; 150 iommu_gaddr_t bound, delta, free_down; 151 152 free_down = 0; 153 bound = entry->start; 154 if ((child = RB_LEFT(entry, rb_entry)) != NULL) { 155 free_down = MAX(child->free_down, bound - child->last); 156 bound = child->first; 157 } 158 delta = bound - entry->first; 159 entry->first = bound; 160 bound = entry->end; 161 if ((child = RB_RIGHT(entry, rb_entry)) != NULL) { 162 free_down = MAX(free_down, child->free_down); 163 free_down = MAX(free_down, child->first - bound); 164 bound = child->last; 165 } 166 delta += entry->last - bound; 167 if (delta == 0) 168 delta = entry->free_down - free_down; 169 entry->last = bound; 170 entry->free_down = free_down; 171 172 /* 173 * Return true either if the value of last-first changed, 174 * or if free_down changed. 175 */ 176 return (delta != 0); 177 } 178 179 RB_GENERATE(iommu_gas_entries_tree, iommu_map_entry, rb_entry, 180 iommu_gas_cmp_entries); 181 182 #ifdef INVARIANTS 183 static void 184 iommu_gas_check_free(struct iommu_domain *domain) 185 { 186 struct iommu_map_entry *entry, *l, *r; 187 iommu_gaddr_t v; 188 189 RB_FOREACH(entry, iommu_gas_entries_tree, &domain->rb_root) { 190 KASSERT(domain == entry->domain, 191 ("mismatched free domain %p entry %p entry->domain %p", 192 domain, entry, entry->domain)); 193 l = RB_LEFT(entry, rb_entry); 194 r = RB_RIGHT(entry, rb_entry); 195 v = 0; 196 if (l != NULL) { 197 v = MAX(v, l->free_down); 198 v = MAX(v, entry->start - l->last); 199 } 200 if (r != NULL) { 201 v = MAX(v, r->free_down); 202 v = MAX(v, r->first - entry->end); 203 } 204 MPASS(entry->free_down == v); 205 } 206 } 207 #endif 208 209 static void 210 iommu_gas_rb_remove(struct iommu_domain *domain, struct iommu_map_entry *entry) 211 { 212 213 RB_REMOVE(iommu_gas_entries_tree, &domain->rb_root, entry); 214 } 215 216 struct iommu_domain * 217 iommu_get_ctx_domain(struct iommu_ctx *ctx) 218 { 219 220 return (ctx->domain); 221 } 222 223 void 224 iommu_gas_init_domain(struct iommu_domain *domain) 225 { 226 struct iommu_map_entry *begin, *end; 227 228 begin = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 229 end = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 230 231 IOMMU_DOMAIN_LOCK(domain); 232 KASSERT(domain->entries_cnt == 2, ("dirty domain %p", domain)); 233 KASSERT(RB_EMPTY(&domain->rb_root), 234 ("non-empty entries %p", domain)); 235 236 /* 237 * The end entry must be inserted first because it has a zero-length gap 238 * between start and end. Initially, all augmentation data for a new 239 * entry is zero. Function iommu_gas_augment_entry will compute no 240 * change in the value of (start-end) and no change in the value of 241 * free_down, so it will return false to suggest that nothing changed in 242 * the entry. Thus, inserting the end entry second prevents 243 * augmentation information to be propogated to the begin entry at the 244 * tree root. So it is inserted first. 245 */ 246 end->start = domain->end; 247 end->end = domain->end; 248 end->flags = IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED; 249 RB_INSERT(iommu_gas_entries_tree, &domain->rb_root, end); 250 251 begin->start = 0; 252 begin->end = IOMMU_PAGE_SIZE; 253 begin->flags = IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED; 254 RB_INSERT_PREV(iommu_gas_entries_tree, &domain->rb_root, end, begin); 255 256 domain->first_place = begin; 257 domain->last_place = end; 258 domain->flags |= IOMMU_DOMAIN_GAS_INITED; 259 IOMMU_DOMAIN_UNLOCK(domain); 260 } 261 262 void 263 iommu_gas_fini_domain(struct iommu_domain *domain) 264 { 265 struct iommu_map_entry *entry, *entry1; 266 267 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 268 KASSERT(domain->entries_cnt == 2, 269 ("domain still in use %p", domain)); 270 271 entry = RB_MIN(iommu_gas_entries_tree, &domain->rb_root); 272 KASSERT(entry->start == 0, ("start entry start %p", domain)); 273 KASSERT(entry->end == IOMMU_PAGE_SIZE, ("start entry end %p", domain)); 274 KASSERT(entry->flags == 275 (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED), 276 ("start entry flags %p", domain)); 277 iommu_gas_rb_remove(domain, entry); 278 iommu_gas_free_entry(entry); 279 280 entry = RB_MAX(iommu_gas_entries_tree, &domain->rb_root); 281 KASSERT(entry->start == domain->end, ("end entry start %p", domain)); 282 KASSERT(entry->end == domain->end, ("end entry end %p", domain)); 283 KASSERT(entry->flags == 284 (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED), 285 ("end entry flags %p", domain)); 286 iommu_gas_rb_remove(domain, entry); 287 iommu_gas_free_entry(entry); 288 289 RB_FOREACH_SAFE(entry, iommu_gas_entries_tree, &domain->rb_root, 290 entry1) { 291 KASSERT((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0, 292 ("non-RMRR entry left %p", domain)); 293 iommu_gas_rb_remove(domain, entry); 294 iommu_gas_free_entry(entry); 295 } 296 } 297 298 struct iommu_gas_match_args { 299 struct iommu_domain *domain; 300 iommu_gaddr_t size; 301 int offset; 302 const struct bus_dma_tag_common *common; 303 u_int gas_flags; 304 struct iommu_map_entry *entry; 305 }; 306 307 /* 308 * The interval [beg, end) is a free interval between two iommu_map_entries. 309 * Addresses can be allocated only in the range [lbound, ubound). Try to 310 * allocate space in the free interval, subject to the conditions expressed by 311 * a, and return 'true' if and only if the allocation attempt succeeds. 312 */ 313 static bool 314 iommu_gas_match_one(struct iommu_gas_match_args *a, iommu_gaddr_t beg, 315 iommu_gaddr_t end, iommu_gaddr_t lbound, iommu_gaddr_t ubound) 316 { 317 struct iommu_map_entry *entry; 318 iommu_gaddr_t first, size, start; 319 int offset; 320 321 /* 322 * The prev->end is always aligned on the page size, which 323 * causes page alignment for the entry->start too. 324 * 325 * Create IOMMU_PAGE_SIZE gaps before, after new entry 326 * to ensure that out-of-bounds accesses fault. 327 */ 328 beg = MAX(beg + IOMMU_PAGE_SIZE, lbound); 329 start = roundup2(beg, a->common->alignment); 330 if (start < beg) 331 return (false); 332 end = MIN(end - IOMMU_PAGE_SIZE, ubound); 333 offset = a->offset; 334 size = a->size; 335 if (start + offset + size > end) 336 return (false); 337 338 /* Check for and try to skip past boundary crossing. */ 339 if (!vm_addr_bound_ok(start + offset, size, a->common->boundary)) { 340 /* 341 * The start + offset to start + offset + size region crosses 342 * the boundary. Check if there is enough space after the next 343 * boundary after the beg. 344 */ 345 first = start; 346 beg = roundup2(start + offset + 1, a->common->boundary); 347 start = roundup2(beg, a->common->alignment); 348 349 if (start + offset + size > end || 350 !vm_addr_bound_ok(start + offset, size, 351 a->common->boundary)) { 352 /* 353 * Not enough space to align at the requested boundary, 354 * or boundary is smaller than the size, but allowed to 355 * split. We already checked that start + size does not 356 * overlap ubound. 357 * 358 * XXXKIB. It is possible that beg is exactly at the 359 * start of the next entry, then we do not have gap. 360 * Ignore for now. 361 */ 362 if ((a->gas_flags & IOMMU_MF_CANSPLIT) == 0) 363 return (false); 364 size = beg - first - offset; 365 start = first; 366 } 367 } 368 entry = a->entry; 369 entry->start = start; 370 entry->end = start + roundup2(size + offset, IOMMU_PAGE_SIZE); 371 entry->flags = IOMMU_MAP_ENTRY_MAP; 372 return (true); 373 } 374 375 /* Find the next entry that might abut a big-enough range. */ 376 static struct iommu_map_entry * 377 iommu_gas_next(struct iommu_map_entry *curr, iommu_gaddr_t min_free) 378 { 379 struct iommu_map_entry *next; 380 381 if ((next = RB_RIGHT(curr, rb_entry)) != NULL && 382 next->free_down >= min_free) { 383 /* Find next entry in right subtree. */ 384 do 385 curr = next; 386 while ((next = RB_LEFT(curr, rb_entry)) != NULL && 387 next->free_down >= min_free); 388 } else { 389 /* Find next entry in a left-parent ancestor. */ 390 while ((next = RB_PARENT(curr, rb_entry)) != NULL && 391 curr == RB_RIGHT(next, rb_entry)) 392 curr = next; 393 curr = next; 394 } 395 return (curr); 396 } 397 398 static int 399 iommu_gas_find_space(struct iommu_gas_match_args *a) 400 { 401 struct iommu_domain *domain; 402 struct iommu_map_entry *curr, *first; 403 iommu_gaddr_t addr, min_free; 404 405 IOMMU_DOMAIN_ASSERT_LOCKED(a->domain); 406 KASSERT(a->entry->flags == 0, 407 ("dirty entry %p %p", a->domain, a->entry)); 408 409 /* 410 * If the subtree doesn't have free space for the requested allocation 411 * plus two guard pages, skip it. 412 */ 413 min_free = 2 * IOMMU_PAGE_SIZE + 414 roundup2(a->size + a->offset, IOMMU_PAGE_SIZE); 415 416 /* 417 * Find the first entry in the lower region that could abut a big-enough 418 * range. 419 */ 420 domain = a->domain; 421 curr = RB_ROOT(&domain->rb_root); 422 first = NULL; 423 while (curr != NULL && curr->free_down >= min_free) { 424 first = curr; 425 curr = RB_LEFT(curr, rb_entry); 426 } 427 428 /* 429 * Walk the big-enough ranges until one satisfies alignment 430 * requirements, or violates lowaddr address requirement. 431 */ 432 addr = a->common->lowaddr + 1; 433 for (curr = first; curr != NULL; 434 curr = iommu_gas_next(curr, min_free)) { 435 if ((first = RB_LEFT(curr, rb_entry)) != NULL && 436 iommu_gas_match_one(a, first->last, curr->start, 437 0, addr)) { 438 RB_INSERT_PREV(iommu_gas_entries_tree, 439 &domain->rb_root, curr, a->entry); 440 return (0); 441 } 442 if (curr->end >= addr) { 443 /* All remaining ranges >= addr */ 444 break; 445 } 446 if ((first = RB_RIGHT(curr, rb_entry)) != NULL && 447 iommu_gas_match_one(a, curr->end, first->first, 448 0, addr)) { 449 RB_INSERT_NEXT(iommu_gas_entries_tree, 450 &domain->rb_root, curr, a->entry); 451 return (0); 452 } 453 } 454 455 /* 456 * To resume the search at the start of the upper region, first climb to 457 * the nearest ancestor that spans highaddr. Then find the last entry 458 * before highaddr that could abut a big-enough range. 459 */ 460 addr = a->common->highaddr; 461 while (curr != NULL && curr->last < addr) 462 curr = RB_PARENT(curr, rb_entry); 463 first = NULL; 464 while (curr != NULL && curr->free_down >= min_free) { 465 if (addr < curr->end) 466 curr = RB_LEFT(curr, rb_entry); 467 else { 468 first = curr; 469 curr = RB_RIGHT(curr, rb_entry); 470 } 471 } 472 473 /* 474 * Walk the remaining big-enough ranges until one satisfies alignment 475 * requirements. 476 */ 477 for (curr = first; curr != NULL; 478 curr = iommu_gas_next(curr, min_free)) { 479 if ((first = RB_LEFT(curr, rb_entry)) != NULL && 480 iommu_gas_match_one(a, first->last, curr->start, 481 addr + 1, domain->end)) { 482 RB_INSERT_PREV(iommu_gas_entries_tree, 483 &domain->rb_root, curr, a->entry); 484 return (0); 485 } 486 if ((first = RB_RIGHT(curr, rb_entry)) != NULL && 487 iommu_gas_match_one(a, curr->end, first->first, 488 addr + 1, domain->end)) { 489 RB_INSERT_NEXT(iommu_gas_entries_tree, 490 &domain->rb_root, curr, a->entry); 491 return (0); 492 } 493 } 494 495 return (ENOMEM); 496 } 497 498 static int 499 iommu_gas_alloc_region(struct iommu_domain *domain, struct iommu_map_entry *entry, 500 u_int flags) 501 { 502 struct iommu_map_entry *next, *prev; 503 504 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 505 506 if ((entry->start & IOMMU_PAGE_MASK) != 0 || 507 (entry->end & IOMMU_PAGE_MASK) != 0) 508 return (EINVAL); 509 if (entry->start >= entry->end) 510 return (EINVAL); 511 if (entry->end >= domain->end) 512 return (EINVAL); 513 514 next = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, entry); 515 KASSERT(next != NULL, ("next must be non-null %p %jx", domain, 516 (uintmax_t)entry->start)); 517 prev = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, next); 518 /* prev could be NULL */ 519 520 /* 521 * Adapt to broken BIOSes which specify overlapping RMRR 522 * entries. 523 * 524 * XXXKIB: this does not handle a case when prev or next 525 * entries are completely covered by the current one, which 526 * extends both ways. 527 */ 528 if (prev != NULL && prev->end > entry->start && 529 (prev->flags & IOMMU_MAP_ENTRY_PLACE) == 0) { 530 if ((flags & IOMMU_MF_RMRR) == 0 || 531 (prev->flags & IOMMU_MAP_ENTRY_RMRR) == 0) 532 return (EBUSY); 533 entry->start = prev->end; 534 } 535 if (next->start < entry->end && 536 (next->flags & IOMMU_MAP_ENTRY_PLACE) == 0) { 537 if ((flags & IOMMU_MF_RMRR) == 0 || 538 (next->flags & IOMMU_MAP_ENTRY_RMRR) == 0) 539 return (EBUSY); 540 entry->end = next->start; 541 } 542 if (entry->end == entry->start) 543 return (0); 544 545 if (prev != NULL && prev->end > entry->start) { 546 /* This assumes that prev is the placeholder entry. */ 547 iommu_gas_rb_remove(domain, prev); 548 prev = NULL; 549 } 550 RB_INSERT_PREV(iommu_gas_entries_tree, 551 &domain->rb_root, next, entry); 552 if (next->start < entry->end) { 553 iommu_gas_rb_remove(domain, next); 554 next = NULL; 555 } 556 557 if ((flags & IOMMU_MF_RMRR) != 0) 558 entry->flags = IOMMU_MAP_ENTRY_RMRR; 559 560 #ifdef INVARIANTS 561 struct iommu_map_entry *ip, *in; 562 ip = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, entry); 563 in = RB_NEXT(iommu_gas_entries_tree, &domain->rb_root, entry); 564 KASSERT(prev == NULL || ip == prev, 565 ("RMRR %p (%jx %jx) prev %p (%jx %jx) ins prev %p (%jx %jx)", 566 entry, entry->start, entry->end, prev, 567 prev == NULL ? 0 : prev->start, prev == NULL ? 0 : prev->end, 568 ip, ip == NULL ? 0 : ip->start, ip == NULL ? 0 : ip->end)); 569 KASSERT(next == NULL || in == next, 570 ("RMRR %p (%jx %jx) next %p (%jx %jx) ins next %p (%jx %jx)", 571 entry, entry->start, entry->end, next, 572 next == NULL ? 0 : next->start, next == NULL ? 0 : next->end, 573 in, in == NULL ? 0 : in->start, in == NULL ? 0 : in->end)); 574 #endif 575 576 return (0); 577 } 578 579 void 580 iommu_gas_free_space(struct iommu_map_entry *entry) 581 { 582 struct iommu_domain *domain; 583 584 domain = entry->domain; 585 KASSERT((entry->flags & (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_RMRR | 586 IOMMU_MAP_ENTRY_MAP)) == IOMMU_MAP_ENTRY_MAP, 587 ("permanent entry %p %p", domain, entry)); 588 589 IOMMU_DOMAIN_LOCK(domain); 590 iommu_gas_rb_remove(domain, entry); 591 entry->flags &= ~IOMMU_MAP_ENTRY_MAP; 592 #ifdef INVARIANTS 593 if (iommu_check_free) 594 iommu_gas_check_free(domain); 595 #endif 596 IOMMU_DOMAIN_UNLOCK(domain); 597 } 598 599 void 600 iommu_gas_free_region(struct iommu_map_entry *entry) 601 { 602 struct iommu_domain *domain; 603 604 domain = entry->domain; 605 KASSERT((entry->flags & (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_RMRR | 606 IOMMU_MAP_ENTRY_MAP)) == IOMMU_MAP_ENTRY_RMRR, 607 ("non-RMRR entry %p %p", domain, entry)); 608 609 IOMMU_DOMAIN_LOCK(domain); 610 if (entry != domain->first_place && 611 entry != domain->last_place) 612 iommu_gas_rb_remove(domain, entry); 613 entry->flags &= ~IOMMU_MAP_ENTRY_RMRR; 614 IOMMU_DOMAIN_UNLOCK(domain); 615 } 616 617 static struct iommu_map_entry * 618 iommu_gas_remove_clip_left(struct iommu_domain *domain, iommu_gaddr_t start, 619 iommu_gaddr_t end, struct iommu_map_entry **r) 620 { 621 struct iommu_map_entry *entry, *res, fentry; 622 623 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 624 MPASS(start <= end); 625 MPASS(end <= domain->end); 626 627 /* 628 * Find an entry which contains the supplied guest's address 629 * start, or the first entry after the start. Since we 630 * asserted that start is below domain end, entry should 631 * exist. Then clip it if needed. 632 */ 633 fentry.start = start + 1; 634 fentry.end = start + 1; 635 entry = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, &fentry); 636 637 if (entry->start >= start || 638 (entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 639 return (entry); 640 641 res = *r; 642 *r = NULL; 643 *res = *entry; 644 res->start = entry->end = start; 645 RB_UPDATE_AUGMENT(entry, rb_entry); 646 RB_INSERT_NEXT(iommu_gas_entries_tree, 647 &domain->rb_root, entry, res); 648 return (res); 649 } 650 651 static bool 652 iommu_gas_remove_clip_right(struct iommu_domain *domain, 653 iommu_gaddr_t end, struct iommu_map_entry *entry, 654 struct iommu_map_entry *r) 655 { 656 if (entry->start >= end || (entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 657 return (false); 658 659 *r = *entry; 660 r->end = entry->start = end; 661 RB_UPDATE_AUGMENT(entry, rb_entry); 662 RB_INSERT_PREV(iommu_gas_entries_tree, 663 &domain->rb_root, entry, r); 664 return (true); 665 } 666 667 static void 668 iommu_gas_remove_unmap(struct iommu_domain *domain, 669 struct iommu_map_entry *entry, struct iommu_map_entries_tailq *gcp) 670 { 671 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 672 673 if ((entry->flags & (IOMMU_MAP_ENTRY_UNMAPPED | 674 IOMMU_MAP_ENTRY_REMOVING)) != 0) 675 return; 676 MPASS((entry->flags & IOMMU_MAP_ENTRY_PLACE) == 0); 677 entry->flags |= IOMMU_MAP_ENTRY_REMOVING; 678 TAILQ_INSERT_TAIL(gcp, entry, dmamap_link); 679 } 680 681 /* 682 * Remove specified range from the GAS of the domain. Note that the 683 * removal is not guaranteed to occur upon the function return, it 684 * might be finalized some time after, when hardware reports that 685 * (queued) IOTLB invalidation was performed. 686 */ 687 void 688 iommu_gas_remove(struct iommu_domain *domain, iommu_gaddr_t start, 689 iommu_gaddr_t size) 690 { 691 struct iommu_map_entry *entry, *nentry, *r1, *r2; 692 struct iommu_map_entries_tailq gc; 693 iommu_gaddr_t end; 694 695 end = start + size; 696 r1 = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 697 r2 = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 698 TAILQ_INIT(&gc); 699 700 IOMMU_DOMAIN_LOCK(domain); 701 702 nentry = iommu_gas_remove_clip_left(domain, start, end, &r1); 703 RB_FOREACH_FROM(entry, iommu_gas_entries_tree, nentry) { 704 if (entry->start >= end) 705 break; 706 KASSERT(start <= entry->start, 707 ("iommu_gas_remove entry (%#jx, %#jx) start %#jx", 708 entry->start, entry->end, start)); 709 if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 710 continue; 711 iommu_gas_remove_unmap(domain, entry, &gc); 712 } 713 if (iommu_gas_remove_clip_right(domain, end, entry, r2)) { 714 iommu_gas_remove_unmap(domain, r2, &gc); 715 r2 = NULL; 716 } 717 718 #ifdef INVARIANTS 719 RB_FOREACH(entry, iommu_gas_entries_tree, &domain->rb_root) { 720 if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 721 continue; 722 KASSERT(entry->end <= start || entry->start >= end, 723 ("iommu_gas_remove leftover entry (%#jx, %#jx) range " 724 "(%#jx, %#jx)", 725 entry->start, entry->end, start, end)); 726 } 727 #endif 728 729 IOMMU_DOMAIN_UNLOCK(domain); 730 if (r1 != NULL) 731 iommu_gas_free_entry(r1); 732 if (r2 != NULL) 733 iommu_gas_free_entry(r2); 734 iommu_domain_unload(domain, &gc, true); 735 } 736 737 int 738 iommu_gas_map(struct iommu_domain *domain, 739 const struct bus_dma_tag_common *common, iommu_gaddr_t size, int offset, 740 u_int eflags, u_int flags, vm_page_t *ma, struct iommu_map_entry **res) 741 { 742 struct iommu_gas_match_args a; 743 struct iommu_map_entry *entry; 744 int error; 745 746 KASSERT((flags & ~(IOMMU_MF_CANWAIT | IOMMU_MF_CANSPLIT)) == 0, 747 ("invalid flags 0x%x", flags)); 748 749 a.domain = domain; 750 a.size = size; 751 a.offset = offset; 752 a.common = common; 753 a.gas_flags = flags; 754 entry = iommu_gas_alloc_entry(domain, 755 (flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0); 756 if (entry == NULL) 757 return (ENOMEM); 758 a.entry = entry; 759 IOMMU_DOMAIN_LOCK(domain); 760 error = iommu_gas_find_space(&a); 761 if (error == ENOMEM) { 762 IOMMU_DOMAIN_UNLOCK(domain); 763 iommu_gas_free_entry(entry); 764 return (error); 765 } 766 #ifdef INVARIANTS 767 if (iommu_check_free) 768 iommu_gas_check_free(domain); 769 #endif 770 KASSERT(error == 0, 771 ("unexpected error %d from iommu_gas_find_entry", error)); 772 KASSERT(entry->end < domain->end, ("allocated GPA %jx, max GPA %jx", 773 (uintmax_t)entry->end, (uintmax_t)domain->end)); 774 entry->flags |= eflags; 775 IOMMU_DOMAIN_UNLOCK(domain); 776 777 error = domain->ops->map(domain, entry->start, 778 entry->end - entry->start, ma, eflags, 779 ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0)); 780 if (error == ENOMEM) { 781 iommu_domain_unload_entry(entry, true, 782 (flags & IOMMU_MF_CANWAIT) != 0); 783 return (error); 784 } 785 KASSERT(error == 0, 786 ("unexpected error %d from domain_map_buf", error)); 787 788 *res = entry; 789 return (0); 790 } 791 792 int 793 iommu_gas_map_region(struct iommu_domain *domain, struct iommu_map_entry *entry, 794 u_int eflags, u_int flags, vm_page_t *ma) 795 { 796 iommu_gaddr_t start; 797 int error; 798 799 KASSERT(entry->domain == domain, 800 ("mismatched domain %p entry %p entry->domain %p", domain, 801 entry, entry->domain)); 802 KASSERT(entry->flags == 0, ("used RMRR entry %p %p %x", domain, 803 entry, entry->flags)); 804 KASSERT((flags & ~(IOMMU_MF_CANWAIT | IOMMU_MF_RMRR)) == 0, 805 ("invalid flags 0x%x", flags)); 806 807 start = entry->start; 808 IOMMU_DOMAIN_LOCK(domain); 809 error = iommu_gas_alloc_region(domain, entry, flags); 810 if (error != 0) { 811 IOMMU_DOMAIN_UNLOCK(domain); 812 return (error); 813 } 814 entry->flags |= eflags; 815 IOMMU_DOMAIN_UNLOCK(domain); 816 if (entry->end == entry->start) 817 return (0); 818 819 error = domain->ops->map(domain, entry->start, 820 entry->end - entry->start, ma + OFF_TO_IDX(start - entry->start), 821 eflags, ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0)); 822 if (error == ENOMEM) { 823 iommu_domain_unload_entry(entry, false, 824 (flags & IOMMU_MF_CANWAIT) != 0); 825 return (error); 826 } 827 KASSERT(error == 0, 828 ("unexpected error %d from domain_map_buf", error)); 829 830 return (0); 831 } 832 833 static int 834 iommu_gas_reserve_region_locked(struct iommu_domain *domain, 835 iommu_gaddr_t start, iommu_gaddr_t end, struct iommu_map_entry *entry) 836 { 837 int error; 838 839 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 840 841 entry->start = start; 842 entry->end = end; 843 error = iommu_gas_alloc_region(domain, entry, IOMMU_MF_CANWAIT); 844 if (error == 0) 845 entry->flags |= IOMMU_MAP_ENTRY_UNMAPPED; 846 return (error); 847 } 848 849 int 850 iommu_gas_reserve_region(struct iommu_domain *domain, iommu_gaddr_t start, 851 iommu_gaddr_t end, struct iommu_map_entry **entry0) 852 { 853 struct iommu_map_entry *entry; 854 int error; 855 856 entry = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 857 IOMMU_DOMAIN_LOCK(domain); 858 error = iommu_gas_reserve_region_locked(domain, start, end, entry); 859 IOMMU_DOMAIN_UNLOCK(domain); 860 if (error != 0) 861 iommu_gas_free_entry(entry); 862 else if (entry0 != NULL) 863 *entry0 = entry; 864 return (error); 865 } 866 867 /* 868 * As in iommu_gas_reserve_region, reserve [start, end), but allow for existing 869 * entries. 870 */ 871 int 872 iommu_gas_reserve_region_extend(struct iommu_domain *domain, 873 iommu_gaddr_t start, iommu_gaddr_t end) 874 { 875 struct iommu_map_entry *entry, *next, *prev, key = {}; 876 iommu_gaddr_t entry_start, entry_end; 877 int error; 878 879 error = 0; 880 entry = NULL; 881 end = ummin(end, domain->end); 882 while (start < end) { 883 /* Preallocate an entry. */ 884 if (entry == NULL) 885 entry = iommu_gas_alloc_entry(domain, 886 IOMMU_PGF_WAITOK); 887 /* Calculate the free region from here to the next entry. */ 888 key.start = key.end = start; 889 IOMMU_DOMAIN_LOCK(domain); 890 next = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, &key); 891 KASSERT(next != NULL, ("domain %p with end %#jx has no entry " 892 "after %#jx", domain, (uintmax_t)domain->end, 893 (uintmax_t)start)); 894 entry_end = ummin(end, next->start); 895 prev = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, next); 896 if (prev != NULL) 897 entry_start = ummax(start, prev->end); 898 else 899 entry_start = start; 900 start = next->end; 901 /* Reserve the region if non-empty. */ 902 if (entry_start != entry_end) { 903 error = iommu_gas_reserve_region_locked(domain, 904 entry_start, entry_end, entry); 905 if (error != 0) { 906 IOMMU_DOMAIN_UNLOCK(domain); 907 break; 908 } 909 entry = NULL; 910 } 911 IOMMU_DOMAIN_UNLOCK(domain); 912 } 913 /* Release a preallocated entry if it was not used. */ 914 if (entry != NULL) 915 iommu_gas_free_entry(entry); 916 return (error); 917 } 918 919 void 920 iommu_unmap_msi(struct iommu_ctx *ctx) 921 { 922 struct iommu_map_entry *entry; 923 struct iommu_domain *domain; 924 925 domain = ctx->domain; 926 entry = domain->msi_entry; 927 if (entry == NULL) 928 return; 929 930 domain->ops->unmap(domain, entry->start, entry->end - 931 entry->start, IOMMU_PGF_WAITOK); 932 933 iommu_gas_free_space(entry); 934 935 iommu_gas_free_entry(entry); 936 937 domain->msi_entry = NULL; 938 domain->msi_base = 0; 939 domain->msi_phys = 0; 940 } 941 942 int 943 iommu_map_msi(struct iommu_ctx *ctx, iommu_gaddr_t size, int offset, 944 u_int eflags, u_int flags, vm_page_t *ma) 945 { 946 struct iommu_domain *domain; 947 struct iommu_map_entry *entry; 948 int error; 949 950 error = 0; 951 domain = ctx->domain; 952 953 /* Check if there is already an MSI page allocated */ 954 IOMMU_DOMAIN_LOCK(domain); 955 entry = domain->msi_entry; 956 IOMMU_DOMAIN_UNLOCK(domain); 957 958 if (entry == NULL) { 959 error = iommu_gas_map(domain, &ctx->tag->common, size, offset, 960 eflags, flags, ma, &entry); 961 IOMMU_DOMAIN_LOCK(domain); 962 if (error == 0) { 963 if (domain->msi_entry == NULL) { 964 MPASS(domain->msi_base == 0); 965 MPASS(domain->msi_phys == 0); 966 967 domain->msi_entry = entry; 968 domain->msi_base = entry->start; 969 domain->msi_phys = VM_PAGE_TO_PHYS(ma[0]); 970 } else { 971 /* 972 * We lost the race and already have an 973 * MSI page allocated. Free the unneeded entry. 974 */ 975 iommu_gas_free_entry(entry); 976 } 977 } else if (domain->msi_entry != NULL) { 978 /* 979 * The allocation failed, but another succeeded. 980 * Return success as there is a valid MSI page. 981 */ 982 error = 0; 983 } 984 IOMMU_DOMAIN_UNLOCK(domain); 985 } 986 987 return (error); 988 } 989 990 void 991 iommu_translate_msi(struct iommu_domain *domain, uint64_t *addr) 992 { 993 994 *addr = (*addr - domain->msi_phys) + domain->msi_base; 995 996 KASSERT(*addr >= domain->msi_entry->start, 997 ("%s: Address is below the MSI entry start address (%jx < %jx)", 998 __func__, (uintmax_t)*addr, (uintmax_t)domain->msi_entry->start)); 999 1000 KASSERT(*addr + sizeof(*addr) <= domain->msi_entry->end, 1001 ("%s: Address is above the MSI entry end address (%jx < %jx)", 1002 __func__, (uintmax_t)*addr, (uintmax_t)domain->msi_entry->end)); 1003 } 1004 1005 SYSCTL_NODE(_hw, OID_AUTO, iommu, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, ""); 1006 1007 #ifdef INVARIANTS 1008 SYSCTL_INT(_hw_iommu, OID_AUTO, check_free, CTLFLAG_RWTUN, 1009 &iommu_check_free, 0, 1010 "Check the GPA RBtree for free_down and free_after validity"); 1011 #endif 1012