1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2013 The FreeBSD Foundation 5 * 6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org> 7 * under sponsorship from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #define RB_AUGMENT_CHECK(entry) iommu_gas_augment_entry(entry) 35 36 #include <sys/param.h> 37 #include <sys/systm.h> 38 #include <sys/malloc.h> 39 #include <sys/bus.h> 40 #include <sys/interrupt.h> 41 #include <sys/kernel.h> 42 #include <sys/ktr.h> 43 #include <sys/lock.h> 44 #include <sys/proc.h> 45 #include <sys/rwlock.h> 46 #include <sys/memdesc.h> 47 #include <sys/mutex.h> 48 #include <sys/sysctl.h> 49 #include <sys/rman.h> 50 #include <sys/taskqueue.h> 51 #include <sys/tree.h> 52 #include <sys/uio.h> 53 #include <sys/vmem.h> 54 #include <vm/vm.h> 55 #include <vm/vm_extern.h> 56 #include <vm/vm_kern.h> 57 #include <vm/vm_object.h> 58 #include <vm/vm_page.h> 59 #include <vm/vm_map.h> 60 #include <vm/uma.h> 61 #include <dev/pci/pcireg.h> 62 #include <dev/pci/pcivar.h> 63 #include <dev/iommu/iommu.h> 64 #include <dev/iommu/iommu_gas.h> 65 #include <dev/iommu/iommu_msi.h> 66 #include <machine/atomic.h> 67 #include <machine/bus.h> 68 #include <machine/md_var.h> 69 #include <machine/iommu.h> 70 #include <dev/iommu/busdma_iommu.h> 71 72 /* 73 * Guest Address Space management. 74 */ 75 76 static uma_zone_t iommu_map_entry_zone; 77 78 #ifdef INVARIANTS 79 static int iommu_check_free; 80 #endif 81 82 static void 83 intel_gas_init(void) 84 { 85 86 iommu_map_entry_zone = uma_zcreate("IOMMU_MAP_ENTRY", 87 sizeof(struct iommu_map_entry), NULL, NULL, 88 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NODUMP); 89 } 90 SYSINIT(intel_gas, SI_SUB_DRIVERS, SI_ORDER_FIRST, intel_gas_init, NULL); 91 92 struct iommu_map_entry * 93 iommu_gas_alloc_entry(struct iommu_domain *domain, u_int flags) 94 { 95 struct iommu_map_entry *res; 96 97 KASSERT((flags & ~(IOMMU_PGF_WAITOK)) == 0, 98 ("unsupported flags %x", flags)); 99 100 res = uma_zalloc(iommu_map_entry_zone, ((flags & IOMMU_PGF_WAITOK) != 101 0 ? M_WAITOK : M_NOWAIT) | M_ZERO); 102 if (res != NULL && domain != NULL) { 103 res->domain = domain; 104 atomic_add_int(&domain->entries_cnt, 1); 105 } 106 return (res); 107 } 108 109 void 110 iommu_gas_free_entry(struct iommu_map_entry *entry) 111 { 112 struct iommu_domain *domain; 113 114 domain = entry->domain; 115 if (domain != NULL) 116 atomic_subtract_int(&domain->entries_cnt, 1); 117 uma_zfree(iommu_map_entry_zone, entry); 118 } 119 120 static int 121 iommu_gas_cmp_entries(struct iommu_map_entry *a, struct iommu_map_entry *b) 122 { 123 124 /* Last entry have zero size, so <= */ 125 KASSERT(a->start <= a->end, ("inverted entry %p (%jx, %jx)", 126 a, (uintmax_t)a->start, (uintmax_t)a->end)); 127 KASSERT(b->start <= b->end, ("inverted entry %p (%jx, %jx)", 128 b, (uintmax_t)b->start, (uintmax_t)b->end)); 129 KASSERT(a->end <= b->start || b->end <= a->start || 130 a->end == a->start || b->end == b->start, 131 ("overlapping entries %p (%jx, %jx) %p (%jx, %jx)", 132 a, (uintmax_t)a->start, (uintmax_t)a->end, 133 b, (uintmax_t)b->start, (uintmax_t)b->end)); 134 135 if (a->end < b->end) 136 return (-1); 137 else if (b->end < a->end) 138 return (1); 139 return (0); 140 } 141 142 /* 143 * Update augmentation data based on data from children. 144 * Return true if and only if the update changes the augmentation data. 145 */ 146 static bool 147 iommu_gas_augment_entry(struct iommu_map_entry *entry) 148 { 149 struct iommu_map_entry *child; 150 iommu_gaddr_t bound, delta, free_down; 151 152 free_down = 0; 153 bound = entry->start; 154 if ((child = RB_LEFT(entry, rb_entry)) != NULL) { 155 free_down = MAX(child->free_down, bound - child->last); 156 bound = child->first; 157 } 158 delta = bound - entry->first; 159 entry->first = bound; 160 bound = entry->end; 161 if ((child = RB_RIGHT(entry, rb_entry)) != NULL) { 162 free_down = MAX(free_down, child->free_down); 163 free_down = MAX(free_down, child->first - bound); 164 bound = child->last; 165 } 166 delta += entry->last - bound; 167 if (delta == 0) 168 delta = entry->free_down - free_down; 169 entry->last = bound; 170 entry->free_down = free_down; 171 172 /* 173 * Return true either if the value of last-first changed, 174 * or if free_down changed. 175 */ 176 return (delta != 0); 177 } 178 179 RB_GENERATE(iommu_gas_entries_tree, iommu_map_entry, rb_entry, 180 iommu_gas_cmp_entries); 181 182 #ifdef INVARIANTS 183 static void 184 iommu_gas_check_free(struct iommu_domain *domain) 185 { 186 struct iommu_map_entry *entry, *l, *r; 187 iommu_gaddr_t v; 188 189 RB_FOREACH(entry, iommu_gas_entries_tree, &domain->rb_root) { 190 KASSERT(domain == entry->domain, 191 ("mismatched free domain %p entry %p entry->domain %p", 192 domain, entry, entry->domain)); 193 l = RB_LEFT(entry, rb_entry); 194 r = RB_RIGHT(entry, rb_entry); 195 v = 0; 196 if (l != NULL) { 197 v = MAX(v, l->free_down); 198 v = MAX(v, entry->start - l->last); 199 } 200 if (r != NULL) { 201 v = MAX(v, r->free_down); 202 v = MAX(v, r->first - entry->end); 203 } 204 MPASS(entry->free_down == v); 205 } 206 } 207 #endif 208 209 static void 210 iommu_gas_rb_remove(struct iommu_domain *domain, struct iommu_map_entry *entry) 211 { 212 struct iommu_map_entry *nbr; 213 214 /* Removing entry may open a new free gap before domain->start_gap. */ 215 if (entry->end <= domain->start_gap->end) { 216 if (RB_RIGHT(entry, rb_entry) != NULL) 217 nbr = iommu_gas_entries_tree_RB_NEXT(entry); 218 else if (RB_LEFT(entry, rb_entry) != NULL) 219 nbr = RB_LEFT(entry, rb_entry); 220 else 221 nbr = RB_PARENT(entry, rb_entry); 222 domain->start_gap = nbr; 223 } 224 RB_REMOVE(iommu_gas_entries_tree, &domain->rb_root, entry); 225 } 226 227 struct iommu_domain * 228 iommu_get_ctx_domain(struct iommu_ctx *ctx) 229 { 230 231 return (ctx->domain); 232 } 233 234 void 235 iommu_gas_init_domain(struct iommu_domain *domain) 236 { 237 struct iommu_map_entry *begin, *end; 238 239 begin = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 240 end = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 241 242 IOMMU_DOMAIN_LOCK(domain); 243 KASSERT(domain->entries_cnt == 2, ("dirty domain %p", domain)); 244 KASSERT(RB_EMPTY(&domain->rb_root), 245 ("non-empty entries %p", domain)); 246 247 /* 248 * The end entry must be inserted first because it has a zero-length gap 249 * between start and end. Initially, all augmentation data for a new 250 * entry is zero. Function iommu_gas_augment_entry will compute no 251 * change in the value of (start-end) and no change in the value of 252 * free_down, so it will return false to suggest that nothing changed in 253 * the entry. Thus, inserting the end entry second prevents 254 * augmentation information to be propogated to the begin entry at the 255 * tree root. So it is inserted first. 256 */ 257 end->start = domain->end; 258 end->end = domain->end; 259 end->flags = IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED; 260 RB_INSERT(iommu_gas_entries_tree, &domain->rb_root, end); 261 262 begin->start = 0; 263 begin->end = IOMMU_PAGE_SIZE; 264 begin->flags = IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED; 265 RB_INSERT_PREV(iommu_gas_entries_tree, &domain->rb_root, end, begin); 266 267 domain->start_gap = end; 268 domain->first_place = begin; 269 domain->last_place = end; 270 domain->flags |= IOMMU_DOMAIN_GAS_INITED; 271 IOMMU_DOMAIN_UNLOCK(domain); 272 } 273 274 void 275 iommu_gas_fini_domain(struct iommu_domain *domain) 276 { 277 struct iommu_map_entry *entry; 278 279 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 280 KASSERT(domain->entries_cnt == 2, 281 ("domain still in use %p", domain)); 282 283 entry = RB_MIN(iommu_gas_entries_tree, &domain->rb_root); 284 KASSERT(entry->start == 0, ("start entry start %p", domain)); 285 KASSERT(entry->end == IOMMU_PAGE_SIZE, ("start entry end %p", domain)); 286 KASSERT(entry->flags == 287 (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED), 288 ("start entry flags %p", domain)); 289 iommu_gas_rb_remove(domain, entry); 290 iommu_gas_free_entry(entry); 291 292 entry = RB_MAX(iommu_gas_entries_tree, &domain->rb_root); 293 KASSERT(entry->start == domain->end, ("end entry start %p", domain)); 294 KASSERT(entry->end == domain->end, ("end entry end %p", domain)); 295 KASSERT(entry->flags == 296 (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED), 297 ("end entry flags %p", domain)); 298 iommu_gas_rb_remove(domain, entry); 299 iommu_gas_free_entry(entry); 300 } 301 302 struct iommu_gas_match_args { 303 iommu_gaddr_t size; 304 int offset; 305 const struct bus_dma_tag_common *common; 306 u_int gas_flags; 307 struct iommu_map_entry *entry; 308 }; 309 310 /* 311 * The interval [beg, end) is a free interval between two iommu_map_entries. 312 * Addresses can be allocated only in the range [lbound, ubound]. Try to 313 * allocate space in the free interval, subject to the conditions expressed by 314 * a, and return 'true' if and only if the allocation attempt succeeds. 315 */ 316 static bool 317 iommu_gas_match_one(struct iommu_gas_match_args *a, iommu_gaddr_t beg, 318 iommu_gaddr_t end, iommu_gaddr_t lbound, iommu_gaddr_t ubound) 319 { 320 struct iommu_map_entry *entry; 321 iommu_gaddr_t first, size, start; 322 int offset; 323 324 /* 325 * The prev->end is always aligned on the page size, which 326 * causes page alignment for the entry->start too. 327 * 328 * Create IOMMU_PAGE_SIZE gaps before, after new entry 329 * to ensure that out-of-bounds accesses fault. 330 */ 331 beg = MAX(beg + IOMMU_PAGE_SIZE, lbound); 332 start = roundup2(beg, a->common->alignment); 333 if (start < beg) 334 return (false); 335 end = MIN(end - IOMMU_PAGE_SIZE - 1, ubound); 336 offset = a->offset; 337 size = a->size; 338 if (start + offset + size - 1 > end) 339 return (false); 340 341 /* Check for and try to skip past boundary crossing. */ 342 if (!vm_addr_bound_ok(start + offset, size, a->common->boundary)) { 343 /* 344 * The start + offset to start + offset + size region crosses 345 * the boundary. Check if there is enough space after the next 346 * boundary after the beg. 347 */ 348 first = start; 349 beg = roundup2(start + offset + 1, a->common->boundary); 350 start = roundup2(beg, a->common->alignment); 351 352 if (start + offset + size - 1 > end || 353 !vm_addr_bound_ok(start + offset, size, 354 a->common->boundary)) { 355 /* 356 * Not enough space to align at the requested boundary, 357 * or boundary is smaller than the size, but allowed to 358 * split. We already checked that start + size does not 359 * overlap ubound. 360 * 361 * XXXKIB. It is possible that beg is exactly at the 362 * start of the next entry, then we do not have gap. 363 * Ignore for now. 364 */ 365 if ((a->gas_flags & IOMMU_MF_CANSPLIT) == 0) 366 return (false); 367 size = beg - first - offset; 368 start = first; 369 } 370 } 371 entry = a->entry; 372 entry->start = start; 373 entry->end = start + roundup2(size + offset, IOMMU_PAGE_SIZE); 374 entry->flags = IOMMU_MAP_ENTRY_MAP; 375 return (true); 376 } 377 378 /* Find the next entry that might abut a big-enough range. */ 379 static struct iommu_map_entry * 380 iommu_gas_next(struct iommu_map_entry *curr, iommu_gaddr_t min_free) 381 { 382 struct iommu_map_entry *next; 383 384 if ((next = RB_RIGHT(curr, rb_entry)) != NULL && 385 next->free_down >= min_free) { 386 /* Find next entry in right subtree. */ 387 do 388 curr = next; 389 while ((next = RB_LEFT(curr, rb_entry)) != NULL && 390 next->free_down >= min_free); 391 } else { 392 /* Find next entry in a left-parent ancestor. */ 393 while ((next = RB_PARENT(curr, rb_entry)) != NULL && 394 curr == RB_RIGHT(next, rb_entry)) 395 curr = next; 396 curr = next; 397 } 398 return (curr); 399 } 400 401 /* 402 * Address-ordered first-fit search of 'domain' for free space satisfying the 403 * conditions of 'a'. The space allocated is at least one page big, and is 404 * bounded by guard pages to the left and right. The allocated space for 405 * 'domain' is described by an rb-tree of map entries at domain->rb_root, and 406 * domain->start_gap points to a map entry less than or adjacent to the first 407 * free-space of size at least 3 pages. 408 */ 409 static int 410 iommu_gas_find_space(struct iommu_domain *domain, 411 struct iommu_gas_match_args *a) 412 { 413 struct iommu_map_entry *curr, *first; 414 iommu_gaddr_t addr, min_free; 415 416 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 417 KASSERT(a->entry->flags == 0, 418 ("dirty entry %p %p", domain, a->entry)); 419 420 /* 421 * start_gap may point to an entry adjacent to gaps too small for any 422 * new allocation. In that case, advance start_gap to the first free 423 * space big enough for a minimum allocation plus two guard pages. 424 */ 425 min_free = 3 * IOMMU_PAGE_SIZE; 426 first = domain->start_gap; 427 while (first != NULL && first->free_down < min_free) 428 first = RB_PARENT(first, rb_entry); 429 for (curr = first; curr != NULL; 430 curr = iommu_gas_next(curr, min_free)) { 431 if ((first = RB_LEFT(curr, rb_entry)) != NULL && 432 first->last + min_free <= curr->start) 433 break; 434 if ((first = RB_RIGHT(curr, rb_entry)) != NULL && 435 curr->end + min_free <= first->first) 436 break; 437 } 438 domain->start_gap = curr; 439 440 /* 441 * If the subtree doesn't have free space for the requested allocation 442 * plus two guard pages, skip it. 443 */ 444 min_free = 2 * IOMMU_PAGE_SIZE + 445 roundup2(a->size + a->offset, IOMMU_PAGE_SIZE); 446 447 /* Climb to find a node in the subtree of big-enough ranges. */ 448 first = curr; 449 while (first != NULL && first->free_down < min_free) 450 first = RB_PARENT(first, rb_entry); 451 452 /* 453 * Walk the big-enough ranges tree until one satisfies alignment 454 * requirements, or violates lowaddr address requirement. 455 */ 456 addr = a->common->lowaddr; 457 for (curr = first; curr != NULL; 458 curr = iommu_gas_next(curr, min_free)) { 459 if ((first = RB_LEFT(curr, rb_entry)) != NULL && 460 iommu_gas_match_one(a, first->last, curr->start, 461 0, addr)) { 462 RB_INSERT_PREV(iommu_gas_entries_tree, 463 &domain->rb_root, curr, a->entry); 464 return (0); 465 } 466 if (curr->end >= addr) { 467 /* All remaining ranges > addr */ 468 break; 469 } 470 if ((first = RB_RIGHT(curr, rb_entry)) != NULL && 471 iommu_gas_match_one(a, curr->end, first->first, 472 0, addr)) { 473 RB_INSERT_NEXT(iommu_gas_entries_tree, 474 &domain->rb_root, curr, a->entry); 475 return (0); 476 } 477 } 478 479 /* 480 * To resume the search at the start of the upper region, first climb to 481 * the nearest ancestor that spans highaddr. Then find the last entry 482 * before highaddr that could abut a big-enough range. 483 */ 484 addr = a->common->highaddr; 485 while (curr != NULL && curr->last < addr) 486 curr = RB_PARENT(curr, rb_entry); 487 first = NULL; 488 while (curr != NULL && curr->free_down >= min_free) { 489 if (addr < curr->end) 490 curr = RB_LEFT(curr, rb_entry); 491 else { 492 first = curr; 493 curr = RB_RIGHT(curr, rb_entry); 494 } 495 } 496 497 /* 498 * Walk the remaining big-enough ranges until one satisfies alignment 499 * requirements. 500 */ 501 for (curr = first; curr != NULL; 502 curr = iommu_gas_next(curr, min_free)) { 503 if ((first = RB_LEFT(curr, rb_entry)) != NULL && 504 iommu_gas_match_one(a, first->last, curr->start, 505 addr + 1, domain->end - 1)) { 506 RB_INSERT_PREV(iommu_gas_entries_tree, 507 &domain->rb_root, curr, a->entry); 508 return (0); 509 } 510 if ((first = RB_RIGHT(curr, rb_entry)) != NULL && 511 iommu_gas_match_one(a, curr->end, first->first, 512 addr + 1, domain->end - 1)) { 513 RB_INSERT_NEXT(iommu_gas_entries_tree, 514 &domain->rb_root, curr, a->entry); 515 return (0); 516 } 517 } 518 519 return (ENOMEM); 520 } 521 522 static int 523 iommu_gas_alloc_region(struct iommu_domain *domain, struct iommu_map_entry *entry, 524 u_int flags) 525 { 526 struct iommu_map_entry *next, *prev; 527 528 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 529 530 if ((entry->start & IOMMU_PAGE_MASK) != 0 || 531 (entry->end & IOMMU_PAGE_MASK) != 0) 532 return (EINVAL); 533 if (entry->start >= entry->end) 534 return (EINVAL); 535 if (entry->end >= domain->end) 536 return (EINVAL); 537 538 next = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, entry); 539 KASSERT(next != NULL, ("next must be non-null %p %jx", domain, 540 (uintmax_t)entry->start)); 541 prev = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, next); 542 /* prev could be NULL */ 543 544 /* 545 * Adapt to broken BIOSes which specify overlapping RMRR 546 * entries. 547 * 548 * XXXKIB: this does not handle a case when prev or next 549 * entries are completely covered by the current one, which 550 * extends both ways. 551 */ 552 if (prev != NULL && prev->end > entry->start && 553 (prev->flags & IOMMU_MAP_ENTRY_PLACE) == 0) { 554 if ((flags & IOMMU_MF_RMRR) == 0 || 555 (prev->flags & IOMMU_MAP_ENTRY_RMRR) == 0) 556 return (EBUSY); 557 entry->start = prev->end; 558 } 559 if (next->start < entry->end && 560 (next->flags & IOMMU_MAP_ENTRY_PLACE) == 0) { 561 if ((flags & IOMMU_MF_RMRR) == 0 || 562 (next->flags & IOMMU_MAP_ENTRY_RMRR) == 0) 563 return (EBUSY); 564 entry->end = next->start; 565 } 566 if (entry->end == entry->start) 567 return (0); 568 569 if (prev != NULL && prev->end > entry->start) { 570 /* This assumes that prev is the placeholder entry. */ 571 iommu_gas_rb_remove(domain, prev); 572 prev = NULL; 573 } 574 RB_INSERT_PREV(iommu_gas_entries_tree, 575 &domain->rb_root, next, entry); 576 if (next->start < entry->end) { 577 iommu_gas_rb_remove(domain, next); 578 next = NULL; 579 } 580 581 if ((flags & IOMMU_MF_RMRR) != 0) 582 entry->flags = IOMMU_MAP_ENTRY_RMRR; 583 584 #ifdef INVARIANTS 585 struct iommu_map_entry *ip, *in; 586 ip = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, entry); 587 in = RB_NEXT(iommu_gas_entries_tree, &domain->rb_root, entry); 588 KASSERT(prev == NULL || ip == prev, 589 ("RMRR %p (%jx %jx) prev %p (%jx %jx) ins prev %p (%jx %jx)", 590 entry, entry->start, entry->end, prev, 591 prev == NULL ? 0 : prev->start, prev == NULL ? 0 : prev->end, 592 ip, ip == NULL ? 0 : ip->start, ip == NULL ? 0 : ip->end)); 593 KASSERT(next == NULL || in == next, 594 ("RMRR %p (%jx %jx) next %p (%jx %jx) ins next %p (%jx %jx)", 595 entry, entry->start, entry->end, next, 596 next == NULL ? 0 : next->start, next == NULL ? 0 : next->end, 597 in, in == NULL ? 0 : in->start, in == NULL ? 0 : in->end)); 598 #endif 599 600 return (0); 601 } 602 603 void 604 iommu_gas_free_space(struct iommu_map_entry *entry) 605 { 606 struct iommu_domain *domain; 607 608 domain = entry->domain; 609 KASSERT((entry->flags & (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_RMRR | 610 IOMMU_MAP_ENTRY_MAP)) == IOMMU_MAP_ENTRY_MAP, 611 ("permanent entry %p %p", domain, entry)); 612 613 IOMMU_DOMAIN_LOCK(domain); 614 iommu_gas_rb_remove(domain, entry); 615 entry->flags &= ~IOMMU_MAP_ENTRY_MAP; 616 #ifdef INVARIANTS 617 if (iommu_check_free) 618 iommu_gas_check_free(domain); 619 #endif 620 IOMMU_DOMAIN_UNLOCK(domain); 621 } 622 623 void 624 iommu_gas_free_region(struct iommu_map_entry *entry) 625 { 626 struct iommu_domain *domain; 627 628 domain = entry->domain; 629 KASSERT((entry->flags & (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_RMRR | 630 IOMMU_MAP_ENTRY_MAP)) == IOMMU_MAP_ENTRY_RMRR, 631 ("non-RMRR entry %p %p", domain, entry)); 632 633 IOMMU_DOMAIN_LOCK(domain); 634 if (entry != domain->first_place && 635 entry != domain->last_place) 636 iommu_gas_rb_remove(domain, entry); 637 entry->flags &= ~IOMMU_MAP_ENTRY_RMRR; 638 IOMMU_DOMAIN_UNLOCK(domain); 639 } 640 641 static struct iommu_map_entry * 642 iommu_gas_remove_clip_left(struct iommu_domain *domain, iommu_gaddr_t start, 643 iommu_gaddr_t end, struct iommu_map_entry **r) 644 { 645 struct iommu_map_entry *entry, *res, fentry; 646 647 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 648 MPASS(start <= end); 649 MPASS(end <= domain->end); 650 651 /* 652 * Find an entry which contains the supplied guest's address 653 * start, or the first entry after the start. Since we 654 * asserted that start is below domain end, entry should 655 * exist. Then clip it if needed. 656 */ 657 fentry.start = start + 1; 658 fentry.end = start + 1; 659 entry = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, &fentry); 660 661 if (entry->start >= start || 662 (entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 663 return (entry); 664 665 res = *r; 666 *r = NULL; 667 *res = *entry; 668 res->start = entry->end = start; 669 RB_UPDATE_AUGMENT(entry, rb_entry); 670 RB_INSERT_NEXT(iommu_gas_entries_tree, 671 &domain->rb_root, entry, res); 672 return (res); 673 } 674 675 static bool 676 iommu_gas_remove_clip_right(struct iommu_domain *domain, 677 iommu_gaddr_t end, struct iommu_map_entry *entry, 678 struct iommu_map_entry *r) 679 { 680 if (entry->start >= end || (entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 681 return (false); 682 683 *r = *entry; 684 r->end = entry->start = end; 685 RB_UPDATE_AUGMENT(entry, rb_entry); 686 RB_INSERT_PREV(iommu_gas_entries_tree, 687 &domain->rb_root, entry, r); 688 return (true); 689 } 690 691 static void 692 iommu_gas_remove_unmap(struct iommu_domain *domain, 693 struct iommu_map_entry *entry, struct iommu_map_entries_tailq *gcp) 694 { 695 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 696 697 if ((entry->flags & (IOMMU_MAP_ENTRY_UNMAPPED | 698 IOMMU_MAP_ENTRY_REMOVING)) != 0) 699 return; 700 MPASS((entry->flags & IOMMU_MAP_ENTRY_PLACE) == 0); 701 entry->flags |= IOMMU_MAP_ENTRY_REMOVING; 702 TAILQ_INSERT_TAIL(gcp, entry, dmamap_link); 703 } 704 705 /* 706 * Remove specified range from the GAS of the domain. Note that the 707 * removal is not guaranteed to occur upon the function return, it 708 * might be finalized some time after, when hardware reports that 709 * (queued) IOTLB invalidation was performed. 710 */ 711 void 712 iommu_gas_remove(struct iommu_domain *domain, iommu_gaddr_t start, 713 iommu_gaddr_t size) 714 { 715 struct iommu_map_entry *entry, *nentry, *r1, *r2; 716 struct iommu_map_entries_tailq gc; 717 iommu_gaddr_t end; 718 719 end = start + size; 720 r1 = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 721 r2 = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 722 TAILQ_INIT(&gc); 723 724 IOMMU_DOMAIN_LOCK(domain); 725 726 nentry = iommu_gas_remove_clip_left(domain, start, end, &r1); 727 RB_FOREACH_FROM(entry, iommu_gas_entries_tree, nentry) { 728 if (entry->start >= end) 729 break; 730 KASSERT(start <= entry->start, 731 ("iommu_gas_remove entry (%#jx, %#jx) start %#jx", 732 entry->start, entry->end, start)); 733 if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 734 continue; 735 iommu_gas_remove_unmap(domain, entry, &gc); 736 } 737 if (iommu_gas_remove_clip_right(domain, end, entry, r2)) { 738 iommu_gas_remove_unmap(domain, r2, &gc); 739 r2 = NULL; 740 } 741 742 #ifdef INVARIANTS 743 RB_FOREACH(entry, iommu_gas_entries_tree, &domain->rb_root) { 744 if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 745 continue; 746 KASSERT(entry->end <= start || entry->start >= end, 747 ("iommu_gas_remove leftover entry (%#jx, %#jx) range " 748 "(%#jx, %#jx)", 749 entry->start, entry->end, start, end)); 750 } 751 #endif 752 753 IOMMU_DOMAIN_UNLOCK(domain); 754 if (r1 != NULL) 755 iommu_gas_free_entry(r1); 756 if (r2 != NULL) 757 iommu_gas_free_entry(r2); 758 iommu_domain_unload(domain, &gc, true); 759 } 760 761 int 762 iommu_gas_map(struct iommu_domain *domain, 763 const struct bus_dma_tag_common *common, iommu_gaddr_t size, int offset, 764 u_int eflags, u_int flags, vm_page_t *ma, struct iommu_map_entry **res) 765 { 766 struct iommu_gas_match_args a; 767 struct iommu_map_entry *entry; 768 int error; 769 770 KASSERT((flags & ~(IOMMU_MF_CANWAIT | IOMMU_MF_CANSPLIT)) == 0, 771 ("invalid flags 0x%x", flags)); 772 773 a.size = size; 774 a.offset = offset; 775 a.common = common; 776 a.gas_flags = flags; 777 entry = iommu_gas_alloc_entry(domain, 778 (flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0); 779 if (entry == NULL) 780 return (ENOMEM); 781 a.entry = entry; 782 IOMMU_DOMAIN_LOCK(domain); 783 error = iommu_gas_find_space(domain, &a); 784 if (error == ENOMEM) { 785 IOMMU_DOMAIN_UNLOCK(domain); 786 iommu_gas_free_entry(entry); 787 return (error); 788 } 789 #ifdef INVARIANTS 790 if (iommu_check_free) 791 iommu_gas_check_free(domain); 792 #endif 793 KASSERT(error == 0, 794 ("unexpected error %d from iommu_gas_find_entry", error)); 795 KASSERT(entry->end < domain->end, ("allocated GPA %jx, max GPA %jx", 796 (uintmax_t)entry->end, (uintmax_t)domain->end)); 797 entry->flags |= eflags; 798 IOMMU_DOMAIN_UNLOCK(domain); 799 800 error = domain->ops->map(domain, entry->start, 801 entry->end - entry->start, ma, eflags, 802 ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0)); 803 if (error == ENOMEM) { 804 iommu_domain_unload_entry(entry, true, 805 (flags & IOMMU_MF_CANWAIT) != 0); 806 return (error); 807 } 808 KASSERT(error == 0, 809 ("unexpected error %d from domain_map_buf", error)); 810 811 *res = entry; 812 return (0); 813 } 814 815 int 816 iommu_gas_map_region(struct iommu_domain *domain, struct iommu_map_entry *entry, 817 u_int eflags, u_int flags, vm_page_t *ma) 818 { 819 iommu_gaddr_t start; 820 int error; 821 822 KASSERT(entry->domain == domain, 823 ("mismatched domain %p entry %p entry->domain %p", domain, 824 entry, entry->domain)); 825 KASSERT(entry->flags == 0, ("used RMRR entry %p %p %x", domain, 826 entry, entry->flags)); 827 KASSERT((flags & ~(IOMMU_MF_CANWAIT | IOMMU_MF_RMRR)) == 0, 828 ("invalid flags 0x%x", flags)); 829 830 start = entry->start; 831 IOMMU_DOMAIN_LOCK(domain); 832 error = iommu_gas_alloc_region(domain, entry, flags); 833 if (error != 0) { 834 IOMMU_DOMAIN_UNLOCK(domain); 835 return (error); 836 } 837 entry->flags |= eflags; 838 IOMMU_DOMAIN_UNLOCK(domain); 839 if (entry->end == entry->start) 840 return (0); 841 842 error = domain->ops->map(domain, entry->start, 843 entry->end - entry->start, ma + OFF_TO_IDX(start - entry->start), 844 eflags, ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0)); 845 if (error == ENOMEM) { 846 iommu_domain_unload_entry(entry, false, 847 (flags & IOMMU_MF_CANWAIT) != 0); 848 return (error); 849 } 850 KASSERT(error == 0, 851 ("unexpected error %d from domain_map_buf", error)); 852 853 return (0); 854 } 855 856 static int 857 iommu_gas_reserve_region_locked(struct iommu_domain *domain, 858 iommu_gaddr_t start, iommu_gaddr_t end, struct iommu_map_entry *entry) 859 { 860 int error; 861 862 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 863 864 entry->start = start; 865 entry->end = end; 866 error = iommu_gas_alloc_region(domain, entry, IOMMU_MF_CANWAIT); 867 if (error == 0) 868 entry->flags |= IOMMU_MAP_ENTRY_UNMAPPED; 869 return (error); 870 } 871 872 int 873 iommu_gas_reserve_region(struct iommu_domain *domain, iommu_gaddr_t start, 874 iommu_gaddr_t end, struct iommu_map_entry **entry0) 875 { 876 struct iommu_map_entry *entry; 877 int error; 878 879 entry = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 880 IOMMU_DOMAIN_LOCK(domain); 881 error = iommu_gas_reserve_region_locked(domain, start, end, entry); 882 IOMMU_DOMAIN_UNLOCK(domain); 883 if (error != 0) 884 iommu_gas_free_entry(entry); 885 else if (entry0 != NULL) 886 *entry0 = entry; 887 return (error); 888 } 889 890 /* 891 * As in iommu_gas_reserve_region, reserve [start, end), but allow for existing 892 * entries. 893 */ 894 int 895 iommu_gas_reserve_region_extend(struct iommu_domain *domain, 896 iommu_gaddr_t start, iommu_gaddr_t end) 897 { 898 struct iommu_map_entry *entry, *next, *prev, key = {}; 899 iommu_gaddr_t entry_start, entry_end; 900 int error; 901 902 error = 0; 903 entry = NULL; 904 end = ummin(end, domain->end); 905 while (start < end) { 906 /* Preallocate an entry. */ 907 if (entry == NULL) 908 entry = iommu_gas_alloc_entry(domain, 909 IOMMU_PGF_WAITOK); 910 /* Calculate the free region from here to the next entry. */ 911 key.start = key.end = start; 912 IOMMU_DOMAIN_LOCK(domain); 913 next = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, &key); 914 KASSERT(next != NULL, ("domain %p with end %#jx has no entry " 915 "after %#jx", domain, (uintmax_t)domain->end, 916 (uintmax_t)start)); 917 entry_end = ummin(end, next->start); 918 prev = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, next); 919 if (prev != NULL) 920 entry_start = ummax(start, prev->end); 921 else 922 entry_start = start; 923 start = next->end; 924 /* Reserve the region if non-empty. */ 925 if (entry_start != entry_end) { 926 error = iommu_gas_reserve_region_locked(domain, 927 entry_start, entry_end, entry); 928 if (error != 0) { 929 IOMMU_DOMAIN_UNLOCK(domain); 930 break; 931 } 932 entry = NULL; 933 } 934 IOMMU_DOMAIN_UNLOCK(domain); 935 } 936 /* Release a preallocated entry if it was not used. */ 937 if (entry != NULL) 938 iommu_gas_free_entry(entry); 939 return (error); 940 } 941 942 void 943 iommu_unmap_msi(struct iommu_ctx *ctx) 944 { 945 struct iommu_map_entry *entry; 946 struct iommu_domain *domain; 947 948 domain = ctx->domain; 949 entry = domain->msi_entry; 950 if (entry == NULL) 951 return; 952 953 domain->ops->unmap(domain, entry->start, entry->end - 954 entry->start, IOMMU_PGF_WAITOK); 955 956 iommu_gas_free_space(entry); 957 958 iommu_gas_free_entry(entry); 959 960 domain->msi_entry = NULL; 961 domain->msi_base = 0; 962 domain->msi_phys = 0; 963 } 964 965 int 966 iommu_map_msi(struct iommu_ctx *ctx, iommu_gaddr_t size, int offset, 967 u_int eflags, u_int flags, vm_page_t *ma) 968 { 969 struct iommu_domain *domain; 970 struct iommu_map_entry *entry; 971 int error; 972 973 error = 0; 974 domain = ctx->domain; 975 976 /* Check if there is already an MSI page allocated */ 977 IOMMU_DOMAIN_LOCK(domain); 978 entry = domain->msi_entry; 979 IOMMU_DOMAIN_UNLOCK(domain); 980 981 if (entry == NULL) { 982 error = iommu_gas_map(domain, &ctx->tag->common, size, offset, 983 eflags, flags, ma, &entry); 984 IOMMU_DOMAIN_LOCK(domain); 985 if (error == 0) { 986 if (domain->msi_entry == NULL) { 987 MPASS(domain->msi_base == 0); 988 MPASS(domain->msi_phys == 0); 989 990 domain->msi_entry = entry; 991 domain->msi_base = entry->start; 992 domain->msi_phys = VM_PAGE_TO_PHYS(ma[0]); 993 } else { 994 /* 995 * We lost the race and already have an 996 * MSI page allocated. Free the unneeded entry. 997 */ 998 iommu_gas_free_entry(entry); 999 } 1000 } else if (domain->msi_entry != NULL) { 1001 /* 1002 * The allocation failed, but another succeeded. 1003 * Return success as there is a valid MSI page. 1004 */ 1005 error = 0; 1006 } 1007 IOMMU_DOMAIN_UNLOCK(domain); 1008 } 1009 1010 return (error); 1011 } 1012 1013 void 1014 iommu_translate_msi(struct iommu_domain *domain, uint64_t *addr) 1015 { 1016 1017 *addr = (*addr - domain->msi_phys) + domain->msi_base; 1018 1019 KASSERT(*addr >= domain->msi_entry->start, 1020 ("%s: Address is below the MSI entry start address (%jx < %jx)", 1021 __func__, (uintmax_t)*addr, (uintmax_t)domain->msi_entry->start)); 1022 1023 KASSERT(*addr + sizeof(*addr) <= domain->msi_entry->end, 1024 ("%s: Address is above the MSI entry end address (%jx < %jx)", 1025 __func__, (uintmax_t)*addr, (uintmax_t)domain->msi_entry->end)); 1026 } 1027 1028 SYSCTL_NODE(_hw, OID_AUTO, iommu, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, ""); 1029 1030 #ifdef INVARIANTS 1031 SYSCTL_INT(_hw_iommu, OID_AUTO, check_free, CTLFLAG_RWTUN, 1032 &iommu_check_free, 0, 1033 "Check the GPA RBtree for free_down and free_after validity"); 1034 #endif 1035