1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2013 The FreeBSD Foundation 5 * 6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org> 7 * under sponsorship from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 #define RB_AUGMENT_CHECK(entry) iommu_gas_augment_entry(entry) 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/malloc.h> 37 #include <sys/bus.h> 38 #include <sys/interrupt.h> 39 #include <sys/kernel.h> 40 #include <sys/ktr.h> 41 #include <sys/lock.h> 42 #include <sys/proc.h> 43 #include <sys/rwlock.h> 44 #include <sys/memdesc.h> 45 #include <sys/mutex.h> 46 #include <sys/sysctl.h> 47 #include <sys/rman.h> 48 #include <sys/taskqueue.h> 49 #include <sys/tree.h> 50 #include <sys/uio.h> 51 #include <sys/vmem.h> 52 #include <vm/vm.h> 53 #include <vm/vm_extern.h> 54 #include <vm/vm_kern.h> 55 #include <vm/vm_object.h> 56 #include <vm/vm_page.h> 57 #include <vm/vm_map.h> 58 #include <vm/uma.h> 59 #include <dev/pci/pcireg.h> 60 #include <dev/pci/pcivar.h> 61 #include <dev/iommu/iommu.h> 62 #include <dev/iommu/iommu_gas.h> 63 #include <dev/iommu/iommu_msi.h> 64 #include <machine/atomic.h> 65 #include <machine/bus.h> 66 #include <machine/md_var.h> 67 #include <machine/iommu.h> 68 #include <dev/iommu/busdma_iommu.h> 69 70 /* 71 * Guest Address Space management. 72 */ 73 74 static uma_zone_t iommu_map_entry_zone; 75 76 #ifdef INVARIANTS 77 static int iommu_check_free; 78 #endif 79 80 static void 81 intel_gas_init(void) 82 { 83 84 iommu_map_entry_zone = uma_zcreate("IOMMU_MAP_ENTRY", 85 sizeof(struct iommu_map_entry), NULL, NULL, 86 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NODUMP); 87 } 88 SYSINIT(intel_gas, SI_SUB_DRIVERS, SI_ORDER_FIRST, intel_gas_init, NULL); 89 90 struct iommu_map_entry * 91 iommu_gas_alloc_entry(struct iommu_domain *domain, u_int flags) 92 { 93 struct iommu_map_entry *res; 94 95 KASSERT((flags & ~(IOMMU_PGF_WAITOK)) == 0, 96 ("unsupported flags %x", flags)); 97 98 res = uma_zalloc(iommu_map_entry_zone, ((flags & IOMMU_PGF_WAITOK) != 99 0 ? M_WAITOK : M_NOWAIT) | M_ZERO); 100 if (res != NULL && domain != NULL) { 101 res->domain = domain; 102 atomic_add_int(&domain->entries_cnt, 1); 103 } 104 return (res); 105 } 106 107 void 108 iommu_gas_free_entry(struct iommu_map_entry *entry) 109 { 110 struct iommu_domain *domain; 111 112 domain = entry->domain; 113 if (domain != NULL) 114 atomic_subtract_int(&domain->entries_cnt, 1); 115 uma_zfree(iommu_map_entry_zone, entry); 116 } 117 118 static int 119 iommu_gas_cmp_entries(struct iommu_map_entry *a, struct iommu_map_entry *b) 120 { 121 122 /* Last entry have zero size, so <= */ 123 KASSERT(a->start <= a->end, ("inverted entry %p (%jx, %jx)", 124 a, (uintmax_t)a->start, (uintmax_t)a->end)); 125 KASSERT(b->start <= b->end, ("inverted entry %p (%jx, %jx)", 126 b, (uintmax_t)b->start, (uintmax_t)b->end)); 127 KASSERT(a->end <= b->start || b->end <= a->start || 128 a->end == a->start || b->end == b->start, 129 ("overlapping entries %p (%jx, %jx) %p (%jx, %jx)", 130 a, (uintmax_t)a->start, (uintmax_t)a->end, 131 b, (uintmax_t)b->start, (uintmax_t)b->end)); 132 133 if (a->end < b->end) 134 return (-1); 135 else if (b->end < a->end) 136 return (1); 137 return (0); 138 } 139 140 /* 141 * Update augmentation data based on data from children. 142 * Return true if and only if the update changes the augmentation data. 143 */ 144 static bool 145 iommu_gas_augment_entry(struct iommu_map_entry *entry) 146 { 147 struct iommu_map_entry *child; 148 iommu_gaddr_t bound, delta, free_down; 149 150 free_down = 0; 151 bound = entry->start; 152 if ((child = RB_LEFT(entry, rb_entry)) != NULL) { 153 free_down = MAX(child->free_down, bound - child->last); 154 bound = child->first; 155 } 156 delta = bound - entry->first; 157 entry->first = bound; 158 bound = entry->end; 159 if ((child = RB_RIGHT(entry, rb_entry)) != NULL) { 160 free_down = MAX(free_down, child->free_down); 161 free_down = MAX(free_down, child->first - bound); 162 bound = child->last; 163 } 164 delta += entry->last - bound; 165 if (delta == 0) 166 delta = entry->free_down - free_down; 167 entry->last = bound; 168 entry->free_down = free_down; 169 170 /* 171 * Return true either if the value of last-first changed, 172 * or if free_down changed. 173 */ 174 return (delta != 0); 175 } 176 177 RB_GENERATE(iommu_gas_entries_tree, iommu_map_entry, rb_entry, 178 iommu_gas_cmp_entries); 179 180 #ifdef INVARIANTS 181 static void 182 iommu_gas_check_free(struct iommu_domain *domain) 183 { 184 struct iommu_map_entry *entry, *l, *r; 185 iommu_gaddr_t v; 186 187 RB_FOREACH(entry, iommu_gas_entries_tree, &domain->rb_root) { 188 KASSERT(domain == entry->domain, 189 ("mismatched free domain %p entry %p entry->domain %p", 190 domain, entry, entry->domain)); 191 l = RB_LEFT(entry, rb_entry); 192 r = RB_RIGHT(entry, rb_entry); 193 v = 0; 194 if (l != NULL) { 195 v = MAX(v, l->free_down); 196 v = MAX(v, entry->start - l->last); 197 } 198 if (r != NULL) { 199 v = MAX(v, r->free_down); 200 v = MAX(v, r->first - entry->end); 201 } 202 MPASS(entry->free_down == v); 203 } 204 } 205 #endif 206 207 static void 208 iommu_gas_rb_remove(struct iommu_domain *domain, struct iommu_map_entry *entry) 209 { 210 struct iommu_map_entry *nbr; 211 212 /* Removing entry may open a new free gap before domain->start_gap. */ 213 if (entry->end <= domain->start_gap->end) { 214 if (RB_RIGHT(entry, rb_entry) != NULL) 215 nbr = iommu_gas_entries_tree_RB_NEXT(entry); 216 else if (RB_LEFT(entry, rb_entry) != NULL) 217 nbr = RB_LEFT(entry, rb_entry); 218 else 219 nbr = RB_PARENT(entry, rb_entry); 220 domain->start_gap = nbr; 221 } 222 RB_REMOVE(iommu_gas_entries_tree, &domain->rb_root, entry); 223 } 224 225 struct iommu_domain * 226 iommu_get_ctx_domain(struct iommu_ctx *ctx) 227 { 228 229 return (ctx->domain); 230 } 231 232 void 233 iommu_gas_init_domain(struct iommu_domain *domain) 234 { 235 struct iommu_map_entry *begin, *end; 236 237 begin = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 238 end = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 239 240 IOMMU_DOMAIN_LOCK(domain); 241 KASSERT(domain->entries_cnt == 2, ("dirty domain %p", domain)); 242 KASSERT(RB_EMPTY(&domain->rb_root), 243 ("non-empty entries %p", domain)); 244 245 /* 246 * The end entry must be inserted first because it has a zero-length gap 247 * between start and end. Initially, all augmentation data for a new 248 * entry is zero. Function iommu_gas_augment_entry will compute no 249 * change in the value of (start-end) and no change in the value of 250 * free_down, so it will return false to suggest that nothing changed in 251 * the entry. Thus, inserting the end entry second prevents 252 * augmentation information to be propogated to the begin entry at the 253 * tree root. So it is inserted first. 254 */ 255 end->start = domain->end; 256 end->end = domain->end; 257 end->flags = IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED; 258 RB_INSERT(iommu_gas_entries_tree, &domain->rb_root, end); 259 260 begin->start = 0; 261 begin->end = IOMMU_PAGE_SIZE; 262 begin->flags = IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED; 263 RB_INSERT_PREV(iommu_gas_entries_tree, &domain->rb_root, end, begin); 264 265 domain->start_gap = begin; 266 domain->first_place = begin; 267 domain->last_place = end; 268 domain->flags |= IOMMU_DOMAIN_GAS_INITED; 269 IOMMU_DOMAIN_UNLOCK(domain); 270 } 271 272 void 273 iommu_gas_fini_domain(struct iommu_domain *domain) 274 { 275 struct iommu_map_entry *entry; 276 277 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 278 KASSERT(domain->entries_cnt == 2, 279 ("domain still in use %p", domain)); 280 281 entry = RB_MIN(iommu_gas_entries_tree, &domain->rb_root); 282 KASSERT(entry->start == 0, ("start entry start %p", domain)); 283 KASSERT(entry->end == IOMMU_PAGE_SIZE, ("start entry end %p", domain)); 284 KASSERT(entry->flags == 285 (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED), 286 ("start entry flags %p", domain)); 287 iommu_gas_rb_remove(domain, entry); 288 iommu_gas_free_entry(entry); 289 290 entry = RB_MAX(iommu_gas_entries_tree, &domain->rb_root); 291 KASSERT(entry->start == domain->end, ("end entry start %p", domain)); 292 KASSERT(entry->end == domain->end, ("end entry end %p", domain)); 293 KASSERT(entry->flags == 294 (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED), 295 ("end entry flags %p", domain)); 296 iommu_gas_rb_remove(domain, entry); 297 iommu_gas_free_entry(entry); 298 } 299 300 struct iommu_gas_match_args { 301 iommu_gaddr_t size; 302 int offset; 303 const struct bus_dma_tag_common *common; 304 u_int gas_flags; 305 struct iommu_map_entry *entry; 306 }; 307 308 /* 309 * The interval [beg, end) is a free interval between two iommu_map_entries. 310 * Addresses can be allocated only in the range [lbound, ubound]. Try to 311 * allocate space in the free interval, subject to the conditions expressed by 312 * a, and return 'true' if and only if the allocation attempt succeeds. 313 */ 314 static bool 315 iommu_gas_match_one(struct iommu_gas_match_args *a, iommu_gaddr_t beg, 316 iommu_gaddr_t end, iommu_gaddr_t lbound, iommu_gaddr_t ubound) 317 { 318 struct iommu_map_entry *entry; 319 iommu_gaddr_t first, size, start; 320 int offset; 321 322 /* 323 * The prev->end is always aligned on the page size, which 324 * causes page alignment for the entry->start too. 325 * 326 * Create IOMMU_PAGE_SIZE gaps before, after new entry 327 * to ensure that out-of-bounds accesses fault. 328 */ 329 beg = MAX(beg + IOMMU_PAGE_SIZE, lbound); 330 start = roundup2(beg, a->common->alignment); 331 if (start < beg) 332 return (false); 333 end = MIN(end - IOMMU_PAGE_SIZE - 1, ubound); 334 offset = a->offset; 335 size = a->size; 336 if (start + offset + size - 1 > end) 337 return (false); 338 339 /* Check for and try to skip past boundary crossing. */ 340 if (!vm_addr_bound_ok(start + offset, size, a->common->boundary)) { 341 /* 342 * The start + offset to start + offset + size region crosses 343 * the boundary. Check if there is enough space after the next 344 * boundary after the beg. 345 */ 346 first = start; 347 beg = roundup2(start + offset + 1, a->common->boundary); 348 start = roundup2(beg, a->common->alignment); 349 350 if (start + offset + size - 1 > end || 351 !vm_addr_bound_ok(start + offset, size, 352 a->common->boundary)) { 353 /* 354 * Not enough space to align at the requested boundary, 355 * or boundary is smaller than the size, but allowed to 356 * split. We already checked that start + size does not 357 * overlap ubound. 358 * 359 * XXXKIB. It is possible that beg is exactly at the 360 * start of the next entry, then we do not have gap. 361 * Ignore for now. 362 */ 363 if ((a->gas_flags & IOMMU_MF_CANSPLIT) == 0) 364 return (false); 365 size = beg - first - offset; 366 start = first; 367 } 368 } 369 entry = a->entry; 370 entry->start = start; 371 entry->end = start + roundup2(size + offset, IOMMU_PAGE_SIZE); 372 entry->flags = IOMMU_MAP_ENTRY_MAP; 373 return (true); 374 } 375 376 /* Find the next entry that might abut a big-enough range. */ 377 static struct iommu_map_entry * 378 iommu_gas_next(struct iommu_map_entry *curr, iommu_gaddr_t min_free) 379 { 380 struct iommu_map_entry *next; 381 382 if ((next = RB_RIGHT(curr, rb_entry)) != NULL && 383 next->free_down >= min_free) { 384 /* Find next entry in right subtree. */ 385 do 386 curr = next; 387 while ((next = RB_LEFT(curr, rb_entry)) != NULL && 388 next->free_down >= min_free); 389 } else { 390 /* Find next entry in a left-parent ancestor. */ 391 while ((next = RB_PARENT(curr, rb_entry)) != NULL && 392 curr == RB_RIGHT(next, rb_entry)) 393 curr = next; 394 curr = next; 395 } 396 return (curr); 397 } 398 399 /* 400 * Address-ordered first-fit search of 'domain' for free space satisfying the 401 * conditions of 'a'. The space allocated is at least one page big, and is 402 * bounded by guard pages to the left and right. The allocated space for 403 * 'domain' is described by an rb-tree of map entries at domain->rb_root, and 404 * domain->start_gap points to a map entry less than or adjacent to the first 405 * free-space of size at least 3 pages. 406 */ 407 static int 408 iommu_gas_find_space(struct iommu_domain *domain, 409 struct iommu_gas_match_args *a) 410 { 411 struct iommu_map_entry *curr, *first; 412 iommu_gaddr_t addr, min_free; 413 414 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 415 KASSERT(a->entry->flags == 0, 416 ("dirty entry %p %p", domain, a->entry)); 417 418 /* 419 * start_gap may point to an entry adjacent to gaps too small for any 420 * new allocation. In that case, advance start_gap to the first free 421 * space big enough for a minimum allocation plus two guard pages. 422 */ 423 min_free = 3 * IOMMU_PAGE_SIZE; 424 first = domain->start_gap; 425 while (first != NULL && first->free_down < min_free) 426 first = RB_PARENT(first, rb_entry); 427 for (curr = first; curr != NULL; 428 curr = iommu_gas_next(curr, min_free)) { 429 if ((first = RB_LEFT(curr, rb_entry)) != NULL && 430 first->last + min_free <= curr->start) 431 break; 432 if ((first = RB_RIGHT(curr, rb_entry)) != NULL && 433 curr->end + min_free <= first->first) 434 break; 435 } 436 domain->start_gap = curr; 437 438 /* 439 * If the subtree doesn't have free space for the requested allocation 440 * plus two guard pages, skip it. 441 */ 442 min_free = 2 * IOMMU_PAGE_SIZE + 443 roundup2(a->size + a->offset, IOMMU_PAGE_SIZE); 444 445 /* Climb to find a node in the subtree of big-enough ranges. */ 446 first = curr; 447 while (first != NULL && first->free_down < min_free) 448 first = RB_PARENT(first, rb_entry); 449 450 /* 451 * Walk the big-enough ranges tree until one satisfies alignment 452 * requirements, or violates lowaddr address requirement. 453 */ 454 addr = a->common->lowaddr; 455 for (curr = first; curr != NULL; 456 curr = iommu_gas_next(curr, min_free)) { 457 if ((first = RB_LEFT(curr, rb_entry)) != NULL && 458 iommu_gas_match_one(a, first->last, curr->start, 459 0, addr)) { 460 RB_INSERT_PREV(iommu_gas_entries_tree, 461 &domain->rb_root, curr, a->entry); 462 return (0); 463 } 464 if (curr->end >= addr) { 465 /* All remaining ranges > addr */ 466 break; 467 } 468 if ((first = RB_RIGHT(curr, rb_entry)) != NULL && 469 iommu_gas_match_one(a, curr->end, first->first, 470 0, addr)) { 471 RB_INSERT_NEXT(iommu_gas_entries_tree, 472 &domain->rb_root, curr, a->entry); 473 return (0); 474 } 475 } 476 477 /* 478 * To resume the search at the start of the upper region, first climb to 479 * the nearest ancestor that spans highaddr. Then find the last entry 480 * before highaddr that could abut a big-enough range. 481 */ 482 addr = a->common->highaddr; 483 while (curr != NULL && curr->last < addr) 484 curr = RB_PARENT(curr, rb_entry); 485 first = NULL; 486 while (curr != NULL && curr->free_down >= min_free) { 487 if (addr < curr->end) 488 curr = RB_LEFT(curr, rb_entry); 489 else { 490 first = curr; 491 curr = RB_RIGHT(curr, rb_entry); 492 } 493 } 494 495 /* 496 * Walk the remaining big-enough ranges until one satisfies alignment 497 * requirements. 498 */ 499 for (curr = first; curr != NULL; 500 curr = iommu_gas_next(curr, min_free)) { 501 if ((first = RB_LEFT(curr, rb_entry)) != NULL && 502 iommu_gas_match_one(a, first->last, curr->start, 503 addr + 1, domain->end - 1)) { 504 RB_INSERT_PREV(iommu_gas_entries_tree, 505 &domain->rb_root, curr, a->entry); 506 return (0); 507 } 508 if ((first = RB_RIGHT(curr, rb_entry)) != NULL && 509 iommu_gas_match_one(a, curr->end, first->first, 510 addr + 1, domain->end - 1)) { 511 RB_INSERT_NEXT(iommu_gas_entries_tree, 512 &domain->rb_root, curr, a->entry); 513 return (0); 514 } 515 } 516 517 return (ENOMEM); 518 } 519 520 static int 521 iommu_gas_alloc_region(struct iommu_domain *domain, struct iommu_map_entry *entry, 522 u_int flags) 523 { 524 struct iommu_map_entry *next, *prev; 525 526 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 527 528 if ((entry->start & IOMMU_PAGE_MASK) != 0 || 529 (entry->end & IOMMU_PAGE_MASK) != 0) 530 return (EINVAL); 531 if (entry->start >= entry->end) 532 return (EINVAL); 533 if (entry->end >= domain->end) 534 return (EINVAL); 535 536 next = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, entry); 537 KASSERT(next != NULL, ("next must be non-null %p %jx", domain, 538 (uintmax_t)entry->start)); 539 prev = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, next); 540 /* prev could be NULL */ 541 542 /* 543 * Adapt to broken BIOSes which specify overlapping RMRR 544 * entries. 545 * 546 * XXXKIB: this does not handle a case when prev or next 547 * entries are completely covered by the current one, which 548 * extends both ways. 549 */ 550 if (prev != NULL && prev->end > entry->start && 551 (prev->flags & IOMMU_MAP_ENTRY_PLACE) == 0) { 552 if ((flags & IOMMU_MF_RMRR) == 0 || 553 (prev->flags & IOMMU_MAP_ENTRY_RMRR) == 0) 554 return (EBUSY); 555 entry->start = prev->end; 556 } 557 if (next->start < entry->end && 558 (next->flags & IOMMU_MAP_ENTRY_PLACE) == 0) { 559 if ((flags & IOMMU_MF_RMRR) == 0 || 560 (next->flags & IOMMU_MAP_ENTRY_RMRR) == 0) 561 return (EBUSY); 562 entry->end = next->start; 563 } 564 if (entry->end == entry->start) 565 return (0); 566 567 if (prev != NULL && prev->end > entry->start) { 568 /* This assumes that prev is the placeholder entry. */ 569 iommu_gas_rb_remove(domain, prev); 570 prev = NULL; 571 } 572 RB_INSERT_PREV(iommu_gas_entries_tree, 573 &domain->rb_root, next, entry); 574 if (next->start < entry->end) { 575 iommu_gas_rb_remove(domain, next); 576 next = NULL; 577 } 578 579 if ((flags & IOMMU_MF_RMRR) != 0) 580 entry->flags = IOMMU_MAP_ENTRY_RMRR; 581 582 #ifdef INVARIANTS 583 struct iommu_map_entry *ip, *in; 584 ip = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, entry); 585 in = RB_NEXT(iommu_gas_entries_tree, &domain->rb_root, entry); 586 KASSERT(prev == NULL || ip == prev, 587 ("RMRR %p (%jx %jx) prev %p (%jx %jx) ins prev %p (%jx %jx)", 588 entry, entry->start, entry->end, prev, 589 prev == NULL ? 0 : prev->start, prev == NULL ? 0 : prev->end, 590 ip, ip == NULL ? 0 : ip->start, ip == NULL ? 0 : ip->end)); 591 KASSERT(next == NULL || in == next, 592 ("RMRR %p (%jx %jx) next %p (%jx %jx) ins next %p (%jx %jx)", 593 entry, entry->start, entry->end, next, 594 next == NULL ? 0 : next->start, next == NULL ? 0 : next->end, 595 in, in == NULL ? 0 : in->start, in == NULL ? 0 : in->end)); 596 #endif 597 598 return (0); 599 } 600 601 void 602 iommu_gas_free_space(struct iommu_map_entry *entry) 603 { 604 struct iommu_domain *domain; 605 606 domain = entry->domain; 607 KASSERT((entry->flags & (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_RMRR | 608 IOMMU_MAP_ENTRY_MAP)) == IOMMU_MAP_ENTRY_MAP, 609 ("permanent entry %p %p", domain, entry)); 610 611 IOMMU_DOMAIN_LOCK(domain); 612 iommu_gas_rb_remove(domain, entry); 613 entry->flags &= ~IOMMU_MAP_ENTRY_MAP; 614 #ifdef INVARIANTS 615 if (iommu_check_free) 616 iommu_gas_check_free(domain); 617 #endif 618 IOMMU_DOMAIN_UNLOCK(domain); 619 } 620 621 void 622 iommu_gas_free_region(struct iommu_map_entry *entry) 623 { 624 struct iommu_domain *domain; 625 626 domain = entry->domain; 627 KASSERT((entry->flags & (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_RMRR | 628 IOMMU_MAP_ENTRY_MAP)) == IOMMU_MAP_ENTRY_RMRR, 629 ("non-RMRR entry %p %p", domain, entry)); 630 631 IOMMU_DOMAIN_LOCK(domain); 632 if (entry != domain->first_place && 633 entry != domain->last_place) 634 iommu_gas_rb_remove(domain, entry); 635 entry->flags &= ~IOMMU_MAP_ENTRY_RMRR; 636 IOMMU_DOMAIN_UNLOCK(domain); 637 } 638 639 static struct iommu_map_entry * 640 iommu_gas_remove_clip_left(struct iommu_domain *domain, iommu_gaddr_t start, 641 iommu_gaddr_t end, struct iommu_map_entry **r) 642 { 643 struct iommu_map_entry *entry, *res, fentry; 644 645 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 646 MPASS(start <= end); 647 MPASS(end <= domain->end); 648 649 /* 650 * Find an entry which contains the supplied guest's address 651 * start, or the first entry after the start. Since we 652 * asserted that start is below domain end, entry should 653 * exist. Then clip it if needed. 654 */ 655 fentry.start = start + 1; 656 fentry.end = start + 1; 657 entry = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, &fentry); 658 659 if (entry->start >= start || 660 (entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 661 return (entry); 662 663 res = *r; 664 *r = NULL; 665 *res = *entry; 666 res->start = entry->end = start; 667 RB_UPDATE_AUGMENT(entry, rb_entry); 668 RB_INSERT_NEXT(iommu_gas_entries_tree, 669 &domain->rb_root, entry, res); 670 return (res); 671 } 672 673 static bool 674 iommu_gas_remove_clip_right(struct iommu_domain *domain, 675 iommu_gaddr_t end, struct iommu_map_entry *entry, 676 struct iommu_map_entry *r) 677 { 678 if (entry->start >= end || (entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 679 return (false); 680 681 *r = *entry; 682 r->end = entry->start = end; 683 RB_UPDATE_AUGMENT(entry, rb_entry); 684 RB_INSERT_PREV(iommu_gas_entries_tree, 685 &domain->rb_root, entry, r); 686 return (true); 687 } 688 689 static void 690 iommu_gas_remove_unmap(struct iommu_domain *domain, 691 struct iommu_map_entry *entry, struct iommu_map_entries_tailq *gcp) 692 { 693 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 694 695 if ((entry->flags & (IOMMU_MAP_ENTRY_UNMAPPED | 696 IOMMU_MAP_ENTRY_REMOVING)) != 0) 697 return; 698 MPASS((entry->flags & IOMMU_MAP_ENTRY_PLACE) == 0); 699 entry->flags |= IOMMU_MAP_ENTRY_REMOVING; 700 TAILQ_INSERT_TAIL(gcp, entry, dmamap_link); 701 } 702 703 /* 704 * Remove specified range from the GAS of the domain. Note that the 705 * removal is not guaranteed to occur upon the function return, it 706 * might be finalized some time after, when hardware reports that 707 * (queued) IOTLB invalidation was performed. 708 */ 709 void 710 iommu_gas_remove(struct iommu_domain *domain, iommu_gaddr_t start, 711 iommu_gaddr_t size) 712 { 713 struct iommu_map_entry *entry, *nentry, *r1, *r2; 714 struct iommu_map_entries_tailq gc; 715 iommu_gaddr_t end; 716 717 end = start + size; 718 r1 = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 719 r2 = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 720 TAILQ_INIT(&gc); 721 722 IOMMU_DOMAIN_LOCK(domain); 723 724 nentry = iommu_gas_remove_clip_left(domain, start, end, &r1); 725 RB_FOREACH_FROM(entry, iommu_gas_entries_tree, nentry) { 726 if (entry->start >= end) 727 break; 728 KASSERT(start <= entry->start, 729 ("iommu_gas_remove entry (%#jx, %#jx) start %#jx", 730 entry->start, entry->end, start)); 731 if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 732 continue; 733 iommu_gas_remove_unmap(domain, entry, &gc); 734 } 735 if (iommu_gas_remove_clip_right(domain, end, entry, r2)) { 736 iommu_gas_remove_unmap(domain, r2, &gc); 737 r2 = NULL; 738 } 739 740 #ifdef INVARIANTS 741 RB_FOREACH(entry, iommu_gas_entries_tree, &domain->rb_root) { 742 if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 743 continue; 744 KASSERT(entry->end <= start || entry->start >= end, 745 ("iommu_gas_remove leftover entry (%#jx, %#jx) range " 746 "(%#jx, %#jx)", 747 entry->start, entry->end, start, end)); 748 } 749 #endif 750 751 IOMMU_DOMAIN_UNLOCK(domain); 752 if (r1 != NULL) 753 iommu_gas_free_entry(r1); 754 if (r2 != NULL) 755 iommu_gas_free_entry(r2); 756 iommu_domain_unload(domain, &gc, true); 757 } 758 759 int 760 iommu_gas_map(struct iommu_domain *domain, 761 const struct bus_dma_tag_common *common, iommu_gaddr_t size, int offset, 762 u_int eflags, u_int flags, vm_page_t *ma, struct iommu_map_entry **res) 763 { 764 struct iommu_gas_match_args a; 765 struct iommu_map_entry *entry; 766 int error; 767 768 KASSERT((flags & ~(IOMMU_MF_CANWAIT | IOMMU_MF_CANSPLIT)) == 0, 769 ("invalid flags 0x%x", flags)); 770 771 a.size = size; 772 a.offset = offset; 773 a.common = common; 774 a.gas_flags = flags; 775 entry = iommu_gas_alloc_entry(domain, 776 (flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0); 777 if (entry == NULL) 778 return (ENOMEM); 779 a.entry = entry; 780 IOMMU_DOMAIN_LOCK(domain); 781 error = iommu_gas_find_space(domain, &a); 782 if (error == ENOMEM) { 783 IOMMU_DOMAIN_UNLOCK(domain); 784 iommu_gas_free_entry(entry); 785 return (error); 786 } 787 #ifdef INVARIANTS 788 if (iommu_check_free) 789 iommu_gas_check_free(domain); 790 #endif 791 KASSERT(error == 0, 792 ("unexpected error %d from iommu_gas_find_entry", error)); 793 KASSERT(entry->end < domain->end, ("allocated GPA %jx, max GPA %jx", 794 (uintmax_t)entry->end, (uintmax_t)domain->end)); 795 entry->flags |= eflags; 796 IOMMU_DOMAIN_UNLOCK(domain); 797 798 error = domain->ops->map(domain, entry->start, 799 entry->end - entry->start, ma, eflags, 800 ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0)); 801 if (error == ENOMEM) { 802 iommu_domain_unload_entry(entry, true, 803 (flags & IOMMU_MF_CANWAIT) != 0); 804 return (error); 805 } 806 KASSERT(error == 0, 807 ("unexpected error %d from domain_map_buf", error)); 808 809 *res = entry; 810 return (0); 811 } 812 813 int 814 iommu_gas_map_region(struct iommu_domain *domain, struct iommu_map_entry *entry, 815 u_int eflags, u_int flags, vm_page_t *ma) 816 { 817 iommu_gaddr_t start; 818 int error; 819 820 KASSERT(entry->domain == domain, 821 ("mismatched domain %p entry %p entry->domain %p", domain, 822 entry, entry->domain)); 823 KASSERT(entry->flags == 0, ("used RMRR entry %p %p %x", domain, 824 entry, entry->flags)); 825 KASSERT((flags & ~(IOMMU_MF_CANWAIT | IOMMU_MF_RMRR)) == 0, 826 ("invalid flags 0x%x", flags)); 827 828 start = entry->start; 829 IOMMU_DOMAIN_LOCK(domain); 830 error = iommu_gas_alloc_region(domain, entry, flags); 831 if (error != 0) { 832 IOMMU_DOMAIN_UNLOCK(domain); 833 return (error); 834 } 835 entry->flags |= eflags; 836 IOMMU_DOMAIN_UNLOCK(domain); 837 if (entry->end == entry->start) 838 return (0); 839 840 error = domain->ops->map(domain, entry->start, 841 entry->end - entry->start, ma + OFF_TO_IDX(start - entry->start), 842 eflags, ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0)); 843 if (error == ENOMEM) { 844 iommu_domain_unload_entry(entry, false, 845 (flags & IOMMU_MF_CANWAIT) != 0); 846 return (error); 847 } 848 KASSERT(error == 0, 849 ("unexpected error %d from domain_map_buf", error)); 850 851 return (0); 852 } 853 854 static int 855 iommu_gas_reserve_region_locked(struct iommu_domain *domain, 856 iommu_gaddr_t start, iommu_gaddr_t end, struct iommu_map_entry *entry) 857 { 858 int error; 859 860 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 861 862 entry->start = start; 863 entry->end = end; 864 error = iommu_gas_alloc_region(domain, entry, IOMMU_MF_CANWAIT); 865 if (error == 0) 866 entry->flags |= IOMMU_MAP_ENTRY_UNMAPPED; 867 return (error); 868 } 869 870 int 871 iommu_gas_reserve_region(struct iommu_domain *domain, iommu_gaddr_t start, 872 iommu_gaddr_t end, struct iommu_map_entry **entry0) 873 { 874 struct iommu_map_entry *entry; 875 int error; 876 877 entry = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 878 IOMMU_DOMAIN_LOCK(domain); 879 error = iommu_gas_reserve_region_locked(domain, start, end, entry); 880 IOMMU_DOMAIN_UNLOCK(domain); 881 if (error != 0) 882 iommu_gas_free_entry(entry); 883 else if (entry0 != NULL) 884 *entry0 = entry; 885 return (error); 886 } 887 888 /* 889 * As in iommu_gas_reserve_region, reserve [start, end), but allow for existing 890 * entries. 891 */ 892 int 893 iommu_gas_reserve_region_extend(struct iommu_domain *domain, 894 iommu_gaddr_t start, iommu_gaddr_t end) 895 { 896 struct iommu_map_entry *entry, *next, *prev, key = {}; 897 iommu_gaddr_t entry_start, entry_end; 898 int error; 899 900 error = 0; 901 entry = NULL; 902 end = ummin(end, domain->end); 903 while (start < end) { 904 /* Preallocate an entry. */ 905 if (entry == NULL) 906 entry = iommu_gas_alloc_entry(domain, 907 IOMMU_PGF_WAITOK); 908 /* Calculate the free region from here to the next entry. */ 909 key.start = key.end = start; 910 IOMMU_DOMAIN_LOCK(domain); 911 next = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, &key); 912 KASSERT(next != NULL, ("domain %p with end %#jx has no entry " 913 "after %#jx", domain, (uintmax_t)domain->end, 914 (uintmax_t)start)); 915 entry_end = ummin(end, next->start); 916 prev = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, next); 917 if (prev != NULL) 918 entry_start = ummax(start, prev->end); 919 else 920 entry_start = start; 921 start = next->end; 922 /* Reserve the region if non-empty. */ 923 if (entry_start != entry_end) { 924 error = iommu_gas_reserve_region_locked(domain, 925 entry_start, entry_end, entry); 926 if (error != 0) { 927 IOMMU_DOMAIN_UNLOCK(domain); 928 break; 929 } 930 entry = NULL; 931 } 932 IOMMU_DOMAIN_UNLOCK(domain); 933 } 934 /* Release a preallocated entry if it was not used. */ 935 if (entry != NULL) 936 iommu_gas_free_entry(entry); 937 return (error); 938 } 939 940 void 941 iommu_unmap_msi(struct iommu_ctx *ctx) 942 { 943 struct iommu_map_entry *entry; 944 struct iommu_domain *domain; 945 946 domain = ctx->domain; 947 entry = domain->msi_entry; 948 if (entry == NULL) 949 return; 950 951 domain->ops->unmap(domain, entry->start, entry->end - 952 entry->start, IOMMU_PGF_WAITOK); 953 954 iommu_gas_free_space(entry); 955 956 iommu_gas_free_entry(entry); 957 958 domain->msi_entry = NULL; 959 domain->msi_base = 0; 960 domain->msi_phys = 0; 961 } 962 963 int 964 iommu_map_msi(struct iommu_ctx *ctx, iommu_gaddr_t size, int offset, 965 u_int eflags, u_int flags, vm_page_t *ma) 966 { 967 struct iommu_domain *domain; 968 struct iommu_map_entry *entry; 969 int error; 970 971 error = 0; 972 domain = ctx->domain; 973 974 /* Check if there is already an MSI page allocated */ 975 IOMMU_DOMAIN_LOCK(domain); 976 entry = domain->msi_entry; 977 IOMMU_DOMAIN_UNLOCK(domain); 978 979 if (entry == NULL) { 980 error = iommu_gas_map(domain, &ctx->tag->common, size, offset, 981 eflags, flags, ma, &entry); 982 IOMMU_DOMAIN_LOCK(domain); 983 if (error == 0) { 984 if (domain->msi_entry == NULL) { 985 MPASS(domain->msi_base == 0); 986 MPASS(domain->msi_phys == 0); 987 988 domain->msi_entry = entry; 989 domain->msi_base = entry->start; 990 domain->msi_phys = VM_PAGE_TO_PHYS(ma[0]); 991 } else { 992 /* 993 * We lost the race and already have an 994 * MSI page allocated. Free the unneeded entry. 995 */ 996 iommu_gas_free_entry(entry); 997 } 998 } else if (domain->msi_entry != NULL) { 999 /* 1000 * The allocation failed, but another succeeded. 1001 * Return success as there is a valid MSI page. 1002 */ 1003 error = 0; 1004 } 1005 IOMMU_DOMAIN_UNLOCK(domain); 1006 } 1007 1008 return (error); 1009 } 1010 1011 void 1012 iommu_translate_msi(struct iommu_domain *domain, uint64_t *addr) 1013 { 1014 1015 *addr = (*addr - domain->msi_phys) + domain->msi_base; 1016 1017 KASSERT(*addr >= domain->msi_entry->start, 1018 ("%s: Address is below the MSI entry start address (%jx < %jx)", 1019 __func__, (uintmax_t)*addr, (uintmax_t)domain->msi_entry->start)); 1020 1021 KASSERT(*addr + sizeof(*addr) <= domain->msi_entry->end, 1022 ("%s: Address is above the MSI entry end address (%jx < %jx)", 1023 __func__, (uintmax_t)*addr, (uintmax_t)domain->msi_entry->end)); 1024 } 1025 1026 SYSCTL_NODE(_hw, OID_AUTO, iommu, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, ""); 1027 1028 #ifdef INVARIANTS 1029 SYSCTL_INT(_hw_iommu, OID_AUTO, check_free, CTLFLAG_RWTUN, 1030 &iommu_check_free, 0, 1031 "Check the GPA RBtree for free_down and free_after validity"); 1032 #endif 1033