1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2013 The FreeBSD Foundation 5 * 6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org> 7 * under sponsorship from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #define RB_AUGMENT_CHECK(entry) iommu_gas_augment_entry(entry) 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/malloc.h> 36 #include <sys/bus.h> 37 #include <sys/interrupt.h> 38 #include <sys/kernel.h> 39 #include <sys/ktr.h> 40 #include <sys/lock.h> 41 #include <sys/proc.h> 42 #include <sys/rwlock.h> 43 #include <sys/memdesc.h> 44 #include <sys/mutex.h> 45 #include <sys/sysctl.h> 46 #include <sys/rman.h> 47 #include <sys/taskqueue.h> 48 #include <sys/tree.h> 49 #include <sys/uio.h> 50 #include <sys/vmem.h> 51 #include <vm/vm.h> 52 #include <vm/vm_extern.h> 53 #include <vm/vm_kern.h> 54 #include <vm/vm_object.h> 55 #include <vm/vm_page.h> 56 #include <vm/vm_map.h> 57 #include <vm/uma.h> 58 #include <dev/pci/pcireg.h> 59 #include <dev/pci/pcivar.h> 60 #include <dev/iommu/iommu.h> 61 #include <dev/iommu/iommu_gas.h> 62 #include <dev/iommu/iommu_msi.h> 63 #include <machine/atomic.h> 64 #include <machine/bus.h> 65 #include <machine/md_var.h> 66 #include <machine/iommu.h> 67 #include <dev/iommu/busdma_iommu.h> 68 69 /* 70 * Guest Address Space management. 71 */ 72 73 static uma_zone_t iommu_map_entry_zone; 74 75 #ifdef INVARIANTS 76 static int iommu_check_free; 77 #endif 78 79 static void 80 intel_gas_init(void) 81 { 82 83 iommu_map_entry_zone = uma_zcreate("IOMMU_MAP_ENTRY", 84 sizeof(struct iommu_map_entry), NULL, NULL, 85 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NODUMP); 86 } 87 SYSINIT(intel_gas, SI_SUB_DRIVERS, SI_ORDER_FIRST, intel_gas_init, NULL); 88 89 struct iommu_map_entry * 90 iommu_gas_alloc_entry(struct iommu_domain *domain, u_int flags) 91 { 92 struct iommu_map_entry *res; 93 94 KASSERT((flags & ~(IOMMU_PGF_WAITOK)) == 0, 95 ("unsupported flags %x", flags)); 96 97 res = uma_zalloc(iommu_map_entry_zone, ((flags & IOMMU_PGF_WAITOK) != 98 0 ? M_WAITOK : M_NOWAIT) | M_ZERO); 99 if (res != NULL && domain != NULL) { 100 res->domain = domain; 101 atomic_add_int(&domain->entries_cnt, 1); 102 } 103 return (res); 104 } 105 106 void 107 iommu_gas_free_entry(struct iommu_map_entry *entry) 108 { 109 struct iommu_domain *domain; 110 111 domain = entry->domain; 112 if (domain != NULL) 113 atomic_subtract_int(&domain->entries_cnt, 1); 114 uma_zfree(iommu_map_entry_zone, entry); 115 } 116 117 static int 118 iommu_gas_cmp_entries(struct iommu_map_entry *a, struct iommu_map_entry *b) 119 { 120 121 /* Last entry have zero size, so <= */ 122 KASSERT(a->start <= a->end, ("inverted entry %p (%jx, %jx)", 123 a, (uintmax_t)a->start, (uintmax_t)a->end)); 124 KASSERT(b->start <= b->end, ("inverted entry %p (%jx, %jx)", 125 b, (uintmax_t)b->start, (uintmax_t)b->end)); 126 KASSERT(((a->flags | b->flags) & IOMMU_MAP_ENTRY_FAKE) != 0 || 127 a->end <= b->start || b->end <= a->start || 128 a->end == a->start || b->end == b->start, 129 ("overlapping entries %p (%jx, %jx) f %#x %p (%jx, %jx) f %#x" 130 " domain %p %p", 131 a, (uintmax_t)a->start, (uintmax_t)a->end, a->flags, 132 b, (uintmax_t)b->start, (uintmax_t)b->end, b->flags, 133 a->domain, b->domain)); 134 135 if (a->end < b->end) 136 return (-1); 137 else if (b->end < a->end) 138 return (1); 139 return (0); 140 } 141 142 /* 143 * Update augmentation data based on data from children. 144 * Return true if and only if the update changes the augmentation data. 145 */ 146 static bool 147 iommu_gas_augment_entry(struct iommu_map_entry *entry) 148 { 149 struct iommu_map_entry *child; 150 iommu_gaddr_t bound, delta, free_down; 151 152 free_down = 0; 153 bound = entry->start; 154 if ((child = RB_LEFT(entry, rb_entry)) != NULL) { 155 free_down = MAX(child->free_down, bound - child->last); 156 bound = child->first; 157 } 158 delta = bound - entry->first; 159 entry->first = bound; 160 bound = entry->end; 161 if ((child = RB_RIGHT(entry, rb_entry)) != NULL) { 162 free_down = MAX(free_down, child->free_down); 163 free_down = MAX(free_down, child->first - bound); 164 bound = child->last; 165 } 166 delta += entry->last - bound; 167 if (delta == 0) 168 delta = entry->free_down - free_down; 169 entry->last = bound; 170 entry->free_down = free_down; 171 172 /* 173 * Return true either if the value of last-first changed, 174 * or if free_down changed. 175 */ 176 return (delta != 0); 177 } 178 179 RB_GENERATE(iommu_gas_entries_tree, iommu_map_entry, rb_entry, 180 iommu_gas_cmp_entries); 181 182 #ifdef INVARIANTS 183 static void 184 iommu_gas_check_free(struct iommu_domain *domain) 185 { 186 struct iommu_map_entry *entry, *l, *r; 187 iommu_gaddr_t v; 188 189 RB_FOREACH(entry, iommu_gas_entries_tree, &domain->rb_root) { 190 KASSERT(domain == entry->domain, 191 ("mismatched free domain %p entry %p entry->domain %p", 192 domain, entry, entry->domain)); 193 l = RB_LEFT(entry, rb_entry); 194 r = RB_RIGHT(entry, rb_entry); 195 v = 0; 196 if (l != NULL) { 197 v = MAX(v, l->free_down); 198 v = MAX(v, entry->start - l->last); 199 } 200 if (r != NULL) { 201 v = MAX(v, r->free_down); 202 v = MAX(v, r->first - entry->end); 203 } 204 MPASS(entry->free_down == v); 205 } 206 } 207 #endif 208 209 static void 210 iommu_gas_rb_remove(struct iommu_domain *domain, struct iommu_map_entry *entry) 211 { 212 struct iommu_map_entry *nbr; 213 214 /* Removing entry may open a new free gap before domain->start_gap. */ 215 if (entry->end <= domain->start_gap->end) { 216 if (RB_RIGHT(entry, rb_entry) != NULL) 217 nbr = iommu_gas_entries_tree_RB_NEXT(entry); 218 else if (RB_LEFT(entry, rb_entry) != NULL) 219 nbr = RB_LEFT(entry, rb_entry); 220 else 221 nbr = RB_PARENT(entry, rb_entry); 222 domain->start_gap = nbr; 223 } 224 RB_REMOVE(iommu_gas_entries_tree, &domain->rb_root, entry); 225 } 226 227 struct iommu_domain * 228 iommu_get_ctx_domain(struct iommu_ctx *ctx) 229 { 230 231 return (ctx->domain); 232 } 233 234 void 235 iommu_gas_init_domain(struct iommu_domain *domain) 236 { 237 struct iommu_map_entry *begin, *end; 238 239 begin = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 240 end = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 241 242 IOMMU_DOMAIN_LOCK(domain); 243 KASSERT(domain->entries_cnt == 2, ("dirty domain %p", domain)); 244 KASSERT(RB_EMPTY(&domain->rb_root), 245 ("non-empty entries %p", domain)); 246 247 /* 248 * The end entry must be inserted first because it has a zero-length gap 249 * between start and end. Initially, all augmentation data for a new 250 * entry is zero. Function iommu_gas_augment_entry will compute no 251 * change in the value of (start-end) and no change in the value of 252 * free_down, so it will return false to suggest that nothing changed in 253 * the entry. Thus, inserting the end entry second prevents 254 * augmentation information to be propogated to the begin entry at the 255 * tree root. So it is inserted first. 256 */ 257 end->start = domain->end; 258 end->end = domain->end; 259 end->flags = IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED; 260 RB_INSERT(iommu_gas_entries_tree, &domain->rb_root, end); 261 262 begin->start = 0; 263 begin->end = IOMMU_PAGE_SIZE; 264 begin->flags = IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED; 265 RB_INSERT_PREV(iommu_gas_entries_tree, &domain->rb_root, end, begin); 266 267 domain->start_gap = begin; 268 domain->first_place = begin; 269 domain->last_place = end; 270 domain->flags |= IOMMU_DOMAIN_GAS_INITED; 271 IOMMU_DOMAIN_UNLOCK(domain); 272 } 273 274 void 275 iommu_gas_fini_domain(struct iommu_domain *domain) 276 { 277 struct iommu_map_entry *entry; 278 279 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 280 KASSERT(domain->entries_cnt == 2, 281 ("domain still in use %p", domain)); 282 283 entry = RB_MIN(iommu_gas_entries_tree, &domain->rb_root); 284 KASSERT(entry->start == 0, ("start entry start %p", domain)); 285 KASSERT(entry->end == IOMMU_PAGE_SIZE, ("start entry end %p", domain)); 286 KASSERT(entry->flags == 287 (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED), 288 ("start entry flags %p", domain)); 289 iommu_gas_rb_remove(domain, entry); 290 iommu_gas_free_entry(entry); 291 292 entry = RB_MAX(iommu_gas_entries_tree, &domain->rb_root); 293 KASSERT(entry->start == domain->end, ("end entry start %p", domain)); 294 KASSERT(entry->end == domain->end, ("end entry end %p", domain)); 295 KASSERT(entry->flags == 296 (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED), 297 ("end entry flags %p", domain)); 298 iommu_gas_rb_remove(domain, entry); 299 iommu_gas_free_entry(entry); 300 } 301 302 struct iommu_gas_match_args { 303 iommu_gaddr_t size; 304 int offset; 305 const struct bus_dma_tag_common *common; 306 u_int gas_flags; 307 struct iommu_map_entry *entry; 308 }; 309 310 /* 311 * The interval [beg, end) is a free interval between two iommu_map_entries. 312 * Addresses can be allocated only in the range [lbound, ubound]. Try to 313 * allocate space in the free interval, subject to the conditions expressed by 314 * a, and return 'true' if and only if the allocation attempt succeeds. 315 */ 316 static bool 317 iommu_gas_match_one(struct iommu_gas_match_args *a, iommu_gaddr_t beg, 318 iommu_gaddr_t end, iommu_gaddr_t lbound, iommu_gaddr_t ubound) 319 { 320 struct iommu_map_entry *entry; 321 iommu_gaddr_t first, size, start; 322 int offset; 323 324 /* 325 * The prev->end is always aligned on the page size, which 326 * causes page alignment for the entry->start too. 327 * 328 * Create IOMMU_PAGE_SIZE gaps before, after new entry 329 * to ensure that out-of-bounds accesses fault. 330 */ 331 beg = MAX(beg + IOMMU_PAGE_SIZE, lbound); 332 start = roundup2(beg, a->common->alignment); 333 if (start < beg) 334 return (false); 335 if (end < IOMMU_PAGE_SIZE + 1) 336 return (false); 337 end = MIN(end - IOMMU_PAGE_SIZE - 1, ubound); 338 offset = a->offset; 339 size = a->size; 340 if (start + offset + size - 1 > end) 341 return (false); 342 343 /* Check for and try to skip past boundary crossing. */ 344 if (!vm_addr_bound_ok(start + offset, size, a->common->boundary)) { 345 /* 346 * The start + offset to start + offset + size region crosses 347 * the boundary. Check if there is enough space after the next 348 * boundary after the beg. 349 */ 350 first = start; 351 beg = roundup2(start + offset + 1, a->common->boundary); 352 start = roundup2(beg, a->common->alignment); 353 354 if (start + offset + size - 1 > end || 355 !vm_addr_bound_ok(start + offset, size, 356 a->common->boundary)) { 357 /* 358 * Not enough space to align at the requested boundary, 359 * or boundary is smaller than the size, but allowed to 360 * split. We already checked that start + size does not 361 * overlap ubound. 362 * 363 * XXXKIB. It is possible that beg is exactly at the 364 * start of the next entry, then we do not have gap. 365 * Ignore for now. 366 */ 367 if ((a->gas_flags & IOMMU_MF_CANSPLIT) == 0) 368 return (false); 369 size = beg - first - offset; 370 start = first; 371 } 372 } 373 entry = a->entry; 374 entry->start = start; 375 entry->end = start + roundup2(size + offset, IOMMU_PAGE_SIZE); 376 entry->flags = IOMMU_MAP_ENTRY_MAP; 377 return (true); 378 } 379 380 /* Find the next entry that might abut a big-enough range. */ 381 static struct iommu_map_entry * 382 iommu_gas_next(struct iommu_map_entry *curr, iommu_gaddr_t min_free) 383 { 384 struct iommu_map_entry *next; 385 386 if ((next = RB_RIGHT(curr, rb_entry)) != NULL && 387 next->free_down >= min_free) { 388 /* Find next entry in right subtree. */ 389 do 390 curr = next; 391 while ((next = RB_LEFT(curr, rb_entry)) != NULL && 392 next->free_down >= min_free); 393 } else { 394 /* Find next entry in a left-parent ancestor. */ 395 while ((next = RB_PARENT(curr, rb_entry)) != NULL && 396 curr == RB_RIGHT(next, rb_entry)) 397 curr = next; 398 curr = next; 399 } 400 return (curr); 401 } 402 403 /* 404 * Address-ordered first-fit search of 'domain' for free space satisfying the 405 * conditions of 'a'. The space allocated is at least one page big, and is 406 * bounded by guard pages to the left and right. The allocated space for 407 * 'domain' is described by an rb-tree of map entries at domain->rb_root, and 408 * domain->start_gap points to a map entry less than or adjacent to the first 409 * free-space of size at least 3 pages. 410 */ 411 static int 412 iommu_gas_find_space(struct iommu_domain *domain, 413 struct iommu_gas_match_args *a) 414 { 415 struct iommu_map_entry *curr, *first; 416 iommu_gaddr_t addr, min_free; 417 418 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 419 KASSERT(a->entry->flags == 0, 420 ("dirty entry %p %p", domain, a->entry)); 421 422 /* 423 * start_gap may point to an entry adjacent to gaps too small for any 424 * new allocation. In that case, advance start_gap to the first free 425 * space big enough for a minimum allocation plus two guard pages. 426 */ 427 min_free = 3 * IOMMU_PAGE_SIZE; 428 first = domain->start_gap; 429 while (first != NULL && first->free_down < min_free) 430 first = RB_PARENT(first, rb_entry); 431 for (curr = first; curr != NULL; 432 curr = iommu_gas_next(curr, min_free)) { 433 if ((first = RB_LEFT(curr, rb_entry)) != NULL && 434 first->last + min_free <= curr->start) 435 break; 436 if ((first = RB_RIGHT(curr, rb_entry)) != NULL && 437 curr->end + min_free <= first->first) 438 break; 439 } 440 domain->start_gap = curr; 441 442 /* 443 * If the subtree doesn't have free space for the requested allocation 444 * plus two guard pages, skip it. 445 */ 446 min_free = 2 * IOMMU_PAGE_SIZE + 447 roundup2(a->size + a->offset, IOMMU_PAGE_SIZE); 448 449 /* Climb to find a node in the subtree of big-enough ranges. */ 450 first = curr; 451 while (first != NULL && first->free_down < min_free) 452 first = RB_PARENT(first, rb_entry); 453 454 /* 455 * Walk the big-enough ranges tree until one satisfies alignment 456 * requirements, or violates lowaddr address requirement. 457 */ 458 addr = a->common->lowaddr; 459 for (curr = first; curr != NULL; 460 curr = iommu_gas_next(curr, min_free)) { 461 if ((first = RB_LEFT(curr, rb_entry)) != NULL && 462 iommu_gas_match_one(a, first->last, curr->start, 463 0, addr)) { 464 RB_INSERT_PREV(iommu_gas_entries_tree, 465 &domain->rb_root, curr, a->entry); 466 return (0); 467 } 468 if (curr->end >= addr) { 469 /* All remaining ranges > addr */ 470 break; 471 } 472 if ((first = RB_RIGHT(curr, rb_entry)) != NULL && 473 iommu_gas_match_one(a, curr->end, first->first, 474 0, addr)) { 475 RB_INSERT_NEXT(iommu_gas_entries_tree, 476 &domain->rb_root, curr, a->entry); 477 return (0); 478 } 479 } 480 481 /* 482 * To resume the search at the start of the upper region, first climb to 483 * the nearest ancestor that spans highaddr. Then find the last entry 484 * before highaddr that could abut a big-enough range. 485 */ 486 addr = a->common->highaddr; 487 while (curr != NULL && curr->last < addr) 488 curr = RB_PARENT(curr, rb_entry); 489 first = NULL; 490 while (curr != NULL && curr->free_down >= min_free) { 491 if (addr < curr->end) 492 curr = RB_LEFT(curr, rb_entry); 493 else { 494 first = curr; 495 curr = RB_RIGHT(curr, rb_entry); 496 } 497 } 498 499 /* 500 * Walk the remaining big-enough ranges until one satisfies alignment 501 * requirements. 502 */ 503 for (curr = first; curr != NULL; 504 curr = iommu_gas_next(curr, min_free)) { 505 if ((first = RB_LEFT(curr, rb_entry)) != NULL && 506 iommu_gas_match_one(a, first->last, curr->start, 507 addr + 1, domain->end - 1)) { 508 RB_INSERT_PREV(iommu_gas_entries_tree, 509 &domain->rb_root, curr, a->entry); 510 return (0); 511 } 512 if ((first = RB_RIGHT(curr, rb_entry)) != NULL && 513 iommu_gas_match_one(a, curr->end, first->first, 514 addr + 1, domain->end - 1)) { 515 RB_INSERT_NEXT(iommu_gas_entries_tree, 516 &domain->rb_root, curr, a->entry); 517 return (0); 518 } 519 } 520 521 return (ENOMEM); 522 } 523 524 static int 525 iommu_gas_alloc_region(struct iommu_domain *domain, struct iommu_map_entry *entry, 526 u_int flags) 527 { 528 struct iommu_map_entry *next, *prev; 529 530 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 531 532 if ((entry->start & IOMMU_PAGE_MASK) != 0 || 533 (entry->end & IOMMU_PAGE_MASK) != 0) 534 return (EINVAL); 535 if (entry->start >= entry->end) 536 return (EINVAL); 537 if (entry->end >= domain->end) 538 return (EINVAL); 539 540 entry->flags |= IOMMU_MAP_ENTRY_FAKE; 541 next = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, entry); 542 KASSERT(next != NULL, ("next must be non-null %p %jx", domain, 543 (uintmax_t)entry->start)); 544 prev = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, next); 545 /* prev could be NULL */ 546 entry->flags &= ~IOMMU_MAP_ENTRY_FAKE; 547 548 /* 549 * Adapt to broken BIOSes which specify overlapping RMRR 550 * entries. 551 * 552 * XXXKIB: this does not handle a case when prev or next 553 * entries are completely covered by the current one, which 554 * extends both ways. 555 */ 556 if (prev != NULL && prev->end > entry->start && 557 (prev->flags & IOMMU_MAP_ENTRY_PLACE) == 0) { 558 if ((flags & IOMMU_MF_RMRR) == 0 || 559 (prev->flags & IOMMU_MAP_ENTRY_RMRR) == 0) 560 return (EBUSY); 561 entry->start = prev->end; 562 } 563 if (next->start < entry->end && 564 (next->flags & IOMMU_MAP_ENTRY_PLACE) == 0) { 565 if ((flags & IOMMU_MF_RMRR) == 0 || 566 (next->flags & IOMMU_MAP_ENTRY_RMRR) == 0) 567 return (EBUSY); 568 entry->end = next->start; 569 } 570 if (entry->end == entry->start) 571 return (0); 572 573 if (prev != NULL && prev->end > entry->start) { 574 /* This assumes that prev is the placeholder entry. */ 575 iommu_gas_rb_remove(domain, prev); 576 prev = NULL; 577 } 578 RB_INSERT_PREV(iommu_gas_entries_tree, 579 &domain->rb_root, next, entry); 580 if (next->start < entry->end) { 581 iommu_gas_rb_remove(domain, next); 582 next = NULL; 583 } 584 585 if ((flags & IOMMU_MF_RMRR) != 0) 586 entry->flags = IOMMU_MAP_ENTRY_RMRR; 587 588 #ifdef INVARIANTS 589 struct iommu_map_entry *ip, *in; 590 ip = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, entry); 591 in = RB_NEXT(iommu_gas_entries_tree, &domain->rb_root, entry); 592 KASSERT(prev == NULL || ip == prev, 593 ("RMRR %p (%jx %jx) prev %p (%jx %jx) ins prev %p (%jx %jx)", 594 entry, entry->start, entry->end, prev, 595 prev == NULL ? 0 : prev->start, prev == NULL ? 0 : prev->end, 596 ip, ip == NULL ? 0 : ip->start, ip == NULL ? 0 : ip->end)); 597 KASSERT(next == NULL || in == next, 598 ("RMRR %p (%jx %jx) next %p (%jx %jx) ins next %p (%jx %jx)", 599 entry, entry->start, entry->end, next, 600 next == NULL ? 0 : next->start, next == NULL ? 0 : next->end, 601 in, in == NULL ? 0 : in->start, in == NULL ? 0 : in->end)); 602 #endif 603 604 return (0); 605 } 606 607 void 608 iommu_gas_free_space(struct iommu_map_entry *entry) 609 { 610 struct iommu_domain *domain; 611 612 domain = entry->domain; 613 KASSERT((entry->flags & (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_RMRR | 614 IOMMU_MAP_ENTRY_MAP)) == IOMMU_MAP_ENTRY_MAP, 615 ("permanent entry %p %p", domain, entry)); 616 617 IOMMU_DOMAIN_LOCK(domain); 618 iommu_gas_rb_remove(domain, entry); 619 entry->flags &= ~IOMMU_MAP_ENTRY_MAP; 620 #ifdef INVARIANTS 621 if (iommu_check_free) 622 iommu_gas_check_free(domain); 623 #endif 624 IOMMU_DOMAIN_UNLOCK(domain); 625 } 626 627 void 628 iommu_gas_free_region(struct iommu_map_entry *entry) 629 { 630 struct iommu_domain *domain; 631 632 domain = entry->domain; 633 KASSERT((entry->flags & (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_RMRR | 634 IOMMU_MAP_ENTRY_MAP)) == IOMMU_MAP_ENTRY_RMRR, 635 ("non-RMRR entry %p %p", domain, entry)); 636 637 IOMMU_DOMAIN_LOCK(domain); 638 if (entry != domain->first_place && 639 entry != domain->last_place) 640 iommu_gas_rb_remove(domain, entry); 641 entry->flags &= ~IOMMU_MAP_ENTRY_RMRR; 642 IOMMU_DOMAIN_UNLOCK(domain); 643 } 644 645 static struct iommu_map_entry * 646 iommu_gas_remove_clip_left(struct iommu_domain *domain, iommu_gaddr_t start, 647 iommu_gaddr_t end, struct iommu_map_entry **r) 648 { 649 struct iommu_map_entry *entry, *res, fentry; 650 651 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 652 MPASS(start <= end); 653 MPASS(end <= domain->end); 654 655 /* 656 * Find an entry which contains the supplied guest's address 657 * start, or the first entry after the start. Since we 658 * asserted that start is below domain end, entry should 659 * exist. Then clip it if needed. 660 */ 661 bzero(&fentry, sizeof(fentry)); 662 fentry.start = start + 1; 663 fentry.end = start + 1; 664 fentry.flags = IOMMU_MAP_ENTRY_FAKE; 665 entry = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, &fentry); 666 667 if (entry->start >= start || 668 (entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 669 return (entry); 670 671 res = *r; 672 *r = NULL; 673 *res = *entry; 674 res->start = entry->end = start; 675 RB_UPDATE_AUGMENT(entry, rb_entry); 676 RB_INSERT_NEXT(iommu_gas_entries_tree, 677 &domain->rb_root, entry, res); 678 return (res); 679 } 680 681 static bool 682 iommu_gas_remove_clip_right(struct iommu_domain *domain, 683 iommu_gaddr_t end, struct iommu_map_entry *entry, 684 struct iommu_map_entry *r) 685 { 686 if (entry->start >= end || (entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 687 return (false); 688 689 *r = *entry; 690 r->end = entry->start = end; 691 RB_UPDATE_AUGMENT(entry, rb_entry); 692 RB_INSERT_PREV(iommu_gas_entries_tree, 693 &domain->rb_root, entry, r); 694 return (true); 695 } 696 697 static void 698 iommu_gas_remove_unmap(struct iommu_domain *domain, 699 struct iommu_map_entry *entry, struct iommu_map_entries_tailq *gcp) 700 { 701 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 702 703 if ((entry->flags & (IOMMU_MAP_ENTRY_UNMAPPED | 704 IOMMU_MAP_ENTRY_REMOVING)) != 0) 705 return; 706 MPASS((entry->flags & IOMMU_MAP_ENTRY_PLACE) == 0); 707 entry->flags |= IOMMU_MAP_ENTRY_REMOVING; 708 TAILQ_INSERT_TAIL(gcp, entry, dmamap_link); 709 } 710 711 /* 712 * Remove specified range from the GAS of the domain. Note that the 713 * removal is not guaranteed to occur upon the function return, it 714 * might be finalized some time after, when hardware reports that 715 * (queued) IOTLB invalidation was performed. 716 */ 717 void 718 iommu_gas_remove(struct iommu_domain *domain, iommu_gaddr_t start, 719 iommu_gaddr_t size) 720 { 721 struct iommu_map_entry *entry, *nentry, *r1, *r2; 722 struct iommu_map_entries_tailq gc; 723 iommu_gaddr_t end; 724 725 end = start + size; 726 r1 = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 727 r2 = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 728 TAILQ_INIT(&gc); 729 730 IOMMU_DOMAIN_LOCK(domain); 731 732 nentry = iommu_gas_remove_clip_left(domain, start, end, &r1); 733 RB_FOREACH_FROM(entry, iommu_gas_entries_tree, nentry) { 734 if (entry->start >= end) 735 break; 736 KASSERT(start <= entry->start, 737 ("iommu_gas_remove entry (%#jx, %#jx) start %#jx", 738 entry->start, entry->end, start)); 739 if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 740 continue; 741 iommu_gas_remove_unmap(domain, entry, &gc); 742 } 743 if (iommu_gas_remove_clip_right(domain, end, entry, r2)) { 744 iommu_gas_remove_unmap(domain, r2, &gc); 745 r2 = NULL; 746 } 747 748 #ifdef INVARIANTS 749 RB_FOREACH(entry, iommu_gas_entries_tree, &domain->rb_root) { 750 if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 751 continue; 752 KASSERT(entry->end <= start || entry->start >= end, 753 ("iommu_gas_remove leftover entry (%#jx, %#jx) range " 754 "(%#jx, %#jx)", 755 entry->start, entry->end, start, end)); 756 } 757 #endif 758 759 IOMMU_DOMAIN_UNLOCK(domain); 760 if (r1 != NULL) 761 iommu_gas_free_entry(r1); 762 if (r2 != NULL) 763 iommu_gas_free_entry(r2); 764 iommu_domain_unload(domain, &gc, true); 765 } 766 767 int 768 iommu_gas_map(struct iommu_domain *domain, 769 const struct bus_dma_tag_common *common, iommu_gaddr_t size, int offset, 770 u_int eflags, u_int flags, vm_page_t *ma, struct iommu_map_entry **res) 771 { 772 struct iommu_gas_match_args a; 773 struct iommu_map_entry *entry; 774 int error; 775 776 KASSERT((flags & ~(IOMMU_MF_CANWAIT | IOMMU_MF_CANSPLIT)) == 0, 777 ("invalid flags 0x%x", flags)); 778 779 a.size = size; 780 a.offset = offset; 781 a.common = common; 782 a.gas_flags = flags; 783 entry = iommu_gas_alloc_entry(domain, 784 (flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0); 785 if (entry == NULL) 786 return (ENOMEM); 787 a.entry = entry; 788 IOMMU_DOMAIN_LOCK(domain); 789 error = iommu_gas_find_space(domain, &a); 790 if (error == ENOMEM) { 791 IOMMU_DOMAIN_UNLOCK(domain); 792 iommu_gas_free_entry(entry); 793 return (error); 794 } 795 #ifdef INVARIANTS 796 if (iommu_check_free) 797 iommu_gas_check_free(domain); 798 #endif 799 KASSERT(error == 0, 800 ("unexpected error %d from iommu_gas_find_entry", error)); 801 KASSERT(entry->end < domain->end, ("allocated GPA %jx, max GPA %jx", 802 (uintmax_t)entry->end, (uintmax_t)domain->end)); 803 entry->flags |= eflags; 804 IOMMU_DOMAIN_UNLOCK(domain); 805 806 error = domain->ops->map(domain, entry->start, 807 entry->end - entry->start, ma, eflags, 808 ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0)); 809 if (error == ENOMEM) { 810 iommu_domain_unload_entry(entry, true, 811 (flags & IOMMU_MF_CANWAIT) != 0); 812 return (error); 813 } 814 KASSERT(error == 0, 815 ("unexpected error %d from domain_map_buf", error)); 816 817 *res = entry; 818 return (0); 819 } 820 821 int 822 iommu_gas_map_region(struct iommu_domain *domain, struct iommu_map_entry *entry, 823 u_int eflags, u_int flags, vm_page_t *ma) 824 { 825 iommu_gaddr_t start; 826 int error; 827 828 KASSERT(entry->domain == domain, 829 ("mismatched domain %p entry %p entry->domain %p", domain, 830 entry, entry->domain)); 831 KASSERT(entry->flags == 0, ("used RMRR entry %p %p %x", domain, 832 entry, entry->flags)); 833 KASSERT((flags & ~(IOMMU_MF_CANWAIT | IOMMU_MF_RMRR)) == 0, 834 ("invalid flags 0x%x", flags)); 835 836 start = entry->start; 837 IOMMU_DOMAIN_LOCK(domain); 838 error = iommu_gas_alloc_region(domain, entry, flags); 839 if (error != 0) { 840 IOMMU_DOMAIN_UNLOCK(domain); 841 return (error); 842 } 843 entry->flags |= eflags; 844 IOMMU_DOMAIN_UNLOCK(domain); 845 if (entry->end == entry->start) 846 return (0); 847 848 error = domain->ops->map(domain, entry->start, 849 entry->end - entry->start, ma + OFF_TO_IDX(start - entry->start), 850 eflags, ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0)); 851 if (error == ENOMEM) { 852 iommu_domain_unload_entry(entry, false, 853 (flags & IOMMU_MF_CANWAIT) != 0); 854 return (error); 855 } 856 KASSERT(error == 0, 857 ("unexpected error %d from domain_map_buf", error)); 858 859 return (0); 860 } 861 862 static int 863 iommu_gas_reserve_region_locked(struct iommu_domain *domain, 864 iommu_gaddr_t start, iommu_gaddr_t end, struct iommu_map_entry *entry) 865 { 866 int error; 867 868 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 869 870 entry->start = start; 871 entry->end = end; 872 error = iommu_gas_alloc_region(domain, entry, IOMMU_MF_CANWAIT); 873 if (error == 0) 874 entry->flags |= IOMMU_MAP_ENTRY_UNMAPPED; 875 return (error); 876 } 877 878 int 879 iommu_gas_reserve_region(struct iommu_domain *domain, iommu_gaddr_t start, 880 iommu_gaddr_t end, struct iommu_map_entry **entry0) 881 { 882 struct iommu_map_entry *entry; 883 int error; 884 885 entry = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 886 IOMMU_DOMAIN_LOCK(domain); 887 error = iommu_gas_reserve_region_locked(domain, start, end, entry); 888 IOMMU_DOMAIN_UNLOCK(domain); 889 if (error != 0) 890 iommu_gas_free_entry(entry); 891 else if (entry0 != NULL) 892 *entry0 = entry; 893 return (error); 894 } 895 896 /* 897 * As in iommu_gas_reserve_region, reserve [start, end), but allow for existing 898 * entries. 899 */ 900 int 901 iommu_gas_reserve_region_extend(struct iommu_domain *domain, 902 iommu_gaddr_t start, iommu_gaddr_t end) 903 { 904 struct iommu_map_entry *entry, *next, *prev, key = {}; 905 iommu_gaddr_t entry_start, entry_end; 906 int error; 907 908 error = 0; 909 entry = NULL; 910 end = ummin(end, domain->end); 911 while (start < end) { 912 /* Preallocate an entry. */ 913 if (entry == NULL) 914 entry = iommu_gas_alloc_entry(domain, 915 IOMMU_PGF_WAITOK); 916 /* Calculate the free region from here to the next entry. */ 917 key.start = key.end = start; 918 IOMMU_DOMAIN_LOCK(domain); 919 next = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, &key); 920 KASSERT(next != NULL, ("domain %p with end %#jx has no entry " 921 "after %#jx", domain, (uintmax_t)domain->end, 922 (uintmax_t)start)); 923 entry_end = ummin(end, next->start); 924 prev = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, next); 925 if (prev != NULL) 926 entry_start = ummax(start, prev->end); 927 else 928 entry_start = start; 929 start = next->end; 930 /* Reserve the region if non-empty. */ 931 if (entry_start != entry_end) { 932 error = iommu_gas_reserve_region_locked(domain, 933 entry_start, entry_end, entry); 934 if (error != 0) { 935 IOMMU_DOMAIN_UNLOCK(domain); 936 break; 937 } 938 entry = NULL; 939 } 940 IOMMU_DOMAIN_UNLOCK(domain); 941 } 942 /* Release a preallocated entry if it was not used. */ 943 if (entry != NULL) 944 iommu_gas_free_entry(entry); 945 return (error); 946 } 947 948 void 949 iommu_unmap_msi(struct iommu_ctx *ctx) 950 { 951 struct iommu_map_entry *entry; 952 struct iommu_domain *domain; 953 954 domain = ctx->domain; 955 entry = domain->msi_entry; 956 if (entry == NULL) 957 return; 958 959 domain->ops->unmap(domain, entry->start, entry->end - 960 entry->start, IOMMU_PGF_WAITOK); 961 962 iommu_gas_free_space(entry); 963 964 iommu_gas_free_entry(entry); 965 966 domain->msi_entry = NULL; 967 domain->msi_base = 0; 968 domain->msi_phys = 0; 969 } 970 971 int 972 iommu_map_msi(struct iommu_ctx *ctx, iommu_gaddr_t size, int offset, 973 u_int eflags, u_int flags, vm_page_t *ma) 974 { 975 struct iommu_domain *domain; 976 struct iommu_map_entry *entry; 977 int error; 978 979 error = 0; 980 domain = ctx->domain; 981 982 /* Check if there is already an MSI page allocated */ 983 IOMMU_DOMAIN_LOCK(domain); 984 entry = domain->msi_entry; 985 IOMMU_DOMAIN_UNLOCK(domain); 986 987 if (entry == NULL) { 988 error = iommu_gas_map(domain, &ctx->tag->common, size, offset, 989 eflags, flags, ma, &entry); 990 IOMMU_DOMAIN_LOCK(domain); 991 if (error == 0) { 992 if (domain->msi_entry == NULL) { 993 MPASS(domain->msi_base == 0); 994 MPASS(domain->msi_phys == 0); 995 996 domain->msi_entry = entry; 997 domain->msi_base = entry->start; 998 domain->msi_phys = VM_PAGE_TO_PHYS(ma[0]); 999 } else { 1000 /* 1001 * We lost the race and already have an 1002 * MSI page allocated. Free the unneeded entry. 1003 */ 1004 iommu_gas_free_entry(entry); 1005 } 1006 } else if (domain->msi_entry != NULL) { 1007 /* 1008 * The allocation failed, but another succeeded. 1009 * Return success as there is a valid MSI page. 1010 */ 1011 error = 0; 1012 } 1013 IOMMU_DOMAIN_UNLOCK(domain); 1014 } 1015 1016 return (error); 1017 } 1018 1019 void 1020 iommu_translate_msi(struct iommu_domain *domain, uint64_t *addr) 1021 { 1022 1023 *addr = (*addr - domain->msi_phys) + domain->msi_base; 1024 1025 KASSERT(*addr >= domain->msi_entry->start, 1026 ("%s: Address is below the MSI entry start address (%jx < %jx)", 1027 __func__, (uintmax_t)*addr, (uintmax_t)domain->msi_entry->start)); 1028 1029 KASSERT(*addr + sizeof(*addr) <= domain->msi_entry->end, 1030 ("%s: Address is above the MSI entry end address (%jx < %jx)", 1031 __func__, (uintmax_t)*addr, (uintmax_t)domain->msi_entry->end)); 1032 } 1033 1034 SYSCTL_NODE(_hw, OID_AUTO, iommu, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, ""); 1035 1036 #ifdef INVARIANTS 1037 SYSCTL_INT(_hw_iommu, OID_AUTO, check_free, CTLFLAG_RWTUN, 1038 &iommu_check_free, 0, 1039 "Check the GPA RBtree for free_down and free_after validity"); 1040 #endif 1041 1042 #include "opt_ddb.h" 1043 #ifdef DDB 1044 1045 #include <ddb/ddb.h> 1046 1047 static void 1048 iommu_debug_dump_gas(struct iommu_domain *domain) 1049 { 1050 struct iommu_map_entry *entry; 1051 1052 db_printf("iommu_domain %p tree %p iommu %p fl %#x\n", domain, 1053 &domain->rb_root, domain->iommu, domain->flags); 1054 db_printf("iommu_domain %p tree %p\n", domain, &domain->rb_root); 1055 RB_FOREACH(entry, iommu_gas_entries_tree, &domain->rb_root) { 1056 db_printf( 1057 " e %p [%#jx %#jx] fl %#x first %#jx last %#jx free_down %#jx", 1058 entry, (uintmax_t)entry->start, (uintmax_t)entry->end, 1059 entry->flags, 1060 (uintmax_t)entry->first, (uintmax_t)entry->last, 1061 (uintmax_t)entry->free_down); 1062 if (entry == domain->start_gap) 1063 db_printf(" start_gap"); 1064 if (entry == domain->first_place) 1065 db_printf(" first_place"); 1066 if (entry == domain->last_place) 1067 db_printf(" last_place"); 1068 db_printf("\n"); 1069 } 1070 } 1071 1072 DB_SHOW_COMMAND(iommu_domain, iommu_domain_show) 1073 { 1074 struct iommu_domain *domain; 1075 1076 if (!have_addr) { 1077 db_printf("show iommu_domain addr\n"); 1078 return; 1079 } 1080 1081 domain = (void *)addr; 1082 iommu_debug_dump_gas(domain); 1083 } 1084 1085 #endif 1086