1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2013 The FreeBSD Foundation 5 * 6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org> 7 * under sponsorship from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #define RB_AUGMENT_CHECK(entry) iommu_gas_augment_entry(entry) 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/malloc.h> 36 #include <sys/bus.h> 37 #include <sys/interrupt.h> 38 #include <sys/kernel.h> 39 #include <sys/ktr.h> 40 #include <sys/lock.h> 41 #include <sys/proc.h> 42 #include <sys/rwlock.h> 43 #include <sys/memdesc.h> 44 #include <sys/mutex.h> 45 #include <sys/sysctl.h> 46 #include <sys/rman.h> 47 #include <sys/taskqueue.h> 48 #include <sys/tree.h> 49 #include <sys/uio.h> 50 #include <sys/vmem.h> 51 #include <vm/vm.h> 52 #include <vm/vm_extern.h> 53 #include <vm/vm_kern.h> 54 #include <vm/vm_object.h> 55 #include <vm/vm_page.h> 56 #include <vm/vm_map.h> 57 #include <vm/uma.h> 58 #include <dev/pci/pcireg.h> 59 #include <dev/pci/pcivar.h> 60 #include <dev/iommu/iommu.h> 61 #include <dev/iommu/iommu_gas.h> 62 #include <dev/iommu/iommu_msi.h> 63 #include <machine/atomic.h> 64 #include <machine/bus.h> 65 #include <machine/md_var.h> 66 #include <machine/iommu.h> 67 #include <dev/iommu/busdma_iommu.h> 68 69 /* 70 * Guest Address Space management. 71 */ 72 73 static uma_zone_t iommu_map_entry_zone; 74 75 #ifdef INVARIANTS 76 static int iommu_check_free; 77 #endif 78 79 static void 80 intel_gas_init(void) 81 { 82 83 iommu_map_entry_zone = uma_zcreate("IOMMU_MAP_ENTRY", 84 sizeof(struct iommu_map_entry), NULL, NULL, 85 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NODUMP); 86 } 87 SYSINIT(intel_gas, SI_SUB_DRIVERS, SI_ORDER_FIRST, intel_gas_init, NULL); 88 89 struct iommu_map_entry * 90 iommu_gas_alloc_entry(struct iommu_domain *domain, u_int flags) 91 { 92 struct iommu_map_entry *res; 93 94 KASSERT((flags & ~(IOMMU_PGF_WAITOK)) == 0, 95 ("unsupported flags %x", flags)); 96 97 res = uma_zalloc(iommu_map_entry_zone, ((flags & IOMMU_PGF_WAITOK) != 98 0 ? M_WAITOK : M_NOWAIT) | M_ZERO); 99 if (res != NULL && domain != NULL) { 100 res->domain = domain; 101 atomic_add_int(&domain->entries_cnt, 1); 102 } 103 return (res); 104 } 105 106 void 107 iommu_gas_free_entry(struct iommu_map_entry *entry) 108 { 109 struct iommu_domain *domain; 110 111 domain = entry->domain; 112 if (domain != NULL) 113 atomic_subtract_int(&domain->entries_cnt, 1); 114 uma_zfree(iommu_map_entry_zone, entry); 115 } 116 117 static int 118 iommu_gas_cmp_entries(struct iommu_map_entry *a, struct iommu_map_entry *b) 119 { 120 121 /* First and last entries have zero size, so <= */ 122 KASSERT(a->start <= a->end, ("inverted entry %p (%jx, %jx)", 123 a, (uintmax_t)a->start, (uintmax_t)a->end)); 124 KASSERT(b->start <= b->end, ("inverted entry %p (%jx, %jx)", 125 b, (uintmax_t)b->start, (uintmax_t)b->end)); 126 KASSERT(((a->flags | b->flags) & IOMMU_MAP_ENTRY_FAKE) != 0 || 127 a->end <= b->start || b->end <= a->start || 128 a->end == a->start || b->end == b->start, 129 ("overlapping entries %p (%jx, %jx) f %#x %p (%jx, %jx) f %#x" 130 " domain %p %p", 131 a, (uintmax_t)a->start, (uintmax_t)a->end, a->flags, 132 b, (uintmax_t)b->start, (uintmax_t)b->end, b->flags, 133 a->domain, b->domain)); 134 135 if (a->end < b->end) 136 return (-1); 137 else if (b->end < a->end) 138 return (1); 139 return (0); 140 } 141 142 /* 143 * Update augmentation data based on data from children. 144 * Return true if and only if the update changes the augmentation data. 145 */ 146 static bool 147 iommu_gas_augment_entry(struct iommu_map_entry *entry) 148 { 149 struct iommu_map_entry *child; 150 iommu_gaddr_t bound, delta, free_down; 151 152 free_down = 0; 153 bound = entry->start; 154 if ((child = RB_LEFT(entry, rb_entry)) != NULL) { 155 free_down = MAX(child->free_down, bound - child->last); 156 bound = child->first; 157 } 158 delta = bound - entry->first; 159 entry->first = bound; 160 bound = entry->end; 161 if ((child = RB_RIGHT(entry, rb_entry)) != NULL) { 162 free_down = MAX(free_down, child->free_down); 163 free_down = MAX(free_down, child->first - bound); 164 bound = child->last; 165 } 166 delta += entry->last - bound; 167 if (delta == 0) 168 delta = entry->free_down - free_down; 169 entry->last = bound; 170 entry->free_down = free_down; 171 172 /* 173 * Return true either if the value of last-first changed, 174 * or if free_down changed. 175 */ 176 return (delta != 0); 177 } 178 179 RB_GENERATE(iommu_gas_entries_tree, iommu_map_entry, rb_entry, 180 iommu_gas_cmp_entries); 181 182 #ifdef INVARIANTS 183 static void 184 iommu_gas_check_free(struct iommu_domain *domain) 185 { 186 struct iommu_map_entry *entry, *l, *r; 187 iommu_gaddr_t v; 188 189 RB_FOREACH(entry, iommu_gas_entries_tree, &domain->rb_root) { 190 KASSERT(domain == entry->domain, 191 ("mismatched free domain %p entry %p entry->domain %p", 192 domain, entry, entry->domain)); 193 l = RB_LEFT(entry, rb_entry); 194 r = RB_RIGHT(entry, rb_entry); 195 v = 0; 196 if (l != NULL) { 197 v = MAX(v, l->free_down); 198 v = MAX(v, entry->start - l->last); 199 } 200 if (r != NULL) { 201 v = MAX(v, r->free_down); 202 v = MAX(v, r->first - entry->end); 203 } 204 MPASS(entry->free_down == v); 205 } 206 } 207 #endif 208 209 static void 210 iommu_gas_rb_remove(struct iommu_domain *domain, struct iommu_map_entry *entry) 211 { 212 struct iommu_map_entry *nbr; 213 214 /* Removing entry may open a new free gap before domain->start_gap. */ 215 if (entry->end <= domain->start_gap->end) { 216 if (RB_RIGHT(entry, rb_entry) != NULL) 217 nbr = iommu_gas_entries_tree_RB_NEXT(entry); 218 else if (RB_LEFT(entry, rb_entry) != NULL) 219 nbr = RB_LEFT(entry, rb_entry); 220 else 221 nbr = RB_PARENT(entry, rb_entry); 222 domain->start_gap = nbr; 223 } 224 RB_REMOVE(iommu_gas_entries_tree, &domain->rb_root, entry); 225 } 226 227 struct iommu_domain * 228 iommu_get_ctx_domain(struct iommu_ctx *ctx) 229 { 230 231 return (ctx->domain); 232 } 233 234 void 235 iommu_gas_init_domain(struct iommu_domain *domain) 236 { 237 struct iommu_map_entry *begin, *end; 238 239 begin = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 240 end = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 241 242 IOMMU_DOMAIN_LOCK(domain); 243 KASSERT(domain->entries_cnt == 2, ("dirty domain %p", domain)); 244 KASSERT(RB_EMPTY(&domain->rb_root), 245 ("non-empty entries %p", domain)); 246 247 end->start = domain->end; 248 end->end = domain->end; 249 end->flags = IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED; 250 RB_INSERT(iommu_gas_entries_tree, &domain->rb_root, end); 251 252 begin->start = 0; 253 begin->end = 0; 254 begin->flags = IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED; 255 RB_INSERT_PREV(iommu_gas_entries_tree, &domain->rb_root, end, begin); 256 iommu_gas_augment_entry(end); 257 iommu_gas_augment_entry(begin); 258 259 domain->start_gap = begin; 260 domain->first_place = begin; 261 domain->last_place = end; 262 domain->flags |= IOMMU_DOMAIN_GAS_INITED; 263 IOMMU_DOMAIN_UNLOCK(domain); 264 } 265 266 void 267 iommu_gas_fini_domain(struct iommu_domain *domain) 268 { 269 struct iommu_map_entry *entry; 270 271 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 272 KASSERT(domain->entries_cnt == 2, 273 ("domain still in use %p", domain)); 274 275 entry = RB_MIN(iommu_gas_entries_tree, &domain->rb_root); 276 KASSERT(entry->start == 0, ("start entry start %p", domain)); 277 KASSERT(entry->end == IOMMU_PAGE_SIZE, ("start entry end %p", domain)); 278 KASSERT(entry->flags == 279 (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED), 280 ("start entry flags %p", domain)); 281 iommu_gas_rb_remove(domain, entry); 282 iommu_gas_free_entry(entry); 283 284 entry = RB_MAX(iommu_gas_entries_tree, &domain->rb_root); 285 KASSERT(entry->start == domain->end, ("end entry start %p", domain)); 286 KASSERT(entry->end == domain->end, ("end entry end %p", domain)); 287 KASSERT(entry->flags == 288 (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED), 289 ("end entry flags %p", domain)); 290 iommu_gas_rb_remove(domain, entry); 291 iommu_gas_free_entry(entry); 292 } 293 294 struct iommu_gas_match_args { 295 iommu_gaddr_t size; 296 int offset; 297 const struct bus_dma_tag_common *common; 298 u_int gas_flags; 299 struct iommu_map_entry *entry; 300 }; 301 302 /* 303 * The interval [beg, end) is a free interval between two iommu_map_entries. 304 * Addresses can be allocated only in the range [lbound, ubound]. Try to 305 * allocate space in the free interval, subject to the conditions expressed by 306 * a, and return 'true' if and only if the allocation attempt succeeds. 307 */ 308 static bool 309 iommu_gas_match_one(struct iommu_gas_match_args *a, iommu_gaddr_t beg, 310 iommu_gaddr_t end, iommu_gaddr_t lbound, iommu_gaddr_t ubound) 311 { 312 struct iommu_map_entry *entry; 313 iommu_gaddr_t first, size, start; 314 int offset; 315 316 /* 317 * The prev->end is always aligned on the page size, which 318 * causes page alignment for the entry->start too. 319 * 320 * Create IOMMU_PAGE_SIZE gaps before, after new entry 321 * to ensure that out-of-bounds accesses fault. 322 */ 323 beg = MAX(beg + IOMMU_PAGE_SIZE, lbound); 324 start = roundup2(beg, a->common->alignment); 325 if (start < beg) 326 return (false); 327 if (end < IOMMU_PAGE_SIZE + 1) 328 return (false); 329 end = MIN(end - IOMMU_PAGE_SIZE - 1, ubound); 330 offset = a->offset; 331 size = a->size; 332 if (start + offset + size - 1 > end) 333 return (false); 334 335 /* Check for and try to skip past boundary crossing. */ 336 if (!vm_addr_bound_ok(start + offset, size, a->common->boundary)) { 337 /* 338 * The start + offset to start + offset + size region crosses 339 * the boundary. Check if there is enough space after the next 340 * boundary after the beg. 341 */ 342 first = start; 343 beg = roundup2(start + offset + 1, a->common->boundary); 344 start = roundup2(beg, a->common->alignment); 345 346 if (start + offset + size - 1 > end || 347 !vm_addr_bound_ok(start + offset, size, 348 a->common->boundary)) { 349 /* 350 * Not enough space to align at the requested boundary, 351 * or boundary is smaller than the size, but allowed to 352 * split. We already checked that start + size does not 353 * overlap ubound. 354 * 355 * XXXKIB. It is possible that beg is exactly at the 356 * start of the next entry, then we do not have gap. 357 * Ignore for now. 358 */ 359 if ((a->gas_flags & IOMMU_MF_CANSPLIT) == 0) 360 return (false); 361 size = beg - first - offset; 362 start = first; 363 } 364 } 365 entry = a->entry; 366 entry->start = start; 367 entry->end = start + roundup2(size + offset, IOMMU_PAGE_SIZE); 368 entry->flags = IOMMU_MAP_ENTRY_MAP; 369 return (true); 370 } 371 372 /* Find the next entry that might abut a big-enough range. */ 373 static struct iommu_map_entry * 374 iommu_gas_next(struct iommu_map_entry *curr, iommu_gaddr_t min_free) 375 { 376 struct iommu_map_entry *next; 377 378 if ((next = RB_RIGHT(curr, rb_entry)) != NULL && 379 next->free_down >= min_free) { 380 /* Find next entry in right subtree. */ 381 do 382 curr = next; 383 while ((next = RB_LEFT(curr, rb_entry)) != NULL && 384 next->free_down >= min_free); 385 } else { 386 /* Find next entry in a left-parent ancestor. */ 387 while ((next = RB_PARENT(curr, rb_entry)) != NULL && 388 curr == RB_RIGHT(next, rb_entry)) 389 curr = next; 390 curr = next; 391 } 392 return (curr); 393 } 394 395 /* 396 * Address-ordered first-fit search of 'domain' for free space satisfying the 397 * conditions of 'a'. The space allocated is at least one page big, and is 398 * bounded by guard pages to the left and right. The allocated space for 399 * 'domain' is described by an rb-tree of map entries at domain->rb_root, and 400 * domain->start_gap points to a map entry less than or adjacent to the first 401 * free-space of size at least 3 pages. 402 */ 403 static int 404 iommu_gas_find_space(struct iommu_domain *domain, 405 struct iommu_gas_match_args *a) 406 { 407 struct iommu_map_entry *curr, *first; 408 iommu_gaddr_t addr, min_free; 409 410 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 411 KASSERT(a->entry->flags == 0, 412 ("dirty entry %p %p", domain, a->entry)); 413 414 /* 415 * start_gap may point to an entry adjacent to gaps too small for any 416 * new allocation. In that case, advance start_gap to the first free 417 * space big enough for a minimum allocation plus two guard pages. 418 */ 419 min_free = 3 * IOMMU_PAGE_SIZE; 420 first = domain->start_gap; 421 while (first != NULL && first->free_down < min_free) 422 first = RB_PARENT(first, rb_entry); 423 for (curr = first; curr != NULL; 424 curr = iommu_gas_next(curr, min_free)) { 425 if ((first = RB_LEFT(curr, rb_entry)) != NULL && 426 first->last + min_free <= curr->start) 427 break; 428 if ((first = RB_RIGHT(curr, rb_entry)) != NULL && 429 curr->end + min_free <= first->first) 430 break; 431 } 432 domain->start_gap = curr; 433 434 /* 435 * If the subtree doesn't have free space for the requested allocation 436 * plus two guard pages, skip it. 437 */ 438 min_free = 2 * IOMMU_PAGE_SIZE + 439 roundup2(a->size + a->offset, IOMMU_PAGE_SIZE); 440 441 /* Climb to find a node in the subtree of big-enough ranges. */ 442 first = curr; 443 while (first != NULL && first->free_down < min_free) 444 first = RB_PARENT(first, rb_entry); 445 446 /* 447 * Walk the big-enough ranges tree until one satisfies alignment 448 * requirements, or violates lowaddr address requirement. 449 */ 450 addr = a->common->lowaddr; 451 for (curr = first; curr != NULL; 452 curr = iommu_gas_next(curr, min_free)) { 453 if ((first = RB_LEFT(curr, rb_entry)) != NULL && 454 iommu_gas_match_one(a, first->last, curr->start, 455 0, addr)) { 456 RB_INSERT_PREV(iommu_gas_entries_tree, 457 &domain->rb_root, curr, a->entry); 458 return (0); 459 } 460 if (curr->end >= addr) { 461 /* All remaining ranges > addr */ 462 break; 463 } 464 if ((first = RB_RIGHT(curr, rb_entry)) != NULL && 465 iommu_gas_match_one(a, curr->end, first->first, 466 0, addr)) { 467 RB_INSERT_NEXT(iommu_gas_entries_tree, 468 &domain->rb_root, curr, a->entry); 469 return (0); 470 } 471 } 472 473 /* 474 * To resume the search at the start of the upper region, first climb to 475 * the nearest ancestor that spans highaddr. Then find the last entry 476 * before highaddr that could abut a big-enough range. 477 */ 478 addr = a->common->highaddr; 479 while (curr != NULL && curr->last < addr) 480 curr = RB_PARENT(curr, rb_entry); 481 first = NULL; 482 while (curr != NULL && curr->free_down >= min_free) { 483 if (addr < curr->end) 484 curr = RB_LEFT(curr, rb_entry); 485 else { 486 first = curr; 487 curr = RB_RIGHT(curr, rb_entry); 488 } 489 } 490 491 /* 492 * Walk the remaining big-enough ranges until one satisfies alignment 493 * requirements. 494 */ 495 for (curr = first; curr != NULL; 496 curr = iommu_gas_next(curr, min_free)) { 497 if ((first = RB_LEFT(curr, rb_entry)) != NULL && 498 iommu_gas_match_one(a, first->last, curr->start, 499 addr + 1, domain->end - 1)) { 500 RB_INSERT_PREV(iommu_gas_entries_tree, 501 &domain->rb_root, curr, a->entry); 502 return (0); 503 } 504 if ((first = RB_RIGHT(curr, rb_entry)) != NULL && 505 iommu_gas_match_one(a, curr->end, first->first, 506 addr + 1, domain->end - 1)) { 507 RB_INSERT_NEXT(iommu_gas_entries_tree, 508 &domain->rb_root, curr, a->entry); 509 return (0); 510 } 511 } 512 513 return (ENOMEM); 514 } 515 516 static int 517 iommu_gas_alloc_region(struct iommu_domain *domain, struct iommu_map_entry *entry, 518 u_int flags) 519 { 520 struct iommu_map_entry *next, *prev; 521 522 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 523 524 if ((entry->start & IOMMU_PAGE_MASK) != 0 || 525 (entry->end & IOMMU_PAGE_MASK) != 0) 526 return (EINVAL); 527 if (entry->start >= entry->end) 528 return (EINVAL); 529 if (entry->end >= domain->end) 530 return (EINVAL); 531 532 entry->flags |= IOMMU_MAP_ENTRY_FAKE; 533 next = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, entry); 534 KASSERT(next != NULL, ("next must be non-null %p %jx", domain, 535 (uintmax_t)entry->start)); 536 prev = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, next); 537 /* prev could be NULL */ 538 entry->flags &= ~IOMMU_MAP_ENTRY_FAKE; 539 540 /* 541 * Adapt to broken BIOSes which specify overlapping RMRR 542 * entries. 543 * 544 * XXXKIB: this does not handle a case when prev or next 545 * entries are completely covered by the current one, which 546 * extends both ways. 547 */ 548 if (prev != NULL && prev->end > entry->start && 549 (prev->flags & IOMMU_MAP_ENTRY_PLACE) == 0) { 550 if ((flags & IOMMU_MF_RMRR) == 0 || 551 (prev->flags & IOMMU_MAP_ENTRY_RMRR) == 0) 552 return (EBUSY); 553 entry->start = prev->end; 554 } 555 if (next->start < entry->end && 556 (next->flags & IOMMU_MAP_ENTRY_PLACE) == 0) { 557 if ((flags & IOMMU_MF_RMRR) == 0 || 558 (next->flags & IOMMU_MAP_ENTRY_RMRR) == 0) 559 return (EBUSY); 560 entry->end = next->start; 561 } 562 if (entry->end == entry->start) 563 return (0); 564 565 if (prev != NULL && prev->end > entry->start) { 566 /* This assumes that prev is the placeholder entry. */ 567 iommu_gas_rb_remove(domain, prev); 568 prev = NULL; 569 } 570 RB_INSERT_PREV(iommu_gas_entries_tree, 571 &domain->rb_root, next, entry); 572 if (next->start < entry->end) { 573 iommu_gas_rb_remove(domain, next); 574 next = NULL; 575 } 576 577 if ((flags & IOMMU_MF_RMRR) != 0) 578 entry->flags = IOMMU_MAP_ENTRY_RMRR; 579 580 #ifdef INVARIANTS 581 struct iommu_map_entry *ip, *in; 582 ip = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, entry); 583 in = RB_NEXT(iommu_gas_entries_tree, &domain->rb_root, entry); 584 KASSERT(prev == NULL || ip == prev, 585 ("RMRR %p (%jx %jx) prev %p (%jx %jx) ins prev %p (%jx %jx)", 586 entry, entry->start, entry->end, prev, 587 prev == NULL ? 0 : prev->start, prev == NULL ? 0 : prev->end, 588 ip, ip == NULL ? 0 : ip->start, ip == NULL ? 0 : ip->end)); 589 KASSERT(next == NULL || in == next, 590 ("RMRR %p (%jx %jx) next %p (%jx %jx) ins next %p (%jx %jx)", 591 entry, entry->start, entry->end, next, 592 next == NULL ? 0 : next->start, next == NULL ? 0 : next->end, 593 in, in == NULL ? 0 : in->start, in == NULL ? 0 : in->end)); 594 #endif 595 596 return (0); 597 } 598 599 void 600 iommu_gas_free_space(struct iommu_map_entry *entry) 601 { 602 struct iommu_domain *domain; 603 604 domain = entry->domain; 605 KASSERT((entry->flags & (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_RMRR | 606 IOMMU_MAP_ENTRY_MAP)) == IOMMU_MAP_ENTRY_MAP, 607 ("permanent entry %p %p", domain, entry)); 608 609 IOMMU_DOMAIN_LOCK(domain); 610 iommu_gas_rb_remove(domain, entry); 611 entry->flags &= ~IOMMU_MAP_ENTRY_MAP; 612 #ifdef INVARIANTS 613 if (iommu_check_free) 614 iommu_gas_check_free(domain); 615 #endif 616 IOMMU_DOMAIN_UNLOCK(domain); 617 } 618 619 void 620 iommu_gas_free_region(struct iommu_map_entry *entry) 621 { 622 struct iommu_domain *domain; 623 624 domain = entry->domain; 625 KASSERT((entry->flags & (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_RMRR | 626 IOMMU_MAP_ENTRY_MAP)) == IOMMU_MAP_ENTRY_RMRR, 627 ("non-RMRR entry %p %p", domain, entry)); 628 629 IOMMU_DOMAIN_LOCK(domain); 630 if (entry != domain->first_place && 631 entry != domain->last_place) 632 iommu_gas_rb_remove(domain, entry); 633 entry->flags &= ~IOMMU_MAP_ENTRY_RMRR; 634 IOMMU_DOMAIN_UNLOCK(domain); 635 } 636 637 static struct iommu_map_entry * 638 iommu_gas_remove_clip_left(struct iommu_domain *domain, iommu_gaddr_t start, 639 iommu_gaddr_t end, struct iommu_map_entry **r) 640 { 641 struct iommu_map_entry *entry, *res, fentry; 642 643 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 644 MPASS(start <= end); 645 MPASS(end <= domain->end); 646 647 /* 648 * Find an entry which contains the supplied guest's address 649 * start, or the first entry after the start. Since we 650 * asserted that start is below domain end, entry should 651 * exist. Then clip it if needed. 652 */ 653 bzero(&fentry, sizeof(fentry)); 654 fentry.start = start + 1; 655 fentry.end = start + 1; 656 fentry.flags = IOMMU_MAP_ENTRY_FAKE; 657 entry = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, &fentry); 658 659 if (entry->start >= start || 660 (entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 661 return (entry); 662 663 res = *r; 664 *r = NULL; 665 *res = *entry; 666 res->start = entry->end = start; 667 RB_UPDATE_AUGMENT(entry, rb_entry); 668 RB_INSERT_NEXT(iommu_gas_entries_tree, 669 &domain->rb_root, entry, res); 670 return (res); 671 } 672 673 static bool 674 iommu_gas_remove_clip_right(struct iommu_domain *domain, 675 iommu_gaddr_t end, struct iommu_map_entry *entry, 676 struct iommu_map_entry *r) 677 { 678 if (entry->start >= end || (entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 679 return (false); 680 681 *r = *entry; 682 r->end = entry->start = end; 683 RB_UPDATE_AUGMENT(entry, rb_entry); 684 RB_INSERT_PREV(iommu_gas_entries_tree, 685 &domain->rb_root, entry, r); 686 return (true); 687 } 688 689 static void 690 iommu_gas_remove_unmap(struct iommu_domain *domain, 691 struct iommu_map_entry *entry, struct iommu_map_entries_tailq *gcp) 692 { 693 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 694 695 if ((entry->flags & (IOMMU_MAP_ENTRY_UNMAPPED | 696 IOMMU_MAP_ENTRY_RMRR | 697 IOMMU_MAP_ENTRY_REMOVING)) != 0) 698 return; 699 MPASS((entry->flags & IOMMU_MAP_ENTRY_PLACE) == 0); 700 entry->flags |= IOMMU_MAP_ENTRY_REMOVING; 701 TAILQ_INSERT_TAIL(gcp, entry, dmamap_link); 702 } 703 704 static void 705 iommu_gas_remove_locked(struct iommu_domain *domain, 706 iommu_gaddr_t start, iommu_gaddr_t size, 707 struct iommu_map_entries_tailq *gc, 708 struct iommu_map_entry **r1, struct iommu_map_entry **r2) 709 { 710 struct iommu_map_entry *entry, *nentry; 711 iommu_gaddr_t end; 712 713 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 714 715 end = start + size; 716 717 nentry = iommu_gas_remove_clip_left(domain, start, end, r1); 718 RB_FOREACH_FROM(entry, iommu_gas_entries_tree, nentry) { 719 if (entry->start >= end) 720 break; 721 KASSERT(start <= entry->start, 722 ("iommu_gas_remove entry (%#jx, %#jx) start %#jx", 723 entry->start, entry->end, start)); 724 iommu_gas_remove_unmap(domain, entry, gc); 725 } 726 if (iommu_gas_remove_clip_right(domain, end, entry, *r2)) { 727 iommu_gas_remove_unmap(domain, *r2, gc); 728 *r2 = NULL; 729 } 730 731 #ifdef INVARIANTS 732 RB_FOREACH(entry, iommu_gas_entries_tree, &domain->rb_root) { 733 if ((entry->flags & (IOMMU_MAP_ENTRY_RMRR | 734 IOMMU_MAP_ENTRY_PLACE)) != 0) 735 continue; 736 KASSERT(entry->end <= start || entry->start >= end, 737 ("iommu_gas_remove leftover entry (%#jx, %#jx) range " 738 "(%#jx, %#jx)", 739 entry->start, entry->end, start, end)); 740 } 741 #endif 742 } 743 744 static void 745 iommu_gas_remove_init(struct iommu_domain *domain, 746 struct iommu_map_entries_tailq *gc, struct iommu_map_entry **r1, 747 struct iommu_map_entry **r2) 748 { 749 TAILQ_INIT(gc); 750 *r1 = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 751 *r2 = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 752 } 753 754 static void 755 iommu_gas_remove_cleanup(struct iommu_domain *domain, 756 struct iommu_map_entries_tailq *gc, struct iommu_map_entry **r1, 757 struct iommu_map_entry **r2) 758 { 759 if (*r1 != NULL) { 760 iommu_gas_free_entry(*r1); 761 *r1 = NULL; 762 } 763 if (*r2 != NULL) { 764 iommu_gas_free_entry(*r2); 765 *r2 = NULL; 766 } 767 iommu_domain_unload(domain, gc, true); 768 } 769 770 /* 771 * Remove specified range from the GAS of the domain. Note that the 772 * removal is not guaranteed to occur upon the function return, it 773 * might be finalized some time after, when hardware reports that 774 * (queued) IOTLB invalidation was performed. 775 */ 776 void 777 iommu_gas_remove(struct iommu_domain *domain, iommu_gaddr_t start, 778 iommu_gaddr_t size) 779 { 780 struct iommu_map_entry *r1, *r2; 781 struct iommu_map_entries_tailq gc; 782 783 iommu_gas_remove_init(domain, &gc, &r1, &r2); 784 IOMMU_DOMAIN_LOCK(domain); 785 iommu_gas_remove_locked(domain, start, size, &gc, &r1, &r2); 786 IOMMU_DOMAIN_UNLOCK(domain); 787 iommu_gas_remove_cleanup(domain, &gc, &r1, &r2); 788 } 789 790 int 791 iommu_gas_map(struct iommu_domain *domain, 792 const struct bus_dma_tag_common *common, iommu_gaddr_t size, int offset, 793 u_int eflags, u_int flags, vm_page_t *ma, struct iommu_map_entry **res) 794 { 795 struct iommu_gas_match_args a; 796 struct iommu_map_entry *entry; 797 int error; 798 799 KASSERT((flags & ~(IOMMU_MF_CANWAIT | IOMMU_MF_CANSPLIT)) == 0, 800 ("invalid flags 0x%x", flags)); 801 802 a.size = size; 803 a.offset = offset; 804 a.common = common; 805 a.gas_flags = flags; 806 entry = iommu_gas_alloc_entry(domain, 807 (flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0); 808 if (entry == NULL) 809 return (ENOMEM); 810 a.entry = entry; 811 IOMMU_DOMAIN_LOCK(domain); 812 error = iommu_gas_find_space(domain, &a); 813 if (error == ENOMEM) { 814 IOMMU_DOMAIN_UNLOCK(domain); 815 iommu_gas_free_entry(entry); 816 return (error); 817 } 818 #ifdef INVARIANTS 819 if (iommu_check_free) 820 iommu_gas_check_free(domain); 821 #endif 822 KASSERT(error == 0, 823 ("unexpected error %d from iommu_gas_find_entry", error)); 824 KASSERT(entry->end < domain->end, ("allocated GPA %jx, max GPA %jx", 825 (uintmax_t)entry->end, (uintmax_t)domain->end)); 826 entry->flags |= eflags; 827 IOMMU_DOMAIN_UNLOCK(domain); 828 829 error = domain->ops->map(domain, entry->start, 830 entry->end - entry->start, ma, eflags, 831 ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0)); 832 if (error == ENOMEM) { 833 iommu_domain_unload_entry(entry, true, 834 (flags & IOMMU_MF_CANWAIT) != 0); 835 return (error); 836 } 837 KASSERT(error == 0, 838 ("unexpected error %d from domain_map_buf", error)); 839 840 *res = entry; 841 return (0); 842 } 843 844 int 845 iommu_gas_map_region(struct iommu_domain *domain, struct iommu_map_entry *entry, 846 u_int eflags, u_int flags, vm_page_t *ma) 847 { 848 iommu_gaddr_t start; 849 int error; 850 851 KASSERT(entry->domain == domain, 852 ("mismatched domain %p entry %p entry->domain %p", domain, 853 entry, entry->domain)); 854 KASSERT(entry->flags == 0, ("used RMRR entry %p %p %x", domain, 855 entry, entry->flags)); 856 KASSERT((flags & ~(IOMMU_MF_CANWAIT | IOMMU_MF_RMRR)) == 0, 857 ("invalid flags 0x%x", flags)); 858 859 start = entry->start; 860 IOMMU_DOMAIN_LOCK(domain); 861 error = iommu_gas_alloc_region(domain, entry, flags); 862 if (error != 0) { 863 IOMMU_DOMAIN_UNLOCK(domain); 864 return (error); 865 } 866 entry->flags |= eflags; 867 IOMMU_DOMAIN_UNLOCK(domain); 868 if (entry->end == entry->start) 869 return (0); 870 871 error = domain->ops->map(domain, entry->start, 872 entry->end - entry->start, ma + OFF_TO_IDX(start - entry->start), 873 eflags, ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0)); 874 if (error == ENOMEM) { 875 iommu_domain_unload_entry(entry, false, 876 (flags & IOMMU_MF_CANWAIT) != 0); 877 return (error); 878 } 879 KASSERT(error == 0, 880 ("unexpected error %d from domain_map_buf", error)); 881 882 return (0); 883 } 884 885 static int 886 iommu_gas_reserve_region_locked(struct iommu_domain *domain, 887 iommu_gaddr_t start, iommu_gaddr_t end, struct iommu_map_entry *entry) 888 { 889 int error; 890 891 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 892 893 entry->start = start; 894 entry->end = end; 895 error = iommu_gas_alloc_region(domain, entry, IOMMU_MF_CANWAIT); 896 if (error == 0) 897 entry->flags |= IOMMU_MAP_ENTRY_UNMAPPED; 898 return (error); 899 } 900 901 int 902 iommu_gas_reserve_region(struct iommu_domain *domain, iommu_gaddr_t start, 903 iommu_gaddr_t end, struct iommu_map_entry **entry0) 904 { 905 struct iommu_map_entry *entry; 906 int error; 907 908 entry = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 909 IOMMU_DOMAIN_LOCK(domain); 910 error = iommu_gas_reserve_region_locked(domain, start, end, entry); 911 IOMMU_DOMAIN_UNLOCK(domain); 912 if (error != 0) 913 iommu_gas_free_entry(entry); 914 else if (entry0 != NULL) 915 *entry0 = entry; 916 return (error); 917 } 918 919 /* 920 * As in iommu_gas_reserve_region, reserve [start, end), but allow for existing 921 * entries. 922 */ 923 int 924 iommu_gas_reserve_region_extend(struct iommu_domain *domain, 925 iommu_gaddr_t start, iommu_gaddr_t end) 926 { 927 struct iommu_map_entry *entry, *next, *prev, key = {}; 928 iommu_gaddr_t entry_start, entry_end; 929 int error; 930 931 error = 0; 932 entry = NULL; 933 end = ummin(end, domain->end); 934 while (start < end) { 935 /* Preallocate an entry. */ 936 if (entry == NULL) 937 entry = iommu_gas_alloc_entry(domain, 938 IOMMU_PGF_WAITOK); 939 /* Calculate the free region from here to the next entry. */ 940 key.start = key.end = start; 941 IOMMU_DOMAIN_LOCK(domain); 942 next = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, &key); 943 KASSERT(next != NULL, ("domain %p with end %#jx has no entry " 944 "after %#jx", domain, (uintmax_t)domain->end, 945 (uintmax_t)start)); 946 entry_end = ummin(end, next->start); 947 prev = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, next); 948 if (prev != NULL) 949 entry_start = ummax(start, prev->end); 950 else 951 entry_start = start; 952 start = next->end; 953 /* Reserve the region if non-empty. */ 954 if (entry_start != entry_end) { 955 error = iommu_gas_reserve_region_locked(domain, 956 entry_start, entry_end, entry); 957 if (error != 0) { 958 IOMMU_DOMAIN_UNLOCK(domain); 959 break; 960 } 961 entry = NULL; 962 } 963 IOMMU_DOMAIN_UNLOCK(domain); 964 } 965 /* Release a preallocated entry if it was not used. */ 966 if (entry != NULL) 967 iommu_gas_free_entry(entry); 968 return (error); 969 } 970 971 void 972 iommu_unmap_msi(struct iommu_ctx *ctx) 973 { 974 struct iommu_map_entry *entry; 975 struct iommu_domain *domain; 976 977 domain = ctx->domain; 978 entry = domain->msi_entry; 979 if (entry == NULL) 980 return; 981 982 domain->ops->unmap(domain, entry->start, entry->end - 983 entry->start, IOMMU_PGF_WAITOK); 984 985 iommu_gas_free_space(entry); 986 987 iommu_gas_free_entry(entry); 988 989 domain->msi_entry = NULL; 990 domain->msi_base = 0; 991 domain->msi_phys = 0; 992 } 993 994 int 995 iommu_map_msi(struct iommu_ctx *ctx, iommu_gaddr_t size, int offset, 996 u_int eflags, u_int flags, vm_page_t *ma) 997 { 998 struct iommu_domain *domain; 999 struct iommu_map_entry *entry; 1000 int error; 1001 1002 error = 0; 1003 domain = ctx->domain; 1004 1005 /* Check if there is already an MSI page allocated */ 1006 IOMMU_DOMAIN_LOCK(domain); 1007 entry = domain->msi_entry; 1008 IOMMU_DOMAIN_UNLOCK(domain); 1009 1010 if (entry == NULL) { 1011 error = iommu_gas_map(domain, &ctx->tag->common, size, offset, 1012 eflags, flags, ma, &entry); 1013 IOMMU_DOMAIN_LOCK(domain); 1014 if (error == 0) { 1015 if (domain->msi_entry == NULL) { 1016 MPASS(domain->msi_base == 0); 1017 MPASS(domain->msi_phys == 0); 1018 1019 domain->msi_entry = entry; 1020 domain->msi_base = entry->start; 1021 domain->msi_phys = VM_PAGE_TO_PHYS(ma[0]); 1022 } else { 1023 /* 1024 * We lost the race and already have an 1025 * MSI page allocated. Free the unneeded entry. 1026 */ 1027 iommu_gas_free_entry(entry); 1028 } 1029 } else if (domain->msi_entry != NULL) { 1030 /* 1031 * The allocation failed, but another succeeded. 1032 * Return success as there is a valid MSI page. 1033 */ 1034 error = 0; 1035 } 1036 IOMMU_DOMAIN_UNLOCK(domain); 1037 } 1038 1039 return (error); 1040 } 1041 1042 void 1043 iommu_translate_msi(struct iommu_domain *domain, uint64_t *addr) 1044 { 1045 1046 *addr = (*addr - domain->msi_phys) + domain->msi_base; 1047 1048 KASSERT(*addr >= domain->msi_entry->start, 1049 ("%s: Address is below the MSI entry start address (%jx < %jx)", 1050 __func__, (uintmax_t)*addr, (uintmax_t)domain->msi_entry->start)); 1051 1052 KASSERT(*addr + sizeof(*addr) <= domain->msi_entry->end, 1053 ("%s: Address is above the MSI entry end address (%jx < %jx)", 1054 __func__, (uintmax_t)*addr, (uintmax_t)domain->msi_entry->end)); 1055 } 1056 1057 SYSCTL_NODE(_hw, OID_AUTO, iommu, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, ""); 1058 1059 #ifdef INVARIANTS 1060 SYSCTL_INT(_hw_iommu, OID_AUTO, check_free, CTLFLAG_RWTUN, 1061 &iommu_check_free, 0, 1062 "Check the GPA RBtree for free_down and free_after validity"); 1063 #endif 1064 1065 #include "opt_ddb.h" 1066 #ifdef DDB 1067 1068 #include <ddb/ddb.h> 1069 1070 static void 1071 iommu_debug_dump_gas(struct iommu_domain *domain) 1072 { 1073 struct iommu_map_entry *entry; 1074 1075 db_printf("iommu_domain %p tree %p iommu %p fl %#x\n", domain, 1076 &domain->rb_root, domain->iommu, domain->flags); 1077 db_printf("iommu_domain %p tree %p\n", domain, &domain->rb_root); 1078 RB_FOREACH(entry, iommu_gas_entries_tree, &domain->rb_root) { 1079 db_printf( 1080 " e %p [%#jx %#jx] fl %#x first %#jx last %#jx free_down %#jx", 1081 entry, (uintmax_t)entry->start, (uintmax_t)entry->end, 1082 entry->flags, 1083 (uintmax_t)entry->first, (uintmax_t)entry->last, 1084 (uintmax_t)entry->free_down); 1085 if (entry == domain->start_gap) 1086 db_printf(" start_gap"); 1087 if (entry == domain->first_place) 1088 db_printf(" first_place"); 1089 if (entry == domain->last_place) 1090 db_printf(" last_place"); 1091 db_printf("\n"); 1092 } 1093 } 1094 1095 DB_SHOW_COMMAND(iommu_domain, iommu_domain_show) 1096 { 1097 struct iommu_domain *domain; 1098 1099 if (!have_addr) { 1100 db_printf("show iommu_domain addr\n"); 1101 return; 1102 } 1103 1104 domain = (void *)addr; 1105 iommu_debug_dump_gas(domain); 1106 } 1107 1108 #endif 1109