1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2013 The FreeBSD Foundation 5 * 6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org> 7 * under sponsorship from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #define RB_AUGMENT_CHECK(entry) iommu_gas_augment_entry(entry) 35 36 #include <sys/param.h> 37 #include <sys/systm.h> 38 #include <sys/malloc.h> 39 #include <sys/bus.h> 40 #include <sys/interrupt.h> 41 #include <sys/kernel.h> 42 #include <sys/ktr.h> 43 #include <sys/lock.h> 44 #include <sys/proc.h> 45 #include <sys/rwlock.h> 46 #include <sys/memdesc.h> 47 #include <sys/mutex.h> 48 #include <sys/sysctl.h> 49 #include <sys/rman.h> 50 #include <sys/taskqueue.h> 51 #include <sys/tree.h> 52 #include <sys/uio.h> 53 #include <sys/vmem.h> 54 #include <vm/vm.h> 55 #include <vm/vm_extern.h> 56 #include <vm/vm_kern.h> 57 #include <vm/vm_object.h> 58 #include <vm/vm_page.h> 59 #include <vm/vm_map.h> 60 #include <vm/uma.h> 61 #include <dev/pci/pcireg.h> 62 #include <dev/pci/pcivar.h> 63 #include <dev/iommu/iommu.h> 64 #include <dev/iommu/iommu_gas.h> 65 #include <dev/iommu/iommu_msi.h> 66 #include <machine/atomic.h> 67 #include <machine/bus.h> 68 #include <machine/md_var.h> 69 #include <machine/iommu.h> 70 #include <dev/iommu/busdma_iommu.h> 71 72 /* 73 * Guest Address Space management. 74 */ 75 76 static uma_zone_t iommu_map_entry_zone; 77 78 #ifdef INVARIANTS 79 static int iommu_check_free; 80 #endif 81 82 static void 83 intel_gas_init(void) 84 { 85 86 iommu_map_entry_zone = uma_zcreate("IOMMU_MAP_ENTRY", 87 sizeof(struct iommu_map_entry), NULL, NULL, 88 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NODUMP); 89 } 90 SYSINIT(intel_gas, SI_SUB_DRIVERS, SI_ORDER_FIRST, intel_gas_init, NULL); 91 92 struct iommu_map_entry * 93 iommu_gas_alloc_entry(struct iommu_domain *domain, u_int flags) 94 { 95 struct iommu_map_entry *res; 96 97 KASSERT((flags & ~(IOMMU_PGF_WAITOK)) == 0, 98 ("unsupported flags %x", flags)); 99 100 res = uma_zalloc(iommu_map_entry_zone, ((flags & IOMMU_PGF_WAITOK) != 101 0 ? M_WAITOK : M_NOWAIT) | M_ZERO); 102 if (res != NULL && domain != NULL) { 103 res->domain = domain; 104 atomic_add_int(&domain->entries_cnt, 1); 105 } 106 return (res); 107 } 108 109 void 110 iommu_gas_free_entry(struct iommu_map_entry *entry) 111 { 112 struct iommu_domain *domain; 113 114 domain = entry->domain; 115 if (domain != NULL) 116 atomic_subtract_int(&domain->entries_cnt, 1); 117 uma_zfree(iommu_map_entry_zone, entry); 118 } 119 120 static int 121 iommu_gas_cmp_entries(struct iommu_map_entry *a, struct iommu_map_entry *b) 122 { 123 124 /* Last entry have zero size, so <= */ 125 KASSERT(a->start <= a->end, ("inverted entry %p (%jx, %jx)", 126 a, (uintmax_t)a->start, (uintmax_t)a->end)); 127 KASSERT(b->start <= b->end, ("inverted entry %p (%jx, %jx)", 128 b, (uintmax_t)b->start, (uintmax_t)b->end)); 129 KASSERT(a->end <= b->start || b->end <= a->start || 130 a->end == a->start || b->end == b->start, 131 ("overlapping entries %p (%jx, %jx) %p (%jx, %jx)", 132 a, (uintmax_t)a->start, (uintmax_t)a->end, 133 b, (uintmax_t)b->start, (uintmax_t)b->end)); 134 135 if (a->end < b->end) 136 return (-1); 137 else if (b->end < a->end) 138 return (1); 139 return (0); 140 } 141 142 /* 143 * Update augmentation data based on data from children. 144 * Return true if and only if the update changes the augmentation data. 145 */ 146 static bool 147 iommu_gas_augment_entry(struct iommu_map_entry *entry) 148 { 149 struct iommu_map_entry *child; 150 iommu_gaddr_t bound, delta, free_down; 151 152 free_down = 0; 153 bound = entry->start; 154 if ((child = RB_LEFT(entry, rb_entry)) != NULL) { 155 free_down = MAX(child->free_down, bound - child->last); 156 bound = child->first; 157 } 158 delta = bound - entry->first; 159 entry->first = bound; 160 bound = entry->end; 161 if ((child = RB_RIGHT(entry, rb_entry)) != NULL) { 162 free_down = MAX(free_down, child->free_down); 163 free_down = MAX(free_down, child->first - bound); 164 bound = child->last; 165 } 166 delta += entry->last - bound; 167 if (delta == 0) 168 delta = entry->free_down - free_down; 169 entry->last = bound; 170 entry->free_down = free_down; 171 172 /* 173 * Return true either if the value of last-first changed, 174 * or if free_down changed. 175 */ 176 return (delta != 0); 177 } 178 179 RB_GENERATE(iommu_gas_entries_tree, iommu_map_entry, rb_entry, 180 iommu_gas_cmp_entries); 181 182 #ifdef INVARIANTS 183 static void 184 iommu_gas_check_free(struct iommu_domain *domain) 185 { 186 struct iommu_map_entry *entry, *l, *r; 187 iommu_gaddr_t v; 188 189 RB_FOREACH(entry, iommu_gas_entries_tree, &domain->rb_root) { 190 KASSERT(domain == entry->domain, 191 ("mismatched free domain %p entry %p entry->domain %p", 192 domain, entry, entry->domain)); 193 l = RB_LEFT(entry, rb_entry); 194 r = RB_RIGHT(entry, rb_entry); 195 v = 0; 196 if (l != NULL) { 197 v = MAX(v, l->free_down); 198 v = MAX(v, entry->start - l->last); 199 } 200 if (r != NULL) { 201 v = MAX(v, r->free_down); 202 v = MAX(v, r->first - entry->end); 203 } 204 MPASS(entry->free_down == v); 205 } 206 } 207 #endif 208 209 static bool 210 iommu_gas_rb_insert(struct iommu_domain *domain, struct iommu_map_entry *entry) 211 { 212 struct iommu_map_entry *found; 213 214 found = RB_INSERT(iommu_gas_entries_tree, &domain->rb_root, entry); 215 return (found == NULL); 216 } 217 218 static void 219 iommu_gas_rb_remove(struct iommu_domain *domain, struct iommu_map_entry *entry) 220 { 221 222 RB_REMOVE(iommu_gas_entries_tree, &domain->rb_root, entry); 223 } 224 225 struct iommu_domain * 226 iommu_get_ctx_domain(struct iommu_ctx *ctx) 227 { 228 229 return (ctx->domain); 230 } 231 232 void 233 iommu_gas_init_domain(struct iommu_domain *domain) 234 { 235 struct iommu_map_entry *begin, *end; 236 237 begin = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 238 end = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 239 240 IOMMU_DOMAIN_LOCK(domain); 241 KASSERT(domain->entries_cnt == 2, ("dirty domain %p", domain)); 242 KASSERT(RB_EMPTY(&domain->rb_root), 243 ("non-empty entries %p", domain)); 244 245 /* 246 * The end entry must be inserted first because it has a zero-length gap 247 * between start and end. Initially, all augmentation data for a new 248 * entry is zero. Function iommu_gas_augment_entry will compute no 249 * change in the value of (start-end) and no change in the value of 250 * free_down, so it will return false to suggest that nothing changed in 251 * the entry. Thus, inserting the end entry second prevents 252 * augmentation information to be propogated to the begin entry at the 253 * tree root. So it is inserted first. 254 */ 255 end->start = domain->end; 256 end->end = domain->end; 257 end->flags = IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED; 258 iommu_gas_rb_insert(domain, end); 259 260 begin->start = 0; 261 begin->end = IOMMU_PAGE_SIZE; 262 begin->flags = IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED; 263 iommu_gas_rb_insert(domain, begin); 264 265 domain->first_place = begin; 266 domain->last_place = end; 267 domain->flags |= IOMMU_DOMAIN_GAS_INITED; 268 IOMMU_DOMAIN_UNLOCK(domain); 269 } 270 271 void 272 iommu_gas_fini_domain(struct iommu_domain *domain) 273 { 274 struct iommu_map_entry *entry, *entry1; 275 276 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 277 KASSERT(domain->entries_cnt == 2, 278 ("domain still in use %p", domain)); 279 280 entry = RB_MIN(iommu_gas_entries_tree, &domain->rb_root); 281 KASSERT(entry->start == 0, ("start entry start %p", domain)); 282 KASSERT(entry->end == IOMMU_PAGE_SIZE, ("start entry end %p", domain)); 283 KASSERT(entry->flags == 284 (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED), 285 ("start entry flags %p", domain)); 286 RB_REMOVE(iommu_gas_entries_tree, &domain->rb_root, entry); 287 iommu_gas_free_entry(entry); 288 289 entry = RB_MAX(iommu_gas_entries_tree, &domain->rb_root); 290 KASSERT(entry->start == domain->end, ("end entry start %p", domain)); 291 KASSERT(entry->end == domain->end, ("end entry end %p", domain)); 292 KASSERT(entry->flags == 293 (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED), 294 ("end entry flags %p", domain)); 295 RB_REMOVE(iommu_gas_entries_tree, &domain->rb_root, entry); 296 iommu_gas_free_entry(entry); 297 298 RB_FOREACH_SAFE(entry, iommu_gas_entries_tree, &domain->rb_root, 299 entry1) { 300 KASSERT((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0, 301 ("non-RMRR entry left %p", domain)); 302 RB_REMOVE(iommu_gas_entries_tree, &domain->rb_root, 303 entry); 304 iommu_gas_free_entry(entry); 305 } 306 } 307 308 struct iommu_gas_match_args { 309 struct iommu_domain *domain; 310 iommu_gaddr_t size; 311 int offset; 312 const struct bus_dma_tag_common *common; 313 u_int gas_flags; 314 struct iommu_map_entry *entry; 315 }; 316 317 /* 318 * The interval [beg, end) is a free interval between two iommu_map_entries. 319 * Addresses can be allocated only in the range [lbound, ubound). Try to 320 * allocate space in the free interval, subject to the conditions expressed by 321 * a, and return 'true' if and only if the allocation attempt succeeds. 322 */ 323 static bool 324 iommu_gas_match_one(struct iommu_gas_match_args *a, iommu_gaddr_t beg, 325 iommu_gaddr_t end, iommu_gaddr_t lbound, iommu_gaddr_t ubound) 326 { 327 struct iommu_map_entry *entry; 328 iommu_gaddr_t first, size, start; 329 bool found __diagused; 330 int offset; 331 332 /* 333 * The prev->end is always aligned on the page size, which 334 * causes page alignment for the entry->start too. 335 * 336 * Create IOMMU_PAGE_SIZE gaps before, after new entry 337 * to ensure that out-of-bounds accesses fault. 338 */ 339 beg = MAX(beg + IOMMU_PAGE_SIZE, lbound); 340 start = roundup2(beg, a->common->alignment); 341 if (start < beg) 342 return (false); 343 end = MIN(end - IOMMU_PAGE_SIZE, ubound); 344 offset = a->offset; 345 size = a->size; 346 if (start + offset + size > end) 347 return (false); 348 349 /* Check for and try to skip past boundary crossing. */ 350 if (!vm_addr_bound_ok(start + offset, size, a->common->boundary)) { 351 /* 352 * The start + offset to start + offset + size region crosses 353 * the boundary. Check if there is enough space after the next 354 * boundary after the beg. 355 */ 356 first = start; 357 beg = roundup2(start + offset + 1, a->common->boundary); 358 start = roundup2(beg, a->common->alignment); 359 360 if (start + offset + size > end || 361 !vm_addr_bound_ok(start + offset, size, 362 a->common->boundary)) { 363 /* 364 * Not enough space to align at the requested boundary, 365 * or boundary is smaller than the size, but allowed to 366 * split. We already checked that start + size does not 367 * overlap ubound. 368 * 369 * XXXKIB. It is possible that beg is exactly at the 370 * start of the next entry, then we do not have gap. 371 * Ignore for now. 372 */ 373 if ((a->gas_flags & IOMMU_MF_CANSPLIT) == 0) 374 return (false); 375 size = beg - first - offset; 376 start = first; 377 } 378 } 379 entry = a->entry; 380 entry->start = start; 381 entry->end = start + roundup2(size + offset, IOMMU_PAGE_SIZE); 382 entry->flags = IOMMU_MAP_ENTRY_MAP; 383 found = iommu_gas_rb_insert(a->domain, entry); 384 KASSERT(found, ("found dup %p start %jx size %jx", 385 a->domain, (uintmax_t)start, (uintmax_t)size)); 386 return (true); 387 } 388 389 /* Find the next entry that might abut a big-enough range. */ 390 static struct iommu_map_entry * 391 iommu_gas_next(struct iommu_map_entry *curr, iommu_gaddr_t min_free) 392 { 393 struct iommu_map_entry *next; 394 395 if ((next = RB_RIGHT(curr, rb_entry)) != NULL && 396 next->free_down >= min_free) { 397 /* Find next entry in right subtree. */ 398 do 399 curr = next; 400 while ((next = RB_LEFT(curr, rb_entry)) != NULL && 401 next->free_down >= min_free); 402 } else { 403 /* Find next entry in a left-parent ancestor. */ 404 while ((next = RB_PARENT(curr, rb_entry)) != NULL && 405 curr == RB_RIGHT(next, rb_entry)) 406 curr = next; 407 curr = next; 408 } 409 return (curr); 410 } 411 412 static int 413 iommu_gas_find_space(struct iommu_gas_match_args *a) 414 { 415 struct iommu_domain *domain; 416 struct iommu_map_entry *curr, *first; 417 iommu_gaddr_t addr, min_free; 418 419 IOMMU_DOMAIN_ASSERT_LOCKED(a->domain); 420 KASSERT(a->entry->flags == 0, 421 ("dirty entry %p %p", a->domain, a->entry)); 422 423 /* 424 * If the subtree doesn't have free space for the requested allocation 425 * plus two guard pages, skip it. 426 */ 427 min_free = 2 * IOMMU_PAGE_SIZE + 428 roundup2(a->size + a->offset, IOMMU_PAGE_SIZE); 429 430 /* 431 * Find the first entry in the lower region that could abut a big-enough 432 * range. 433 */ 434 curr = RB_ROOT(&a->domain->rb_root); 435 first = NULL; 436 while (curr != NULL && curr->free_down >= min_free) { 437 first = curr; 438 curr = RB_LEFT(curr, rb_entry); 439 } 440 441 /* 442 * Walk the big-enough ranges until one satisfies alignment 443 * requirements, or violates lowaddr address requirement. 444 */ 445 addr = a->common->lowaddr + 1; 446 for (curr = first; curr != NULL; 447 curr = iommu_gas_next(curr, min_free)) { 448 if ((first = RB_LEFT(curr, rb_entry)) != NULL && 449 iommu_gas_match_one(a, first->last, curr->start, 450 0, addr)) 451 return (0); 452 if (curr->end >= addr) { 453 /* All remaining ranges >= addr */ 454 break; 455 } 456 if ((first = RB_RIGHT(curr, rb_entry)) != NULL && 457 iommu_gas_match_one(a, curr->end, first->first, 458 0, addr)) 459 return (0); 460 } 461 462 /* 463 * To resume the search at the start of the upper region, first climb to 464 * the nearest ancestor that spans highaddr. Then find the last entry 465 * before highaddr that could abut a big-enough range. 466 */ 467 addr = a->common->highaddr; 468 while (curr != NULL && curr->last < addr) 469 curr = RB_PARENT(curr, rb_entry); 470 first = NULL; 471 while (curr != NULL && curr->free_down >= min_free) { 472 if (addr < curr->end) 473 curr = RB_LEFT(curr, rb_entry); 474 else { 475 first = curr; 476 curr = RB_RIGHT(curr, rb_entry); 477 } 478 } 479 480 /* 481 * Walk the remaining big-enough ranges until one satisfies alignment 482 * requirements. 483 */ 484 domain = a->domain; 485 for (curr = first; curr != NULL; 486 curr = iommu_gas_next(curr, min_free)) { 487 if ((first = RB_LEFT(curr, rb_entry)) != NULL && 488 iommu_gas_match_one(a, first->last, curr->start, 489 addr + 1, domain->end)) 490 return (0); 491 if ((first = RB_RIGHT(curr, rb_entry)) != NULL && 492 iommu_gas_match_one(a, curr->end, first->first, 493 addr + 1, domain->end)) 494 return (0); 495 } 496 497 return (ENOMEM); 498 } 499 500 static int 501 iommu_gas_alloc_region(struct iommu_domain *domain, struct iommu_map_entry *entry, 502 u_int flags) 503 { 504 struct iommu_map_entry *next, *prev; 505 bool found __diagused; 506 507 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 508 509 if ((entry->start & IOMMU_PAGE_MASK) != 0 || 510 (entry->end & IOMMU_PAGE_MASK) != 0) 511 return (EINVAL); 512 if (entry->start >= entry->end) 513 return (EINVAL); 514 if (entry->end >= domain->end) 515 return (EINVAL); 516 517 next = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, entry); 518 KASSERT(next != NULL, ("next must be non-null %p %jx", domain, 519 (uintmax_t)entry->start)); 520 prev = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, next); 521 /* prev could be NULL */ 522 523 /* 524 * Adapt to broken BIOSes which specify overlapping RMRR 525 * entries. 526 * 527 * XXXKIB: this does not handle a case when prev or next 528 * entries are completely covered by the current one, which 529 * extends both ways. 530 */ 531 if (prev != NULL && prev->end > entry->start && 532 (prev->flags & IOMMU_MAP_ENTRY_PLACE) == 0) { 533 if ((flags & IOMMU_MF_RMRR) == 0 || 534 (prev->flags & IOMMU_MAP_ENTRY_RMRR) == 0) 535 return (EBUSY); 536 entry->start = prev->end; 537 } 538 if (next->start < entry->end && 539 (next->flags & IOMMU_MAP_ENTRY_PLACE) == 0) { 540 if ((flags & IOMMU_MF_RMRR) == 0 || 541 (next->flags & IOMMU_MAP_ENTRY_RMRR) == 0) 542 return (EBUSY); 543 entry->end = next->start; 544 } 545 if (entry->end == entry->start) 546 return (0); 547 548 if (prev != NULL && prev->end > entry->start) { 549 /* This assumes that prev is the placeholder entry. */ 550 iommu_gas_rb_remove(domain, prev); 551 prev = NULL; 552 } 553 if (next->start < entry->end) { 554 iommu_gas_rb_remove(domain, next); 555 next = NULL; 556 } 557 558 found = iommu_gas_rb_insert(domain, entry); 559 KASSERT(found, ("found RMRR dup %p start %jx end %jx", 560 domain, (uintmax_t)entry->start, (uintmax_t)entry->end)); 561 if ((flags & IOMMU_MF_RMRR) != 0) 562 entry->flags = IOMMU_MAP_ENTRY_RMRR; 563 564 #ifdef INVARIANTS 565 struct iommu_map_entry *ip, *in; 566 ip = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, entry); 567 in = RB_NEXT(iommu_gas_entries_tree, &domain->rb_root, entry); 568 KASSERT(prev == NULL || ip == prev, 569 ("RMRR %p (%jx %jx) prev %p (%jx %jx) ins prev %p (%jx %jx)", 570 entry, entry->start, entry->end, prev, 571 prev == NULL ? 0 : prev->start, prev == NULL ? 0 : prev->end, 572 ip, ip == NULL ? 0 : ip->start, ip == NULL ? 0 : ip->end)); 573 KASSERT(next == NULL || in == next, 574 ("RMRR %p (%jx %jx) next %p (%jx %jx) ins next %p (%jx %jx)", 575 entry, entry->start, entry->end, next, 576 next == NULL ? 0 : next->start, next == NULL ? 0 : next->end, 577 in, in == NULL ? 0 : in->start, in == NULL ? 0 : in->end)); 578 #endif 579 580 return (0); 581 } 582 583 void 584 iommu_gas_free_space(struct iommu_map_entry *entry) 585 { 586 struct iommu_domain *domain; 587 588 domain = entry->domain; 589 KASSERT((entry->flags & (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_RMRR | 590 IOMMU_MAP_ENTRY_MAP)) == IOMMU_MAP_ENTRY_MAP, 591 ("permanent entry %p %p", domain, entry)); 592 593 IOMMU_DOMAIN_LOCK(domain); 594 iommu_gas_rb_remove(domain, entry); 595 entry->flags &= ~IOMMU_MAP_ENTRY_MAP; 596 #ifdef INVARIANTS 597 if (iommu_check_free) 598 iommu_gas_check_free(domain); 599 #endif 600 IOMMU_DOMAIN_UNLOCK(domain); 601 } 602 603 void 604 iommu_gas_free_region(struct iommu_map_entry *entry) 605 { 606 struct iommu_domain *domain; 607 608 domain = entry->domain; 609 KASSERT((entry->flags & (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_RMRR | 610 IOMMU_MAP_ENTRY_MAP)) == IOMMU_MAP_ENTRY_RMRR, 611 ("non-RMRR entry %p %p", domain, entry)); 612 613 IOMMU_DOMAIN_LOCK(domain); 614 if (entry != domain->first_place && 615 entry != domain->last_place) 616 iommu_gas_rb_remove(domain, entry); 617 entry->flags &= ~IOMMU_MAP_ENTRY_RMRR; 618 IOMMU_DOMAIN_UNLOCK(domain); 619 } 620 621 static struct iommu_map_entry * 622 iommu_gas_remove_clip_left(struct iommu_domain *domain, iommu_gaddr_t start, 623 iommu_gaddr_t end, struct iommu_map_entry **r) 624 { 625 struct iommu_map_entry *entry, *res, fentry; 626 627 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 628 MPASS(start <= end); 629 MPASS(end <= domain->end); 630 631 /* 632 * Find an entry which contains the supplied guest's address 633 * start, or the first entry after the start. Since we 634 * asserted that start is below domain end, entry should 635 * exist. Then clip it if needed. 636 */ 637 fentry.start = start + 1; 638 fentry.end = start + 1; 639 entry = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, &fentry); 640 641 if (entry->start >= start || 642 (entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 643 return (entry); 644 645 res = *r; 646 *r = NULL; 647 *res = *entry; 648 res->start = entry->end = start; 649 RB_UPDATE_AUGMENT(entry, rb_entry); 650 iommu_gas_rb_insert(domain, res); 651 return (res); 652 } 653 654 static bool 655 iommu_gas_remove_clip_right(struct iommu_domain *domain, 656 iommu_gaddr_t end, struct iommu_map_entry *entry, 657 struct iommu_map_entry *r) 658 { 659 if (entry->start >= end || (entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 660 return (false); 661 662 *r = *entry; 663 r->end = entry->start = end; 664 RB_UPDATE_AUGMENT(entry, rb_entry); 665 iommu_gas_rb_insert(domain, r); 666 return (true); 667 } 668 669 static void 670 iommu_gas_remove_unmap(struct iommu_domain *domain, 671 struct iommu_map_entry *entry, struct iommu_map_entries_tailq *gcp) 672 { 673 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 674 675 if ((entry->flags & (IOMMU_MAP_ENTRY_UNMAPPED | 676 IOMMU_MAP_ENTRY_REMOVING)) != 0) 677 return; 678 MPASS((entry->flags & IOMMU_MAP_ENTRY_PLACE) == 0); 679 entry->flags |= IOMMU_MAP_ENTRY_REMOVING; 680 TAILQ_INSERT_TAIL(gcp, entry, dmamap_link); 681 } 682 683 /* 684 * Remove specified range from the GAS of the domain. Note that the 685 * removal is not guaranteed to occur upon the function return, it 686 * might be finalized some time after, when hardware reports that 687 * (queued) IOTLB invalidation was performed. 688 */ 689 void 690 iommu_gas_remove(struct iommu_domain *domain, iommu_gaddr_t start, 691 iommu_gaddr_t size) 692 { 693 struct iommu_map_entry *entry, *nentry, *r1, *r2; 694 struct iommu_map_entries_tailq gc; 695 iommu_gaddr_t end; 696 697 end = start + size; 698 r1 = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 699 r2 = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 700 TAILQ_INIT(&gc); 701 702 IOMMU_DOMAIN_LOCK(domain); 703 704 nentry = iommu_gas_remove_clip_left(domain, start, end, &r1); 705 RB_FOREACH_FROM(entry, iommu_gas_entries_tree, nentry) { 706 if (entry->start >= end) 707 break; 708 KASSERT(start <= entry->start, 709 ("iommu_gas_remove entry (%#jx, %#jx) start %#jx", 710 entry->start, entry->end, start)); 711 if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 712 continue; 713 iommu_gas_remove_unmap(domain, entry, &gc); 714 } 715 if (iommu_gas_remove_clip_right(domain, end, entry, r2)) { 716 iommu_gas_remove_unmap(domain, r2, &gc); 717 r2 = NULL; 718 } 719 720 #ifdef INVARIANTS 721 RB_FOREACH(entry, iommu_gas_entries_tree, &domain->rb_root) { 722 if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 723 continue; 724 KASSERT(entry->end <= start || entry->start >= end, 725 ("iommu_gas_remove leftover entry (%#jx, %#jx) range " 726 "(%#jx, %#jx)", 727 entry->start, entry->end, start, end)); 728 } 729 #endif 730 731 IOMMU_DOMAIN_UNLOCK(domain); 732 if (r1 != NULL) 733 iommu_gas_free_entry(r1); 734 if (r2 != NULL) 735 iommu_gas_free_entry(r2); 736 iommu_domain_unload(domain, &gc, true); 737 } 738 739 int 740 iommu_gas_map(struct iommu_domain *domain, 741 const struct bus_dma_tag_common *common, iommu_gaddr_t size, int offset, 742 u_int eflags, u_int flags, vm_page_t *ma, struct iommu_map_entry **res) 743 { 744 struct iommu_gas_match_args a; 745 struct iommu_map_entry *entry; 746 int error; 747 748 KASSERT((flags & ~(IOMMU_MF_CANWAIT | IOMMU_MF_CANSPLIT)) == 0, 749 ("invalid flags 0x%x", flags)); 750 751 a.domain = domain; 752 a.size = size; 753 a.offset = offset; 754 a.common = common; 755 a.gas_flags = flags; 756 entry = iommu_gas_alloc_entry(domain, 757 (flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0); 758 if (entry == NULL) 759 return (ENOMEM); 760 a.entry = entry; 761 IOMMU_DOMAIN_LOCK(domain); 762 error = iommu_gas_find_space(&a); 763 if (error == ENOMEM) { 764 IOMMU_DOMAIN_UNLOCK(domain); 765 iommu_gas_free_entry(entry); 766 return (error); 767 } 768 #ifdef INVARIANTS 769 if (iommu_check_free) 770 iommu_gas_check_free(domain); 771 #endif 772 KASSERT(error == 0, 773 ("unexpected error %d from iommu_gas_find_entry", error)); 774 KASSERT(entry->end < domain->end, ("allocated GPA %jx, max GPA %jx", 775 (uintmax_t)entry->end, (uintmax_t)domain->end)); 776 entry->flags |= eflags; 777 IOMMU_DOMAIN_UNLOCK(domain); 778 779 error = domain->ops->map(domain, entry->start, 780 entry->end - entry->start, ma, eflags, 781 ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0)); 782 if (error == ENOMEM) { 783 iommu_domain_unload_entry(entry, true, 784 (flags & IOMMU_MF_CANWAIT) != 0); 785 return (error); 786 } 787 KASSERT(error == 0, 788 ("unexpected error %d from domain_map_buf", error)); 789 790 *res = entry; 791 return (0); 792 } 793 794 int 795 iommu_gas_map_region(struct iommu_domain *domain, struct iommu_map_entry *entry, 796 u_int eflags, u_int flags, vm_page_t *ma) 797 { 798 iommu_gaddr_t start; 799 int error; 800 801 KASSERT(entry->domain == domain, 802 ("mismatched domain %p entry %p entry->domain %p", domain, 803 entry, entry->domain)); 804 KASSERT(entry->flags == 0, ("used RMRR entry %p %p %x", domain, 805 entry, entry->flags)); 806 KASSERT((flags & ~(IOMMU_MF_CANWAIT | IOMMU_MF_RMRR)) == 0, 807 ("invalid flags 0x%x", flags)); 808 809 start = entry->start; 810 IOMMU_DOMAIN_LOCK(domain); 811 error = iommu_gas_alloc_region(domain, entry, flags); 812 if (error != 0) { 813 IOMMU_DOMAIN_UNLOCK(domain); 814 return (error); 815 } 816 entry->flags |= eflags; 817 IOMMU_DOMAIN_UNLOCK(domain); 818 if (entry->end == entry->start) 819 return (0); 820 821 error = domain->ops->map(domain, entry->start, 822 entry->end - entry->start, ma + OFF_TO_IDX(start - entry->start), 823 eflags, ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0)); 824 if (error == ENOMEM) { 825 iommu_domain_unload_entry(entry, false, 826 (flags & IOMMU_MF_CANWAIT) != 0); 827 return (error); 828 } 829 KASSERT(error == 0, 830 ("unexpected error %d from domain_map_buf", error)); 831 832 return (0); 833 } 834 835 static int 836 iommu_gas_reserve_region_locked(struct iommu_domain *domain, 837 iommu_gaddr_t start, iommu_gaddr_t end, struct iommu_map_entry *entry) 838 { 839 int error; 840 841 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 842 843 entry->start = start; 844 entry->end = end; 845 error = iommu_gas_alloc_region(domain, entry, IOMMU_MF_CANWAIT); 846 if (error == 0) 847 entry->flags |= IOMMU_MAP_ENTRY_UNMAPPED; 848 return (error); 849 } 850 851 int 852 iommu_gas_reserve_region(struct iommu_domain *domain, iommu_gaddr_t start, 853 iommu_gaddr_t end, struct iommu_map_entry **entry0) 854 { 855 struct iommu_map_entry *entry; 856 int error; 857 858 entry = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 859 IOMMU_DOMAIN_LOCK(domain); 860 error = iommu_gas_reserve_region_locked(domain, start, end, entry); 861 IOMMU_DOMAIN_UNLOCK(domain); 862 if (error != 0) 863 iommu_gas_free_entry(entry); 864 else if (entry0 != NULL) 865 *entry0 = entry; 866 return (error); 867 } 868 869 /* 870 * As in iommu_gas_reserve_region, reserve [start, end), but allow for existing 871 * entries. 872 */ 873 int 874 iommu_gas_reserve_region_extend(struct iommu_domain *domain, 875 iommu_gaddr_t start, iommu_gaddr_t end) 876 { 877 struct iommu_map_entry *entry, *next, *prev, key = {}; 878 iommu_gaddr_t entry_start, entry_end; 879 int error; 880 881 error = 0; 882 entry = NULL; 883 end = ummin(end, domain->end); 884 while (start < end) { 885 /* Preallocate an entry. */ 886 if (entry == NULL) 887 entry = iommu_gas_alloc_entry(domain, 888 IOMMU_PGF_WAITOK); 889 /* Calculate the free region from here to the next entry. */ 890 key.start = key.end = start; 891 IOMMU_DOMAIN_LOCK(domain); 892 next = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, &key); 893 KASSERT(next != NULL, ("domain %p with end %#jx has no entry " 894 "after %#jx", domain, (uintmax_t)domain->end, 895 (uintmax_t)start)); 896 entry_end = ummin(end, next->start); 897 prev = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, next); 898 if (prev != NULL) 899 entry_start = ummax(start, prev->end); 900 else 901 entry_start = start; 902 start = next->end; 903 /* Reserve the region if non-empty. */ 904 if (entry_start != entry_end) { 905 error = iommu_gas_reserve_region_locked(domain, 906 entry_start, entry_end, entry); 907 if (error != 0) { 908 IOMMU_DOMAIN_UNLOCK(domain); 909 break; 910 } 911 entry = NULL; 912 } 913 IOMMU_DOMAIN_UNLOCK(domain); 914 } 915 /* Release a preallocated entry if it was not used. */ 916 if (entry != NULL) 917 iommu_gas_free_entry(entry); 918 return (error); 919 } 920 921 void 922 iommu_unmap_msi(struct iommu_ctx *ctx) 923 { 924 struct iommu_map_entry *entry; 925 struct iommu_domain *domain; 926 927 domain = ctx->domain; 928 entry = domain->msi_entry; 929 if (entry == NULL) 930 return; 931 932 domain->ops->unmap(domain, entry->start, entry->end - 933 entry->start, IOMMU_PGF_WAITOK); 934 935 iommu_gas_free_space(entry); 936 937 iommu_gas_free_entry(entry); 938 939 domain->msi_entry = NULL; 940 domain->msi_base = 0; 941 domain->msi_phys = 0; 942 } 943 944 int 945 iommu_map_msi(struct iommu_ctx *ctx, iommu_gaddr_t size, int offset, 946 u_int eflags, u_int flags, vm_page_t *ma) 947 { 948 struct iommu_domain *domain; 949 struct iommu_map_entry *entry; 950 int error; 951 952 error = 0; 953 domain = ctx->domain; 954 955 /* Check if there is already an MSI page allocated */ 956 IOMMU_DOMAIN_LOCK(domain); 957 entry = domain->msi_entry; 958 IOMMU_DOMAIN_UNLOCK(domain); 959 960 if (entry == NULL) { 961 error = iommu_gas_map(domain, &ctx->tag->common, size, offset, 962 eflags, flags, ma, &entry); 963 IOMMU_DOMAIN_LOCK(domain); 964 if (error == 0) { 965 if (domain->msi_entry == NULL) { 966 MPASS(domain->msi_base == 0); 967 MPASS(domain->msi_phys == 0); 968 969 domain->msi_entry = entry; 970 domain->msi_base = entry->start; 971 domain->msi_phys = VM_PAGE_TO_PHYS(ma[0]); 972 } else { 973 /* 974 * We lost the race and already have an 975 * MSI page allocated. Free the unneeded entry. 976 */ 977 iommu_gas_free_entry(entry); 978 } 979 } else if (domain->msi_entry != NULL) { 980 /* 981 * The allocation failed, but another succeeded. 982 * Return success as there is a valid MSI page. 983 */ 984 error = 0; 985 } 986 IOMMU_DOMAIN_UNLOCK(domain); 987 } 988 989 return (error); 990 } 991 992 void 993 iommu_translate_msi(struct iommu_domain *domain, uint64_t *addr) 994 { 995 996 *addr = (*addr - domain->msi_phys) + domain->msi_base; 997 998 KASSERT(*addr >= domain->msi_entry->start, 999 ("%s: Address is below the MSI entry start address (%jx < %jx)", 1000 __func__, (uintmax_t)*addr, (uintmax_t)domain->msi_entry->start)); 1001 1002 KASSERT(*addr + sizeof(*addr) <= domain->msi_entry->end, 1003 ("%s: Address is above the MSI entry end address (%jx < %jx)", 1004 __func__, (uintmax_t)*addr, (uintmax_t)domain->msi_entry->end)); 1005 } 1006 1007 SYSCTL_NODE(_hw, OID_AUTO, iommu, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, ""); 1008 1009 #ifdef INVARIANTS 1010 SYSCTL_INT(_hw_iommu, OID_AUTO, check_free, CTLFLAG_RWTUN, 1011 &iommu_check_free, 0, 1012 "Check the GPA RBtree for free_down and free_after validity"); 1013 #endif 1014