1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2013 The FreeBSD Foundation 5 * 6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org> 7 * under sponsorship from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #define RB_AUGMENT_CHECK(entry) iommu_gas_augment_entry(entry) 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/malloc.h> 36 #include <sys/bus.h> 37 #include <sys/interrupt.h> 38 #include <sys/kernel.h> 39 #include <sys/ktr.h> 40 #include <sys/lock.h> 41 #include <sys/proc.h> 42 #include <sys/rwlock.h> 43 #include <sys/memdesc.h> 44 #include <sys/mutex.h> 45 #include <sys/sysctl.h> 46 #include <sys/rman.h> 47 #include <sys/taskqueue.h> 48 #include <sys/tree.h> 49 #include <sys/uio.h> 50 #include <sys/vmem.h> 51 #include <vm/vm.h> 52 #include <vm/vm_extern.h> 53 #include <vm/vm_kern.h> 54 #include <vm/vm_object.h> 55 #include <vm/vm_page.h> 56 #include <vm/vm_map.h> 57 #include <vm/uma.h> 58 #include <dev/pci/pcireg.h> 59 #include <dev/pci/pcivar.h> 60 #include <dev/iommu/iommu.h> 61 #include <dev/iommu/iommu_gas.h> 62 #include <dev/iommu/iommu_msi.h> 63 #include <machine/atomic.h> 64 #include <machine/bus.h> 65 #include <machine/md_var.h> 66 #include <machine/iommu.h> 67 #include <dev/iommu/busdma_iommu.h> 68 69 /* 70 * Guest Address Space management. 71 */ 72 73 static uma_zone_t iommu_map_entry_zone; 74 75 #ifdef INVARIANTS 76 static int iommu_check_free; 77 #endif 78 79 static void 80 intel_gas_init(void) 81 { 82 83 iommu_map_entry_zone = uma_zcreate("IOMMU_MAP_ENTRY", 84 sizeof(struct iommu_map_entry), NULL, NULL, 85 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NODUMP); 86 } 87 SYSINIT(intel_gas, SI_SUB_DRIVERS, SI_ORDER_FIRST, intel_gas_init, NULL); 88 89 struct iommu_map_entry * 90 iommu_gas_alloc_entry(struct iommu_domain *domain, u_int flags) 91 { 92 struct iommu_map_entry *res; 93 94 KASSERT((flags & ~(IOMMU_PGF_WAITOK)) == 0, 95 ("unsupported flags %x", flags)); 96 97 res = uma_zalloc(iommu_map_entry_zone, ((flags & IOMMU_PGF_WAITOK) != 98 0 ? M_WAITOK : M_NOWAIT) | M_ZERO); 99 if (res != NULL) { 100 SLIST_INIT(&res->pgtbl_free); 101 if (domain != NULL) { 102 res->domain = domain; 103 atomic_add_int(&domain->entries_cnt, 1); 104 } 105 } 106 return (res); 107 } 108 109 void 110 iommu_gas_free_entry(struct iommu_map_entry *entry) 111 { 112 struct iommu_domain *domain; 113 int n __unused; 114 115 n = vm_page_free_pages_toq(&entry->pgtbl_free, false); 116 #if defined(__i386__) || defined(__amd64__) 117 atomic_subtract_int(&iommu_tbl_pagecnt, n); 118 #endif 119 domain = entry->domain; 120 if (domain != NULL) 121 atomic_subtract_int(&domain->entries_cnt, 1); 122 uma_zfree(iommu_map_entry_zone, entry); 123 } 124 125 static int 126 iommu_gas_cmp_entries(struct iommu_map_entry *a, struct iommu_map_entry *b) 127 { 128 129 /* First and last entries have zero size, so <= */ 130 KASSERT(a->start <= a->end, ("inverted entry %p (%jx, %jx)", 131 a, (uintmax_t)a->start, (uintmax_t)a->end)); 132 KASSERT(b->start <= b->end, ("inverted entry %p (%jx, %jx)", 133 b, (uintmax_t)b->start, (uintmax_t)b->end)); 134 KASSERT(((a->flags | b->flags) & IOMMU_MAP_ENTRY_FAKE) != 0 || 135 a->end <= b->start || b->end <= a->start || 136 a->end == a->start || b->end == b->start, 137 ("overlapping entries %p (%jx, %jx) f %#x %p (%jx, %jx) f %#x" 138 " domain %p %p", 139 a, (uintmax_t)a->start, (uintmax_t)a->end, a->flags, 140 b, (uintmax_t)b->start, (uintmax_t)b->end, b->flags, 141 a->domain, b->domain)); 142 143 if (a->end < b->end) 144 return (-1); 145 else if (b->end < a->end) 146 return (1); 147 return (0); 148 } 149 150 /* 151 * Update augmentation data based on data from children. 152 * Return true if and only if the update changes the augmentation data. 153 */ 154 static bool 155 iommu_gas_augment_entry(struct iommu_map_entry *entry) 156 { 157 struct iommu_map_entry *child; 158 iommu_gaddr_t bound, delta, free_down; 159 160 free_down = 0; 161 bound = entry->start; 162 if ((child = RB_LEFT(entry, rb_entry)) != NULL) { 163 free_down = MAX(child->free_down, bound - child->last); 164 bound = child->first; 165 } 166 delta = bound - entry->first; 167 entry->first = bound; 168 bound = entry->end; 169 if ((child = RB_RIGHT(entry, rb_entry)) != NULL) { 170 free_down = MAX(free_down, child->free_down); 171 free_down = MAX(free_down, child->first - bound); 172 bound = child->last; 173 } 174 delta += entry->last - bound; 175 if (delta == 0) 176 delta = entry->free_down - free_down; 177 entry->last = bound; 178 entry->free_down = free_down; 179 180 /* 181 * Return true either if the value of last-first changed, 182 * or if free_down changed. 183 */ 184 return (delta != 0); 185 } 186 187 RB_GENERATE(iommu_gas_entries_tree, iommu_map_entry, rb_entry, 188 iommu_gas_cmp_entries); 189 190 #ifdef INVARIANTS 191 static void 192 iommu_gas_check_free(struct iommu_domain *domain) 193 { 194 struct iommu_map_entry *entry, *l, *r; 195 iommu_gaddr_t v; 196 197 RB_FOREACH(entry, iommu_gas_entries_tree, &domain->rb_root) { 198 KASSERT(domain == entry->domain, 199 ("mismatched free domain %p entry %p entry->domain %p", 200 domain, entry, entry->domain)); 201 l = RB_LEFT(entry, rb_entry); 202 r = RB_RIGHT(entry, rb_entry); 203 v = 0; 204 if (l != NULL) { 205 v = MAX(v, l->free_down); 206 v = MAX(v, entry->start - l->last); 207 } 208 if (r != NULL) { 209 v = MAX(v, r->free_down); 210 v = MAX(v, r->first - entry->end); 211 } 212 MPASS(entry->free_down == v); 213 } 214 } 215 #endif 216 217 static void 218 iommu_gas_rb_remove(struct iommu_domain *domain, struct iommu_map_entry *entry) 219 { 220 struct iommu_map_entry *nbr; 221 222 /* Removing entry may open a new free gap before domain->start_gap. */ 223 if (entry->end <= domain->start_gap->end) { 224 if (RB_RIGHT(entry, rb_entry) != NULL) 225 nbr = iommu_gas_entries_tree_RB_NEXT(entry); 226 else if (RB_LEFT(entry, rb_entry) != NULL) 227 nbr = RB_LEFT(entry, rb_entry); 228 else 229 nbr = RB_PARENT(entry, rb_entry); 230 domain->start_gap = nbr; 231 } 232 RB_REMOVE(iommu_gas_entries_tree, &domain->rb_root, entry); 233 } 234 235 struct iommu_domain * 236 iommu_get_ctx_domain(struct iommu_ctx *ctx) 237 { 238 239 return (ctx->domain); 240 } 241 242 void 243 iommu_gas_init_domain(struct iommu_domain *domain) 244 { 245 struct iommu_map_entry *begin, *end; 246 247 begin = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 248 end = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 249 250 IOMMU_DOMAIN_LOCK(domain); 251 KASSERT(domain->entries_cnt == 2, ("dirty domain %p", domain)); 252 KASSERT(RB_EMPTY(&domain->rb_root), 253 ("non-empty entries %p", domain)); 254 255 end->start = domain->end; 256 end->end = domain->end; 257 end->flags = IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED; 258 RB_INSERT(iommu_gas_entries_tree, &domain->rb_root, end); 259 260 begin->start = 0; 261 begin->end = 0; 262 begin->flags = IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED; 263 RB_INSERT_PREV(iommu_gas_entries_tree, &domain->rb_root, end, begin); 264 iommu_gas_augment_entry(end); 265 iommu_gas_augment_entry(begin); 266 267 domain->start_gap = begin; 268 domain->first_place = begin; 269 domain->last_place = end; 270 domain->flags |= IOMMU_DOMAIN_GAS_INITED; 271 IOMMU_DOMAIN_UNLOCK(domain); 272 } 273 274 void 275 iommu_gas_fini_domain(struct iommu_domain *domain) 276 { 277 struct iommu_map_entry *entry; 278 279 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 280 KASSERT(domain->entries_cnt == 2, 281 ("domain still in use %p", domain)); 282 283 entry = RB_MIN(iommu_gas_entries_tree, &domain->rb_root); 284 KASSERT(entry->start == 0, ("start entry start %p", domain)); 285 KASSERT(entry->end == IOMMU_PAGE_SIZE, ("start entry end %p", domain)); 286 KASSERT(entry->flags == 287 (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED), 288 ("start entry flags %p", domain)); 289 iommu_gas_rb_remove(domain, entry); 290 iommu_gas_free_entry(entry); 291 292 entry = RB_MAX(iommu_gas_entries_tree, &domain->rb_root); 293 KASSERT(entry->start == domain->end, ("end entry start %p", domain)); 294 KASSERT(entry->end == domain->end, ("end entry end %p", domain)); 295 KASSERT(entry->flags == 296 (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED), 297 ("end entry flags %p", domain)); 298 iommu_gas_rb_remove(domain, entry); 299 iommu_gas_free_entry(entry); 300 } 301 302 struct iommu_gas_match_args { 303 iommu_gaddr_t size; 304 int offset; 305 const struct bus_dma_tag_common *common; 306 u_int gas_flags; 307 struct iommu_map_entry *entry; 308 }; 309 310 /* 311 * The interval [beg, end) is a free interval between two iommu_map_entries. 312 * Addresses can be allocated only in the range [lbound, ubound]. Try to 313 * allocate space in the free interval, subject to the conditions expressed by 314 * a, and return 'true' if and only if the allocation attempt succeeds. 315 */ 316 static bool 317 iommu_gas_match_one(struct iommu_gas_match_args *a, iommu_gaddr_t beg, 318 iommu_gaddr_t end, iommu_gaddr_t lbound, iommu_gaddr_t ubound) 319 { 320 struct iommu_map_entry *entry; 321 iommu_gaddr_t first, size, start; 322 int offset; 323 324 /* 325 * The prev->end is always aligned on the page size, which 326 * causes page alignment for the entry->start too. 327 * 328 * Create IOMMU_PAGE_SIZE gaps before, after new entry 329 * to ensure that out-of-bounds accesses fault. 330 */ 331 beg = MAX(beg + IOMMU_PAGE_SIZE, lbound); 332 start = roundup2(beg, a->common->alignment); 333 if (start < beg) 334 return (false); 335 if (end < IOMMU_PAGE_SIZE + 1) 336 return (false); 337 end = MIN(end - IOMMU_PAGE_SIZE - 1, ubound); 338 offset = a->offset; 339 size = a->size; 340 if (start + offset + size - 1 > end) 341 return (false); 342 343 /* Check for and try to skip past boundary crossing. */ 344 if (!vm_addr_bound_ok(start + offset, size, a->common->boundary)) { 345 /* 346 * The start + offset to start + offset + size region crosses 347 * the boundary. Check if there is enough space after the next 348 * boundary after the beg. 349 */ 350 first = start; 351 beg = roundup2(start + offset + 1, a->common->boundary); 352 start = roundup2(beg, a->common->alignment); 353 354 if (start + offset + size - 1 > end || 355 !vm_addr_bound_ok(start + offset, size, 356 a->common->boundary)) { 357 /* 358 * Not enough space to align at the requested boundary, 359 * or boundary is smaller than the size, but allowed to 360 * split. We already checked that start + size does not 361 * overlap ubound. 362 * 363 * XXXKIB. It is possible that beg is exactly at the 364 * start of the next entry, then we do not have gap. 365 * Ignore for now. 366 */ 367 if ((a->gas_flags & IOMMU_MF_CANSPLIT) == 0) 368 return (false); 369 size = beg - first - offset; 370 start = first; 371 } 372 } 373 entry = a->entry; 374 entry->start = start; 375 entry->end = start + roundup2(size + offset, IOMMU_PAGE_SIZE); 376 entry->flags = IOMMU_MAP_ENTRY_MAP; 377 return (true); 378 } 379 380 /* Find the next entry that might abut a big-enough range. */ 381 static struct iommu_map_entry * 382 iommu_gas_next(struct iommu_map_entry *curr, iommu_gaddr_t min_free) 383 { 384 struct iommu_map_entry *next; 385 386 if ((next = RB_RIGHT(curr, rb_entry)) != NULL && 387 next->free_down >= min_free) { 388 /* Find next entry in right subtree. */ 389 do 390 curr = next; 391 while ((next = RB_LEFT(curr, rb_entry)) != NULL && 392 next->free_down >= min_free); 393 } else { 394 /* Find next entry in a left-parent ancestor. */ 395 while ((next = RB_PARENT(curr, rb_entry)) != NULL && 396 curr == RB_RIGHT(next, rb_entry)) 397 curr = next; 398 curr = next; 399 } 400 return (curr); 401 } 402 403 /* 404 * Address-ordered first-fit search of 'domain' for free space satisfying the 405 * conditions of 'a'. The space allocated is at least one page big, and is 406 * bounded by guard pages to the left and right. The allocated space for 407 * 'domain' is described by an rb-tree of map entries at domain->rb_root, and 408 * domain->start_gap points to a map entry less than or adjacent to the first 409 * free-space of size at least 3 pages. 410 */ 411 static int 412 iommu_gas_find_space(struct iommu_domain *domain, 413 struct iommu_gas_match_args *a) 414 { 415 struct iommu_map_entry *curr, *first; 416 iommu_gaddr_t addr, min_free; 417 418 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 419 KASSERT(a->entry->flags == 0, 420 ("dirty entry %p %p", domain, a->entry)); 421 422 /* 423 * start_gap may point to an entry adjacent to gaps too small for any 424 * new allocation. In that case, advance start_gap to the first free 425 * space big enough for a minimum allocation plus two guard pages. 426 */ 427 min_free = 3 * IOMMU_PAGE_SIZE; 428 first = domain->start_gap; 429 while (first != NULL && first->free_down < min_free) 430 first = RB_PARENT(first, rb_entry); 431 for (curr = first; curr != NULL; 432 curr = iommu_gas_next(curr, min_free)) { 433 if ((first = RB_LEFT(curr, rb_entry)) != NULL && 434 first->last + min_free <= curr->start) 435 break; 436 if ((first = RB_RIGHT(curr, rb_entry)) != NULL && 437 curr->end + min_free <= first->first) 438 break; 439 } 440 domain->start_gap = curr; 441 442 /* 443 * If the subtree doesn't have free space for the requested allocation 444 * plus two guard pages, skip it. 445 */ 446 min_free = 2 * IOMMU_PAGE_SIZE + 447 roundup2(a->size + a->offset, IOMMU_PAGE_SIZE); 448 449 /* Climb to find a node in the subtree of big-enough ranges. */ 450 first = curr; 451 while (first != NULL && first->free_down < min_free) 452 first = RB_PARENT(first, rb_entry); 453 454 /* 455 * Walk the big-enough ranges tree until one satisfies alignment 456 * requirements, or violates lowaddr address requirement. 457 */ 458 addr = a->common->lowaddr; 459 for (curr = first; curr != NULL; 460 curr = iommu_gas_next(curr, min_free)) { 461 if ((first = RB_LEFT(curr, rb_entry)) != NULL && 462 iommu_gas_match_one(a, first->last, curr->start, 463 0, addr)) { 464 RB_INSERT_PREV(iommu_gas_entries_tree, 465 &domain->rb_root, curr, a->entry); 466 return (0); 467 } 468 if (curr->end >= addr) { 469 /* All remaining ranges > addr */ 470 break; 471 } 472 if ((first = RB_RIGHT(curr, rb_entry)) != NULL && 473 iommu_gas_match_one(a, curr->end, first->first, 474 0, addr)) { 475 RB_INSERT_NEXT(iommu_gas_entries_tree, 476 &domain->rb_root, curr, a->entry); 477 return (0); 478 } 479 } 480 481 /* 482 * To resume the search at the start of the upper region, first climb to 483 * the nearest ancestor that spans highaddr. Then find the last entry 484 * before highaddr that could abut a big-enough range. 485 */ 486 addr = a->common->highaddr; 487 while (curr != NULL && curr->last < addr) 488 curr = RB_PARENT(curr, rb_entry); 489 first = NULL; 490 while (curr != NULL && curr->free_down >= min_free) { 491 if (addr < curr->end) 492 curr = RB_LEFT(curr, rb_entry); 493 else { 494 first = curr; 495 curr = RB_RIGHT(curr, rb_entry); 496 } 497 } 498 499 /* 500 * Walk the remaining big-enough ranges until one satisfies alignment 501 * requirements. 502 */ 503 for (curr = first; curr != NULL; 504 curr = iommu_gas_next(curr, min_free)) { 505 if ((first = RB_LEFT(curr, rb_entry)) != NULL && 506 iommu_gas_match_one(a, first->last, curr->start, 507 addr + 1, domain->end - 1)) { 508 RB_INSERT_PREV(iommu_gas_entries_tree, 509 &domain->rb_root, curr, a->entry); 510 return (0); 511 } 512 if ((first = RB_RIGHT(curr, rb_entry)) != NULL && 513 iommu_gas_match_one(a, curr->end, first->first, 514 addr + 1, domain->end - 1)) { 515 RB_INSERT_NEXT(iommu_gas_entries_tree, 516 &domain->rb_root, curr, a->entry); 517 return (0); 518 } 519 } 520 521 return (ENOMEM); 522 } 523 524 static int 525 iommu_gas_alloc_region(struct iommu_domain *domain, struct iommu_map_entry *entry, 526 u_int flags) 527 { 528 struct iommu_map_entry *next, *prev; 529 530 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 531 532 if ((entry->start & IOMMU_PAGE_MASK) != 0 || 533 (entry->end & IOMMU_PAGE_MASK) != 0) 534 return (EINVAL); 535 if (entry->start >= entry->end) 536 return (EINVAL); 537 if (entry->end >= domain->end) 538 return (EINVAL); 539 540 entry->flags |= IOMMU_MAP_ENTRY_FAKE; 541 next = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, entry); 542 KASSERT(next != NULL, ("next must be non-null %p %jx", domain, 543 (uintmax_t)entry->start)); 544 prev = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, next); 545 /* prev could be NULL */ 546 entry->flags &= ~IOMMU_MAP_ENTRY_FAKE; 547 548 /* 549 * Adapt to broken BIOSes which specify overlapping RMRR 550 * entries. 551 * 552 * XXXKIB: this does not handle a case when prev or next 553 * entries are completely covered by the current one, which 554 * extends both ways. 555 */ 556 if (prev != NULL && prev->end > entry->start && 557 (prev->flags & IOMMU_MAP_ENTRY_PLACE) == 0) { 558 if ((flags & IOMMU_MF_RMRR) == 0 || 559 (prev->flags & IOMMU_MAP_ENTRY_RMRR) == 0) 560 return (EBUSY); 561 entry->start = prev->end; 562 } 563 if (next->start < entry->end && 564 (next->flags & IOMMU_MAP_ENTRY_PLACE) == 0) { 565 if ((flags & IOMMU_MF_RMRR) == 0 || 566 (next->flags & IOMMU_MAP_ENTRY_RMRR) == 0) 567 return (EBUSY); 568 entry->end = next->start; 569 } 570 if (entry->end == entry->start) 571 return (0); 572 573 if (prev != NULL && prev->end > entry->start) { 574 /* This assumes that prev is the placeholder entry. */ 575 iommu_gas_rb_remove(domain, prev); 576 prev = NULL; 577 } 578 RB_INSERT_PREV(iommu_gas_entries_tree, 579 &domain->rb_root, next, entry); 580 if (next->start < entry->end) { 581 iommu_gas_rb_remove(domain, next); 582 next = NULL; 583 } 584 585 if ((flags & IOMMU_MF_RMRR) != 0) 586 entry->flags = IOMMU_MAP_ENTRY_RMRR; 587 588 #ifdef INVARIANTS 589 struct iommu_map_entry *ip, *in; 590 ip = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, entry); 591 in = RB_NEXT(iommu_gas_entries_tree, &domain->rb_root, entry); 592 KASSERT(prev == NULL || ip == prev, 593 ("RMRR %p (%jx %jx) prev %p (%jx %jx) ins prev %p (%jx %jx)", 594 entry, entry->start, entry->end, prev, 595 prev == NULL ? 0 : prev->start, prev == NULL ? 0 : prev->end, 596 ip, ip == NULL ? 0 : ip->start, ip == NULL ? 0 : ip->end)); 597 KASSERT(next == NULL || in == next, 598 ("RMRR %p (%jx %jx) next %p (%jx %jx) ins next %p (%jx %jx)", 599 entry, entry->start, entry->end, next, 600 next == NULL ? 0 : next->start, next == NULL ? 0 : next->end, 601 in, in == NULL ? 0 : in->start, in == NULL ? 0 : in->end)); 602 #endif 603 604 return (0); 605 } 606 607 void 608 iommu_gas_free_space(struct iommu_map_entry *entry) 609 { 610 struct iommu_domain *domain; 611 612 domain = entry->domain; 613 KASSERT((entry->flags & (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_RMRR | 614 IOMMU_MAP_ENTRY_MAP)) == IOMMU_MAP_ENTRY_MAP, 615 ("permanent entry %p %p", domain, entry)); 616 617 IOMMU_DOMAIN_LOCK(domain); 618 iommu_gas_rb_remove(domain, entry); 619 entry->flags &= ~IOMMU_MAP_ENTRY_MAP; 620 #ifdef INVARIANTS 621 if (iommu_check_free) 622 iommu_gas_check_free(domain); 623 #endif 624 IOMMU_DOMAIN_UNLOCK(domain); 625 } 626 627 void 628 iommu_gas_free_region(struct iommu_map_entry *entry) 629 { 630 struct iommu_domain *domain; 631 632 domain = entry->domain; 633 KASSERT((entry->flags & (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_RMRR | 634 IOMMU_MAP_ENTRY_MAP)) == IOMMU_MAP_ENTRY_RMRR, 635 ("non-RMRR entry %p %p", domain, entry)); 636 637 IOMMU_DOMAIN_LOCK(domain); 638 if (entry != domain->first_place && 639 entry != domain->last_place) 640 iommu_gas_rb_remove(domain, entry); 641 entry->flags &= ~IOMMU_MAP_ENTRY_RMRR; 642 IOMMU_DOMAIN_UNLOCK(domain); 643 } 644 645 static struct iommu_map_entry * 646 iommu_gas_remove_clip_left(struct iommu_domain *domain, iommu_gaddr_t start, 647 iommu_gaddr_t end, struct iommu_map_entry **r) 648 { 649 struct iommu_map_entry *entry, *res, fentry; 650 651 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 652 MPASS(start <= end); 653 MPASS(end <= domain->end); 654 655 /* 656 * Find an entry which contains the supplied guest's address 657 * start, or the first entry after the start. Since we 658 * asserted that start is below domain end, entry should 659 * exist. Then clip it if needed. 660 */ 661 bzero(&fentry, sizeof(fentry)); 662 fentry.start = start + 1; 663 fentry.end = start + 1; 664 fentry.flags = IOMMU_MAP_ENTRY_FAKE; 665 entry = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, &fentry); 666 667 if (entry->start >= start || 668 (entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 669 return (entry); 670 671 res = *r; 672 *r = NULL; 673 *res = *entry; 674 res->start = entry->end = start; 675 RB_UPDATE_AUGMENT(entry, rb_entry); 676 RB_INSERT_NEXT(iommu_gas_entries_tree, 677 &domain->rb_root, entry, res); 678 return (res); 679 } 680 681 static bool 682 iommu_gas_remove_clip_right(struct iommu_domain *domain, 683 iommu_gaddr_t end, struct iommu_map_entry *entry, 684 struct iommu_map_entry *r) 685 { 686 if (entry->start >= end || (entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 687 return (false); 688 689 *r = *entry; 690 r->end = entry->start = end; 691 RB_UPDATE_AUGMENT(entry, rb_entry); 692 RB_INSERT_PREV(iommu_gas_entries_tree, 693 &domain->rb_root, entry, r); 694 return (true); 695 } 696 697 static void 698 iommu_gas_remove_unmap(struct iommu_domain *domain, 699 struct iommu_map_entry *entry, struct iommu_map_entries_tailq *gcp) 700 { 701 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 702 703 if ((entry->flags & (IOMMU_MAP_ENTRY_UNMAPPED | 704 IOMMU_MAP_ENTRY_RMRR | 705 IOMMU_MAP_ENTRY_REMOVING)) != 0) 706 return; 707 MPASS((entry->flags & IOMMU_MAP_ENTRY_PLACE) == 0); 708 entry->flags |= IOMMU_MAP_ENTRY_REMOVING; 709 TAILQ_INSERT_TAIL(gcp, entry, dmamap_link); 710 } 711 712 static void 713 iommu_gas_remove_locked(struct iommu_domain *domain, 714 iommu_gaddr_t start, iommu_gaddr_t size, 715 struct iommu_map_entries_tailq *gc, 716 struct iommu_map_entry **r1, struct iommu_map_entry **r2) 717 { 718 struct iommu_map_entry *entry, *nentry; 719 iommu_gaddr_t end; 720 721 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 722 723 end = start + size; 724 725 nentry = iommu_gas_remove_clip_left(domain, start, end, r1); 726 RB_FOREACH_FROM(entry, iommu_gas_entries_tree, nentry) { 727 if (entry->start >= end) 728 break; 729 KASSERT(start <= entry->start, 730 ("iommu_gas_remove entry (%#jx, %#jx) start %#jx", 731 entry->start, entry->end, start)); 732 iommu_gas_remove_unmap(domain, entry, gc); 733 } 734 if (iommu_gas_remove_clip_right(domain, end, entry, *r2)) { 735 iommu_gas_remove_unmap(domain, *r2, gc); 736 *r2 = NULL; 737 } 738 739 #ifdef INVARIANTS 740 RB_FOREACH(entry, iommu_gas_entries_tree, &domain->rb_root) { 741 if ((entry->flags & (IOMMU_MAP_ENTRY_RMRR | 742 IOMMU_MAP_ENTRY_PLACE)) != 0) 743 continue; 744 KASSERT(entry->end <= start || entry->start >= end, 745 ("iommu_gas_remove leftover entry (%#jx, %#jx) range " 746 "(%#jx, %#jx)", 747 entry->start, entry->end, start, end)); 748 } 749 #endif 750 } 751 752 static void 753 iommu_gas_remove_init(struct iommu_domain *domain, 754 struct iommu_map_entries_tailq *gc, struct iommu_map_entry **r1, 755 struct iommu_map_entry **r2) 756 { 757 TAILQ_INIT(gc); 758 *r1 = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 759 *r2 = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 760 } 761 762 static void 763 iommu_gas_remove_cleanup(struct iommu_domain *domain, 764 struct iommu_map_entries_tailq *gc, struct iommu_map_entry **r1, 765 struct iommu_map_entry **r2) 766 { 767 if (*r1 != NULL) { 768 iommu_gas_free_entry(*r1); 769 *r1 = NULL; 770 } 771 if (*r2 != NULL) { 772 iommu_gas_free_entry(*r2); 773 *r2 = NULL; 774 } 775 iommu_domain_unload(domain, gc, true); 776 } 777 778 /* 779 * Remove specified range from the GAS of the domain. Note that the 780 * removal is not guaranteed to occur upon the function return, it 781 * might be finalized some time after, when hardware reports that 782 * (queued) IOTLB invalidation was performed. 783 */ 784 void 785 iommu_gas_remove(struct iommu_domain *domain, iommu_gaddr_t start, 786 iommu_gaddr_t size) 787 { 788 struct iommu_map_entry *r1, *r2; 789 struct iommu_map_entries_tailq gc; 790 791 iommu_gas_remove_init(domain, &gc, &r1, &r2); 792 IOMMU_DOMAIN_LOCK(domain); 793 iommu_gas_remove_locked(domain, start, size, &gc, &r1, &r2); 794 IOMMU_DOMAIN_UNLOCK(domain); 795 iommu_gas_remove_cleanup(domain, &gc, &r1, &r2); 796 } 797 798 int 799 iommu_gas_map(struct iommu_domain *domain, 800 const struct bus_dma_tag_common *common, iommu_gaddr_t size, int offset, 801 u_int eflags, u_int flags, vm_page_t *ma, struct iommu_map_entry **res) 802 { 803 struct iommu_gas_match_args a; 804 struct iommu_map_entry *entry; 805 int error; 806 807 KASSERT((flags & ~(IOMMU_MF_CANWAIT | IOMMU_MF_CANSPLIT)) == 0, 808 ("invalid flags 0x%x", flags)); 809 810 a.size = size; 811 a.offset = offset; 812 a.common = common; 813 a.gas_flags = flags; 814 entry = iommu_gas_alloc_entry(domain, 815 (flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0); 816 if (entry == NULL) 817 return (ENOMEM); 818 a.entry = entry; 819 IOMMU_DOMAIN_LOCK(domain); 820 error = iommu_gas_find_space(domain, &a); 821 if (error == ENOMEM) { 822 IOMMU_DOMAIN_UNLOCK(domain); 823 iommu_gas_free_entry(entry); 824 return (error); 825 } 826 #ifdef INVARIANTS 827 if (iommu_check_free) 828 iommu_gas_check_free(domain); 829 #endif 830 KASSERT(error == 0, 831 ("unexpected error %d from iommu_gas_find_entry", error)); 832 KASSERT(entry->end < domain->end, ("allocated GPA %jx, max GPA %jx", 833 (uintmax_t)entry->end, (uintmax_t)domain->end)); 834 entry->flags |= eflags; 835 IOMMU_DOMAIN_UNLOCK(domain); 836 837 error = domain->ops->map(domain, entry, ma, eflags, 838 ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0)); 839 if (error == ENOMEM) { 840 iommu_domain_unload_entry(entry, true, 841 (flags & IOMMU_MF_CANWAIT) != 0); 842 return (error); 843 } 844 KASSERT(error == 0, 845 ("unexpected error %d from domain_map_buf", error)); 846 847 *res = entry; 848 return (0); 849 } 850 851 int 852 iommu_gas_map_region(struct iommu_domain *domain, struct iommu_map_entry *entry, 853 u_int eflags, u_int flags, vm_page_t *ma) 854 { 855 iommu_gaddr_t start; 856 int error; 857 858 KASSERT(entry->domain == domain, 859 ("mismatched domain %p entry %p entry->domain %p", domain, 860 entry, entry->domain)); 861 KASSERT(entry->flags == 0, ("used RMRR entry %p %p %x", domain, 862 entry, entry->flags)); 863 KASSERT((flags & ~(IOMMU_MF_CANWAIT | IOMMU_MF_RMRR)) == 0, 864 ("invalid flags 0x%x", flags)); 865 866 start = entry->start; 867 IOMMU_DOMAIN_LOCK(domain); 868 error = iommu_gas_alloc_region(domain, entry, flags); 869 if (error != 0) { 870 IOMMU_DOMAIN_UNLOCK(domain); 871 return (error); 872 } 873 entry->flags |= eflags; 874 IOMMU_DOMAIN_UNLOCK(domain); 875 if (entry->end == entry->start) 876 return (0); 877 878 error = domain->ops->map(domain, entry, 879 ma + OFF_TO_IDX(start - entry->start), eflags, 880 ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0)); 881 if (error == ENOMEM) { 882 iommu_domain_unload_entry(entry, false, 883 (flags & IOMMU_MF_CANWAIT) != 0); 884 return (error); 885 } 886 KASSERT(error == 0, 887 ("unexpected error %d from domain_map_buf", error)); 888 889 return (0); 890 } 891 892 static int 893 iommu_gas_reserve_region_locked(struct iommu_domain *domain, 894 iommu_gaddr_t start, iommu_gaddr_t end, struct iommu_map_entry *entry) 895 { 896 int error; 897 898 IOMMU_DOMAIN_ASSERT_LOCKED(domain); 899 900 entry->start = start; 901 entry->end = end; 902 error = iommu_gas_alloc_region(domain, entry, IOMMU_MF_CANWAIT); 903 if (error == 0) 904 entry->flags |= IOMMU_MAP_ENTRY_UNMAPPED; 905 return (error); 906 } 907 908 int 909 iommu_gas_reserve_region(struct iommu_domain *domain, iommu_gaddr_t start, 910 iommu_gaddr_t end, struct iommu_map_entry **entry0) 911 { 912 struct iommu_map_entry *entry; 913 int error; 914 915 entry = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); 916 IOMMU_DOMAIN_LOCK(domain); 917 error = iommu_gas_reserve_region_locked(domain, start, end, entry); 918 IOMMU_DOMAIN_UNLOCK(domain); 919 if (error != 0) 920 iommu_gas_free_entry(entry); 921 else if (entry0 != NULL) 922 *entry0 = entry; 923 return (error); 924 } 925 926 /* 927 * As in iommu_gas_reserve_region, reserve [start, end), but allow for existing 928 * entries. 929 */ 930 int 931 iommu_gas_reserve_region_extend(struct iommu_domain *domain, 932 iommu_gaddr_t start, iommu_gaddr_t end) 933 { 934 struct iommu_map_entry *entry, *next, *prev, key = {}; 935 iommu_gaddr_t entry_start, entry_end; 936 int error; 937 938 error = 0; 939 entry = NULL; 940 end = ummin(end, domain->end); 941 while (start < end) { 942 /* Preallocate an entry. */ 943 if (entry == NULL) 944 entry = iommu_gas_alloc_entry(domain, 945 IOMMU_PGF_WAITOK); 946 /* Calculate the free region from here to the next entry. */ 947 key.start = key.end = start; 948 IOMMU_DOMAIN_LOCK(domain); 949 next = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, &key); 950 KASSERT(next != NULL, ("domain %p with end %#jx has no entry " 951 "after %#jx", domain, (uintmax_t)domain->end, 952 (uintmax_t)start)); 953 entry_end = ummin(end, next->start); 954 prev = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, next); 955 if (prev != NULL) 956 entry_start = ummax(start, prev->end); 957 else 958 entry_start = start; 959 start = next->end; 960 /* Reserve the region if non-empty. */ 961 if (entry_start != entry_end) { 962 error = iommu_gas_reserve_region_locked(domain, 963 entry_start, entry_end, entry); 964 if (error != 0) { 965 IOMMU_DOMAIN_UNLOCK(domain); 966 break; 967 } 968 entry = NULL; 969 } 970 IOMMU_DOMAIN_UNLOCK(domain); 971 } 972 /* Release a preallocated entry if it was not used. */ 973 if (entry != NULL) 974 iommu_gas_free_entry(entry); 975 return (error); 976 } 977 978 void 979 iommu_unmap_msi(struct iommu_ctx *ctx) 980 { 981 struct iommu_map_entry *entry; 982 struct iommu_domain *domain; 983 984 domain = ctx->domain; 985 entry = domain->msi_entry; 986 if (entry == NULL) 987 return; 988 989 domain->ops->unmap(domain, entry, IOMMU_PGF_WAITOK); 990 991 iommu_gas_free_space(entry); 992 993 iommu_gas_free_entry(entry); 994 995 domain->msi_entry = NULL; 996 domain->msi_base = 0; 997 domain->msi_phys = 0; 998 } 999 1000 int 1001 iommu_map_msi(struct iommu_ctx *ctx, iommu_gaddr_t size, int offset, 1002 u_int eflags, u_int flags, vm_page_t *ma) 1003 { 1004 struct iommu_domain *domain; 1005 struct iommu_map_entry *entry; 1006 int error; 1007 1008 error = 0; 1009 domain = ctx->domain; 1010 1011 /* Check if there is already an MSI page allocated */ 1012 IOMMU_DOMAIN_LOCK(domain); 1013 entry = domain->msi_entry; 1014 IOMMU_DOMAIN_UNLOCK(domain); 1015 1016 if (entry == NULL) { 1017 error = iommu_gas_map(domain, &ctx->tag->common, size, offset, 1018 eflags, flags, ma, &entry); 1019 IOMMU_DOMAIN_LOCK(domain); 1020 if (error == 0) { 1021 if (domain->msi_entry == NULL) { 1022 MPASS(domain->msi_base == 0); 1023 MPASS(domain->msi_phys == 0); 1024 1025 domain->msi_entry = entry; 1026 domain->msi_base = entry->start; 1027 domain->msi_phys = VM_PAGE_TO_PHYS(ma[0]); 1028 } else { 1029 /* 1030 * We lost the race and already have an 1031 * MSI page allocated. Free the unneeded entry. 1032 */ 1033 iommu_gas_free_entry(entry); 1034 } 1035 } else if (domain->msi_entry != NULL) { 1036 /* 1037 * The allocation failed, but another succeeded. 1038 * Return success as there is a valid MSI page. 1039 */ 1040 error = 0; 1041 } 1042 IOMMU_DOMAIN_UNLOCK(domain); 1043 } 1044 1045 return (error); 1046 } 1047 1048 void 1049 iommu_translate_msi(struct iommu_domain *domain, uint64_t *addr) 1050 { 1051 1052 *addr = (*addr - domain->msi_phys) + domain->msi_base; 1053 1054 KASSERT(*addr >= domain->msi_entry->start, 1055 ("%s: Address is below the MSI entry start address (%jx < %jx)", 1056 __func__, (uintmax_t)*addr, (uintmax_t)domain->msi_entry->start)); 1057 1058 KASSERT(*addr + sizeof(*addr) <= domain->msi_entry->end, 1059 ("%s: Address is above the MSI entry end address (%jx < %jx)", 1060 __func__, (uintmax_t)*addr, (uintmax_t)domain->msi_entry->end)); 1061 } 1062 1063 SYSCTL_NODE(_hw, OID_AUTO, iommu, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, ""); 1064 1065 #ifdef INVARIANTS 1066 SYSCTL_INT(_hw_iommu, OID_AUTO, check_free, CTLFLAG_RWTUN, 1067 &iommu_check_free, 0, 1068 "Check the GPA RBtree for free_down and free_after validity"); 1069 #endif 1070 1071 #include "opt_ddb.h" 1072 #ifdef DDB 1073 1074 #include <ddb/ddb.h> 1075 1076 static void 1077 iommu_debug_dump_gas(struct iommu_domain *domain) 1078 { 1079 struct iommu_map_entry *entry; 1080 1081 db_printf("iommu_domain %p tree %p iommu %p fl %#x\n", domain, 1082 &domain->rb_root, domain->iommu, domain->flags); 1083 db_printf("iommu_domain %p tree %p\n", domain, &domain->rb_root); 1084 RB_FOREACH(entry, iommu_gas_entries_tree, &domain->rb_root) { 1085 db_printf( 1086 " e %p [%#jx %#jx] fl %#x first %#jx last %#jx free_down %#jx", 1087 entry, (uintmax_t)entry->start, (uintmax_t)entry->end, 1088 entry->flags, 1089 (uintmax_t)entry->first, (uintmax_t)entry->last, 1090 (uintmax_t)entry->free_down); 1091 if (entry == domain->start_gap) 1092 db_printf(" start_gap"); 1093 if (entry == domain->first_place) 1094 db_printf(" first_place"); 1095 if (entry == domain->last_place) 1096 db_printf(" last_place"); 1097 db_printf("\n"); 1098 } 1099 } 1100 1101 DB_SHOW_COMMAND(iommu_domain, iommu_domain_show) 1102 { 1103 struct iommu_domain *domain; 1104 1105 if (!have_addr) { 1106 db_printf("show iommu_domain addr\n"); 1107 return; 1108 } 1109 1110 domain = (void *)addr; 1111 iommu_debug_dump_gas(domain); 1112 } 1113 1114 #endif 1115