1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2013, 2014, 2024 The FreeBSD Foundation 5 * 6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org> 7 * under sponsorship from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include "opt_acpi.h" 32 #if defined(__amd64__) 33 #define DEV_APIC 34 #else 35 #include "opt_apic.h" 36 #endif 37 #include "opt_ddb.h" 38 39 #include <sys/systm.h> 40 #include <sys/bus.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/malloc.h> 44 #include <sys/memdesc.h> 45 #include <sys/mutex.h> 46 #include <sys/sf_buf.h> 47 #include <sys/sysctl.h> 48 #include <sys/proc.h> 49 #include <sys/sched.h> 50 #include <sys/rman.h> 51 #include <sys/rwlock.h> 52 #include <sys/taskqueue.h> 53 #include <sys/tree.h> 54 #include <vm/vm.h> 55 #include <vm/vm_extern.h> 56 #include <vm/vm_kern.h> 57 #include <vm/vm_map.h> 58 #include <vm/vm_object.h> 59 #include <vm/vm_page.h> 60 #include <dev/pci/pcireg.h> 61 #include <dev/pci/pcivar.h> 62 #include <machine/atomic.h> 63 #include <machine/bus.h> 64 #include <machine/cpu.h> 65 #include <x86/include/busdma_impl.h> 66 #include <dev/iommu/busdma_iommu.h> 67 #include <dev/iommu/iommu.h> 68 #include <x86/iommu/x86_iommu.h> 69 #include <x86/iommu/iommu_intrmap.h> 70 #ifdef DEV_APIC 71 #include "pcib_if.h" 72 #include <machine/intr_machdep.h> 73 #include <x86/apicreg.h> 74 #include <x86/apicvar.h> 75 #endif 76 77 vm_page_t 78 iommu_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags) 79 { 80 vm_page_t m; 81 int zeroed, aflags; 82 83 zeroed = (flags & IOMMU_PGF_ZERO) != 0 ? VM_ALLOC_ZERO : 0; 84 aflags = zeroed | VM_ALLOC_NOBUSY | VM_ALLOC_SYSTEM | VM_ALLOC_NODUMP | 85 ((flags & IOMMU_PGF_WAITOK) != 0 ? VM_ALLOC_WAITFAIL : 86 VM_ALLOC_NOWAIT); 87 for (;;) { 88 if ((flags & IOMMU_PGF_OBJL) == 0) 89 VM_OBJECT_WLOCK(obj); 90 m = vm_page_lookup(obj, idx); 91 if ((flags & IOMMU_PGF_NOALLOC) != 0 || m != NULL) { 92 if ((flags & IOMMU_PGF_OBJL) == 0) 93 VM_OBJECT_WUNLOCK(obj); 94 break; 95 } 96 m = vm_page_alloc_contig(obj, idx, aflags, 1, 0, 97 iommu_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); 98 if ((flags & IOMMU_PGF_OBJL) == 0) 99 VM_OBJECT_WUNLOCK(obj); 100 if (m != NULL) { 101 if (zeroed && (m->flags & PG_ZERO) == 0) 102 pmap_zero_page(m); 103 atomic_add_int(&iommu_tbl_pagecnt, 1); 104 break; 105 } 106 if ((flags & IOMMU_PGF_WAITOK) == 0) 107 break; 108 } 109 return (m); 110 } 111 112 void 113 iommu_pgfree(vm_object_t obj, vm_pindex_t idx, int flags, 114 struct iommu_map_entry *entry) 115 { 116 vm_page_t m; 117 118 if ((flags & IOMMU_PGF_OBJL) == 0) 119 VM_OBJECT_WLOCK(obj); 120 m = vm_page_grab(obj, idx, VM_ALLOC_NOCREAT); 121 if (m != NULL) { 122 if (entry == NULL) { 123 vm_page_free(m); 124 atomic_subtract_int(&iommu_tbl_pagecnt, 1); 125 } else { 126 vm_page_remove_xbusy(m); /* keep page busy */ 127 SLIST_INSERT_HEAD(&entry->pgtbl_free, m, plinks.s.ss); 128 } 129 } 130 if ((flags & IOMMU_PGF_OBJL) == 0) 131 VM_OBJECT_WUNLOCK(obj); 132 } 133 134 void * 135 iommu_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags, 136 struct sf_buf **sf) 137 { 138 vm_page_t m; 139 bool allocated; 140 141 if ((flags & IOMMU_PGF_OBJL) == 0) 142 VM_OBJECT_WLOCK(obj); 143 m = vm_page_lookup(obj, idx); 144 if (m == NULL && (flags & IOMMU_PGF_ALLOC) != 0) { 145 m = iommu_pgalloc(obj, idx, flags | IOMMU_PGF_OBJL); 146 allocated = true; 147 } else 148 allocated = false; 149 if (m == NULL) { 150 if ((flags & IOMMU_PGF_OBJL) == 0) 151 VM_OBJECT_WUNLOCK(obj); 152 return (NULL); 153 } 154 /* Sleepable allocations cannot fail. */ 155 if ((flags & IOMMU_PGF_WAITOK) != 0) 156 VM_OBJECT_WUNLOCK(obj); 157 sched_pin(); 158 *sf = sf_buf_alloc(m, SFB_CPUPRIVATE | ((flags & IOMMU_PGF_WAITOK) 159 == 0 ? SFB_NOWAIT : 0)); 160 if (*sf == NULL) { 161 sched_unpin(); 162 if (allocated) { 163 VM_OBJECT_ASSERT_WLOCKED(obj); 164 iommu_pgfree(obj, m->pindex, flags | IOMMU_PGF_OBJL, 165 NULL); 166 } 167 if ((flags & IOMMU_PGF_OBJL) == 0) 168 VM_OBJECT_WUNLOCK(obj); 169 return (NULL); 170 } 171 if ((flags & (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) == 172 (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) 173 VM_OBJECT_WLOCK(obj); 174 else if ((flags & (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) == 0) 175 VM_OBJECT_WUNLOCK(obj); 176 return ((void *)sf_buf_kva(*sf)); 177 } 178 179 void 180 iommu_unmap_pgtbl(struct sf_buf *sf) 181 { 182 183 sf_buf_free(sf); 184 sched_unpin(); 185 } 186 187 iommu_haddr_t iommu_high; 188 int iommu_tbl_pagecnt; 189 190 SYSCTL_NODE(_hw_iommu, OID_AUTO, dmar, CTLFLAG_RD | CTLFLAG_MPSAFE, 191 NULL, ""); 192 SYSCTL_INT(_hw_iommu, OID_AUTO, tbl_pagecnt, CTLFLAG_RD, 193 &iommu_tbl_pagecnt, 0, 194 "Count of pages used for IOMMU pagetables"); 195 196 int iommu_qi_batch_coalesce = 100; 197 SYSCTL_INT(_hw_iommu, OID_AUTO, batch_coalesce, CTLFLAG_RWTUN, 198 &iommu_qi_batch_coalesce, 0, 199 "Number of qi batches between interrupt"); 200 201 static struct iommu_unit * 202 x86_no_iommu_find(device_t dev, bool verbose) 203 { 204 return (NULL); 205 } 206 207 static int 208 x86_no_iommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count) 209 { 210 return (EOPNOTSUPP); 211 } 212 213 static int 214 x86_no_iommu_map_msi_intr(device_t src, u_int cpu, u_int vector, 215 u_int cookie, uint64_t *addr, uint32_t *data) 216 { 217 return (EOPNOTSUPP); 218 } 219 220 static int 221 x86_no_iommu_unmap_msi_intr(device_t src, u_int cookie) 222 { 223 return (0); 224 } 225 226 static int 227 x86_no_iommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector, 228 bool edge, bool activehi, int irq, u_int *cookie, uint32_t *hi, 229 uint32_t *lo) 230 { 231 return (EOPNOTSUPP); 232 } 233 234 static int 235 x86_no_iommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie) 236 { 237 return (0); 238 } 239 240 static struct x86_iommu x86_no_iommu = { 241 .find = x86_no_iommu_find, 242 .alloc_msi_intr = x86_no_iommu_alloc_msi_intr, 243 .map_msi_intr = x86_no_iommu_map_msi_intr, 244 .unmap_msi_intr = x86_no_iommu_unmap_msi_intr, 245 .map_ioapic_intr = x86_no_iommu_map_ioapic_intr, 246 .unmap_ioapic_intr = x86_no_iommu_unmap_ioapic_intr, 247 }; 248 249 static struct x86_iommu *x86_iommu = &x86_no_iommu; 250 251 void 252 set_x86_iommu(struct x86_iommu *x) 253 { 254 MPASS(x86_iommu == &x86_no_iommu); 255 x86_iommu = x; 256 } 257 258 struct x86_iommu * 259 get_x86_iommu(void) 260 { 261 return (x86_iommu); 262 } 263 264 void 265 iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free, 266 bool cansleep) 267 { 268 x86_iommu->domain_unload_entry(entry, free, cansleep); 269 } 270 271 void 272 iommu_domain_unload(struct iommu_domain *iodom, 273 struct iommu_map_entries_tailq *entries, bool cansleep) 274 { 275 x86_iommu->domain_unload(iodom, entries, cansleep); 276 } 277 278 struct iommu_ctx * 279 iommu_get_ctx(struct iommu_unit *iommu, device_t dev, uint16_t rid, 280 bool id_mapped, bool rmrr_init) 281 { 282 return (x86_iommu->get_ctx(iommu, dev, rid, id_mapped, rmrr_init)); 283 } 284 285 void 286 iommu_free_ctx_locked(struct iommu_unit *iommu, struct iommu_ctx *context) 287 { 288 x86_iommu->free_ctx_locked(iommu, context); 289 } 290 291 void 292 iommu_free_ctx(struct iommu_ctx *context) 293 { 294 x86_iommu->free_ctx(context); 295 } 296 297 struct iommu_unit * 298 iommu_find(device_t dev, bool verbose) 299 { 300 return (x86_iommu->find(dev, verbose)); 301 } 302 303 int 304 iommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count) 305 { 306 return (x86_iommu->alloc_msi_intr(src, cookies, count)); 307 } 308 309 int 310 iommu_map_msi_intr(device_t src, u_int cpu, u_int vector, u_int cookie, 311 uint64_t *addr, uint32_t *data) 312 { 313 return (x86_iommu->map_msi_intr(src, cpu, vector, cookie, 314 addr, data)); 315 } 316 317 int 318 iommu_unmap_msi_intr(device_t src, u_int cookie) 319 { 320 return (x86_iommu->unmap_msi_intr(src, cookie)); 321 } 322 323 int 324 iommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector, bool edge, 325 bool activehi, int irq, u_int *cookie, uint32_t *hi, uint32_t *lo) 326 { 327 return (x86_iommu->map_ioapic_intr(ioapic_id, cpu, vector, edge, 328 activehi, irq, cookie, hi, lo)); 329 } 330 331 int 332 iommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie) 333 { 334 return (x86_iommu->unmap_ioapic_intr(ioapic_id, cookie)); 335 } 336 337 void 338 iommu_unit_pre_instantiate_ctx(struct iommu_unit *unit) 339 { 340 x86_iommu->unit_pre_instantiate_ctx(unit); 341 } 342 343 #define IOMMU2X86C(iommu) (x86_iommu->get_x86_common(iommu)) 344 345 static bool 346 iommu_qi_seq_processed(struct iommu_unit *unit, 347 const struct iommu_qi_genseq *pseq) 348 { 349 struct x86_unit_common *x86c; 350 u_int gen; 351 352 x86c = IOMMU2X86C(unit); 353 gen = x86c->inv_waitd_gen; 354 return (pseq->gen < gen || (pseq->gen == gen && pseq->seq <= 355 atomic_load_64(&x86c->inv_waitd_seq_hw))); 356 } 357 358 void 359 iommu_qi_emit_wait_seq(struct iommu_unit *unit, struct iommu_qi_genseq *pseq, 360 bool emit_wait) 361 { 362 struct x86_unit_common *x86c; 363 struct iommu_qi_genseq gsec; 364 uint32_t seq; 365 366 KASSERT(pseq != NULL, ("wait descriptor with no place for seq")); 367 IOMMU_ASSERT_LOCKED(unit); 368 x86c = IOMMU2X86C(unit); 369 370 if (x86c->inv_waitd_seq == 0xffffffff) { 371 gsec.gen = x86c->inv_waitd_gen; 372 gsec.seq = x86c->inv_waitd_seq; 373 x86_iommu->qi_ensure(unit, 1); 374 x86_iommu->qi_emit_wait_descr(unit, gsec.seq, false, 375 true, false); 376 x86_iommu->qi_advance_tail(unit); 377 while (!iommu_qi_seq_processed(unit, &gsec)) 378 cpu_spinwait(); 379 x86c->inv_waitd_gen++; 380 x86c->inv_waitd_seq = 1; 381 } 382 seq = x86c->inv_waitd_seq++; 383 pseq->gen = x86c->inv_waitd_gen; 384 pseq->seq = seq; 385 if (emit_wait) { 386 x86_iommu->qi_ensure(unit, 1); 387 x86_iommu->qi_emit_wait_descr(unit, seq, true, true, false); 388 } 389 } 390 391 /* 392 * To avoid missed wakeups, callers must increment the unit's waiters count 393 * before advancing the tail past the wait descriptor. 394 */ 395 void 396 iommu_qi_wait_for_seq(struct iommu_unit *unit, const struct iommu_qi_genseq * 397 gseq, bool nowait) 398 { 399 struct x86_unit_common *x86c; 400 401 IOMMU_ASSERT_LOCKED(unit); 402 x86c = IOMMU2X86C(unit); 403 404 KASSERT(x86c->inv_seq_waiters > 0, ("%s: no waiters", __func__)); 405 while (!iommu_qi_seq_processed(unit, gseq)) { 406 if (cold || nowait) { 407 cpu_spinwait(); 408 } else { 409 msleep(&x86c->inv_seq_waiters, &unit->lock, 0, 410 "dmarse", hz); 411 } 412 } 413 x86c->inv_seq_waiters--; 414 } 415 416 /* 417 * The caller must not be using the entry's dmamap_link field. 418 */ 419 void 420 iommu_qi_invalidate_locked(struct iommu_domain *domain, 421 struct iommu_map_entry *entry, bool emit_wait) 422 { 423 struct iommu_unit *unit; 424 struct x86_unit_common *x86c; 425 426 unit = domain->iommu; 427 x86c = IOMMU2X86C(unit); 428 IOMMU_ASSERT_LOCKED(unit); 429 430 x86_iommu->qi_invalidate_emit(domain, entry->start, entry->end - 431 entry->start, &entry->gseq, emit_wait); 432 433 /* 434 * To avoid a data race in dmar_qi_task(), the entry's gseq must be 435 * initialized before the entry is added to the TLB flush list, and the 436 * entry must be added to that list before the tail is advanced. More 437 * precisely, the tail must not be advanced past the wait descriptor 438 * that will generate the interrupt that schedules dmar_qi_task() for 439 * execution before the entry is added to the list. While an earlier 440 * call to dmar_qi_ensure() might have advanced the tail, it will not 441 * advance it past the wait descriptor. 442 * 443 * See the definition of struct dmar_unit for more information on 444 * synchronization. 445 */ 446 entry->tlb_flush_next = NULL; 447 atomic_store_rel_ptr((uintptr_t *)&x86c->tlb_flush_tail-> 448 tlb_flush_next, (uintptr_t)entry); 449 x86c->tlb_flush_tail = entry; 450 451 x86_iommu->qi_advance_tail(unit); 452 } 453 454 void 455 iommu_qi_invalidate_sync(struct iommu_domain *domain, iommu_gaddr_t base, 456 iommu_gaddr_t size, bool cansleep) 457 { 458 struct iommu_unit *unit; 459 struct iommu_qi_genseq gseq; 460 461 unit = domain->iommu; 462 IOMMU_LOCK(unit); 463 x86_iommu->qi_invalidate_emit(domain, base, size, &gseq, true); 464 465 /* 466 * To avoid a missed wakeup in iommu_qi_task(), the unit's 467 * waiters count must be incremented before the tail is 468 * advanced. 469 */ 470 IOMMU2X86C(unit)->inv_seq_waiters++; 471 472 x86_iommu->qi_advance_tail(unit); 473 iommu_qi_wait_for_seq(unit, &gseq, !cansleep); 474 IOMMU_UNLOCK(unit); 475 } 476 477 void 478 iommu_qi_drain_tlb_flush(struct iommu_unit *unit) 479 { 480 struct x86_unit_common *x86c; 481 struct iommu_map_entry *entry, *head; 482 483 x86c = IOMMU2X86C(unit); 484 for (head = x86c->tlb_flush_head;; head = entry) { 485 entry = (struct iommu_map_entry *) 486 atomic_load_acq_ptr((uintptr_t *)&head->tlb_flush_next); 487 if (entry == NULL || 488 !iommu_qi_seq_processed(unit, &entry->gseq)) 489 break; 490 x86c->tlb_flush_head = entry; 491 iommu_gas_free_entry(head); 492 if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 493 iommu_gas_free_region(entry); 494 else 495 iommu_gas_free_space(entry); 496 } 497 } 498 499 void 500 iommu_qi_common_init(struct iommu_unit *unit, task_fn_t qi_task) 501 { 502 struct x86_unit_common *x86c; 503 u_int qi_sz; 504 505 x86c = IOMMU2X86C(unit); 506 507 x86c->tlb_flush_head = x86c->tlb_flush_tail = 508 iommu_gas_alloc_entry(NULL, 0); 509 TASK_INIT(&x86c->qi_task, 0, qi_task, unit); 510 x86c->qi_taskqueue = taskqueue_create_fast("iommuqf", M_WAITOK, 511 taskqueue_thread_enqueue, &x86c->qi_taskqueue); 512 taskqueue_start_threads(&x86c->qi_taskqueue, 1, PI_AV, 513 "iommu%d qi taskq", unit->unit); 514 515 x86c->inv_waitd_gen = 0; 516 x86c->inv_waitd_seq = 1; 517 518 qi_sz = 3; 519 TUNABLE_INT_FETCH("hw.iommu.qi_size", &qi_sz); 520 if (qi_sz > x86c->qi_buf_maxsz) 521 qi_sz = x86c->qi_buf_maxsz; 522 x86c->inv_queue_size = (1ULL << qi_sz) * PAGE_SIZE; 523 /* Reserve one descriptor to prevent wraparound. */ 524 x86c->inv_queue_avail = x86c->inv_queue_size - 525 x86c->qi_cmd_sz; 526 527 /* 528 * The invalidation queue reads by DMARs/AMDIOMMUs are always 529 * coherent. 530 */ 531 x86c->inv_queue = kmem_alloc_contig(x86c->inv_queue_size, 532 M_WAITOK | M_ZERO, 0, iommu_high, PAGE_SIZE, 0, 533 VM_MEMATTR_DEFAULT); 534 x86c->inv_waitd_seq_hw_phys = pmap_kextract( 535 (vm_offset_t)&x86c->inv_waitd_seq_hw); 536 } 537 538 void 539 iommu_qi_common_fini(struct iommu_unit *unit, void (*disable_qi)( 540 struct iommu_unit *)) 541 { 542 struct x86_unit_common *x86c; 543 struct iommu_qi_genseq gseq; 544 545 x86c = IOMMU2X86C(unit); 546 547 taskqueue_drain(x86c->qi_taskqueue, &x86c->qi_task); 548 taskqueue_free(x86c->qi_taskqueue); 549 x86c->qi_taskqueue = NULL; 550 551 IOMMU_LOCK(unit); 552 /* quisce */ 553 x86_iommu->qi_ensure(unit, 1); 554 iommu_qi_emit_wait_seq(unit, &gseq, true); 555 /* See iommu_qi_invalidate_locked(). */ 556 x86c->inv_seq_waiters++; 557 x86_iommu->qi_advance_tail(unit); 558 iommu_qi_wait_for_seq(unit, &gseq, false); 559 /* only after the quisce, disable queue */ 560 disable_qi(unit); 561 KASSERT(x86c->inv_seq_waiters == 0, 562 ("iommu%d: waiters on disabled queue", unit->unit)); 563 IOMMU_UNLOCK(unit); 564 565 kmem_free(x86c->inv_queue, x86c->inv_queue_size); 566 x86c->inv_queue = NULL; 567 x86c->inv_queue_size = 0; 568 } 569 570 int 571 iommu_alloc_irq(struct iommu_unit *unit, int idx) 572 { 573 device_t dev, pcib; 574 struct iommu_msi_data *dmd; 575 uint64_t msi_addr; 576 uint32_t msi_data; 577 int error; 578 579 MPASS(idx >= 0 || idx < IOMMU_MAX_MSI); 580 581 dev = unit->dev; 582 dmd = &IOMMU2X86C(unit)->intrs[idx]; 583 pcib = device_get_parent(device_get_parent(dev)); /* Really not pcib */ 584 error = PCIB_ALLOC_MSIX(pcib, dev, &dmd->irq); 585 if (error != 0) { 586 device_printf(dev, "cannot allocate %s interrupt, %d\n", 587 dmd->name, error); 588 goto err1; 589 } 590 error = bus_set_resource(dev, SYS_RES_IRQ, dmd->irq_rid, 591 dmd->irq, 1); 592 if (error != 0) { 593 device_printf(dev, "cannot set %s interrupt resource, %d\n", 594 dmd->name, error); 595 goto err2; 596 } 597 dmd->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, 598 &dmd->irq_rid, RF_ACTIVE); 599 if (dmd->irq_res == NULL) { 600 device_printf(dev, 601 "cannot allocate resource for %s interrupt\n", dmd->name); 602 error = ENXIO; 603 goto err3; 604 } 605 error = bus_setup_intr(dev, dmd->irq_res, INTR_TYPE_MISC, 606 dmd->handler, NULL, unit, &dmd->intr_handle); 607 if (error != 0) { 608 device_printf(dev, "cannot setup %s interrupt, %d\n", 609 dmd->name, error); 610 goto err4; 611 } 612 bus_describe_intr(dev, dmd->irq_res, dmd->intr_handle, "%s", dmd->name); 613 error = PCIB_MAP_MSI(pcib, dev, dmd->irq, &msi_addr, &msi_data); 614 if (error != 0) { 615 device_printf(dev, "cannot map %s interrupt, %d\n", 616 dmd->name, error); 617 goto err5; 618 } 619 620 dmd->msi_data = msi_data; 621 dmd->msi_addr = msi_addr; 622 623 return (0); 624 625 err5: 626 bus_teardown_intr(dev, dmd->irq_res, dmd->intr_handle); 627 err4: 628 bus_release_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq_res); 629 err3: 630 bus_delete_resource(dev, SYS_RES_IRQ, dmd->irq_rid); 631 err2: 632 PCIB_RELEASE_MSIX(pcib, dev, dmd->irq); 633 dmd->irq = -1; 634 err1: 635 return (error); 636 } 637 638 void 639 iommu_release_intr(struct iommu_unit *unit, int idx) 640 { 641 device_t dev; 642 struct iommu_msi_data *dmd; 643 644 MPASS(idx >= 0 || idx < IOMMU_MAX_MSI); 645 646 dmd = &IOMMU2X86C(unit)->intrs[idx]; 647 if (dmd->handler == NULL || dmd->irq == -1) 648 return; 649 dev = unit->dev; 650 651 bus_teardown_intr(dev, dmd->irq_res, dmd->intr_handle); 652 bus_release_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq_res); 653 bus_delete_resource(dev, SYS_RES_IRQ, dmd->irq_rid); 654 PCIB_RELEASE_MSIX(device_get_parent(device_get_parent(dev)), 655 dev, dmd->irq); 656 dmd->irq = -1; 657 } 658 659 void 660 iommu_device_tag_init(struct iommu_ctx *ctx, device_t dev) 661 { 662 bus_addr_t maxaddr; 663 664 maxaddr = MIN(ctx->domain->end, BUS_SPACE_MAXADDR); 665 ctx->tag->common.impl = &bus_dma_iommu_impl; 666 ctx->tag->common.boundary = 0; 667 ctx->tag->common.lowaddr = maxaddr; 668 ctx->tag->common.highaddr = maxaddr; 669 ctx->tag->common.maxsize = maxaddr; 670 ctx->tag->common.nsegments = BUS_SPACE_UNRESTRICTED; 671 ctx->tag->common.maxsegsz = maxaddr; 672 ctx->tag->ctx = ctx; 673 ctx->tag->owner = dev; 674 } 675 676 void 677 iommu_domain_free_entry(struct iommu_map_entry *entry, bool free) 678 { 679 if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 680 iommu_gas_free_region(entry); 681 else 682 iommu_gas_free_space(entry); 683 if (free) 684 iommu_gas_free_entry(entry); 685 else 686 entry->flags = 0; 687 } 688 689 /* 690 * Index of the pte for the guest address base in the page table at 691 * the level lvl. 692 */ 693 int 694 pglvl_pgtbl_pte_off(int pglvl, iommu_gaddr_t base, int lvl) 695 { 696 697 base >>= IOMMU_PAGE_SHIFT + (pglvl - lvl - 1) * 698 IOMMU_NPTEPGSHIFT; 699 return (base & IOMMU_PTEMASK); 700 } 701 702 /* 703 * Returns the page index of the page table page in the page table 704 * object, which maps the given address base at the page table level 705 * lvl. 706 */ 707 vm_pindex_t 708 pglvl_pgtbl_get_pindex(int pglvl, iommu_gaddr_t base, int lvl) 709 { 710 vm_pindex_t idx, pidx; 711 int i; 712 713 KASSERT(lvl >= 0 && lvl < pglvl, 714 ("wrong lvl %d %d", pglvl, lvl)); 715 716 for (pidx = idx = 0, i = 0; i < lvl; i++, pidx = idx) { 717 idx = pglvl_pgtbl_pte_off(pglvl, base, i) + 718 pidx * IOMMU_NPTEPG + 1; 719 } 720 return (idx); 721 } 722 723 /* 724 * Calculate the total amount of page table pages needed to map the 725 * whole bus address space on the context with the selected agaw. 726 */ 727 vm_pindex_t 728 pglvl_max_pages(int pglvl) 729 { 730 vm_pindex_t res; 731 int i; 732 733 for (res = 0, i = pglvl; i > 0; i--) { 734 res *= IOMMU_NPTEPG; 735 res++; 736 } 737 return (res); 738 } 739 740 iommu_gaddr_t 741 pglvl_page_size(int total_pglvl, int lvl) 742 { 743 int rlvl; 744 static const iommu_gaddr_t pg_sz[] = { 745 (iommu_gaddr_t)IOMMU_PAGE_SIZE, 746 (iommu_gaddr_t)IOMMU_PAGE_SIZE << IOMMU_NPTEPGSHIFT, 747 (iommu_gaddr_t)IOMMU_PAGE_SIZE << (2 * IOMMU_NPTEPGSHIFT), 748 (iommu_gaddr_t)IOMMU_PAGE_SIZE << (3 * IOMMU_NPTEPGSHIFT), 749 (iommu_gaddr_t)IOMMU_PAGE_SIZE << (4 * IOMMU_NPTEPGSHIFT), 750 (iommu_gaddr_t)IOMMU_PAGE_SIZE << (5 * IOMMU_NPTEPGSHIFT), 751 (iommu_gaddr_t)IOMMU_PAGE_SIZE << (6 * IOMMU_NPTEPGSHIFT), 752 }; 753 754 KASSERT(lvl >= 0 && lvl < total_pglvl, 755 ("total %d lvl %d", total_pglvl, lvl)); 756 rlvl = total_pglvl - lvl - 1; 757 KASSERT(rlvl < nitems(pg_sz), ("sizeof pg_sz lvl %d", lvl)); 758 return (pg_sz[rlvl]); 759 } 760 761 #ifdef DDB 762 #include <ddb/ddb.h> 763 #include <ddb/db_lex.h> 764 765 void 766 iommu_db_print_domain_entry(const struct iommu_map_entry *entry) 767 { 768 struct iommu_map_entry *l, *r; 769 770 db_printf( 771 " start %jx end %jx first %jx last %jx free_down %jx flags %x ", 772 entry->start, entry->end, entry->first, entry->last, 773 entry->free_down, entry->flags); 774 db_printf("left "); 775 l = RB_LEFT(entry, rb_entry); 776 if (l == NULL) 777 db_printf("NULL "); 778 else 779 db_printf("%jx ", l->start); 780 db_printf("right "); 781 r = RB_RIGHT(entry, rb_entry); 782 if (r == NULL) 783 db_printf("NULL"); 784 else 785 db_printf("%jx", r->start); 786 db_printf("\n"); 787 } 788 789 void 790 iommu_db_print_ctx(struct iommu_ctx *ctx) 791 { 792 db_printf( 793 " @%p pci%d:%d:%d refs %d flags %#x loads %lu unloads %lu\n", 794 ctx, pci_get_bus(ctx->tag->owner), 795 pci_get_slot(ctx->tag->owner), 796 pci_get_function(ctx->tag->owner), ctx->refs, 797 ctx->flags, ctx->loads, ctx->unloads); 798 } 799 800 void 801 iommu_db_domain_print_contexts(struct iommu_domain *iodom) 802 { 803 struct iommu_ctx *ctx; 804 805 if (LIST_EMPTY(&iodom->contexts)) 806 return; 807 808 db_printf(" Contexts:\n"); 809 LIST_FOREACH(ctx, &iodom->contexts, link) 810 iommu_db_print_ctx(ctx); 811 } 812 813 void 814 iommu_db_domain_print_mappings(struct iommu_domain *iodom) 815 { 816 struct iommu_map_entry *entry; 817 818 db_printf(" mapped:\n"); 819 RB_FOREACH(entry, iommu_gas_entries_tree, &iodom->rb_root) { 820 iommu_db_print_domain_entry(entry); 821 if (db_pager_quit) 822 break; 823 } 824 if (db_pager_quit) 825 return; 826 db_printf(" unloading:\n"); 827 TAILQ_FOREACH(entry, &iodom->unload_entries, dmamap_link) { 828 iommu_db_print_domain_entry(entry); 829 if (db_pager_quit) 830 break; 831 } 832 } 833 834 #endif 835