1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2013, 2014, 2024 The FreeBSD Foundation 5 * 6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org> 7 * under sponsorship from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include "opt_acpi.h" 32 #if defined(__amd64__) 33 #define DEV_APIC 34 #else 35 #include "opt_apic.h" 36 #endif 37 #include "opt_ddb.h" 38 39 #include <sys/systm.h> 40 #include <sys/bus.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/malloc.h> 44 #include <sys/memdesc.h> 45 #include <sys/mutex.h> 46 #include <sys/sf_buf.h> 47 #include <sys/sysctl.h> 48 #include <sys/proc.h> 49 #include <sys/sched.h> 50 #include <sys/rman.h> 51 #include <sys/rwlock.h> 52 #include <sys/taskqueue.h> 53 #include <sys/tree.h> 54 #include <vm/vm.h> 55 #include <vm/vm_extern.h> 56 #include <vm/vm_kern.h> 57 #include <vm/vm_map.h> 58 #include <vm/vm_object.h> 59 #include <vm/vm_page.h> 60 #include <dev/pci/pcireg.h> 61 #include <dev/pci/pcivar.h> 62 #include <machine/atomic.h> 63 #include <machine/bus.h> 64 #include <machine/cpu.h> 65 #include <x86/include/busdma_impl.h> 66 #include <dev/iommu/busdma_iommu.h> 67 #include <dev/iommu/iommu.h> 68 #include <x86/iommu/x86_iommu.h> 69 #include <x86/iommu/iommu_intrmap.h> 70 #ifdef DEV_APIC 71 #include "pcib_if.h" 72 #include <machine/intr_machdep.h> 73 #include <x86/apicreg.h> 74 #include <x86/apicvar.h> 75 #endif 76 77 vm_page_t 78 iommu_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags) 79 { 80 vm_page_t m; 81 int zeroed, aflags; 82 83 zeroed = (flags & IOMMU_PGF_ZERO) != 0 ? VM_ALLOC_ZERO : 0; 84 aflags = zeroed | VM_ALLOC_NOBUSY | VM_ALLOC_SYSTEM | VM_ALLOC_NODUMP | 85 ((flags & IOMMU_PGF_WAITOK) != 0 ? VM_ALLOC_WAITFAIL : 86 VM_ALLOC_NOWAIT); 87 for (;;) { 88 if ((flags & IOMMU_PGF_OBJL) == 0) 89 VM_OBJECT_WLOCK(obj); 90 m = vm_page_lookup(obj, idx); 91 if ((flags & IOMMU_PGF_NOALLOC) != 0 || m != NULL) { 92 if ((flags & IOMMU_PGF_OBJL) == 0) 93 VM_OBJECT_WUNLOCK(obj); 94 break; 95 } 96 m = vm_page_alloc_contig(obj, idx, aflags, 1, 0, 97 iommu_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); 98 if ((flags & IOMMU_PGF_OBJL) == 0) 99 VM_OBJECT_WUNLOCK(obj); 100 if (m != NULL) { 101 if (zeroed && (m->flags & PG_ZERO) == 0) 102 pmap_zero_page(m); 103 atomic_add_int(&iommu_tbl_pagecnt, 1); 104 break; 105 } 106 if ((flags & IOMMU_PGF_WAITOK) == 0) 107 break; 108 } 109 return (m); 110 } 111 112 void 113 iommu_pgfree(vm_object_t obj, vm_pindex_t idx, int flags, 114 struct iommu_map_entry *entry) 115 { 116 vm_page_t m; 117 118 if ((flags & IOMMU_PGF_OBJL) == 0) 119 VM_OBJECT_WLOCK(obj); 120 m = vm_page_grab(obj, idx, VM_ALLOC_NOCREAT); 121 if (m != NULL) { 122 if (entry == NULL) { 123 vm_page_free(m); 124 atomic_subtract_int(&iommu_tbl_pagecnt, 1); 125 } else { 126 vm_page_remove_xbusy(m); /* keep page busy */ 127 SLIST_INSERT_HEAD(&entry->pgtbl_free, m, plinks.s.ss); 128 } 129 } 130 if ((flags & IOMMU_PGF_OBJL) == 0) 131 VM_OBJECT_WUNLOCK(obj); 132 } 133 134 void * 135 iommu_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags, 136 struct sf_buf **sf) 137 { 138 vm_page_t m; 139 bool allocated; 140 141 if ((flags & IOMMU_PGF_OBJL) == 0) 142 VM_OBJECT_WLOCK(obj); 143 m = vm_page_lookup(obj, idx); 144 if (m == NULL && (flags & IOMMU_PGF_ALLOC) != 0) { 145 m = iommu_pgalloc(obj, idx, flags | IOMMU_PGF_OBJL); 146 allocated = true; 147 } else 148 allocated = false; 149 if (m == NULL) { 150 if ((flags & IOMMU_PGF_OBJL) == 0) 151 VM_OBJECT_WUNLOCK(obj); 152 return (NULL); 153 } 154 /* Sleepable allocations cannot fail. */ 155 if ((flags & IOMMU_PGF_WAITOK) != 0) 156 VM_OBJECT_WUNLOCK(obj); 157 sched_pin(); 158 *sf = sf_buf_alloc(m, SFB_CPUPRIVATE | ((flags & IOMMU_PGF_WAITOK) 159 == 0 ? SFB_NOWAIT : 0)); 160 if (*sf == NULL) { 161 sched_unpin(); 162 if (allocated) { 163 VM_OBJECT_ASSERT_WLOCKED(obj); 164 iommu_pgfree(obj, m->pindex, flags | IOMMU_PGF_OBJL, 165 NULL); 166 } 167 if ((flags & IOMMU_PGF_OBJL) == 0) 168 VM_OBJECT_WUNLOCK(obj); 169 return (NULL); 170 } 171 if ((flags & (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) == 172 (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) 173 VM_OBJECT_WLOCK(obj); 174 else if ((flags & (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) == 0) 175 VM_OBJECT_WUNLOCK(obj); 176 return ((void *)sf_buf_kva(*sf)); 177 } 178 179 void 180 iommu_unmap_pgtbl(struct sf_buf *sf) 181 { 182 183 sf_buf_free(sf); 184 sched_unpin(); 185 } 186 187 iommu_haddr_t iommu_high; 188 int iommu_tbl_pagecnt; 189 190 SYSCTL_NODE(_hw_iommu, OID_AUTO, dmar, CTLFLAG_RD | CTLFLAG_MPSAFE, 191 NULL, ""); 192 SYSCTL_INT(_hw_iommu, OID_AUTO, tbl_pagecnt, CTLFLAG_RD, 193 &iommu_tbl_pagecnt, 0, 194 "Count of pages used for IOMMU pagetables"); 195 196 int iommu_qi_batch_coalesce = 100; 197 SYSCTL_INT(_hw_iommu, OID_AUTO, batch_coalesce, CTLFLAG_RWTUN, 198 &iommu_qi_batch_coalesce, 0, 199 "Number of qi batches between interrupt"); 200 201 static struct iommu_unit * 202 x86_no_iommu_find(device_t dev, bool verbose) 203 { 204 return (NULL); 205 } 206 207 static int 208 x86_no_iommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count) 209 { 210 return (EOPNOTSUPP); 211 } 212 213 static int 214 x86_no_iommu_map_msi_intr(device_t src, u_int cpu, u_int vector, 215 u_int cookie, uint64_t *addr, uint32_t *data) 216 { 217 return (EOPNOTSUPP); 218 } 219 220 static int 221 x86_no_iommu_unmap_msi_intr(device_t src, u_int cookie) 222 { 223 return (0); 224 } 225 226 static int 227 x86_no_iommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector, 228 bool edge, bool activehi, int irq, u_int *cookie, uint32_t *hi, 229 uint32_t *lo) 230 { 231 return (EOPNOTSUPP); 232 } 233 234 static int 235 x86_no_iommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie) 236 { 237 return (0); 238 } 239 240 static struct x86_iommu x86_no_iommu = { 241 .find = x86_no_iommu_find, 242 .alloc_msi_intr = x86_no_iommu_alloc_msi_intr, 243 .map_msi_intr = x86_no_iommu_map_msi_intr, 244 .unmap_msi_intr = x86_no_iommu_unmap_msi_intr, 245 .map_ioapic_intr = x86_no_iommu_map_ioapic_intr, 246 .unmap_ioapic_intr = x86_no_iommu_unmap_ioapic_intr, 247 }; 248 249 static struct x86_iommu *x86_iommu = &x86_no_iommu; 250 251 void 252 set_x86_iommu(struct x86_iommu *x) 253 { 254 MPASS(x86_iommu == &x86_no_iommu); 255 x86_iommu = x; 256 } 257 258 struct x86_iommu * 259 get_x86_iommu(void) 260 { 261 return (x86_iommu); 262 } 263 264 void 265 iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free, 266 bool cansleep) 267 { 268 x86_iommu->domain_unload_entry(entry, free, cansleep); 269 } 270 271 void 272 iommu_domain_unload(struct iommu_domain *iodom, 273 struct iommu_map_entries_tailq *entries, bool cansleep) 274 { 275 x86_iommu->domain_unload(iodom, entries, cansleep); 276 } 277 278 struct iommu_ctx * 279 iommu_get_ctx(struct iommu_unit *iommu, device_t dev, uint16_t rid, 280 bool id_mapped, bool rmrr_init) 281 { 282 return (x86_iommu->get_ctx(iommu, dev, rid, id_mapped, rmrr_init)); 283 } 284 285 void 286 iommu_free_ctx_locked(struct iommu_unit *iommu, struct iommu_ctx *context) 287 { 288 x86_iommu->free_ctx_locked(iommu, context); 289 } 290 291 struct iommu_unit * 292 iommu_find(device_t dev, bool verbose) 293 { 294 return (x86_iommu->find(dev, verbose)); 295 } 296 297 int 298 iommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count) 299 { 300 return (x86_iommu->alloc_msi_intr(src, cookies, count)); 301 } 302 303 int 304 iommu_map_msi_intr(device_t src, u_int cpu, u_int vector, u_int cookie, 305 uint64_t *addr, uint32_t *data) 306 { 307 return (x86_iommu->map_msi_intr(src, cpu, vector, cookie, 308 addr, data)); 309 } 310 311 int 312 iommu_unmap_msi_intr(device_t src, u_int cookie) 313 { 314 return (x86_iommu->unmap_msi_intr(src, cookie)); 315 } 316 317 int 318 iommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector, bool edge, 319 bool activehi, int irq, u_int *cookie, uint32_t *hi, uint32_t *lo) 320 { 321 return (x86_iommu->map_ioapic_intr(ioapic_id, cpu, vector, edge, 322 activehi, irq, cookie, hi, lo)); 323 } 324 325 int 326 iommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie) 327 { 328 return (x86_iommu->unmap_ioapic_intr(ioapic_id, cookie)); 329 } 330 331 void 332 iommu_unit_pre_instantiate_ctx(struct iommu_unit *unit) 333 { 334 x86_iommu->unit_pre_instantiate_ctx(unit); 335 } 336 337 #define IOMMU2X86C(iommu) (x86_iommu->get_x86_common(iommu)) 338 339 static bool 340 iommu_qi_seq_processed(struct iommu_unit *unit, 341 const struct iommu_qi_genseq *pseq) 342 { 343 struct x86_unit_common *x86c; 344 u_int gen; 345 346 x86c = IOMMU2X86C(unit); 347 gen = x86c->inv_waitd_gen; 348 return (pseq->gen < gen || (pseq->gen == gen && pseq->seq <= 349 atomic_load_64(&x86c->inv_waitd_seq_hw))); 350 } 351 352 void 353 iommu_qi_emit_wait_seq(struct iommu_unit *unit, struct iommu_qi_genseq *pseq, 354 bool emit_wait) 355 { 356 struct x86_unit_common *x86c; 357 struct iommu_qi_genseq gsec; 358 uint32_t seq; 359 360 KASSERT(pseq != NULL, ("wait descriptor with no place for seq")); 361 IOMMU_ASSERT_LOCKED(unit); 362 x86c = IOMMU2X86C(unit); 363 364 if (x86c->inv_waitd_seq == 0xffffffff) { 365 gsec.gen = x86c->inv_waitd_gen; 366 gsec.seq = x86c->inv_waitd_seq; 367 x86_iommu->qi_ensure(unit, 1); 368 x86_iommu->qi_emit_wait_descr(unit, gsec.seq, false, 369 true, false); 370 x86_iommu->qi_advance_tail(unit); 371 while (!iommu_qi_seq_processed(unit, &gsec)) 372 cpu_spinwait(); 373 x86c->inv_waitd_gen++; 374 x86c->inv_waitd_seq = 1; 375 } 376 seq = x86c->inv_waitd_seq++; 377 pseq->gen = x86c->inv_waitd_gen; 378 pseq->seq = seq; 379 if (emit_wait) { 380 x86_iommu->qi_ensure(unit, 1); 381 x86_iommu->qi_emit_wait_descr(unit, seq, true, true, false); 382 } 383 } 384 385 /* 386 * To avoid missed wakeups, callers must increment the unit's waiters count 387 * before advancing the tail past the wait descriptor. 388 */ 389 void 390 iommu_qi_wait_for_seq(struct iommu_unit *unit, const struct iommu_qi_genseq * 391 gseq, bool nowait) 392 { 393 struct x86_unit_common *x86c; 394 395 IOMMU_ASSERT_LOCKED(unit); 396 x86c = IOMMU2X86C(unit); 397 398 KASSERT(x86c->inv_seq_waiters > 0, ("%s: no waiters", __func__)); 399 while (!iommu_qi_seq_processed(unit, gseq)) { 400 if (cold || nowait) { 401 cpu_spinwait(); 402 } else { 403 msleep(&x86c->inv_seq_waiters, &unit->lock, 0, 404 "dmarse", hz); 405 } 406 } 407 x86c->inv_seq_waiters--; 408 } 409 410 /* 411 * The caller must not be using the entry's dmamap_link field. 412 */ 413 void 414 iommu_qi_invalidate_locked(struct iommu_domain *domain, 415 struct iommu_map_entry *entry, bool emit_wait) 416 { 417 struct iommu_unit *unit; 418 struct x86_unit_common *x86c; 419 420 unit = domain->iommu; 421 x86c = IOMMU2X86C(unit); 422 IOMMU_ASSERT_LOCKED(unit); 423 424 x86_iommu->qi_invalidate_emit(domain, entry->start, entry->end - 425 entry->start, &entry->gseq, emit_wait); 426 427 /* 428 * To avoid a data race in dmar_qi_task(), the entry's gseq must be 429 * initialized before the entry is added to the TLB flush list, and the 430 * entry must be added to that list before the tail is advanced. More 431 * precisely, the tail must not be advanced past the wait descriptor 432 * that will generate the interrupt that schedules dmar_qi_task() for 433 * execution before the entry is added to the list. While an earlier 434 * call to dmar_qi_ensure() might have advanced the tail, it will not 435 * advance it past the wait descriptor. 436 * 437 * See the definition of struct dmar_unit for more information on 438 * synchronization. 439 */ 440 entry->tlb_flush_next = NULL; 441 atomic_store_rel_ptr((uintptr_t *)&x86c->tlb_flush_tail-> 442 tlb_flush_next, (uintptr_t)entry); 443 x86c->tlb_flush_tail = entry; 444 445 x86_iommu->qi_advance_tail(unit); 446 } 447 448 void 449 iommu_qi_invalidate_sync(struct iommu_domain *domain, iommu_gaddr_t base, 450 iommu_gaddr_t size, bool cansleep) 451 { 452 struct iommu_unit *unit; 453 struct iommu_qi_genseq gseq; 454 455 unit = domain->iommu; 456 IOMMU_LOCK(unit); 457 x86_iommu->qi_invalidate_emit(domain, base, size, &gseq, true); 458 459 /* 460 * To avoid a missed wakeup in iommu_qi_task(), the unit's 461 * waiters count must be incremented before the tail is 462 * advanced. 463 */ 464 IOMMU2X86C(unit)->inv_seq_waiters++; 465 466 x86_iommu->qi_advance_tail(unit); 467 iommu_qi_wait_for_seq(unit, &gseq, !cansleep); 468 IOMMU_UNLOCK(unit); 469 } 470 471 void 472 iommu_qi_drain_tlb_flush(struct iommu_unit *unit) 473 { 474 struct x86_unit_common *x86c; 475 struct iommu_map_entry *entry, *head; 476 477 x86c = IOMMU2X86C(unit); 478 for (head = x86c->tlb_flush_head;; head = entry) { 479 entry = (struct iommu_map_entry *) 480 atomic_load_acq_ptr((uintptr_t *)&head->tlb_flush_next); 481 if (entry == NULL || 482 !iommu_qi_seq_processed(unit, &entry->gseq)) 483 break; 484 x86c->tlb_flush_head = entry; 485 iommu_gas_free_entry(head); 486 if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 487 iommu_gas_free_region(entry); 488 else 489 iommu_gas_free_space(entry); 490 } 491 } 492 493 void 494 iommu_qi_common_init(struct iommu_unit *unit, task_fn_t qi_task) 495 { 496 struct x86_unit_common *x86c; 497 u_int qi_sz; 498 499 x86c = IOMMU2X86C(unit); 500 501 x86c->tlb_flush_head = x86c->tlb_flush_tail = 502 iommu_gas_alloc_entry(NULL, 0); 503 TASK_INIT(&x86c->qi_task, 0, qi_task, unit); 504 x86c->qi_taskqueue = taskqueue_create_fast("iommuqf", M_WAITOK, 505 taskqueue_thread_enqueue, &x86c->qi_taskqueue); 506 taskqueue_start_threads(&x86c->qi_taskqueue, 1, PI_AV, 507 "iommu%d qi taskq", unit->unit); 508 509 x86c->inv_waitd_gen = 0; 510 x86c->inv_waitd_seq = 1; 511 512 qi_sz = 3; 513 TUNABLE_INT_FETCH("hw.iommu.qi_size", &qi_sz); 514 if (qi_sz > x86c->qi_buf_maxsz) 515 qi_sz = x86c->qi_buf_maxsz; 516 x86c->inv_queue_size = (1ULL << qi_sz) * PAGE_SIZE; 517 /* Reserve one descriptor to prevent wraparound. */ 518 x86c->inv_queue_avail = x86c->inv_queue_size - 519 x86c->qi_cmd_sz; 520 521 /* 522 * The invalidation queue reads by DMARs/AMDIOMMUs are always 523 * coherent. 524 */ 525 x86c->inv_queue = kmem_alloc_contig(x86c->inv_queue_size, 526 M_WAITOK | M_ZERO, 0, iommu_high, PAGE_SIZE, 0, 527 VM_MEMATTR_DEFAULT); 528 x86c->inv_waitd_seq_hw_phys = pmap_kextract( 529 (vm_offset_t)&x86c->inv_waitd_seq_hw); 530 } 531 532 void 533 iommu_qi_common_fini(struct iommu_unit *unit, void (*disable_qi)( 534 struct iommu_unit *)) 535 { 536 struct x86_unit_common *x86c; 537 struct iommu_qi_genseq gseq; 538 539 x86c = IOMMU2X86C(unit); 540 541 taskqueue_drain(x86c->qi_taskqueue, &x86c->qi_task); 542 taskqueue_free(x86c->qi_taskqueue); 543 x86c->qi_taskqueue = NULL; 544 545 IOMMU_LOCK(unit); 546 /* quisce */ 547 x86_iommu->qi_ensure(unit, 1); 548 iommu_qi_emit_wait_seq(unit, &gseq, true); 549 /* See iommu_qi_invalidate_locked(). */ 550 x86c->inv_seq_waiters++; 551 x86_iommu->qi_advance_tail(unit); 552 iommu_qi_wait_for_seq(unit, &gseq, false); 553 /* only after the quisce, disable queue */ 554 disable_qi(unit); 555 KASSERT(x86c->inv_seq_waiters == 0, 556 ("iommu%d: waiters on disabled queue", unit->unit)); 557 IOMMU_UNLOCK(unit); 558 559 kmem_free(x86c->inv_queue, x86c->inv_queue_size); 560 x86c->inv_queue = NULL; 561 x86c->inv_queue_size = 0; 562 } 563 564 int 565 iommu_alloc_irq(struct iommu_unit *unit, int idx) 566 { 567 device_t dev, pcib; 568 struct iommu_msi_data *dmd; 569 uint64_t msi_addr; 570 uint32_t msi_data; 571 int error; 572 573 MPASS(idx >= 0 || idx < IOMMU_MAX_MSI); 574 575 dev = unit->dev; 576 dmd = &IOMMU2X86C(unit)->intrs[idx]; 577 pcib = device_get_parent(device_get_parent(dev)); /* Really not pcib */ 578 error = PCIB_ALLOC_MSIX(pcib, dev, &dmd->irq); 579 if (error != 0) { 580 device_printf(dev, "cannot allocate %s interrupt, %d\n", 581 dmd->name, error); 582 goto err1; 583 } 584 error = bus_set_resource(dev, SYS_RES_IRQ, dmd->irq_rid, 585 dmd->irq, 1); 586 if (error != 0) { 587 device_printf(dev, "cannot set %s interrupt resource, %d\n", 588 dmd->name, error); 589 goto err2; 590 } 591 dmd->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, 592 &dmd->irq_rid, RF_ACTIVE); 593 if (dmd->irq_res == NULL) { 594 device_printf(dev, 595 "cannot allocate resource for %s interrupt\n", dmd->name); 596 error = ENXIO; 597 goto err3; 598 } 599 error = bus_setup_intr(dev, dmd->irq_res, INTR_TYPE_MISC, 600 dmd->handler, NULL, unit, &dmd->intr_handle); 601 if (error != 0) { 602 device_printf(dev, "cannot setup %s interrupt, %d\n", 603 dmd->name, error); 604 goto err4; 605 } 606 bus_describe_intr(dev, dmd->irq_res, dmd->intr_handle, "%s", dmd->name); 607 error = PCIB_MAP_MSI(pcib, dev, dmd->irq, &msi_addr, &msi_data); 608 if (error != 0) { 609 device_printf(dev, "cannot map %s interrupt, %d\n", 610 dmd->name, error); 611 goto err5; 612 } 613 614 dmd->msi_data = msi_data; 615 dmd->msi_addr = msi_addr; 616 617 return (0); 618 619 err5: 620 bus_teardown_intr(dev, dmd->irq_res, dmd->intr_handle); 621 err4: 622 bus_release_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq_res); 623 err3: 624 bus_delete_resource(dev, SYS_RES_IRQ, dmd->irq_rid); 625 err2: 626 PCIB_RELEASE_MSIX(pcib, dev, dmd->irq); 627 dmd->irq = -1; 628 err1: 629 return (error); 630 } 631 632 void 633 iommu_release_intr(struct iommu_unit *unit, int idx) 634 { 635 device_t dev; 636 struct iommu_msi_data *dmd; 637 638 MPASS(idx >= 0 || idx < IOMMU_MAX_MSI); 639 640 dmd = &IOMMU2X86C(unit)->intrs[idx]; 641 if (dmd->handler == NULL || dmd->irq == -1) 642 return; 643 dev = unit->dev; 644 645 bus_teardown_intr(dev, dmd->irq_res, dmd->intr_handle); 646 bus_release_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq_res); 647 bus_delete_resource(dev, SYS_RES_IRQ, dmd->irq_rid); 648 PCIB_RELEASE_MSIX(device_get_parent(device_get_parent(dev)), 649 dev, dmd->irq); 650 dmd->irq = -1; 651 } 652 653 void 654 iommu_device_tag_init(struct iommu_ctx *ctx, device_t dev) 655 { 656 bus_addr_t maxaddr; 657 658 maxaddr = MIN(ctx->domain->end, BUS_SPACE_MAXADDR); 659 ctx->tag->common.impl = &bus_dma_iommu_impl; 660 ctx->tag->common.boundary = 0; 661 ctx->tag->common.lowaddr = maxaddr; 662 ctx->tag->common.highaddr = maxaddr; 663 ctx->tag->common.maxsize = maxaddr; 664 ctx->tag->common.nsegments = BUS_SPACE_UNRESTRICTED; 665 ctx->tag->common.maxsegsz = maxaddr; 666 ctx->tag->ctx = ctx; 667 ctx->tag->owner = dev; 668 } 669 670 void 671 iommu_domain_free_entry(struct iommu_map_entry *entry, bool free) 672 { 673 if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 674 iommu_gas_free_region(entry); 675 else 676 iommu_gas_free_space(entry); 677 if (free) 678 iommu_gas_free_entry(entry); 679 else 680 entry->flags = 0; 681 } 682 683 /* 684 * Index of the pte for the guest address base in the page table at 685 * the level lvl. 686 */ 687 int 688 pglvl_pgtbl_pte_off(int pglvl, iommu_gaddr_t base, int lvl) 689 { 690 691 base >>= IOMMU_PAGE_SHIFT + (pglvl - lvl - 1) * 692 IOMMU_NPTEPGSHIFT; 693 return (base & IOMMU_PTEMASK); 694 } 695 696 /* 697 * Returns the page index of the page table page in the page table 698 * object, which maps the given address base at the page table level 699 * lvl. 700 */ 701 vm_pindex_t 702 pglvl_pgtbl_get_pindex(int pglvl, iommu_gaddr_t base, int lvl) 703 { 704 vm_pindex_t idx, pidx; 705 int i; 706 707 KASSERT(lvl >= 0 && lvl < pglvl, 708 ("wrong lvl %d %d", pglvl, lvl)); 709 710 for (pidx = idx = 0, i = 0; i < lvl; i++, pidx = idx) { 711 idx = pglvl_pgtbl_pte_off(pglvl, base, i) + 712 pidx * IOMMU_NPTEPG + 1; 713 } 714 return (idx); 715 } 716 717 /* 718 * Calculate the total amount of page table pages needed to map the 719 * whole bus address space on the context with the selected agaw. 720 */ 721 vm_pindex_t 722 pglvl_max_pages(int pglvl) 723 { 724 vm_pindex_t res; 725 int i; 726 727 for (res = 0, i = pglvl; i > 0; i--) { 728 res *= IOMMU_NPTEPG; 729 res++; 730 } 731 return (res); 732 } 733 734 iommu_gaddr_t 735 pglvl_page_size(int total_pglvl, int lvl) 736 { 737 int rlvl; 738 static const iommu_gaddr_t pg_sz[] = { 739 (iommu_gaddr_t)IOMMU_PAGE_SIZE, 740 (iommu_gaddr_t)IOMMU_PAGE_SIZE << IOMMU_NPTEPGSHIFT, 741 (iommu_gaddr_t)IOMMU_PAGE_SIZE << (2 * IOMMU_NPTEPGSHIFT), 742 (iommu_gaddr_t)IOMMU_PAGE_SIZE << (3 * IOMMU_NPTEPGSHIFT), 743 (iommu_gaddr_t)IOMMU_PAGE_SIZE << (4 * IOMMU_NPTEPGSHIFT), 744 (iommu_gaddr_t)IOMMU_PAGE_SIZE << (5 * IOMMU_NPTEPGSHIFT), 745 (iommu_gaddr_t)IOMMU_PAGE_SIZE << (6 * IOMMU_NPTEPGSHIFT), 746 }; 747 748 KASSERT(lvl >= 0 && lvl < total_pglvl, 749 ("total %d lvl %d", total_pglvl, lvl)); 750 rlvl = total_pglvl - lvl - 1; 751 KASSERT(rlvl < nitems(pg_sz), ("sizeof pg_sz lvl %d", lvl)); 752 return (pg_sz[rlvl]); 753 } 754 755 void 756 iommu_device_set_iommu_prop(device_t dev, device_t iommu) 757 { 758 device_t iommu_dev; 759 int error; 760 761 bus_topo_lock(); 762 error = device_get_prop(dev, DEV_PROP_NAME_IOMMU, (void **)&iommu_dev); 763 if (error == ENOENT) 764 device_set_prop(dev, DEV_PROP_NAME_IOMMU, iommu, NULL, NULL); 765 bus_topo_unlock(); 766 } 767 768 #ifdef DDB 769 #include <ddb/ddb.h> 770 #include <ddb/db_lex.h> 771 772 void 773 iommu_db_print_domain_entry(const struct iommu_map_entry *entry) 774 { 775 struct iommu_map_entry *l, *r; 776 777 db_printf( 778 " start %jx end %jx first %jx last %jx free_down %jx flags %x ", 779 entry->start, entry->end, entry->first, entry->last, 780 entry->free_down, entry->flags); 781 db_printf("left "); 782 l = RB_LEFT(entry, rb_entry); 783 if (l == NULL) 784 db_printf("NULL "); 785 else 786 db_printf("%jx ", l->start); 787 db_printf("right "); 788 r = RB_RIGHT(entry, rb_entry); 789 if (r == NULL) 790 db_printf("NULL"); 791 else 792 db_printf("%jx", r->start); 793 db_printf("\n"); 794 } 795 796 void 797 iommu_db_print_ctx(struct iommu_ctx *ctx) 798 { 799 db_printf( 800 " @%p pci%d:%d:%d refs %d flags %#x loads %lu unloads %lu\n", 801 ctx, pci_get_bus(ctx->tag->owner), 802 pci_get_slot(ctx->tag->owner), 803 pci_get_function(ctx->tag->owner), ctx->refs, 804 ctx->flags, ctx->loads, ctx->unloads); 805 } 806 807 void 808 iommu_db_domain_print_contexts(struct iommu_domain *iodom) 809 { 810 struct iommu_ctx *ctx; 811 812 if (LIST_EMPTY(&iodom->contexts)) 813 return; 814 815 db_printf(" Contexts:\n"); 816 LIST_FOREACH(ctx, &iodom->contexts, link) 817 iommu_db_print_ctx(ctx); 818 } 819 820 void 821 iommu_db_domain_print_mappings(struct iommu_domain *iodom) 822 { 823 struct iommu_map_entry *entry; 824 825 db_printf(" mapped:\n"); 826 RB_FOREACH(entry, iommu_gas_entries_tree, &iodom->rb_root) { 827 iommu_db_print_domain_entry(entry); 828 if (db_pager_quit) 829 break; 830 } 831 if (db_pager_quit) 832 return; 833 db_printf(" unloading:\n"); 834 TAILQ_FOREACH(entry, &iodom->unload_entries, dmamap_link) { 835 iommu_db_print_domain_entry(entry); 836 if (db_pager_quit) 837 break; 838 } 839 } 840 841 #endif 842