1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2013, 2014, 2024 The FreeBSD Foundation 5 * 6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org> 7 * under sponsorship from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include "opt_acpi.h" 32 #if defined(__amd64__) 33 #define DEV_APIC 34 #else 35 #include "opt_apic.h" 36 #endif 37 38 #include <sys/systm.h> 39 #include <sys/bus.h> 40 #include <sys/kernel.h> 41 #include <sys/lock.h> 42 #include <sys/malloc.h> 43 #include <sys/memdesc.h> 44 #include <sys/mutex.h> 45 #include <sys/sf_buf.h> 46 #include <sys/sysctl.h> 47 #include <sys/proc.h> 48 #include <sys/sched.h> 49 #include <sys/rman.h> 50 #include <sys/rwlock.h> 51 #include <sys/taskqueue.h> 52 #include <sys/tree.h> 53 #include <vm/vm.h> 54 #include <vm/vm_extern.h> 55 #include <vm/vm_kern.h> 56 #include <vm/vm_map.h> 57 #include <vm/vm_object.h> 58 #include <vm/vm_page.h> 59 #include <dev/pci/pcireg.h> 60 #include <dev/pci/pcivar.h> 61 #include <machine/atomic.h> 62 #include <machine/bus.h> 63 #include <machine/cpu.h> 64 #include <x86/include/busdma_impl.h> 65 #include <dev/iommu/busdma_iommu.h> 66 #include <dev/iommu/iommu.h> 67 #include <x86/iommu/x86_iommu.h> 68 #include <x86/iommu/iommu_intrmap.h> 69 #ifdef DEV_APIC 70 #include "pcib_if.h" 71 #include <machine/intr_machdep.h> 72 #include <x86/apicreg.h> 73 #include <x86/apicvar.h> 74 #endif 75 76 vm_page_t 77 iommu_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags) 78 { 79 vm_page_t m; 80 int zeroed, aflags; 81 82 zeroed = (flags & IOMMU_PGF_ZERO) != 0 ? VM_ALLOC_ZERO : 0; 83 aflags = zeroed | VM_ALLOC_NOBUSY | VM_ALLOC_SYSTEM | VM_ALLOC_NODUMP | 84 ((flags & IOMMU_PGF_WAITOK) != 0 ? VM_ALLOC_WAITFAIL : 85 VM_ALLOC_NOWAIT); 86 for (;;) { 87 if ((flags & IOMMU_PGF_OBJL) == 0) 88 VM_OBJECT_WLOCK(obj); 89 m = vm_page_lookup(obj, idx); 90 if ((flags & IOMMU_PGF_NOALLOC) != 0 || m != NULL) { 91 if ((flags & IOMMU_PGF_OBJL) == 0) 92 VM_OBJECT_WUNLOCK(obj); 93 break; 94 } 95 m = vm_page_alloc_contig(obj, idx, aflags, 1, 0, 96 iommu_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); 97 if ((flags & IOMMU_PGF_OBJL) == 0) 98 VM_OBJECT_WUNLOCK(obj); 99 if (m != NULL) { 100 if (zeroed && (m->flags & PG_ZERO) == 0) 101 pmap_zero_page(m); 102 atomic_add_int(&iommu_tbl_pagecnt, 1); 103 break; 104 } 105 if ((flags & IOMMU_PGF_WAITOK) == 0) 106 break; 107 } 108 return (m); 109 } 110 111 void 112 iommu_pgfree(vm_object_t obj, vm_pindex_t idx, int flags) 113 { 114 vm_page_t m; 115 116 if ((flags & IOMMU_PGF_OBJL) == 0) 117 VM_OBJECT_WLOCK(obj); 118 m = vm_page_grab(obj, idx, VM_ALLOC_NOCREAT); 119 if (m != NULL) { 120 vm_page_free(m); 121 atomic_subtract_int(&iommu_tbl_pagecnt, 1); 122 } 123 if ((flags & IOMMU_PGF_OBJL) == 0) 124 VM_OBJECT_WUNLOCK(obj); 125 } 126 127 void * 128 iommu_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags, 129 struct sf_buf **sf) 130 { 131 vm_page_t m; 132 bool allocated; 133 134 if ((flags & IOMMU_PGF_OBJL) == 0) 135 VM_OBJECT_WLOCK(obj); 136 m = vm_page_lookup(obj, idx); 137 if (m == NULL && (flags & IOMMU_PGF_ALLOC) != 0) { 138 m = iommu_pgalloc(obj, idx, flags | IOMMU_PGF_OBJL); 139 allocated = true; 140 } else 141 allocated = false; 142 if (m == NULL) { 143 if ((flags & IOMMU_PGF_OBJL) == 0) 144 VM_OBJECT_WUNLOCK(obj); 145 return (NULL); 146 } 147 /* Sleepable allocations cannot fail. */ 148 if ((flags & IOMMU_PGF_WAITOK) != 0) 149 VM_OBJECT_WUNLOCK(obj); 150 sched_pin(); 151 *sf = sf_buf_alloc(m, SFB_CPUPRIVATE | ((flags & IOMMU_PGF_WAITOK) 152 == 0 ? SFB_NOWAIT : 0)); 153 if (*sf == NULL) { 154 sched_unpin(); 155 if (allocated) { 156 VM_OBJECT_ASSERT_WLOCKED(obj); 157 iommu_pgfree(obj, m->pindex, flags | IOMMU_PGF_OBJL); 158 } 159 if ((flags & IOMMU_PGF_OBJL) == 0) 160 VM_OBJECT_WUNLOCK(obj); 161 return (NULL); 162 } 163 if ((flags & (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) == 164 (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) 165 VM_OBJECT_WLOCK(obj); 166 else if ((flags & (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) == 0) 167 VM_OBJECT_WUNLOCK(obj); 168 return ((void *)sf_buf_kva(*sf)); 169 } 170 171 void 172 iommu_unmap_pgtbl(struct sf_buf *sf) 173 { 174 175 sf_buf_free(sf); 176 sched_unpin(); 177 } 178 179 iommu_haddr_t iommu_high; 180 int iommu_tbl_pagecnt; 181 182 SYSCTL_NODE(_hw_iommu, OID_AUTO, dmar, CTLFLAG_RD | CTLFLAG_MPSAFE, 183 NULL, ""); 184 SYSCTL_INT(_hw_iommu, OID_AUTO, tbl_pagecnt, CTLFLAG_RD, 185 &iommu_tbl_pagecnt, 0, 186 "Count of pages used for IOMMU pagetables"); 187 188 int iommu_qi_batch_coalesce = 100; 189 SYSCTL_INT(_hw_iommu, OID_AUTO, batch_coalesce, CTLFLAG_RWTUN, 190 &iommu_qi_batch_coalesce, 0, 191 "Number of qi batches between interrupt"); 192 193 static struct iommu_unit * 194 x86_no_iommu_find(device_t dev, bool verbose) 195 { 196 return (NULL); 197 } 198 199 static int 200 x86_no_iommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count) 201 { 202 return (EOPNOTSUPP); 203 } 204 205 static int 206 x86_no_iommu_map_msi_intr(device_t src, u_int cpu, u_int vector, 207 u_int cookie, uint64_t *addr, uint32_t *data) 208 { 209 return (EOPNOTSUPP); 210 } 211 212 static int 213 x86_no_iommu_unmap_msi_intr(device_t src, u_int cookie) 214 { 215 return (0); 216 } 217 218 static int 219 x86_no_iommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector, 220 bool edge, bool activehi, int irq, u_int *cookie, uint32_t *hi, 221 uint32_t *lo) 222 { 223 return (EOPNOTSUPP); 224 } 225 226 static int 227 x86_no_iommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie) 228 { 229 return (0); 230 } 231 232 static struct x86_iommu x86_no_iommu = { 233 .find = x86_no_iommu_find, 234 .alloc_msi_intr = x86_no_iommu_alloc_msi_intr, 235 .map_msi_intr = x86_no_iommu_map_msi_intr, 236 .unmap_msi_intr = x86_no_iommu_unmap_msi_intr, 237 .map_ioapic_intr = x86_no_iommu_map_ioapic_intr, 238 .unmap_ioapic_intr = x86_no_iommu_unmap_ioapic_intr, 239 }; 240 241 static struct x86_iommu *x86_iommu = &x86_no_iommu; 242 243 void 244 set_x86_iommu(struct x86_iommu *x) 245 { 246 MPASS(x86_iommu == &x86_no_iommu); 247 x86_iommu = x; 248 } 249 250 struct x86_iommu * 251 get_x86_iommu(void) 252 { 253 return (x86_iommu); 254 } 255 256 void 257 iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free, 258 bool cansleep) 259 { 260 x86_iommu->domain_unload_entry(entry, free, cansleep); 261 } 262 263 void 264 iommu_domain_unload(struct iommu_domain *iodom, 265 struct iommu_map_entries_tailq *entries, bool cansleep) 266 { 267 x86_iommu->domain_unload(iodom, entries, cansleep); 268 } 269 270 struct iommu_ctx * 271 iommu_get_ctx(struct iommu_unit *iommu, device_t dev, uint16_t rid, 272 bool id_mapped, bool rmrr_init) 273 { 274 return (x86_iommu->get_ctx(iommu, dev, rid, id_mapped, rmrr_init)); 275 } 276 277 void 278 iommu_free_ctx_locked(struct iommu_unit *iommu, struct iommu_ctx *context) 279 { 280 x86_iommu->free_ctx_locked(iommu, context); 281 } 282 283 void 284 iommu_free_ctx(struct iommu_ctx *context) 285 { 286 x86_iommu->free_ctx(context); 287 } 288 289 struct iommu_unit * 290 iommu_find(device_t dev, bool verbose) 291 { 292 return (x86_iommu->find(dev, verbose)); 293 } 294 295 int 296 iommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count) 297 { 298 return (x86_iommu->alloc_msi_intr(src, cookies, count)); 299 } 300 301 int 302 iommu_map_msi_intr(device_t src, u_int cpu, u_int vector, u_int cookie, 303 uint64_t *addr, uint32_t *data) 304 { 305 return (x86_iommu->map_msi_intr(src, cpu, vector, cookie, 306 addr, data)); 307 } 308 309 int 310 iommu_unmap_msi_intr(device_t src, u_int cookie) 311 { 312 return (x86_iommu->unmap_msi_intr(src, cookie)); 313 } 314 315 int 316 iommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector, bool edge, 317 bool activehi, int irq, u_int *cookie, uint32_t *hi, uint32_t *lo) 318 { 319 return (x86_iommu->map_ioapic_intr(ioapic_id, cpu, vector, edge, 320 activehi, irq, cookie, hi, lo)); 321 } 322 323 int 324 iommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie) 325 { 326 return (x86_iommu->unmap_ioapic_intr(ioapic_id, cookie)); 327 } 328 329 void 330 iommu_unit_pre_instantiate_ctx(struct iommu_unit *unit) 331 { 332 x86_iommu->unit_pre_instantiate_ctx(unit); 333 } 334 335 #define IOMMU2X86C(iommu) (x86_iommu->get_x86_common(iommu)) 336 337 static bool 338 iommu_qi_seq_processed(struct iommu_unit *unit, 339 const struct iommu_qi_genseq *pseq) 340 { 341 struct x86_unit_common *x86c; 342 u_int gen; 343 344 x86c = IOMMU2X86C(unit); 345 gen = x86c->inv_waitd_gen; 346 return (pseq->gen < gen || 347 (pseq->gen == gen && pseq->seq <= x86c->inv_waitd_seq_hw)); 348 } 349 350 void 351 iommu_qi_emit_wait_seq(struct iommu_unit *unit, struct iommu_qi_genseq *pseq, 352 bool emit_wait) 353 { 354 struct x86_unit_common *x86c; 355 struct iommu_qi_genseq gsec; 356 uint32_t seq; 357 358 KASSERT(pseq != NULL, ("wait descriptor with no place for seq")); 359 IOMMU_ASSERT_LOCKED(unit); 360 x86c = IOMMU2X86C(unit); 361 362 if (x86c->inv_waitd_seq == 0xffffffff) { 363 gsec.gen = x86c->inv_waitd_gen; 364 gsec.seq = x86c->inv_waitd_seq; 365 x86_iommu->qi_ensure(unit, 1); 366 x86_iommu->qi_emit_wait_descr(unit, gsec.seq, false, 367 true, false); 368 x86_iommu->qi_advance_tail(unit); 369 while (!iommu_qi_seq_processed(unit, &gsec)) 370 cpu_spinwait(); 371 x86c->inv_waitd_gen++; 372 x86c->inv_waitd_seq = 1; 373 } 374 seq = x86c->inv_waitd_seq++; 375 pseq->gen = x86c->inv_waitd_gen; 376 pseq->seq = seq; 377 if (emit_wait) { 378 x86_iommu->qi_ensure(unit, 1); 379 x86_iommu->qi_emit_wait_descr(unit, seq, true, true, false); 380 } 381 } 382 383 /* 384 * To avoid missed wakeups, callers must increment the unit's waiters count 385 * before advancing the tail past the wait descriptor. 386 */ 387 void 388 iommu_qi_wait_for_seq(struct iommu_unit *unit, const struct iommu_qi_genseq * 389 gseq, bool nowait) 390 { 391 struct x86_unit_common *x86c; 392 393 IOMMU_ASSERT_LOCKED(unit); 394 x86c = IOMMU2X86C(unit); 395 396 KASSERT(x86c->inv_seq_waiters > 0, ("%s: no waiters", __func__)); 397 while (!iommu_qi_seq_processed(unit, gseq)) { 398 if (cold || nowait) { 399 cpu_spinwait(); 400 } else { 401 msleep(&x86c->inv_seq_waiters, &unit->lock, 0, 402 "dmarse", hz); 403 } 404 } 405 x86c->inv_seq_waiters--; 406 } 407 408 /* 409 * The caller must not be using the entry's dmamap_link field. 410 */ 411 void 412 iommu_qi_invalidate_locked(struct iommu_domain *domain, 413 struct iommu_map_entry *entry, bool emit_wait) 414 { 415 struct iommu_unit *unit; 416 struct x86_unit_common *x86c; 417 418 unit = domain->iommu; 419 x86c = IOMMU2X86C(unit); 420 IOMMU_ASSERT_LOCKED(unit); 421 422 x86_iommu->qi_invalidate_emit(domain, entry->start, entry->end - 423 entry->start, &entry->gseq, emit_wait); 424 425 /* 426 * To avoid a data race in dmar_qi_task(), the entry's gseq must be 427 * initialized before the entry is added to the TLB flush list, and the 428 * entry must be added to that list before the tail is advanced. More 429 * precisely, the tail must not be advanced past the wait descriptor 430 * that will generate the interrupt that schedules dmar_qi_task() for 431 * execution before the entry is added to the list. While an earlier 432 * call to dmar_qi_ensure() might have advanced the tail, it will not 433 * advance it past the wait descriptor. 434 * 435 * See the definition of struct dmar_unit for more information on 436 * synchronization. 437 */ 438 entry->tlb_flush_next = NULL; 439 atomic_store_rel_ptr((uintptr_t *)&x86c->tlb_flush_tail-> 440 tlb_flush_next, (uintptr_t)entry); 441 x86c->tlb_flush_tail = entry; 442 443 x86_iommu->qi_advance_tail(unit); 444 } 445 446 void 447 iommu_qi_invalidate_sync(struct iommu_domain *domain, iommu_gaddr_t base, 448 iommu_gaddr_t size, bool cansleep) 449 { 450 struct iommu_unit *unit; 451 struct iommu_qi_genseq gseq; 452 453 unit = domain->iommu; 454 IOMMU_LOCK(unit); 455 x86_iommu->qi_invalidate_emit(domain, base, size, &gseq, true); 456 457 /* 458 * To avoid a missed wakeup in iommu_qi_task(), the unit's 459 * waiters count must be incremented before the tail is 460 * advanced. 461 */ 462 IOMMU2X86C(unit)->inv_seq_waiters++; 463 464 x86_iommu->qi_advance_tail(unit); 465 iommu_qi_wait_for_seq(unit, &gseq, !cansleep); 466 IOMMU_UNLOCK(unit); 467 } 468 469 void 470 iommu_qi_drain_tlb_flush(struct iommu_unit *unit) 471 { 472 struct x86_unit_common *x86c; 473 struct iommu_map_entry *entry, *head; 474 475 x86c = IOMMU2X86C(unit); 476 for (head = x86c->tlb_flush_head;; head = entry) { 477 entry = (struct iommu_map_entry *) 478 atomic_load_acq_ptr((uintptr_t *)&head->tlb_flush_next); 479 if (entry == NULL || 480 !iommu_qi_seq_processed(unit, &entry->gseq)) 481 break; 482 x86c->tlb_flush_head = entry; 483 iommu_gas_free_entry(head); 484 if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 485 iommu_gas_free_region(entry); 486 else 487 iommu_gas_free_space(entry); 488 } 489 } 490 491 void 492 iommu_qi_common_init(struct iommu_unit *unit, task_fn_t qi_task) 493 { 494 struct x86_unit_common *x86c; 495 u_int qi_sz; 496 497 x86c = IOMMU2X86C(unit); 498 499 x86c->tlb_flush_head = x86c->tlb_flush_tail = 500 iommu_gas_alloc_entry(NULL, 0); 501 TASK_INIT(&x86c->qi_task, 0, qi_task, unit); 502 x86c->qi_taskqueue = taskqueue_create_fast("iommuqf", M_WAITOK, 503 taskqueue_thread_enqueue, &x86c->qi_taskqueue); 504 taskqueue_start_threads(&x86c->qi_taskqueue, 1, PI_AV, 505 "iommu%d qi taskq", unit->unit); 506 507 x86c->inv_waitd_gen = 0; 508 x86c->inv_waitd_seq = 1; 509 510 qi_sz = 3; 511 TUNABLE_INT_FETCH("hw.iommu.qi_size", &qi_sz); 512 if (qi_sz > x86c->qi_buf_maxsz) 513 qi_sz = x86c->qi_buf_maxsz; 514 x86c->inv_queue_size = (1ULL << qi_sz) * PAGE_SIZE; 515 /* Reserve one descriptor to prevent wraparound. */ 516 x86c->inv_queue_avail = x86c->inv_queue_size - 517 x86c->qi_cmd_sz; 518 519 /* 520 * The invalidation queue reads by DMARs/AMDIOMMUs are always 521 * coherent. 522 */ 523 x86c->inv_queue = kmem_alloc_contig(x86c->inv_queue_size, 524 M_WAITOK | M_ZERO, 0, iommu_high, PAGE_SIZE, 0, 525 VM_MEMATTR_DEFAULT); 526 x86c->inv_waitd_seq_hw_phys = pmap_kextract( 527 (vm_offset_t)&x86c->inv_waitd_seq_hw); 528 } 529 530 void 531 iommu_qi_common_fini(struct iommu_unit *unit, void (*disable_qi)( 532 struct iommu_unit *)) 533 { 534 struct x86_unit_common *x86c; 535 struct iommu_qi_genseq gseq; 536 537 x86c = IOMMU2X86C(unit); 538 539 taskqueue_drain(x86c->qi_taskqueue, &x86c->qi_task); 540 taskqueue_free(x86c->qi_taskqueue); 541 x86c->qi_taskqueue = NULL; 542 543 IOMMU_LOCK(unit); 544 /* quisce */ 545 x86_iommu->qi_ensure(unit, 1); 546 iommu_qi_emit_wait_seq(unit, &gseq, true); 547 /* See iommu_qi_invalidate_locked(). */ 548 x86c->inv_seq_waiters++; 549 x86_iommu->qi_advance_tail(unit); 550 iommu_qi_wait_for_seq(unit, &gseq, false); 551 /* only after the quisce, disable queue */ 552 disable_qi(unit); 553 KASSERT(x86c->inv_seq_waiters == 0, 554 ("iommu%d: waiters on disabled queue", unit->unit)); 555 IOMMU_UNLOCK(unit); 556 557 kmem_free(x86c->inv_queue, x86c->inv_queue_size); 558 x86c->inv_queue = NULL; 559 x86c->inv_queue_size = 0; 560 } 561 562 int 563 iommu_alloc_irq(struct iommu_unit *unit, int idx) 564 { 565 device_t dev, pcib; 566 struct iommu_msi_data *dmd; 567 uint64_t msi_addr; 568 uint32_t msi_data; 569 int error; 570 571 MPASS(idx >= 0 || idx < IOMMU_MAX_MSI); 572 573 dev = unit->dev; 574 dmd = &IOMMU2X86C(unit)->intrs[idx]; 575 pcib = device_get_parent(device_get_parent(dev)); /* Really not pcib */ 576 error = PCIB_ALLOC_MSIX(pcib, dev, &dmd->irq); 577 if (error != 0) { 578 device_printf(dev, "cannot allocate %s interrupt, %d\n", 579 dmd->name, error); 580 goto err1; 581 } 582 error = bus_set_resource(dev, SYS_RES_IRQ, dmd->irq_rid, 583 dmd->irq, 1); 584 if (error != 0) { 585 device_printf(dev, "cannot set %s interrupt resource, %d\n", 586 dmd->name, error); 587 goto err2; 588 } 589 dmd->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, 590 &dmd->irq_rid, RF_ACTIVE); 591 if (dmd->irq_res == NULL) { 592 device_printf(dev, 593 "cannot allocate resource for %s interrupt\n", dmd->name); 594 error = ENXIO; 595 goto err3; 596 } 597 error = bus_setup_intr(dev, dmd->irq_res, INTR_TYPE_MISC, 598 dmd->handler, NULL, unit, &dmd->intr_handle); 599 if (error != 0) { 600 device_printf(dev, "cannot setup %s interrupt, %d\n", 601 dmd->name, error); 602 goto err4; 603 } 604 bus_describe_intr(dev, dmd->irq_res, dmd->intr_handle, "%s", dmd->name); 605 error = PCIB_MAP_MSI(pcib, dev, dmd->irq, &msi_addr, &msi_data); 606 if (error != 0) { 607 device_printf(dev, "cannot map %s interrupt, %d\n", 608 dmd->name, error); 609 goto err5; 610 } 611 612 dmd->msi_data = msi_data; 613 dmd->msi_addr = msi_addr; 614 615 return (0); 616 617 err5: 618 bus_teardown_intr(dev, dmd->irq_res, dmd->intr_handle); 619 err4: 620 bus_release_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq_res); 621 err3: 622 bus_delete_resource(dev, SYS_RES_IRQ, dmd->irq_rid); 623 err2: 624 PCIB_RELEASE_MSIX(pcib, dev, dmd->irq); 625 dmd->irq = -1; 626 err1: 627 return (error); 628 } 629 630 void 631 iommu_release_intr(struct iommu_unit *unit, int idx) 632 { 633 device_t dev; 634 struct iommu_msi_data *dmd; 635 636 MPASS(idx >= 0 || idx < IOMMU_MAX_MSI); 637 638 dmd = &IOMMU2X86C(unit)->intrs[idx]; 639 if (dmd->handler == NULL || dmd->irq == -1) 640 return; 641 dev = unit->dev; 642 643 bus_teardown_intr(dev, dmd->irq_res, dmd->intr_handle); 644 bus_release_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq_res); 645 bus_delete_resource(dev, SYS_RES_IRQ, dmd->irq_rid); 646 PCIB_RELEASE_MSIX(device_get_parent(device_get_parent(dev)), 647 dev, dmd->irq); 648 dmd->irq = -1; 649 } 650 651 void 652 iommu_device_tag_init(struct iommu_ctx *ctx, device_t dev) 653 { 654 bus_addr_t maxaddr; 655 656 maxaddr = MIN(ctx->domain->end, BUS_SPACE_MAXADDR); 657 ctx->tag->common.impl = &bus_dma_iommu_impl; 658 ctx->tag->common.boundary = 0; 659 ctx->tag->common.lowaddr = maxaddr; 660 ctx->tag->common.highaddr = maxaddr; 661 ctx->tag->common.maxsize = maxaddr; 662 ctx->tag->common.nsegments = BUS_SPACE_UNRESTRICTED; 663 ctx->tag->common.maxsegsz = maxaddr; 664 ctx->tag->ctx = ctx; 665 ctx->tag->owner = dev; 666 } 667 668 void 669 iommu_domain_free_entry(struct iommu_map_entry *entry, bool free) 670 { 671 if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 672 iommu_gas_free_region(entry); 673 else 674 iommu_gas_free_space(entry); 675 if (free) 676 iommu_gas_free_entry(entry); 677 else 678 entry->flags = 0; 679 } 680 681 /* 682 * Index of the pte for the guest address base in the page table at 683 * the level lvl. 684 */ 685 int 686 pglvl_pgtbl_pte_off(int pglvl, iommu_gaddr_t base, int lvl) 687 { 688 689 base >>= IOMMU_PAGE_SHIFT + (pglvl - lvl - 1) * 690 IOMMU_NPTEPGSHIFT; 691 return (base & IOMMU_PTEMASK); 692 } 693 694 /* 695 * Returns the page index of the page table page in the page table 696 * object, which maps the given address base at the page table level 697 * lvl. 698 */ 699 vm_pindex_t 700 pglvl_pgtbl_get_pindex(int pglvl, iommu_gaddr_t base, int lvl) 701 { 702 vm_pindex_t idx, pidx; 703 int i; 704 705 KASSERT(lvl >= 0 && lvl < pglvl, 706 ("wrong lvl %d %d", pglvl, lvl)); 707 708 for (pidx = idx = 0, i = 0; i < lvl; i++, pidx = idx) { 709 idx = pglvl_pgtbl_pte_off(pglvl, base, i) + 710 pidx * IOMMU_NPTEPG + 1; 711 } 712 return (idx); 713 } 714 715 /* 716 * Calculate the total amount of page table pages needed to map the 717 * whole bus address space on the context with the selected agaw. 718 */ 719 vm_pindex_t 720 pglvl_max_pages(int pglvl) 721 { 722 vm_pindex_t res; 723 int i; 724 725 for (res = 0, i = pglvl; i > 0; i--) { 726 res *= IOMMU_NPTEPG; 727 res++; 728 } 729 return (res); 730 } 731 732 iommu_gaddr_t 733 pglvl_page_size(int total_pglvl, int lvl) 734 { 735 int rlvl; 736 static const iommu_gaddr_t pg_sz[] = { 737 (iommu_gaddr_t)IOMMU_PAGE_SIZE, 738 (iommu_gaddr_t)IOMMU_PAGE_SIZE << IOMMU_NPTEPGSHIFT, 739 (iommu_gaddr_t)IOMMU_PAGE_SIZE << (2 * IOMMU_NPTEPGSHIFT), 740 (iommu_gaddr_t)IOMMU_PAGE_SIZE << (3 * IOMMU_NPTEPGSHIFT), 741 (iommu_gaddr_t)IOMMU_PAGE_SIZE << (4 * IOMMU_NPTEPGSHIFT), 742 (iommu_gaddr_t)IOMMU_PAGE_SIZE << (5 * IOMMU_NPTEPGSHIFT), 743 (iommu_gaddr_t)IOMMU_PAGE_SIZE << (6 * IOMMU_NPTEPGSHIFT), 744 }; 745 746 KASSERT(lvl >= 0 && lvl < total_pglvl, 747 ("total %d lvl %d", total_pglvl, lvl)); 748 rlvl = total_pglvl - lvl - 1; 749 KASSERT(rlvl < nitems(pg_sz), ("sizeof pg_sz lvl %d", lvl)); 750 return (pg_sz[rlvl]); 751 } 752