1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2013, 2014, 2024 The FreeBSD Foundation 5 * 6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org> 7 * under sponsorship from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include "opt_acpi.h" 32 #if defined(__amd64__) 33 #define DEV_APIC 34 #else 35 #include "opt_apic.h" 36 #endif 37 38 #include <sys/systm.h> 39 #include <sys/bus.h> 40 #include <sys/kernel.h> 41 #include <sys/lock.h> 42 #include <sys/malloc.h> 43 #include <sys/memdesc.h> 44 #include <sys/mutex.h> 45 #include <sys/sf_buf.h> 46 #include <sys/sysctl.h> 47 #include <sys/proc.h> 48 #include <sys/sched.h> 49 #include <sys/rman.h> 50 #include <sys/rwlock.h> 51 #include <sys/taskqueue.h> 52 #include <sys/tree.h> 53 #include <vm/vm.h> 54 #include <vm/vm_extern.h> 55 #include <vm/vm_kern.h> 56 #include <vm/vm_map.h> 57 #include <vm/vm_object.h> 58 #include <vm/vm_page.h> 59 #include <dev/pci/pcireg.h> 60 #include <dev/pci/pcivar.h> 61 #include <machine/atomic.h> 62 #include <machine/bus.h> 63 #include <machine/cpu.h> 64 #include <x86/include/busdma_impl.h> 65 #include <dev/iommu/busdma_iommu.h> 66 #include <dev/iommu/iommu.h> 67 #include <x86/iommu/x86_iommu.h> 68 #include <x86/iommu/iommu_intrmap.h> 69 #ifdef DEV_APIC 70 #include "pcib_if.h" 71 #include <machine/intr_machdep.h> 72 #include <x86/apicreg.h> 73 #include <x86/apicvar.h> 74 #endif 75 76 vm_page_t 77 iommu_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags) 78 { 79 vm_page_t m; 80 int zeroed, aflags; 81 82 zeroed = (flags & IOMMU_PGF_ZERO) != 0 ? VM_ALLOC_ZERO : 0; 83 aflags = zeroed | VM_ALLOC_NOBUSY | VM_ALLOC_SYSTEM | VM_ALLOC_NODUMP | 84 ((flags & IOMMU_PGF_WAITOK) != 0 ? VM_ALLOC_WAITFAIL : 85 VM_ALLOC_NOWAIT); 86 for (;;) { 87 if ((flags & IOMMU_PGF_OBJL) == 0) 88 VM_OBJECT_WLOCK(obj); 89 m = vm_page_lookup(obj, idx); 90 if ((flags & IOMMU_PGF_NOALLOC) != 0 || m != NULL) { 91 if ((flags & IOMMU_PGF_OBJL) == 0) 92 VM_OBJECT_WUNLOCK(obj); 93 break; 94 } 95 m = vm_page_alloc_contig(obj, idx, aflags, 1, 0, 96 iommu_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); 97 if ((flags & IOMMU_PGF_OBJL) == 0) 98 VM_OBJECT_WUNLOCK(obj); 99 if (m != NULL) { 100 if (zeroed && (m->flags & PG_ZERO) == 0) 101 pmap_zero_page(m); 102 atomic_add_int(&iommu_tbl_pagecnt, 1); 103 break; 104 } 105 if ((flags & IOMMU_PGF_WAITOK) == 0) 106 break; 107 } 108 return (m); 109 } 110 111 void 112 iommu_pgfree(vm_object_t obj, vm_pindex_t idx, int flags, 113 struct iommu_map_entry *entry) 114 { 115 vm_page_t m; 116 117 if ((flags & IOMMU_PGF_OBJL) == 0) 118 VM_OBJECT_WLOCK(obj); 119 m = vm_page_grab(obj, idx, VM_ALLOC_NOCREAT); 120 if (m != NULL) { 121 if (entry == NULL) { 122 vm_page_free(m); 123 atomic_subtract_int(&iommu_tbl_pagecnt, 1); 124 } else { 125 vm_page_remove_xbusy(m); /* keep page busy */ 126 SLIST_INSERT_HEAD(&entry->pgtbl_free, m, plinks.s.ss); 127 } 128 } 129 if ((flags & IOMMU_PGF_OBJL) == 0) 130 VM_OBJECT_WUNLOCK(obj); 131 } 132 133 void * 134 iommu_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags, 135 struct sf_buf **sf) 136 { 137 vm_page_t m; 138 bool allocated; 139 140 if ((flags & IOMMU_PGF_OBJL) == 0) 141 VM_OBJECT_WLOCK(obj); 142 m = vm_page_lookup(obj, idx); 143 if (m == NULL && (flags & IOMMU_PGF_ALLOC) != 0) { 144 m = iommu_pgalloc(obj, idx, flags | IOMMU_PGF_OBJL); 145 allocated = true; 146 } else 147 allocated = false; 148 if (m == NULL) { 149 if ((flags & IOMMU_PGF_OBJL) == 0) 150 VM_OBJECT_WUNLOCK(obj); 151 return (NULL); 152 } 153 /* Sleepable allocations cannot fail. */ 154 if ((flags & IOMMU_PGF_WAITOK) != 0) 155 VM_OBJECT_WUNLOCK(obj); 156 sched_pin(); 157 *sf = sf_buf_alloc(m, SFB_CPUPRIVATE | ((flags & IOMMU_PGF_WAITOK) 158 == 0 ? SFB_NOWAIT : 0)); 159 if (*sf == NULL) { 160 sched_unpin(); 161 if (allocated) { 162 VM_OBJECT_ASSERT_WLOCKED(obj); 163 iommu_pgfree(obj, m->pindex, flags | IOMMU_PGF_OBJL, 164 NULL); 165 } 166 if ((flags & IOMMU_PGF_OBJL) == 0) 167 VM_OBJECT_WUNLOCK(obj); 168 return (NULL); 169 } 170 if ((flags & (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) == 171 (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) 172 VM_OBJECT_WLOCK(obj); 173 else if ((flags & (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) == 0) 174 VM_OBJECT_WUNLOCK(obj); 175 return ((void *)sf_buf_kva(*sf)); 176 } 177 178 void 179 iommu_unmap_pgtbl(struct sf_buf *sf) 180 { 181 182 sf_buf_free(sf); 183 sched_unpin(); 184 } 185 186 iommu_haddr_t iommu_high; 187 int iommu_tbl_pagecnt; 188 189 SYSCTL_NODE(_hw_iommu, OID_AUTO, dmar, CTLFLAG_RD | CTLFLAG_MPSAFE, 190 NULL, ""); 191 SYSCTL_INT(_hw_iommu, OID_AUTO, tbl_pagecnt, CTLFLAG_RD, 192 &iommu_tbl_pagecnt, 0, 193 "Count of pages used for IOMMU pagetables"); 194 195 int iommu_qi_batch_coalesce = 100; 196 SYSCTL_INT(_hw_iommu, OID_AUTO, batch_coalesce, CTLFLAG_RWTUN, 197 &iommu_qi_batch_coalesce, 0, 198 "Number of qi batches between interrupt"); 199 200 static struct iommu_unit * 201 x86_no_iommu_find(device_t dev, bool verbose) 202 { 203 return (NULL); 204 } 205 206 static int 207 x86_no_iommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count) 208 { 209 return (EOPNOTSUPP); 210 } 211 212 static int 213 x86_no_iommu_map_msi_intr(device_t src, u_int cpu, u_int vector, 214 u_int cookie, uint64_t *addr, uint32_t *data) 215 { 216 return (EOPNOTSUPP); 217 } 218 219 static int 220 x86_no_iommu_unmap_msi_intr(device_t src, u_int cookie) 221 { 222 return (0); 223 } 224 225 static int 226 x86_no_iommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector, 227 bool edge, bool activehi, int irq, u_int *cookie, uint32_t *hi, 228 uint32_t *lo) 229 { 230 return (EOPNOTSUPP); 231 } 232 233 static int 234 x86_no_iommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie) 235 { 236 return (0); 237 } 238 239 static struct x86_iommu x86_no_iommu = { 240 .find = x86_no_iommu_find, 241 .alloc_msi_intr = x86_no_iommu_alloc_msi_intr, 242 .map_msi_intr = x86_no_iommu_map_msi_intr, 243 .unmap_msi_intr = x86_no_iommu_unmap_msi_intr, 244 .map_ioapic_intr = x86_no_iommu_map_ioapic_intr, 245 .unmap_ioapic_intr = x86_no_iommu_unmap_ioapic_intr, 246 }; 247 248 static struct x86_iommu *x86_iommu = &x86_no_iommu; 249 250 void 251 set_x86_iommu(struct x86_iommu *x) 252 { 253 MPASS(x86_iommu == &x86_no_iommu); 254 x86_iommu = x; 255 } 256 257 struct x86_iommu * 258 get_x86_iommu(void) 259 { 260 return (x86_iommu); 261 } 262 263 void 264 iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free, 265 bool cansleep) 266 { 267 x86_iommu->domain_unload_entry(entry, free, cansleep); 268 } 269 270 void 271 iommu_domain_unload(struct iommu_domain *iodom, 272 struct iommu_map_entries_tailq *entries, bool cansleep) 273 { 274 x86_iommu->domain_unload(iodom, entries, cansleep); 275 } 276 277 struct iommu_ctx * 278 iommu_get_ctx(struct iommu_unit *iommu, device_t dev, uint16_t rid, 279 bool id_mapped, bool rmrr_init) 280 { 281 return (x86_iommu->get_ctx(iommu, dev, rid, id_mapped, rmrr_init)); 282 } 283 284 void 285 iommu_free_ctx_locked(struct iommu_unit *iommu, struct iommu_ctx *context) 286 { 287 x86_iommu->free_ctx_locked(iommu, context); 288 } 289 290 void 291 iommu_free_ctx(struct iommu_ctx *context) 292 { 293 x86_iommu->free_ctx(context); 294 } 295 296 struct iommu_unit * 297 iommu_find(device_t dev, bool verbose) 298 { 299 return (x86_iommu->find(dev, verbose)); 300 } 301 302 int 303 iommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count) 304 { 305 return (x86_iommu->alloc_msi_intr(src, cookies, count)); 306 } 307 308 int 309 iommu_map_msi_intr(device_t src, u_int cpu, u_int vector, u_int cookie, 310 uint64_t *addr, uint32_t *data) 311 { 312 return (x86_iommu->map_msi_intr(src, cpu, vector, cookie, 313 addr, data)); 314 } 315 316 int 317 iommu_unmap_msi_intr(device_t src, u_int cookie) 318 { 319 return (x86_iommu->unmap_msi_intr(src, cookie)); 320 } 321 322 int 323 iommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector, bool edge, 324 bool activehi, int irq, u_int *cookie, uint32_t *hi, uint32_t *lo) 325 { 326 return (x86_iommu->map_ioapic_intr(ioapic_id, cpu, vector, edge, 327 activehi, irq, cookie, hi, lo)); 328 } 329 330 int 331 iommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie) 332 { 333 return (x86_iommu->unmap_ioapic_intr(ioapic_id, cookie)); 334 } 335 336 void 337 iommu_unit_pre_instantiate_ctx(struct iommu_unit *unit) 338 { 339 x86_iommu->unit_pre_instantiate_ctx(unit); 340 } 341 342 #define IOMMU2X86C(iommu) (x86_iommu->get_x86_common(iommu)) 343 344 static bool 345 iommu_qi_seq_processed(struct iommu_unit *unit, 346 const struct iommu_qi_genseq *pseq) 347 { 348 struct x86_unit_common *x86c; 349 u_int gen; 350 351 x86c = IOMMU2X86C(unit); 352 gen = x86c->inv_waitd_gen; 353 return (pseq->gen < gen || (pseq->gen == gen && pseq->seq <= 354 atomic_load_64(&x86c->inv_waitd_seq_hw))); 355 } 356 357 void 358 iommu_qi_emit_wait_seq(struct iommu_unit *unit, struct iommu_qi_genseq *pseq, 359 bool emit_wait) 360 { 361 struct x86_unit_common *x86c; 362 struct iommu_qi_genseq gsec; 363 uint32_t seq; 364 365 KASSERT(pseq != NULL, ("wait descriptor with no place for seq")); 366 IOMMU_ASSERT_LOCKED(unit); 367 x86c = IOMMU2X86C(unit); 368 369 if (x86c->inv_waitd_seq == 0xffffffff) { 370 gsec.gen = x86c->inv_waitd_gen; 371 gsec.seq = x86c->inv_waitd_seq; 372 x86_iommu->qi_ensure(unit, 1); 373 x86_iommu->qi_emit_wait_descr(unit, gsec.seq, false, 374 true, false); 375 x86_iommu->qi_advance_tail(unit); 376 while (!iommu_qi_seq_processed(unit, &gsec)) 377 cpu_spinwait(); 378 x86c->inv_waitd_gen++; 379 x86c->inv_waitd_seq = 1; 380 } 381 seq = x86c->inv_waitd_seq++; 382 pseq->gen = x86c->inv_waitd_gen; 383 pseq->seq = seq; 384 if (emit_wait) { 385 x86_iommu->qi_ensure(unit, 1); 386 x86_iommu->qi_emit_wait_descr(unit, seq, true, true, false); 387 } 388 } 389 390 /* 391 * To avoid missed wakeups, callers must increment the unit's waiters count 392 * before advancing the tail past the wait descriptor. 393 */ 394 void 395 iommu_qi_wait_for_seq(struct iommu_unit *unit, const struct iommu_qi_genseq * 396 gseq, bool nowait) 397 { 398 struct x86_unit_common *x86c; 399 400 IOMMU_ASSERT_LOCKED(unit); 401 x86c = IOMMU2X86C(unit); 402 403 KASSERT(x86c->inv_seq_waiters > 0, ("%s: no waiters", __func__)); 404 while (!iommu_qi_seq_processed(unit, gseq)) { 405 if (cold || nowait) { 406 cpu_spinwait(); 407 } else { 408 msleep(&x86c->inv_seq_waiters, &unit->lock, 0, 409 "dmarse", hz); 410 } 411 } 412 x86c->inv_seq_waiters--; 413 } 414 415 /* 416 * The caller must not be using the entry's dmamap_link field. 417 */ 418 void 419 iommu_qi_invalidate_locked(struct iommu_domain *domain, 420 struct iommu_map_entry *entry, bool emit_wait) 421 { 422 struct iommu_unit *unit; 423 struct x86_unit_common *x86c; 424 425 unit = domain->iommu; 426 x86c = IOMMU2X86C(unit); 427 IOMMU_ASSERT_LOCKED(unit); 428 429 x86_iommu->qi_invalidate_emit(domain, entry->start, entry->end - 430 entry->start, &entry->gseq, emit_wait); 431 432 /* 433 * To avoid a data race in dmar_qi_task(), the entry's gseq must be 434 * initialized before the entry is added to the TLB flush list, and the 435 * entry must be added to that list before the tail is advanced. More 436 * precisely, the tail must not be advanced past the wait descriptor 437 * that will generate the interrupt that schedules dmar_qi_task() for 438 * execution before the entry is added to the list. While an earlier 439 * call to dmar_qi_ensure() might have advanced the tail, it will not 440 * advance it past the wait descriptor. 441 * 442 * See the definition of struct dmar_unit for more information on 443 * synchronization. 444 */ 445 entry->tlb_flush_next = NULL; 446 atomic_store_rel_ptr((uintptr_t *)&x86c->tlb_flush_tail-> 447 tlb_flush_next, (uintptr_t)entry); 448 x86c->tlb_flush_tail = entry; 449 450 x86_iommu->qi_advance_tail(unit); 451 } 452 453 void 454 iommu_qi_invalidate_sync(struct iommu_domain *domain, iommu_gaddr_t base, 455 iommu_gaddr_t size, bool cansleep) 456 { 457 struct iommu_unit *unit; 458 struct iommu_qi_genseq gseq; 459 460 unit = domain->iommu; 461 IOMMU_LOCK(unit); 462 x86_iommu->qi_invalidate_emit(domain, base, size, &gseq, true); 463 464 /* 465 * To avoid a missed wakeup in iommu_qi_task(), the unit's 466 * waiters count must be incremented before the tail is 467 * advanced. 468 */ 469 IOMMU2X86C(unit)->inv_seq_waiters++; 470 471 x86_iommu->qi_advance_tail(unit); 472 iommu_qi_wait_for_seq(unit, &gseq, !cansleep); 473 IOMMU_UNLOCK(unit); 474 } 475 476 void 477 iommu_qi_drain_tlb_flush(struct iommu_unit *unit) 478 { 479 struct x86_unit_common *x86c; 480 struct iommu_map_entry *entry, *head; 481 482 x86c = IOMMU2X86C(unit); 483 for (head = x86c->tlb_flush_head;; head = entry) { 484 entry = (struct iommu_map_entry *) 485 atomic_load_acq_ptr((uintptr_t *)&head->tlb_flush_next); 486 if (entry == NULL || 487 !iommu_qi_seq_processed(unit, &entry->gseq)) 488 break; 489 x86c->tlb_flush_head = entry; 490 iommu_gas_free_entry(head); 491 if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 492 iommu_gas_free_region(entry); 493 else 494 iommu_gas_free_space(entry); 495 } 496 } 497 498 void 499 iommu_qi_common_init(struct iommu_unit *unit, task_fn_t qi_task) 500 { 501 struct x86_unit_common *x86c; 502 u_int qi_sz; 503 504 x86c = IOMMU2X86C(unit); 505 506 x86c->tlb_flush_head = x86c->tlb_flush_tail = 507 iommu_gas_alloc_entry(NULL, 0); 508 TASK_INIT(&x86c->qi_task, 0, qi_task, unit); 509 x86c->qi_taskqueue = taskqueue_create_fast("iommuqf", M_WAITOK, 510 taskqueue_thread_enqueue, &x86c->qi_taskqueue); 511 taskqueue_start_threads(&x86c->qi_taskqueue, 1, PI_AV, 512 "iommu%d qi taskq", unit->unit); 513 514 x86c->inv_waitd_gen = 0; 515 x86c->inv_waitd_seq = 1; 516 517 qi_sz = 3; 518 TUNABLE_INT_FETCH("hw.iommu.qi_size", &qi_sz); 519 if (qi_sz > x86c->qi_buf_maxsz) 520 qi_sz = x86c->qi_buf_maxsz; 521 x86c->inv_queue_size = (1ULL << qi_sz) * PAGE_SIZE; 522 /* Reserve one descriptor to prevent wraparound. */ 523 x86c->inv_queue_avail = x86c->inv_queue_size - 524 x86c->qi_cmd_sz; 525 526 /* 527 * The invalidation queue reads by DMARs/AMDIOMMUs are always 528 * coherent. 529 */ 530 x86c->inv_queue = kmem_alloc_contig(x86c->inv_queue_size, 531 M_WAITOK | M_ZERO, 0, iommu_high, PAGE_SIZE, 0, 532 VM_MEMATTR_DEFAULT); 533 x86c->inv_waitd_seq_hw_phys = pmap_kextract( 534 (vm_offset_t)&x86c->inv_waitd_seq_hw); 535 } 536 537 void 538 iommu_qi_common_fini(struct iommu_unit *unit, void (*disable_qi)( 539 struct iommu_unit *)) 540 { 541 struct x86_unit_common *x86c; 542 struct iommu_qi_genseq gseq; 543 544 x86c = IOMMU2X86C(unit); 545 546 taskqueue_drain(x86c->qi_taskqueue, &x86c->qi_task); 547 taskqueue_free(x86c->qi_taskqueue); 548 x86c->qi_taskqueue = NULL; 549 550 IOMMU_LOCK(unit); 551 /* quisce */ 552 x86_iommu->qi_ensure(unit, 1); 553 iommu_qi_emit_wait_seq(unit, &gseq, true); 554 /* See iommu_qi_invalidate_locked(). */ 555 x86c->inv_seq_waiters++; 556 x86_iommu->qi_advance_tail(unit); 557 iommu_qi_wait_for_seq(unit, &gseq, false); 558 /* only after the quisce, disable queue */ 559 disable_qi(unit); 560 KASSERT(x86c->inv_seq_waiters == 0, 561 ("iommu%d: waiters on disabled queue", unit->unit)); 562 IOMMU_UNLOCK(unit); 563 564 kmem_free(x86c->inv_queue, x86c->inv_queue_size); 565 x86c->inv_queue = NULL; 566 x86c->inv_queue_size = 0; 567 } 568 569 int 570 iommu_alloc_irq(struct iommu_unit *unit, int idx) 571 { 572 device_t dev, pcib; 573 struct iommu_msi_data *dmd; 574 uint64_t msi_addr; 575 uint32_t msi_data; 576 int error; 577 578 MPASS(idx >= 0 || idx < IOMMU_MAX_MSI); 579 580 dev = unit->dev; 581 dmd = &IOMMU2X86C(unit)->intrs[idx]; 582 pcib = device_get_parent(device_get_parent(dev)); /* Really not pcib */ 583 error = PCIB_ALLOC_MSIX(pcib, dev, &dmd->irq); 584 if (error != 0) { 585 device_printf(dev, "cannot allocate %s interrupt, %d\n", 586 dmd->name, error); 587 goto err1; 588 } 589 error = bus_set_resource(dev, SYS_RES_IRQ, dmd->irq_rid, 590 dmd->irq, 1); 591 if (error != 0) { 592 device_printf(dev, "cannot set %s interrupt resource, %d\n", 593 dmd->name, error); 594 goto err2; 595 } 596 dmd->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, 597 &dmd->irq_rid, RF_ACTIVE); 598 if (dmd->irq_res == NULL) { 599 device_printf(dev, 600 "cannot allocate resource for %s interrupt\n", dmd->name); 601 error = ENXIO; 602 goto err3; 603 } 604 error = bus_setup_intr(dev, dmd->irq_res, INTR_TYPE_MISC, 605 dmd->handler, NULL, unit, &dmd->intr_handle); 606 if (error != 0) { 607 device_printf(dev, "cannot setup %s interrupt, %d\n", 608 dmd->name, error); 609 goto err4; 610 } 611 bus_describe_intr(dev, dmd->irq_res, dmd->intr_handle, "%s", dmd->name); 612 error = PCIB_MAP_MSI(pcib, dev, dmd->irq, &msi_addr, &msi_data); 613 if (error != 0) { 614 device_printf(dev, "cannot map %s interrupt, %d\n", 615 dmd->name, error); 616 goto err5; 617 } 618 619 dmd->msi_data = msi_data; 620 dmd->msi_addr = msi_addr; 621 622 return (0); 623 624 err5: 625 bus_teardown_intr(dev, dmd->irq_res, dmd->intr_handle); 626 err4: 627 bus_release_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq_res); 628 err3: 629 bus_delete_resource(dev, SYS_RES_IRQ, dmd->irq_rid); 630 err2: 631 PCIB_RELEASE_MSIX(pcib, dev, dmd->irq); 632 dmd->irq = -1; 633 err1: 634 return (error); 635 } 636 637 void 638 iommu_release_intr(struct iommu_unit *unit, int idx) 639 { 640 device_t dev; 641 struct iommu_msi_data *dmd; 642 643 MPASS(idx >= 0 || idx < IOMMU_MAX_MSI); 644 645 dmd = &IOMMU2X86C(unit)->intrs[idx]; 646 if (dmd->handler == NULL || dmd->irq == -1) 647 return; 648 dev = unit->dev; 649 650 bus_teardown_intr(dev, dmd->irq_res, dmd->intr_handle); 651 bus_release_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq_res); 652 bus_delete_resource(dev, SYS_RES_IRQ, dmd->irq_rid); 653 PCIB_RELEASE_MSIX(device_get_parent(device_get_parent(dev)), 654 dev, dmd->irq); 655 dmd->irq = -1; 656 } 657 658 void 659 iommu_device_tag_init(struct iommu_ctx *ctx, device_t dev) 660 { 661 bus_addr_t maxaddr; 662 663 maxaddr = MIN(ctx->domain->end, BUS_SPACE_MAXADDR); 664 ctx->tag->common.impl = &bus_dma_iommu_impl; 665 ctx->tag->common.boundary = 0; 666 ctx->tag->common.lowaddr = maxaddr; 667 ctx->tag->common.highaddr = maxaddr; 668 ctx->tag->common.maxsize = maxaddr; 669 ctx->tag->common.nsegments = BUS_SPACE_UNRESTRICTED; 670 ctx->tag->common.maxsegsz = maxaddr; 671 ctx->tag->ctx = ctx; 672 ctx->tag->owner = dev; 673 } 674 675 void 676 iommu_domain_free_entry(struct iommu_map_entry *entry, bool free) 677 { 678 if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 679 iommu_gas_free_region(entry); 680 else 681 iommu_gas_free_space(entry); 682 if (free) 683 iommu_gas_free_entry(entry); 684 else 685 entry->flags = 0; 686 } 687 688 /* 689 * Index of the pte for the guest address base in the page table at 690 * the level lvl. 691 */ 692 int 693 pglvl_pgtbl_pte_off(int pglvl, iommu_gaddr_t base, int lvl) 694 { 695 696 base >>= IOMMU_PAGE_SHIFT + (pglvl - lvl - 1) * 697 IOMMU_NPTEPGSHIFT; 698 return (base & IOMMU_PTEMASK); 699 } 700 701 /* 702 * Returns the page index of the page table page in the page table 703 * object, which maps the given address base at the page table level 704 * lvl. 705 */ 706 vm_pindex_t 707 pglvl_pgtbl_get_pindex(int pglvl, iommu_gaddr_t base, int lvl) 708 { 709 vm_pindex_t idx, pidx; 710 int i; 711 712 KASSERT(lvl >= 0 && lvl < pglvl, 713 ("wrong lvl %d %d", pglvl, lvl)); 714 715 for (pidx = idx = 0, i = 0; i < lvl; i++, pidx = idx) { 716 idx = pglvl_pgtbl_pte_off(pglvl, base, i) + 717 pidx * IOMMU_NPTEPG + 1; 718 } 719 return (idx); 720 } 721 722 /* 723 * Calculate the total amount of page table pages needed to map the 724 * whole bus address space on the context with the selected agaw. 725 */ 726 vm_pindex_t 727 pglvl_max_pages(int pglvl) 728 { 729 vm_pindex_t res; 730 int i; 731 732 for (res = 0, i = pglvl; i > 0; i--) { 733 res *= IOMMU_NPTEPG; 734 res++; 735 } 736 return (res); 737 } 738 739 iommu_gaddr_t 740 pglvl_page_size(int total_pglvl, int lvl) 741 { 742 int rlvl; 743 static const iommu_gaddr_t pg_sz[] = { 744 (iommu_gaddr_t)IOMMU_PAGE_SIZE, 745 (iommu_gaddr_t)IOMMU_PAGE_SIZE << IOMMU_NPTEPGSHIFT, 746 (iommu_gaddr_t)IOMMU_PAGE_SIZE << (2 * IOMMU_NPTEPGSHIFT), 747 (iommu_gaddr_t)IOMMU_PAGE_SIZE << (3 * IOMMU_NPTEPGSHIFT), 748 (iommu_gaddr_t)IOMMU_PAGE_SIZE << (4 * IOMMU_NPTEPGSHIFT), 749 (iommu_gaddr_t)IOMMU_PAGE_SIZE << (5 * IOMMU_NPTEPGSHIFT), 750 (iommu_gaddr_t)IOMMU_PAGE_SIZE << (6 * IOMMU_NPTEPGSHIFT), 751 }; 752 753 KASSERT(lvl >= 0 && lvl < total_pglvl, 754 ("total %d lvl %d", total_pglvl, lvl)); 755 rlvl = total_pglvl - lvl - 1; 756 KASSERT(rlvl < nitems(pg_sz), ("sizeof pg_sz lvl %d", lvl)); 757 return (pg_sz[rlvl]); 758 } 759