1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2013 The FreeBSD Foundation 5 * 6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org> 7 * under sponsorship from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include "opt_acpi.h" 35 36 #include <sys/param.h> 37 #include <sys/bus.h> 38 #include <sys/kernel.h> 39 #include <sys/malloc.h> 40 #include <sys/memdesc.h> 41 #include <sys/module.h> 42 #include <sys/rman.h> 43 #include <sys/taskqueue.h> 44 #include <sys/time.h> 45 #include <sys/tree.h> 46 #include <sys/vmem.h> 47 #include <vm/vm.h> 48 #include <vm/vm_extern.h> 49 #include <vm/vm_kern.h> 50 #include <vm/vm_page.h> 51 #include <vm/vm_map.h> 52 #include <contrib/dev/acpica/include/acpi.h> 53 #include <contrib/dev/acpica/include/accommon.h> 54 #include <dev/acpica/acpivar.h> 55 #include <dev/pci/pcireg.h> 56 #include <machine/bus.h> 57 #include <machine/cpu.h> 58 #include <x86/include/busdma_impl.h> 59 #include <dev/iommu/busdma_iommu.h> 60 #include <x86/iommu/intel_reg.h> 61 #include <x86/iommu/intel_dmar.h> 62 63 static bool 64 dmar_qi_seq_processed(const struct dmar_unit *unit, 65 const struct iommu_qi_genseq *pseq) 66 { 67 u_int gen; 68 69 gen = unit->inv_waitd_gen; 70 return (pseq->gen < gen || 71 (pseq->gen == gen && pseq->seq <= unit->inv_waitd_seq_hw)); 72 } 73 74 static int 75 dmar_enable_qi(struct dmar_unit *unit) 76 { 77 int error; 78 79 DMAR_ASSERT_LOCKED(unit); 80 unit->hw_gcmd |= DMAR_GCMD_QIE; 81 dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd); 82 DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_QIES) 83 != 0)); 84 return (error); 85 } 86 87 static int 88 dmar_disable_qi(struct dmar_unit *unit) 89 { 90 int error; 91 92 DMAR_ASSERT_LOCKED(unit); 93 unit->hw_gcmd &= ~DMAR_GCMD_QIE; 94 dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd); 95 DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_QIES) 96 == 0)); 97 return (error); 98 } 99 100 static void 101 dmar_qi_advance_tail(struct dmar_unit *unit) 102 { 103 104 DMAR_ASSERT_LOCKED(unit); 105 dmar_write4(unit, DMAR_IQT_REG, unit->inv_queue_tail); 106 } 107 108 static void 109 dmar_qi_ensure(struct dmar_unit *unit, int descr_count) 110 { 111 uint32_t head; 112 int bytes; 113 114 DMAR_ASSERT_LOCKED(unit); 115 bytes = descr_count << DMAR_IQ_DESCR_SZ_SHIFT; 116 for (;;) { 117 if (bytes <= unit->inv_queue_avail) 118 break; 119 /* refill */ 120 head = dmar_read4(unit, DMAR_IQH_REG); 121 head &= DMAR_IQH_MASK; 122 unit->inv_queue_avail = head - unit->inv_queue_tail - 123 DMAR_IQ_DESCR_SZ; 124 if (head <= unit->inv_queue_tail) 125 unit->inv_queue_avail += unit->inv_queue_size; 126 if (bytes <= unit->inv_queue_avail) 127 break; 128 129 /* 130 * No space in the queue, do busy wait. Hardware must 131 * make a progress. But first advance the tail to 132 * inform the descriptor streamer about entries we 133 * might have already filled, otherwise they could 134 * clog the whole queue.. 135 * 136 * See dmar_qi_invalidate_locked() for a discussion 137 * about data race prevention. 138 */ 139 dmar_qi_advance_tail(unit); 140 unit->inv_queue_full++; 141 cpu_spinwait(); 142 } 143 unit->inv_queue_avail -= bytes; 144 } 145 146 static void 147 dmar_qi_emit(struct dmar_unit *unit, uint64_t data1, uint64_t data2) 148 { 149 150 DMAR_ASSERT_LOCKED(unit); 151 *(volatile uint64_t *)(unit->inv_queue + unit->inv_queue_tail) = data1; 152 unit->inv_queue_tail += DMAR_IQ_DESCR_SZ / 2; 153 KASSERT(unit->inv_queue_tail <= unit->inv_queue_size, 154 ("tail overflow 0x%x 0x%jx", unit->inv_queue_tail, 155 (uintmax_t)unit->inv_queue_size)); 156 unit->inv_queue_tail &= unit->inv_queue_size - 1; 157 *(volatile uint64_t *)(unit->inv_queue + unit->inv_queue_tail) = data2; 158 unit->inv_queue_tail += DMAR_IQ_DESCR_SZ / 2; 159 KASSERT(unit->inv_queue_tail <= unit->inv_queue_size, 160 ("tail overflow 0x%x 0x%jx", unit->inv_queue_tail, 161 (uintmax_t)unit->inv_queue_size)); 162 unit->inv_queue_tail &= unit->inv_queue_size - 1; 163 } 164 165 static void 166 dmar_qi_emit_wait_descr(struct dmar_unit *unit, uint32_t seq, bool intr, 167 bool memw, bool fence) 168 { 169 170 DMAR_ASSERT_LOCKED(unit); 171 dmar_qi_emit(unit, DMAR_IQ_DESCR_WAIT_ID | 172 (intr ? DMAR_IQ_DESCR_WAIT_IF : 0) | 173 (memw ? DMAR_IQ_DESCR_WAIT_SW : 0) | 174 (fence ? DMAR_IQ_DESCR_WAIT_FN : 0) | 175 (memw ? DMAR_IQ_DESCR_WAIT_SD(seq) : 0), 176 memw ? unit->inv_waitd_seq_hw_phys : 0); 177 } 178 179 static void 180 dmar_qi_emit_wait_seq(struct dmar_unit *unit, struct iommu_qi_genseq *pseq, 181 bool emit_wait) 182 { 183 struct iommu_qi_genseq gsec; 184 uint32_t seq; 185 186 KASSERT(pseq != NULL, ("wait descriptor with no place for seq")); 187 DMAR_ASSERT_LOCKED(unit); 188 if (unit->inv_waitd_seq == 0xffffffff) { 189 gsec.gen = unit->inv_waitd_gen; 190 gsec.seq = unit->inv_waitd_seq; 191 dmar_qi_ensure(unit, 1); 192 dmar_qi_emit_wait_descr(unit, gsec.seq, false, true, false); 193 dmar_qi_advance_tail(unit); 194 while (!dmar_qi_seq_processed(unit, &gsec)) 195 cpu_spinwait(); 196 unit->inv_waitd_gen++; 197 unit->inv_waitd_seq = 1; 198 } 199 seq = unit->inv_waitd_seq++; 200 pseq->gen = unit->inv_waitd_gen; 201 pseq->seq = seq; 202 if (emit_wait) { 203 dmar_qi_ensure(unit, 1); 204 dmar_qi_emit_wait_descr(unit, seq, true, true, false); 205 } 206 } 207 208 /* 209 * To avoid missed wakeups, callers must increment the unit's waiters count 210 * before advancing the tail past the wait descriptor. 211 */ 212 static void 213 dmar_qi_wait_for_seq(struct dmar_unit *unit, const struct iommu_qi_genseq *gseq, 214 bool nowait) 215 { 216 217 DMAR_ASSERT_LOCKED(unit); 218 KASSERT(unit->inv_seq_waiters > 0, ("%s: no waiters", __func__)); 219 while (!dmar_qi_seq_processed(unit, gseq)) { 220 if (cold || nowait) { 221 cpu_spinwait(); 222 } else { 223 msleep(&unit->inv_seq_waiters, &unit->iommu.lock, 0, 224 "dmarse", hz); 225 } 226 } 227 unit->inv_seq_waiters--; 228 } 229 230 static void 231 dmar_qi_invalidate_emit(struct dmar_domain *domain, iommu_gaddr_t base, 232 iommu_gaddr_t size, struct iommu_qi_genseq *pseq, bool emit_wait) 233 { 234 struct dmar_unit *unit; 235 iommu_gaddr_t isize; 236 int am; 237 238 unit = domain->dmar; 239 DMAR_ASSERT_LOCKED(unit); 240 for (; size > 0; base += isize, size -= isize) { 241 am = calc_am(unit, base, size, &isize); 242 dmar_qi_ensure(unit, 1); 243 dmar_qi_emit(unit, DMAR_IQ_DESCR_IOTLB_INV | 244 DMAR_IQ_DESCR_IOTLB_PAGE | DMAR_IQ_DESCR_IOTLB_DW | 245 DMAR_IQ_DESCR_IOTLB_DR | 246 DMAR_IQ_DESCR_IOTLB_DID(domain->domain), 247 base | am); 248 } 249 dmar_qi_emit_wait_seq(unit, pseq, emit_wait); 250 } 251 252 /* 253 * The caller must not be using the entry's dmamap_link field. 254 */ 255 void 256 dmar_qi_invalidate_locked(struct dmar_domain *domain, 257 struct iommu_map_entry *entry, bool emit_wait) 258 { 259 struct dmar_unit *unit; 260 261 unit = domain->dmar; 262 DMAR_ASSERT_LOCKED(unit); 263 dmar_qi_invalidate_emit(domain, entry->start, entry->end - 264 entry->start, &entry->gseq, emit_wait); 265 266 /* 267 * To avoid a data race in dmar_qi_task(), the entry's gseq must be 268 * initialized before the entry is added to the TLB flush list, and the 269 * entry must be added to that list before the tail is advanced. More 270 * precisely, the tail must not be advanced past the wait descriptor 271 * that will generate the interrupt that schedules dmar_qi_task() for 272 * execution before the entry is added to the list. While an earlier 273 * call to dmar_qi_ensure() might have advanced the tail, it will not 274 * advance it past the wait descriptor. 275 * 276 * See the definition of struct dmar_unit for more information on 277 * synchronization. 278 */ 279 entry->tlb_flush_next = NULL; 280 atomic_store_rel_ptr((uintptr_t *)&unit->tlb_flush_tail->tlb_flush_next, 281 (uintptr_t)entry); 282 unit->tlb_flush_tail = entry; 283 284 dmar_qi_advance_tail(unit); 285 } 286 287 void 288 dmar_qi_invalidate_sync(struct dmar_domain *domain, iommu_gaddr_t base, 289 iommu_gaddr_t size, bool cansleep) 290 { 291 struct dmar_unit *unit; 292 struct iommu_qi_genseq gseq; 293 294 unit = domain->dmar; 295 DMAR_LOCK(unit); 296 dmar_qi_invalidate_emit(domain, base, size, &gseq, true); 297 298 /* 299 * To avoid a missed wakeup in dmar_qi_task(), the unit's waiters count 300 * must be incremented before the tail is advanced. 301 */ 302 unit->inv_seq_waiters++; 303 304 dmar_qi_advance_tail(unit); 305 dmar_qi_wait_for_seq(unit, &gseq, !cansleep); 306 DMAR_UNLOCK(unit); 307 } 308 309 void 310 dmar_qi_invalidate_ctx_glob_locked(struct dmar_unit *unit) 311 { 312 struct iommu_qi_genseq gseq; 313 314 DMAR_ASSERT_LOCKED(unit); 315 dmar_qi_ensure(unit, 2); 316 dmar_qi_emit(unit, DMAR_IQ_DESCR_CTX_INV | DMAR_IQ_DESCR_CTX_GLOB, 0); 317 dmar_qi_emit_wait_seq(unit, &gseq, true); 318 /* See dmar_qi_invalidate_sync(). */ 319 unit->inv_seq_waiters++; 320 dmar_qi_advance_tail(unit); 321 dmar_qi_wait_for_seq(unit, &gseq, false); 322 } 323 324 void 325 dmar_qi_invalidate_iotlb_glob_locked(struct dmar_unit *unit) 326 { 327 struct iommu_qi_genseq gseq; 328 329 DMAR_ASSERT_LOCKED(unit); 330 dmar_qi_ensure(unit, 2); 331 dmar_qi_emit(unit, DMAR_IQ_DESCR_IOTLB_INV | DMAR_IQ_DESCR_IOTLB_GLOB | 332 DMAR_IQ_DESCR_IOTLB_DW | DMAR_IQ_DESCR_IOTLB_DR, 0); 333 dmar_qi_emit_wait_seq(unit, &gseq, true); 334 /* See dmar_qi_invalidate_sync(). */ 335 unit->inv_seq_waiters++; 336 dmar_qi_advance_tail(unit); 337 dmar_qi_wait_for_seq(unit, &gseq, false); 338 } 339 340 void 341 dmar_qi_invalidate_iec_glob(struct dmar_unit *unit) 342 { 343 struct iommu_qi_genseq gseq; 344 345 DMAR_ASSERT_LOCKED(unit); 346 dmar_qi_ensure(unit, 2); 347 dmar_qi_emit(unit, DMAR_IQ_DESCR_IEC_INV, 0); 348 dmar_qi_emit_wait_seq(unit, &gseq, true); 349 /* See dmar_qi_invalidate_sync(). */ 350 unit->inv_seq_waiters++; 351 dmar_qi_advance_tail(unit); 352 dmar_qi_wait_for_seq(unit, &gseq, false); 353 } 354 355 void 356 dmar_qi_invalidate_iec(struct dmar_unit *unit, u_int start, u_int cnt) 357 { 358 struct iommu_qi_genseq gseq; 359 u_int c, l; 360 361 DMAR_ASSERT_LOCKED(unit); 362 KASSERT(start < unit->irte_cnt && start < start + cnt && 363 start + cnt <= unit->irte_cnt, 364 ("inv iec overflow %d %d %d", unit->irte_cnt, start, cnt)); 365 for (; cnt > 0; cnt -= c, start += c) { 366 l = ffs(start | cnt) - 1; 367 c = 1 << l; 368 dmar_qi_ensure(unit, 1); 369 dmar_qi_emit(unit, DMAR_IQ_DESCR_IEC_INV | 370 DMAR_IQ_DESCR_IEC_IDX | DMAR_IQ_DESCR_IEC_IIDX(start) | 371 DMAR_IQ_DESCR_IEC_IM(l), 0); 372 } 373 dmar_qi_ensure(unit, 1); 374 dmar_qi_emit_wait_seq(unit, &gseq, true); 375 376 /* 377 * Since dmar_qi_wait_for_seq() will not sleep, this increment's 378 * placement relative to advancing the tail doesn't matter. 379 */ 380 unit->inv_seq_waiters++; 381 382 dmar_qi_advance_tail(unit); 383 384 /* 385 * The caller of the function, in particular, 386 * dmar_ir_program_irte(), may be called from the context 387 * where the sleeping is forbidden (in fact, the 388 * intr_table_lock mutex may be held, locked from 389 * intr_shuffle_irqs()). Wait for the invalidation completion 390 * using the busy wait. 391 * 392 * The impact on the interrupt input setup code is small, the 393 * expected overhead is comparable with the chipset register 394 * read. It is more harmful for the parallel DMA operations, 395 * since we own the dmar unit lock until whole invalidation 396 * queue is processed, which includes requests possibly issued 397 * before our request. 398 */ 399 dmar_qi_wait_for_seq(unit, &gseq, true); 400 } 401 402 int 403 dmar_qi_intr(void *arg) 404 { 405 struct dmar_unit *unit; 406 407 unit = arg; 408 KASSERT(unit->qi_enabled, ("dmar%d: QI is not enabled", 409 unit->iommu.unit)); 410 taskqueue_enqueue(unit->qi_taskqueue, &unit->qi_task); 411 return (FILTER_HANDLED); 412 } 413 414 static void 415 dmar_qi_task(void *arg, int pending __unused) 416 { 417 struct dmar_unit *unit; 418 struct iommu_map_entry *entry, *head; 419 uint32_t ics; 420 421 unit = arg; 422 423 /* 424 * Request an interrupt on the completion of the next invalidation 425 * wait descriptor with the IF field set. 426 */ 427 ics = dmar_read4(unit, DMAR_ICS_REG); 428 if ((ics & DMAR_ICS_IWC) != 0) { 429 ics = DMAR_ICS_IWC; 430 dmar_write4(unit, DMAR_ICS_REG, ics); 431 } 432 433 for (;;) { 434 head = unit->tlb_flush_head; 435 entry = (struct iommu_map_entry *) 436 atomic_load_acq_ptr((uintptr_t *)&head->tlb_flush_next); 437 if (entry == NULL) 438 break; 439 if (!dmar_qi_seq_processed(unit, &entry->gseq)) 440 break; 441 unit->tlb_flush_head = entry; 442 iommu_gas_free_entry(head); 443 if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 444 iommu_gas_free_region(entry); 445 else 446 iommu_gas_free_space(entry); 447 } 448 if (unit->inv_seq_waiters > 0) { 449 /* 450 * Acquire the DMAR lock so that wakeup() is called only after 451 * the waiter is sleeping. 452 */ 453 DMAR_LOCK(unit); 454 wakeup(&unit->inv_seq_waiters); 455 DMAR_UNLOCK(unit); 456 } 457 } 458 459 int 460 dmar_init_qi(struct dmar_unit *unit) 461 { 462 uint64_t iqa; 463 uint32_t ics; 464 int qi_sz; 465 466 if (!DMAR_HAS_QI(unit) || (unit->hw_cap & DMAR_CAP_CM) != 0) 467 return (0); 468 unit->qi_enabled = 1; 469 TUNABLE_INT_FETCH("hw.dmar.qi", &unit->qi_enabled); 470 if (!unit->qi_enabled) 471 return (0); 472 473 unit->tlb_flush_head = unit->tlb_flush_tail = 474 iommu_gas_alloc_entry(NULL, 0); 475 TASK_INIT(&unit->qi_task, 0, dmar_qi_task, unit); 476 unit->qi_taskqueue = taskqueue_create_fast("dmarqf", M_WAITOK, 477 taskqueue_thread_enqueue, &unit->qi_taskqueue); 478 taskqueue_start_threads(&unit->qi_taskqueue, 1, PI_AV, 479 "dmar%d qi taskq", unit->iommu.unit); 480 481 unit->inv_waitd_gen = 0; 482 unit->inv_waitd_seq = 1; 483 484 qi_sz = DMAR_IQA_QS_DEF; 485 TUNABLE_INT_FETCH("hw.dmar.qi_size", &qi_sz); 486 if (qi_sz > DMAR_IQA_QS_MAX) 487 qi_sz = DMAR_IQA_QS_MAX; 488 unit->inv_queue_size = (1ULL << qi_sz) * PAGE_SIZE; 489 /* Reserve one descriptor to prevent wraparound. */ 490 unit->inv_queue_avail = unit->inv_queue_size - DMAR_IQ_DESCR_SZ; 491 492 /* The invalidation queue reads by DMARs are always coherent. */ 493 unit->inv_queue = kmem_alloc_contig(unit->inv_queue_size, M_WAITOK | 494 M_ZERO, 0, dmar_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); 495 unit->inv_waitd_seq_hw_phys = pmap_kextract( 496 (vm_offset_t)&unit->inv_waitd_seq_hw); 497 498 DMAR_LOCK(unit); 499 dmar_write8(unit, DMAR_IQT_REG, 0); 500 iqa = pmap_kextract(unit->inv_queue); 501 iqa |= qi_sz; 502 dmar_write8(unit, DMAR_IQA_REG, iqa); 503 dmar_enable_qi(unit); 504 ics = dmar_read4(unit, DMAR_ICS_REG); 505 if ((ics & DMAR_ICS_IWC) != 0) { 506 ics = DMAR_ICS_IWC; 507 dmar_write4(unit, DMAR_ICS_REG, ics); 508 } 509 dmar_enable_qi_intr(unit); 510 DMAR_UNLOCK(unit); 511 512 return (0); 513 } 514 515 void 516 dmar_fini_qi(struct dmar_unit *unit) 517 { 518 struct iommu_qi_genseq gseq; 519 520 if (!unit->qi_enabled) 521 return; 522 taskqueue_drain(unit->qi_taskqueue, &unit->qi_task); 523 taskqueue_free(unit->qi_taskqueue); 524 unit->qi_taskqueue = NULL; 525 526 DMAR_LOCK(unit); 527 /* quisce */ 528 dmar_qi_ensure(unit, 1); 529 dmar_qi_emit_wait_seq(unit, &gseq, true); 530 /* See dmar_qi_invalidate_sync_locked(). */ 531 unit->inv_seq_waiters++; 532 dmar_qi_advance_tail(unit); 533 dmar_qi_wait_for_seq(unit, &gseq, false); 534 /* only after the quisce, disable queue */ 535 dmar_disable_qi_intr(unit); 536 dmar_disable_qi(unit); 537 KASSERT(unit->inv_seq_waiters == 0, 538 ("dmar%d: waiters on disabled queue", unit->iommu.unit)); 539 DMAR_UNLOCK(unit); 540 541 kmem_free(unit->inv_queue, unit->inv_queue_size); 542 unit->inv_queue = 0; 543 unit->inv_queue_size = 0; 544 unit->qi_enabled = 0; 545 } 546 547 void 548 dmar_enable_qi_intr(struct dmar_unit *unit) 549 { 550 uint32_t iectl; 551 552 DMAR_ASSERT_LOCKED(unit); 553 KASSERT(DMAR_HAS_QI(unit), ("dmar%d: QI is not supported", 554 unit->iommu.unit)); 555 iectl = dmar_read4(unit, DMAR_IECTL_REG); 556 iectl &= ~DMAR_IECTL_IM; 557 dmar_write4(unit, DMAR_IECTL_REG, iectl); 558 } 559 560 void 561 dmar_disable_qi_intr(struct dmar_unit *unit) 562 { 563 uint32_t iectl; 564 565 DMAR_ASSERT_LOCKED(unit); 566 KASSERT(DMAR_HAS_QI(unit), ("dmar%d: QI is not supported", 567 unit->iommu.unit)); 568 iectl = dmar_read4(unit, DMAR_IECTL_REG); 569 dmar_write4(unit, DMAR_IECTL_REG, iectl | DMAR_IECTL_IM); 570 } 571