1 /*- 2 * Copyright (c) 2013 The FreeBSD Foundation 3 * All rights reserved. 4 * 5 * This software was developed by Konstantin Belousov <kib@FreeBSD.org> 6 * under sponsorship from the FreeBSD Foundation. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_acpi.h" 34 35 #include <sys/param.h> 36 #include <sys/bus.h> 37 #include <sys/kernel.h> 38 #include <sys/malloc.h> 39 #include <sys/memdesc.h> 40 #include <sys/module.h> 41 #include <sys/rman.h> 42 #include <sys/taskqueue.h> 43 #include <sys/tree.h> 44 #include <machine/bus.h> 45 #include <contrib/dev/acpica/include/acpi.h> 46 #include <contrib/dev/acpica/include/accommon.h> 47 #include <dev/acpica/acpivar.h> 48 #include <vm/vm.h> 49 #include <vm/vm_extern.h> 50 #include <vm/vm_kern.h> 51 #include <vm/vm_page.h> 52 #include <vm/vm_map.h> 53 #include <machine/cpu.h> 54 #include <x86/include/busdma_impl.h> 55 #include <x86/iommu/intel_reg.h> 56 #include <x86/iommu/busdma_dmar.h> 57 #include <x86/iommu/intel_dmar.h> 58 59 static bool 60 dmar_qi_seq_processed(const struct dmar_unit *unit, 61 const struct dmar_qi_genseq *pseq) 62 { 63 64 return (pseq->gen < unit->inv_waitd_gen || 65 (pseq->gen == unit->inv_waitd_gen && 66 pseq->seq <= unit->inv_waitd_seq_hw)); 67 } 68 69 static int 70 dmar_enable_qi(struct dmar_unit *unit) 71 { 72 73 DMAR_ASSERT_LOCKED(unit); 74 unit->hw_gcmd |= DMAR_GCMD_QIE; 75 dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd); 76 /* XXXKIB should have a timeout */ 77 while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_QIES) == 0) 78 cpu_spinwait(); 79 return (0); 80 } 81 82 static int 83 dmar_disable_qi(struct dmar_unit *unit) 84 { 85 86 DMAR_ASSERT_LOCKED(unit); 87 unit->hw_gcmd &= ~DMAR_GCMD_QIE; 88 dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd); 89 /* XXXKIB should have a timeout */ 90 while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_QIES) != 0) 91 cpu_spinwait(); 92 return (0); 93 } 94 95 static void 96 dmar_qi_advance_tail(struct dmar_unit *unit) 97 { 98 99 DMAR_ASSERT_LOCKED(unit); 100 dmar_write4(unit, DMAR_IQT_REG, unit->inv_queue_tail); 101 } 102 103 static void 104 dmar_qi_ensure(struct dmar_unit *unit, int descr_count) 105 { 106 uint32_t head; 107 int bytes; 108 109 DMAR_ASSERT_LOCKED(unit); 110 bytes = descr_count << DMAR_IQ_DESCR_SZ_SHIFT; 111 for (;;) { 112 if (bytes <= unit->inv_queue_avail) 113 break; 114 /* refill */ 115 head = dmar_read4(unit, DMAR_IQH_REG); 116 head &= DMAR_IQH_MASK; 117 unit->inv_queue_avail = head - unit->inv_queue_tail - 118 DMAR_IQ_DESCR_SZ; 119 if (head <= unit->inv_queue_tail) 120 unit->inv_queue_avail += unit->inv_queue_size; 121 if (bytes <= unit->inv_queue_avail) 122 break; 123 124 /* 125 * No space in the queue, do busy wait. Hardware must 126 * make a progress. But first advance the tail to 127 * inform the descriptor streamer about entries we 128 * might have already filled, otherwise they could 129 * clog the whole queue.. 130 */ 131 dmar_qi_advance_tail(unit); 132 unit->inv_queue_full++; 133 cpu_spinwait(); 134 } 135 unit->inv_queue_avail -= bytes; 136 } 137 138 static void 139 dmar_qi_emit(struct dmar_unit *unit, uint64_t data1, uint64_t data2) 140 { 141 142 DMAR_ASSERT_LOCKED(unit); 143 *(volatile uint64_t *)(unit->inv_queue + unit->inv_queue_tail) = data1; 144 unit->inv_queue_tail += DMAR_IQ_DESCR_SZ / 2; 145 KASSERT(unit->inv_queue_tail <= unit->inv_queue_size, 146 ("tail overflow 0x%x 0x%jx", unit->inv_queue_tail, 147 (uintmax_t)unit->inv_queue_size)); 148 unit->inv_queue_tail &= unit->inv_queue_size - 1; 149 *(volatile uint64_t *)(unit->inv_queue + unit->inv_queue_tail) = data2; 150 unit->inv_queue_tail += DMAR_IQ_DESCR_SZ / 2; 151 KASSERT(unit->inv_queue_tail <= unit->inv_queue_size, 152 ("tail overflow 0x%x 0x%jx", unit->inv_queue_tail, 153 (uintmax_t)unit->inv_queue_size)); 154 unit->inv_queue_tail &= unit->inv_queue_size - 1; 155 } 156 157 static void 158 dmar_qi_emit_wait_descr(struct dmar_unit *unit, uint32_t seq, bool intr, 159 bool memw, bool fence) 160 { 161 162 DMAR_ASSERT_LOCKED(unit); 163 dmar_qi_emit(unit, DMAR_IQ_DESCR_WAIT_ID | 164 (intr ? DMAR_IQ_DESCR_WAIT_IF : 0) | 165 (memw ? DMAR_IQ_DESCR_WAIT_SW : 0) | 166 (fence ? DMAR_IQ_DESCR_WAIT_FN : 0) | 167 (memw ? DMAR_IQ_DESCR_WAIT_SD(seq) : 0), 168 memw ? unit->inv_waitd_seq_hw_phys : 0); 169 } 170 171 static void 172 dmar_qi_emit_wait_seq(struct dmar_unit *unit, struct dmar_qi_genseq *pseq) 173 { 174 struct dmar_qi_genseq gsec; 175 uint32_t seq; 176 177 KASSERT(pseq != NULL, ("wait descriptor with no place for seq")); 178 DMAR_ASSERT_LOCKED(unit); 179 if (unit->inv_waitd_seq == 0xffffffff) { 180 gsec.gen = unit->inv_waitd_gen; 181 gsec.seq = unit->inv_waitd_seq; 182 dmar_qi_ensure(unit, 1); 183 dmar_qi_emit_wait_descr(unit, gsec.seq, false, true, false); 184 dmar_qi_advance_tail(unit); 185 while (!dmar_qi_seq_processed(unit, &gsec)) 186 cpu_spinwait(); 187 unit->inv_waitd_gen++; 188 unit->inv_waitd_seq = 1; 189 } 190 seq = unit->inv_waitd_seq++; 191 pseq->gen = unit->inv_waitd_gen; 192 pseq->seq = seq; 193 dmar_qi_emit_wait_descr(unit, seq, true, true, false); 194 } 195 196 static void 197 dmar_qi_wait_for_seq(struct dmar_unit *unit, const struct dmar_qi_genseq *gseq) 198 { 199 200 DMAR_ASSERT_LOCKED(unit); 201 unit->inv_seq_waiters++; 202 while (!dmar_qi_seq_processed(unit, gseq)) { 203 if (cold) { 204 cpu_spinwait(); 205 } else { 206 msleep(&unit->inv_seq_waiters, &unit->lock, 0, 207 "dmarse", hz); 208 } 209 } 210 unit->inv_seq_waiters--; 211 } 212 213 void 214 dmar_qi_invalidate_locked(struct dmar_ctx *ctx, dmar_gaddr_t base, 215 dmar_gaddr_t size, struct dmar_qi_genseq *pseq) 216 { 217 struct dmar_unit *unit; 218 dmar_gaddr_t isize; 219 int am; 220 221 unit = ctx->dmar; 222 DMAR_ASSERT_LOCKED(unit); 223 for (; size > 0; base += isize, size -= isize) { 224 am = calc_am(unit, base, size, &isize); 225 dmar_qi_ensure(unit, 1); 226 dmar_qi_emit(unit, DMAR_IQ_DESCR_IOTLB_INV | 227 DMAR_IQ_DESCR_IOTLB_PAGE | DMAR_IQ_DESCR_IOTLB_DW | 228 DMAR_IQ_DESCR_IOTLB_DR | 229 DMAR_IQ_DESCR_IOTLB_DID(ctx->domain), 230 base | am); 231 } 232 if (pseq != NULL) { 233 dmar_qi_ensure(unit, 1); 234 dmar_qi_emit_wait_seq(unit, pseq); 235 } 236 dmar_qi_advance_tail(unit); 237 } 238 239 void 240 dmar_qi_invalidate_ctx_glob_locked(struct dmar_unit *unit) 241 { 242 struct dmar_qi_genseq gseq; 243 244 DMAR_ASSERT_LOCKED(unit); 245 dmar_qi_ensure(unit, 2); 246 dmar_qi_emit(unit, DMAR_IQ_DESCR_CTX_INV | DMAR_IQ_DESCR_CTX_GLOB, 0); 247 dmar_qi_emit_wait_seq(unit, &gseq); 248 dmar_qi_advance_tail(unit); 249 dmar_qi_wait_for_seq(unit, &gseq); 250 } 251 252 void 253 dmar_qi_invalidate_iotlb_glob_locked(struct dmar_unit *unit) 254 { 255 struct dmar_qi_genseq gseq; 256 257 DMAR_ASSERT_LOCKED(unit); 258 dmar_qi_ensure(unit, 2); 259 dmar_qi_emit(unit, DMAR_IQ_DESCR_IOTLB_INV | DMAR_IQ_DESCR_IOTLB_GLOB | 260 DMAR_IQ_DESCR_IOTLB_DW | DMAR_IQ_DESCR_IOTLB_DR, 0); 261 dmar_qi_emit_wait_seq(unit, &gseq); 262 dmar_qi_advance_tail(unit); 263 dmar_qi_wait_for_seq(unit, &gseq); 264 } 265 266 int 267 dmar_qi_intr(void *arg) 268 { 269 struct dmar_unit *unit; 270 271 unit = arg; 272 KASSERT(unit->qi_enabled, ("dmar%d: QI is not enabled", unit->unit)); 273 taskqueue_enqueue_fast(unit->qi_taskqueue, &unit->qi_task); 274 return (FILTER_HANDLED); 275 } 276 277 static void 278 dmar_qi_task(void *arg, int pending __unused) 279 { 280 struct dmar_unit *unit; 281 struct dmar_map_entry *entry; 282 uint32_t ics; 283 284 unit = arg; 285 286 DMAR_LOCK(unit); 287 for (;;) { 288 entry = TAILQ_FIRST(&unit->tlb_flush_entries); 289 if (entry == NULL) 290 break; 291 if ((entry->gseq.gen == 0 && entry->gseq.seq == 0) || 292 !dmar_qi_seq_processed(unit, &entry->gseq)) 293 break; 294 TAILQ_REMOVE(&unit->tlb_flush_entries, entry, dmamap_link); 295 DMAR_UNLOCK(unit); 296 dmar_ctx_free_entry(entry, (entry->flags & 297 DMAR_MAP_ENTRY_QI_NF) == 0); 298 DMAR_LOCK(unit); 299 } 300 ics = dmar_read4(unit, DMAR_ICS_REG); 301 if ((ics & DMAR_ICS_IWC) != 0) { 302 ics = DMAR_ICS_IWC; 303 dmar_write4(unit, DMAR_ICS_REG, ics); 304 } 305 if (unit->inv_seq_waiters > 0) 306 wakeup(&unit->inv_seq_waiters); 307 DMAR_UNLOCK(unit); 308 } 309 310 int 311 dmar_init_qi(struct dmar_unit *unit) 312 { 313 uint64_t iqa; 314 uint32_t ics; 315 int qi_sz; 316 317 if (!DMAR_HAS_QI(unit) || (unit->hw_cap & DMAR_CAP_CM) != 0) 318 return (0); 319 unit->qi_enabled = 1; 320 TUNABLE_INT_FETCH("hw.dmar.qi", &unit->qi_enabled); 321 if (!unit->qi_enabled) 322 return (0); 323 324 TAILQ_INIT(&unit->tlb_flush_entries); 325 TASK_INIT(&unit->qi_task, 0, dmar_qi_task, unit); 326 unit->qi_taskqueue = taskqueue_create_fast("dmar", M_WAITOK, 327 taskqueue_thread_enqueue, &unit->qi_taskqueue); 328 taskqueue_start_threads(&unit->qi_taskqueue, 1, PI_AV, 329 "dmar%d qi taskq", unit->unit); 330 331 unit->inv_waitd_gen = 0; 332 unit->inv_waitd_seq = 1; 333 334 qi_sz = DMAR_IQA_QS_DEF; 335 TUNABLE_INT_FETCH("hw.dmar.qi_size", &qi_sz); 336 if (qi_sz > DMAR_IQA_QS_MAX) 337 qi_sz = DMAR_IQA_QS_MAX; 338 unit->inv_queue_size = (1ULL << qi_sz) * PAGE_SIZE; 339 /* Reserve one descriptor to prevent wraparound. */ 340 unit->inv_queue_avail = unit->inv_queue_size - DMAR_IQ_DESCR_SZ; 341 342 /* The invalidation queue reads by DMARs are always coherent. */ 343 unit->inv_queue = kmem_alloc_contig(kernel_arena, unit->inv_queue_size, 344 M_WAITOK | M_ZERO, 0, dmar_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); 345 unit->inv_waitd_seq_hw_phys = pmap_kextract( 346 (vm_offset_t)&unit->inv_waitd_seq_hw); 347 348 DMAR_LOCK(unit); 349 dmar_write8(unit, DMAR_IQT_REG, 0); 350 iqa = pmap_kextract(unit->inv_queue); 351 iqa |= qi_sz; 352 dmar_write8(unit, DMAR_IQA_REG, iqa); 353 dmar_enable_qi(unit); 354 ics = dmar_read4(unit, DMAR_ICS_REG); 355 if ((ics & DMAR_ICS_IWC) != 0) { 356 ics = DMAR_ICS_IWC; 357 dmar_write4(unit, DMAR_ICS_REG, ics); 358 } 359 DMAR_UNLOCK(unit); 360 361 return (0); 362 } 363 364 void 365 dmar_fini_qi(struct dmar_unit *unit) 366 { 367 struct dmar_qi_genseq gseq; 368 369 if (unit->qi_enabled) 370 return; 371 taskqueue_drain(unit->qi_taskqueue, &unit->qi_task); 372 taskqueue_free(unit->qi_taskqueue); 373 unit->qi_taskqueue = NULL; 374 375 DMAR_LOCK(unit); 376 /* quisce */ 377 dmar_qi_ensure(unit, 1); 378 dmar_qi_emit_wait_seq(unit, &gseq); 379 dmar_qi_advance_tail(unit); 380 dmar_qi_wait_for_seq(unit, &gseq); 381 /* only after the quisce, disable queue */ 382 dmar_disable_qi(unit); 383 KASSERT(unit->inv_seq_waiters == 0, 384 ("dmar%d: waiters on disabled queue", unit->unit)); 385 DMAR_UNLOCK(unit); 386 387 kmem_free(kernel_arena, unit->inv_queue, unit->inv_queue_size); 388 unit->inv_queue = 0; 389 unit->inv_queue_size = 0; 390 unit->qi_enabled = 0; 391 } 392 393 void 394 dmar_enable_qi_intr(struct dmar_unit *unit) 395 { 396 uint32_t iectl; 397 398 DMAR_ASSERT_LOCKED(unit); 399 KASSERT(DMAR_HAS_QI(unit), ("dmar%d: QI is not supported", unit->unit)); 400 iectl = dmar_read4(unit, DMAR_IECTL_REG); 401 iectl &= ~DMAR_IECTL_IM; 402 dmar_write4(unit, DMAR_IECTL_REG, iectl); 403 } 404 405 void 406 dmar_disable_qi_intr(struct dmar_unit *unit) 407 { 408 uint32_t iectl; 409 410 DMAR_ASSERT_LOCKED(unit); 411 KASSERT(DMAR_HAS_QI(unit), ("dmar%d: QI is not supported", unit->unit)); 412 iectl = dmar_read4(unit, DMAR_IECTL_REG); 413 dmar_write4(unit, DMAR_IECTL_REG, iectl | DMAR_IECTL_IM); 414 } 415