1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2013, 2014, 2024 The FreeBSD Foundation
5 *
6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
7 * under sponsorship from the FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 #include "opt_acpi.h"
32 #if defined(__amd64__)
33 #define DEV_APIC
34 #else
35 #include "opt_apic.h"
36 #endif
37 #include "opt_ddb.h"
38
39 #include <sys/systm.h>
40 #include <sys/bus.h>
41 #include <sys/kernel.h>
42 #include <sys/lock.h>
43 #include <sys/malloc.h>
44 #include <sys/memdesc.h>
45 #include <sys/mutex.h>
46 #include <sys/sf_buf.h>
47 #include <sys/sysctl.h>
48 #include <sys/proc.h>
49 #include <sys/sched.h>
50 #include <sys/rman.h>
51 #include <sys/rwlock.h>
52 #include <sys/taskqueue.h>
53 #include <sys/tree.h>
54 #include <vm/vm.h>
55 #include <vm/vm_extern.h>
56 #include <vm/vm_kern.h>
57 #include <vm/vm_map.h>
58 #include <vm/vm_object.h>
59 #include <vm/vm_page.h>
60 #include <dev/pci/pcireg.h>
61 #include <dev/pci/pcivar.h>
62 #include <machine/atomic.h>
63 #include <machine/bus.h>
64 #include <machine/cpu.h>
65 #include <x86/include/busdma_impl.h>
66 #include <dev/iommu/busdma_iommu.h>
67 #include <dev/iommu/iommu.h>
68 #include <x86/iommu/x86_iommu.h>
69 #include <x86/iommu/iommu_intrmap.h>
70 #ifdef DEV_APIC
71 #include "pcib_if.h"
72 #include <machine/intr_machdep.h>
73 #include <x86/apicreg.h>
74 #include <x86/apicvar.h>
75 #endif
76
77 vm_page_t
iommu_pgalloc(vm_object_t obj,vm_pindex_t idx,int flags)78 iommu_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags)
79 {
80 vm_page_t m;
81 int zeroed, aflags;
82
83 zeroed = (flags & IOMMU_PGF_ZERO) != 0 ? VM_ALLOC_ZERO : 0;
84 aflags = zeroed | VM_ALLOC_NOBUSY | VM_ALLOC_SYSTEM | VM_ALLOC_NODUMP |
85 ((flags & IOMMU_PGF_WAITOK) != 0 ? VM_ALLOC_WAITFAIL :
86 VM_ALLOC_NOWAIT);
87 for (;;) {
88 if ((flags & IOMMU_PGF_OBJL) == 0)
89 VM_OBJECT_WLOCK(obj);
90 m = vm_page_lookup(obj, idx);
91 if ((flags & IOMMU_PGF_NOALLOC) != 0 || m != NULL) {
92 if ((flags & IOMMU_PGF_OBJL) == 0)
93 VM_OBJECT_WUNLOCK(obj);
94 break;
95 }
96 m = vm_page_alloc_contig(obj, idx, aflags, 1, 0,
97 iommu_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT);
98 if ((flags & IOMMU_PGF_OBJL) == 0)
99 VM_OBJECT_WUNLOCK(obj);
100 if (m != NULL) {
101 if (zeroed && (m->flags & PG_ZERO) == 0)
102 pmap_zero_page(m);
103 atomic_add_int(&iommu_tbl_pagecnt, 1);
104 break;
105 }
106 if ((flags & IOMMU_PGF_WAITOK) == 0)
107 break;
108 }
109 return (m);
110 }
111
112 void
iommu_pgfree(vm_object_t obj,vm_pindex_t idx,int flags,struct iommu_map_entry * entry)113 iommu_pgfree(vm_object_t obj, vm_pindex_t idx, int flags,
114 struct iommu_map_entry *entry)
115 {
116 vm_page_t m;
117
118 if ((flags & IOMMU_PGF_OBJL) == 0)
119 VM_OBJECT_WLOCK(obj);
120 m = vm_page_grab(obj, idx, VM_ALLOC_NOCREAT);
121 if (m != NULL) {
122 if (entry == NULL) {
123 vm_page_free(m);
124 atomic_subtract_int(&iommu_tbl_pagecnt, 1);
125 } else {
126 vm_page_remove_xbusy(m); /* keep page busy */
127 SLIST_INSERT_HEAD(&entry->pgtbl_free, m, plinks.s.ss);
128 }
129 }
130 if ((flags & IOMMU_PGF_OBJL) == 0)
131 VM_OBJECT_WUNLOCK(obj);
132 }
133
134 void *
iommu_map_pgtbl(vm_object_t obj,vm_pindex_t idx,int flags,struct sf_buf ** sf)135 iommu_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags,
136 struct sf_buf **sf)
137 {
138 vm_page_t m;
139 bool allocated;
140
141 if ((flags & IOMMU_PGF_OBJL) == 0)
142 VM_OBJECT_WLOCK(obj);
143 m = vm_page_lookup(obj, idx);
144 if (m == NULL && (flags & IOMMU_PGF_ALLOC) != 0) {
145 m = iommu_pgalloc(obj, idx, flags | IOMMU_PGF_OBJL);
146 allocated = true;
147 } else
148 allocated = false;
149 if (m == NULL) {
150 if ((flags & IOMMU_PGF_OBJL) == 0)
151 VM_OBJECT_WUNLOCK(obj);
152 return (NULL);
153 }
154 /* Sleepable allocations cannot fail. */
155 if ((flags & IOMMU_PGF_WAITOK) != 0)
156 VM_OBJECT_WUNLOCK(obj);
157 sched_pin();
158 *sf = sf_buf_alloc(m, SFB_CPUPRIVATE | ((flags & IOMMU_PGF_WAITOK)
159 == 0 ? SFB_NOWAIT : 0));
160 if (*sf == NULL) {
161 sched_unpin();
162 if (allocated) {
163 VM_OBJECT_ASSERT_WLOCKED(obj);
164 iommu_pgfree(obj, m->pindex, flags | IOMMU_PGF_OBJL,
165 NULL);
166 }
167 if ((flags & IOMMU_PGF_OBJL) == 0)
168 VM_OBJECT_WUNLOCK(obj);
169 return (NULL);
170 }
171 if ((flags & (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) ==
172 (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL))
173 VM_OBJECT_WLOCK(obj);
174 else if ((flags & (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) == 0)
175 VM_OBJECT_WUNLOCK(obj);
176 return ((void *)sf_buf_kva(*sf));
177 }
178
179 void
iommu_unmap_pgtbl(struct sf_buf * sf)180 iommu_unmap_pgtbl(struct sf_buf *sf)
181 {
182
183 sf_buf_free(sf);
184 sched_unpin();
185 }
186
187 iommu_haddr_t iommu_high;
188 int iommu_tbl_pagecnt;
189
190 SYSCTL_NODE(_hw_iommu, OID_AUTO, dmar, CTLFLAG_RD | CTLFLAG_MPSAFE,
191 NULL, "");
192 SYSCTL_INT(_hw_iommu, OID_AUTO, tbl_pagecnt, CTLFLAG_RD,
193 &iommu_tbl_pagecnt, 0,
194 "Count of pages used for IOMMU pagetables");
195
196 int iommu_qi_batch_coalesce = 100;
197 SYSCTL_INT(_hw_iommu, OID_AUTO, batch_coalesce, CTLFLAG_RWTUN,
198 &iommu_qi_batch_coalesce, 0,
199 "Number of qi batches between interrupt");
200
201 static struct iommu_unit *
x86_no_iommu_find(device_t dev,bool verbose)202 x86_no_iommu_find(device_t dev, bool verbose)
203 {
204 return (NULL);
205 }
206
207 static int
x86_no_iommu_alloc_msi_intr(device_t src,u_int * cookies,u_int count)208 x86_no_iommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count)
209 {
210 return (EOPNOTSUPP);
211 }
212
213 static int
x86_no_iommu_map_msi_intr(device_t src,u_int cpu,u_int vector,u_int cookie,uint64_t * addr,uint32_t * data)214 x86_no_iommu_map_msi_intr(device_t src, u_int cpu, u_int vector,
215 u_int cookie, uint64_t *addr, uint32_t *data)
216 {
217 return (EOPNOTSUPP);
218 }
219
220 static int
x86_no_iommu_unmap_msi_intr(device_t src,u_int cookie)221 x86_no_iommu_unmap_msi_intr(device_t src, u_int cookie)
222 {
223 return (0);
224 }
225
226 static int
x86_no_iommu_map_ioapic_intr(u_int ioapic_id,u_int cpu,u_int vector,bool edge,bool activehi,int irq,u_int * cookie,uint32_t * hi,uint32_t * lo)227 x86_no_iommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector,
228 bool edge, bool activehi, int irq, u_int *cookie, uint32_t *hi,
229 uint32_t *lo)
230 {
231 return (EOPNOTSUPP);
232 }
233
234 static int
x86_no_iommu_unmap_ioapic_intr(u_int ioapic_id,u_int * cookie)235 x86_no_iommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie)
236 {
237 return (0);
238 }
239
240 static struct x86_iommu x86_no_iommu = {
241 .find = x86_no_iommu_find,
242 .alloc_msi_intr = x86_no_iommu_alloc_msi_intr,
243 .map_msi_intr = x86_no_iommu_map_msi_intr,
244 .unmap_msi_intr = x86_no_iommu_unmap_msi_intr,
245 .map_ioapic_intr = x86_no_iommu_map_ioapic_intr,
246 .unmap_ioapic_intr = x86_no_iommu_unmap_ioapic_intr,
247 };
248
249 static struct x86_iommu *x86_iommu = &x86_no_iommu;
250
251 void
set_x86_iommu(struct x86_iommu * x)252 set_x86_iommu(struct x86_iommu *x)
253 {
254 MPASS(x86_iommu == &x86_no_iommu);
255 x86_iommu = x;
256 }
257
258 struct x86_iommu *
get_x86_iommu(void)259 get_x86_iommu(void)
260 {
261 return (x86_iommu);
262 }
263
264 void
iommu_domain_unload_entry(struct iommu_map_entry * entry,bool free,bool cansleep)265 iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
266 bool cansleep)
267 {
268 x86_iommu->domain_unload_entry(entry, free, cansleep);
269 }
270
271 void
iommu_domain_unload(struct iommu_domain * iodom,struct iommu_map_entries_tailq * entries,bool cansleep)272 iommu_domain_unload(struct iommu_domain *iodom,
273 struct iommu_map_entries_tailq *entries, bool cansleep)
274 {
275 x86_iommu->domain_unload(iodom, entries, cansleep);
276 }
277
278 struct iommu_ctx *
iommu_get_ctx(struct iommu_unit * iommu,device_t dev,uint16_t rid,bool id_mapped,bool rmrr_init)279 iommu_get_ctx(struct iommu_unit *iommu, device_t dev, uint16_t rid,
280 bool id_mapped, bool rmrr_init)
281 {
282 return (x86_iommu->get_ctx(iommu, dev, rid, id_mapped, rmrr_init));
283 }
284
285 void
iommu_free_ctx_locked(struct iommu_unit * iommu,struct iommu_ctx * context)286 iommu_free_ctx_locked(struct iommu_unit *iommu, struct iommu_ctx *context)
287 {
288 x86_iommu->free_ctx_locked(iommu, context);
289 }
290
291 struct iommu_unit *
iommu_find(device_t dev,bool verbose)292 iommu_find(device_t dev, bool verbose)
293 {
294 return (x86_iommu->find(dev, verbose));
295 }
296
297 int
iommu_alloc_msi_intr(device_t src,u_int * cookies,u_int count)298 iommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count)
299 {
300 return (x86_iommu->alloc_msi_intr(src, cookies, count));
301 }
302
303 int
iommu_map_msi_intr(device_t src,u_int cpu,u_int vector,u_int cookie,uint64_t * addr,uint32_t * data)304 iommu_map_msi_intr(device_t src, u_int cpu, u_int vector, u_int cookie,
305 uint64_t *addr, uint32_t *data)
306 {
307 return (x86_iommu->map_msi_intr(src, cpu, vector, cookie,
308 addr, data));
309 }
310
311 int
iommu_unmap_msi_intr(device_t src,u_int cookie)312 iommu_unmap_msi_intr(device_t src, u_int cookie)
313 {
314 return (x86_iommu->unmap_msi_intr(src, cookie));
315 }
316
317 int
iommu_map_ioapic_intr(u_int ioapic_id,u_int cpu,u_int vector,bool edge,bool activehi,int irq,u_int * cookie,uint32_t * hi,uint32_t * lo)318 iommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector, bool edge,
319 bool activehi, int irq, u_int *cookie, uint32_t *hi, uint32_t *lo)
320 {
321 return (x86_iommu->map_ioapic_intr(ioapic_id, cpu, vector, edge,
322 activehi, irq, cookie, hi, lo));
323 }
324
325 int
iommu_unmap_ioapic_intr(u_int ioapic_id,u_int * cookie)326 iommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie)
327 {
328 return (x86_iommu->unmap_ioapic_intr(ioapic_id, cookie));
329 }
330
331 void
iommu_unit_pre_instantiate_ctx(struct iommu_unit * unit)332 iommu_unit_pre_instantiate_ctx(struct iommu_unit *unit)
333 {
334 x86_iommu->unit_pre_instantiate_ctx(unit);
335 }
336
337 #define IOMMU2X86C(iommu) (x86_iommu->get_x86_common(iommu))
338
339 static bool
iommu_qi_seq_processed(struct iommu_unit * unit,const struct iommu_qi_genseq * pseq)340 iommu_qi_seq_processed(struct iommu_unit *unit,
341 const struct iommu_qi_genseq *pseq)
342 {
343 struct x86_unit_common *x86c;
344 u_int gen;
345
346 x86c = IOMMU2X86C(unit);
347 gen = x86c->inv_waitd_gen;
348 return (pseq->gen < gen || (pseq->gen == gen && pseq->seq <=
349 atomic_load_64(&x86c->inv_waitd_seq_hw)));
350 }
351
352 void
iommu_qi_emit_wait_seq(struct iommu_unit * unit,struct iommu_qi_genseq * pseq,bool emit_wait)353 iommu_qi_emit_wait_seq(struct iommu_unit *unit, struct iommu_qi_genseq *pseq,
354 bool emit_wait)
355 {
356 struct x86_unit_common *x86c;
357 struct iommu_qi_genseq gsec;
358 uint32_t seq;
359
360 KASSERT(pseq != NULL, ("wait descriptor with no place for seq"));
361 IOMMU_ASSERT_LOCKED(unit);
362 x86c = IOMMU2X86C(unit);
363
364 if (x86c->inv_waitd_seq == 0xffffffff) {
365 gsec.gen = x86c->inv_waitd_gen;
366 gsec.seq = x86c->inv_waitd_seq;
367 x86_iommu->qi_ensure(unit, 1);
368 x86_iommu->qi_emit_wait_descr(unit, gsec.seq, false,
369 true, false);
370 x86_iommu->qi_advance_tail(unit);
371 while (!iommu_qi_seq_processed(unit, &gsec))
372 cpu_spinwait();
373 x86c->inv_waitd_gen++;
374 x86c->inv_waitd_seq = 1;
375 }
376 seq = x86c->inv_waitd_seq++;
377 pseq->gen = x86c->inv_waitd_gen;
378 pseq->seq = seq;
379 if (emit_wait) {
380 x86_iommu->qi_ensure(unit, 1);
381 x86_iommu->qi_emit_wait_descr(unit, seq, true, true, false);
382 }
383 }
384
385 /*
386 * To avoid missed wakeups, callers must increment the unit's waiters count
387 * before advancing the tail past the wait descriptor.
388 */
389 void
iommu_qi_wait_for_seq(struct iommu_unit * unit,const struct iommu_qi_genseq * gseq,bool nowait)390 iommu_qi_wait_for_seq(struct iommu_unit *unit, const struct iommu_qi_genseq *
391 gseq, bool nowait)
392 {
393 struct x86_unit_common *x86c;
394
395 IOMMU_ASSERT_LOCKED(unit);
396 x86c = IOMMU2X86C(unit);
397
398 KASSERT(x86c->inv_seq_waiters > 0, ("%s: no waiters", __func__));
399 while (!iommu_qi_seq_processed(unit, gseq)) {
400 if (cold || nowait) {
401 cpu_spinwait();
402 } else {
403 msleep(&x86c->inv_seq_waiters, &unit->lock, 0,
404 "dmarse", hz);
405 }
406 }
407 x86c->inv_seq_waiters--;
408 }
409
410 /*
411 * The caller must not be using the entry's dmamap_link field.
412 */
413 void
iommu_qi_invalidate_locked(struct iommu_domain * domain,struct iommu_map_entry * entry,bool emit_wait)414 iommu_qi_invalidate_locked(struct iommu_domain *domain,
415 struct iommu_map_entry *entry, bool emit_wait)
416 {
417 struct iommu_unit *unit;
418 struct x86_unit_common *x86c;
419
420 unit = domain->iommu;
421 x86c = IOMMU2X86C(unit);
422 IOMMU_ASSERT_LOCKED(unit);
423
424 x86_iommu->qi_invalidate_emit(domain, entry->start, entry->end -
425 entry->start, &entry->gseq, emit_wait);
426
427 /*
428 * To avoid a data race in dmar_qi_task(), the entry's gseq must be
429 * initialized before the entry is added to the TLB flush list, and the
430 * entry must be added to that list before the tail is advanced. More
431 * precisely, the tail must not be advanced past the wait descriptor
432 * that will generate the interrupt that schedules dmar_qi_task() for
433 * execution before the entry is added to the list. While an earlier
434 * call to dmar_qi_ensure() might have advanced the tail, it will not
435 * advance it past the wait descriptor.
436 *
437 * See the definition of struct dmar_unit for more information on
438 * synchronization.
439 */
440 entry->tlb_flush_next = NULL;
441 atomic_store_rel_ptr((uintptr_t *)&x86c->tlb_flush_tail->
442 tlb_flush_next, (uintptr_t)entry);
443 x86c->tlb_flush_tail = entry;
444
445 x86_iommu->qi_advance_tail(unit);
446 }
447
448 void
iommu_qi_invalidate_sync(struct iommu_domain * domain,iommu_gaddr_t base,iommu_gaddr_t size,bool cansleep)449 iommu_qi_invalidate_sync(struct iommu_domain *domain, iommu_gaddr_t base,
450 iommu_gaddr_t size, bool cansleep)
451 {
452 struct iommu_unit *unit;
453 struct iommu_qi_genseq gseq;
454
455 unit = domain->iommu;
456 IOMMU_LOCK(unit);
457 x86_iommu->qi_invalidate_emit(domain, base, size, &gseq, true);
458
459 /*
460 * To avoid a missed wakeup in iommu_qi_task(), the unit's
461 * waiters count must be incremented before the tail is
462 * advanced.
463 */
464 IOMMU2X86C(unit)->inv_seq_waiters++;
465
466 x86_iommu->qi_advance_tail(unit);
467 iommu_qi_wait_for_seq(unit, &gseq, !cansleep);
468 IOMMU_UNLOCK(unit);
469 }
470
471 void
iommu_qi_drain_tlb_flush(struct iommu_unit * unit)472 iommu_qi_drain_tlb_flush(struct iommu_unit *unit)
473 {
474 struct x86_unit_common *x86c;
475 struct iommu_map_entry *entry, *head;
476
477 x86c = IOMMU2X86C(unit);
478 for (head = x86c->tlb_flush_head;; head = entry) {
479 entry = (struct iommu_map_entry *)
480 atomic_load_acq_ptr((uintptr_t *)&head->tlb_flush_next);
481 if (entry == NULL ||
482 !iommu_qi_seq_processed(unit, &entry->gseq))
483 break;
484 x86c->tlb_flush_head = entry;
485 iommu_gas_free_entry(head);
486 if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
487 iommu_gas_free_region(entry);
488 else
489 iommu_gas_free_space(entry);
490 }
491 }
492
493 void
iommu_qi_common_init(struct iommu_unit * unit,task_fn_t qi_task)494 iommu_qi_common_init(struct iommu_unit *unit, task_fn_t qi_task)
495 {
496 struct x86_unit_common *x86c;
497 u_int qi_sz;
498
499 x86c = IOMMU2X86C(unit);
500
501 x86c->tlb_flush_head = x86c->tlb_flush_tail =
502 iommu_gas_alloc_entry(NULL, 0);
503 TASK_INIT(&x86c->qi_task, 0, qi_task, unit);
504 x86c->qi_taskqueue = taskqueue_create_fast("iommuqf", M_WAITOK,
505 taskqueue_thread_enqueue, &x86c->qi_taskqueue);
506 taskqueue_start_threads(&x86c->qi_taskqueue, 1, PI_AV,
507 "iommu%d qi taskq", unit->unit);
508
509 x86c->inv_waitd_gen = 0;
510 x86c->inv_waitd_seq = 1;
511
512 qi_sz = 3;
513 TUNABLE_INT_FETCH("hw.iommu.qi_size", &qi_sz);
514 if (qi_sz > x86c->qi_buf_maxsz)
515 qi_sz = x86c->qi_buf_maxsz;
516 x86c->inv_queue_size = (1ULL << qi_sz) * PAGE_SIZE;
517 /* Reserve one descriptor to prevent wraparound. */
518 x86c->inv_queue_avail = x86c->inv_queue_size -
519 x86c->qi_cmd_sz;
520
521 /*
522 * The invalidation queue reads by DMARs/AMDIOMMUs are always
523 * coherent.
524 */
525 x86c->inv_queue = kmem_alloc_contig(x86c->inv_queue_size,
526 M_WAITOK | M_ZERO, 0, iommu_high, PAGE_SIZE, 0,
527 VM_MEMATTR_DEFAULT);
528 x86c->inv_waitd_seq_hw_phys = pmap_kextract(
529 (vm_offset_t)&x86c->inv_waitd_seq_hw);
530 }
531
532 void
iommu_qi_common_fini(struct iommu_unit * unit,void (* disable_qi)(struct iommu_unit *))533 iommu_qi_common_fini(struct iommu_unit *unit, void (*disable_qi)(
534 struct iommu_unit *))
535 {
536 struct x86_unit_common *x86c;
537 struct iommu_qi_genseq gseq;
538
539 x86c = IOMMU2X86C(unit);
540
541 taskqueue_drain(x86c->qi_taskqueue, &x86c->qi_task);
542 taskqueue_free(x86c->qi_taskqueue);
543 x86c->qi_taskqueue = NULL;
544
545 IOMMU_LOCK(unit);
546 /* quisce */
547 x86_iommu->qi_ensure(unit, 1);
548 iommu_qi_emit_wait_seq(unit, &gseq, true);
549 /* See iommu_qi_invalidate_locked(). */
550 x86c->inv_seq_waiters++;
551 x86_iommu->qi_advance_tail(unit);
552 iommu_qi_wait_for_seq(unit, &gseq, false);
553 /* only after the quisce, disable queue */
554 disable_qi(unit);
555 KASSERT(x86c->inv_seq_waiters == 0,
556 ("iommu%d: waiters on disabled queue", unit->unit));
557 IOMMU_UNLOCK(unit);
558
559 kmem_free(x86c->inv_queue, x86c->inv_queue_size);
560 x86c->inv_queue = NULL;
561 x86c->inv_queue_size = 0;
562 }
563
564 int
iommu_alloc_irq(struct iommu_unit * unit,int idx)565 iommu_alloc_irq(struct iommu_unit *unit, int idx)
566 {
567 device_t dev, pcib;
568 struct iommu_msi_data *dmd;
569 uint64_t msi_addr;
570 uint32_t msi_data;
571 int error;
572
573 MPASS(idx >= 0 || idx < IOMMU_MAX_MSI);
574
575 dev = unit->dev;
576 dmd = &IOMMU2X86C(unit)->intrs[idx];
577 pcib = device_get_parent(device_get_parent(dev)); /* Really not pcib */
578 error = PCIB_ALLOC_MSIX(pcib, dev, &dmd->irq);
579 if (error != 0) {
580 device_printf(dev, "cannot allocate %s interrupt, %d\n",
581 dmd->name, error);
582 goto err1;
583 }
584 error = bus_set_resource(dev, SYS_RES_IRQ, dmd->irq_rid,
585 dmd->irq, 1);
586 if (error != 0) {
587 device_printf(dev, "cannot set %s interrupt resource, %d\n",
588 dmd->name, error);
589 goto err2;
590 }
591 dmd->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
592 &dmd->irq_rid, RF_ACTIVE);
593 if (dmd->irq_res == NULL) {
594 device_printf(dev,
595 "cannot allocate resource for %s interrupt\n", dmd->name);
596 error = ENXIO;
597 goto err3;
598 }
599 error = bus_setup_intr(dev, dmd->irq_res, INTR_TYPE_MISC,
600 dmd->handler, NULL, unit, &dmd->intr_handle);
601 if (error != 0) {
602 device_printf(dev, "cannot setup %s interrupt, %d\n",
603 dmd->name, error);
604 goto err4;
605 }
606 bus_describe_intr(dev, dmd->irq_res, dmd->intr_handle, "%s", dmd->name);
607 error = PCIB_MAP_MSI(pcib, dev, dmd->irq, &msi_addr, &msi_data);
608 if (error != 0) {
609 device_printf(dev, "cannot map %s interrupt, %d\n",
610 dmd->name, error);
611 goto err5;
612 }
613
614 dmd->msi_data = msi_data;
615 dmd->msi_addr = msi_addr;
616
617 return (0);
618
619 err5:
620 bus_teardown_intr(dev, dmd->irq_res, dmd->intr_handle);
621 err4:
622 bus_release_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq_res);
623 err3:
624 bus_delete_resource(dev, SYS_RES_IRQ, dmd->irq_rid);
625 err2:
626 PCIB_RELEASE_MSIX(pcib, dev, dmd->irq);
627 dmd->irq = -1;
628 err1:
629 return (error);
630 }
631
632 void
iommu_release_intr(struct iommu_unit * unit,int idx)633 iommu_release_intr(struct iommu_unit *unit, int idx)
634 {
635 device_t dev;
636 struct iommu_msi_data *dmd;
637
638 MPASS(idx >= 0 || idx < IOMMU_MAX_MSI);
639
640 dmd = &IOMMU2X86C(unit)->intrs[idx];
641 if (dmd->handler == NULL || dmd->irq == -1)
642 return;
643 dev = unit->dev;
644
645 bus_teardown_intr(dev, dmd->irq_res, dmd->intr_handle);
646 bus_release_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq_res);
647 bus_delete_resource(dev, SYS_RES_IRQ, dmd->irq_rid);
648 PCIB_RELEASE_MSIX(device_get_parent(device_get_parent(dev)),
649 dev, dmd->irq);
650 dmd->irq = -1;
651 }
652
653 void
iommu_device_tag_init(struct iommu_ctx * ctx,device_t dev)654 iommu_device_tag_init(struct iommu_ctx *ctx, device_t dev)
655 {
656 bus_addr_t maxaddr;
657
658 maxaddr = MIN(ctx->domain->end, BUS_SPACE_MAXADDR);
659 ctx->tag->common.impl = &bus_dma_iommu_impl;
660 ctx->tag->common.boundary = 0;
661 ctx->tag->common.lowaddr = maxaddr;
662 ctx->tag->common.highaddr = maxaddr;
663 ctx->tag->common.maxsize = maxaddr;
664 ctx->tag->common.nsegments = BUS_SPACE_UNRESTRICTED;
665 ctx->tag->common.maxsegsz = maxaddr;
666 ctx->tag->ctx = ctx;
667 ctx->tag->owner = dev;
668 }
669
670 void
iommu_domain_free_entry(struct iommu_map_entry * entry,bool free)671 iommu_domain_free_entry(struct iommu_map_entry *entry, bool free)
672 {
673 if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
674 iommu_gas_free_region(entry);
675 else
676 iommu_gas_free_space(entry);
677 if (free)
678 iommu_gas_free_entry(entry);
679 else
680 entry->flags = 0;
681 }
682
683 /*
684 * Index of the pte for the guest address base in the page table at
685 * the level lvl.
686 */
687 int
pglvl_pgtbl_pte_off(int pglvl,iommu_gaddr_t base,int lvl)688 pglvl_pgtbl_pte_off(int pglvl, iommu_gaddr_t base, int lvl)
689 {
690
691 base >>= IOMMU_PAGE_SHIFT + (pglvl - lvl - 1) *
692 IOMMU_NPTEPGSHIFT;
693 return (base & IOMMU_PTEMASK);
694 }
695
696 /*
697 * Returns the page index of the page table page in the page table
698 * object, which maps the given address base at the page table level
699 * lvl.
700 */
701 vm_pindex_t
pglvl_pgtbl_get_pindex(int pglvl,iommu_gaddr_t base,int lvl)702 pglvl_pgtbl_get_pindex(int pglvl, iommu_gaddr_t base, int lvl)
703 {
704 vm_pindex_t idx, pidx;
705 int i;
706
707 KASSERT(lvl >= 0 && lvl < pglvl,
708 ("wrong lvl %d %d", pglvl, lvl));
709
710 for (pidx = idx = 0, i = 0; i < lvl; i++, pidx = idx) {
711 idx = pglvl_pgtbl_pte_off(pglvl, base, i) +
712 pidx * IOMMU_NPTEPG + 1;
713 }
714 return (idx);
715 }
716
717 /*
718 * Calculate the total amount of page table pages needed to map the
719 * whole bus address space on the context with the selected agaw.
720 */
721 vm_pindex_t
pglvl_max_pages(int pglvl)722 pglvl_max_pages(int pglvl)
723 {
724 vm_pindex_t res;
725 int i;
726
727 for (res = 0, i = pglvl; i > 0; i--) {
728 res *= IOMMU_NPTEPG;
729 res++;
730 }
731 return (res);
732 }
733
734 iommu_gaddr_t
pglvl_page_size(int total_pglvl,int lvl)735 pglvl_page_size(int total_pglvl, int lvl)
736 {
737 int rlvl;
738 static const iommu_gaddr_t pg_sz[] = {
739 (iommu_gaddr_t)IOMMU_PAGE_SIZE,
740 (iommu_gaddr_t)IOMMU_PAGE_SIZE << IOMMU_NPTEPGSHIFT,
741 (iommu_gaddr_t)IOMMU_PAGE_SIZE << (2 * IOMMU_NPTEPGSHIFT),
742 (iommu_gaddr_t)IOMMU_PAGE_SIZE << (3 * IOMMU_NPTEPGSHIFT),
743 (iommu_gaddr_t)IOMMU_PAGE_SIZE << (4 * IOMMU_NPTEPGSHIFT),
744 (iommu_gaddr_t)IOMMU_PAGE_SIZE << (5 * IOMMU_NPTEPGSHIFT),
745 (iommu_gaddr_t)IOMMU_PAGE_SIZE << (6 * IOMMU_NPTEPGSHIFT),
746 };
747
748 KASSERT(lvl >= 0 && lvl < total_pglvl,
749 ("total %d lvl %d", total_pglvl, lvl));
750 rlvl = total_pglvl - lvl - 1;
751 KASSERT(rlvl < nitems(pg_sz), ("sizeof pg_sz lvl %d", lvl));
752 return (pg_sz[rlvl]);
753 }
754
755 void
iommu_device_set_iommu_prop(device_t dev,device_t iommu)756 iommu_device_set_iommu_prop(device_t dev, device_t iommu)
757 {
758 device_t iommu_dev;
759 int error;
760
761 bus_topo_lock();
762 error = device_get_prop(dev, DEV_PROP_NAME_IOMMU, (void **)&iommu_dev);
763 if (error == ENOENT)
764 device_set_prop(dev, DEV_PROP_NAME_IOMMU, iommu, NULL, NULL);
765 bus_topo_unlock();
766 }
767
768 #ifdef DDB
769 #include <ddb/ddb.h>
770 #include <ddb/db_lex.h>
771
772 void
iommu_db_print_domain_entry(const struct iommu_map_entry * entry)773 iommu_db_print_domain_entry(const struct iommu_map_entry *entry)
774 {
775 struct iommu_map_entry *l, *r;
776
777 db_printf(
778 " start %jx end %jx first %jx last %jx free_down %jx flags %x ",
779 entry->start, entry->end, entry->first, entry->last,
780 entry->free_down, entry->flags);
781 db_printf("left ");
782 l = RB_LEFT(entry, rb_entry);
783 if (l == NULL)
784 db_printf("NULL ");
785 else
786 db_printf("%jx ", l->start);
787 db_printf("right ");
788 r = RB_RIGHT(entry, rb_entry);
789 if (r == NULL)
790 db_printf("NULL");
791 else
792 db_printf("%jx", r->start);
793 db_printf("\n");
794 }
795
796 void
iommu_db_print_ctx(struct iommu_ctx * ctx)797 iommu_db_print_ctx(struct iommu_ctx *ctx)
798 {
799 db_printf(
800 " @%p pci%d:%d:%d refs %d flags %#x loads %lu unloads %lu\n",
801 ctx, pci_get_bus(ctx->tag->owner),
802 pci_get_slot(ctx->tag->owner),
803 pci_get_function(ctx->tag->owner), ctx->refs,
804 ctx->flags, ctx->loads, ctx->unloads);
805 }
806
807 void
iommu_db_domain_print_contexts(struct iommu_domain * iodom)808 iommu_db_domain_print_contexts(struct iommu_domain *iodom)
809 {
810 struct iommu_ctx *ctx;
811
812 if (LIST_EMPTY(&iodom->contexts))
813 return;
814
815 db_printf(" Contexts:\n");
816 LIST_FOREACH(ctx, &iodom->contexts, link)
817 iommu_db_print_ctx(ctx);
818 }
819
820 void
iommu_db_domain_print_mappings(struct iommu_domain * iodom)821 iommu_db_domain_print_mappings(struct iommu_domain *iodom)
822 {
823 struct iommu_map_entry *entry;
824
825 db_printf(" mapped:\n");
826 RB_FOREACH(entry, iommu_gas_entries_tree, &iodom->rb_root) {
827 iommu_db_print_domain_entry(entry);
828 if (db_pager_quit)
829 break;
830 }
831 if (db_pager_quit)
832 return;
833 db_printf(" unloading:\n");
834 TAILQ_FOREACH(entry, &iodom->unload_entries, dmamap_link) {
835 iommu_db_print_domain_entry(entry);
836 if (db_pager_quit)
837 break;
838 }
839 }
840
841 #endif
842