1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2013 The FreeBSD Foundation
5 *
6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
7 * under sponsorship from the FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/domainset.h>
34 #include <sys/malloc.h>
35 #include <sys/bus.h>
36 #include <sys/conf.h>
37 #include <sys/interrupt.h>
38 #include <sys/kernel.h>
39 #include <sys/ktr.h>
40 #include <sys/lock.h>
41 #include <sys/proc.h>
42 #include <sys/memdesc.h>
43 #include <sys/msan.h>
44 #include <sys/mutex.h>
45 #include <sys/sysctl.h>
46 #include <sys/rman.h>
47 #include <sys/taskqueue.h>
48 #include <sys/tree.h>
49 #include <sys/uio.h>
50 #include <sys/vmem.h>
51 #include <dev/pci/pcireg.h>
52 #include <dev/pci/pcivar.h>
53 #include <vm/vm.h>
54 #include <vm/vm_extern.h>
55 #include <vm/vm_kern.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_page.h>
58 #include <vm/vm_map.h>
59 #include <dev/iommu/iommu.h>
60 #include <machine/atomic.h>
61 #include <machine/bus.h>
62 #include <machine/md_var.h>
63 #include <machine/iommu.h>
64 #include <dev/iommu/busdma_iommu.h>
65
66 /*
67 * busdma_iommu.c, the implementation of the busdma(9) interface using
68 * IOMMU units from Intel VT-d.
69 */
70
71 static bool
iommu_bus_dma_is_dev_disabled(int domain,int bus,int slot,int func)72 iommu_bus_dma_is_dev_disabled(int domain, int bus, int slot, int func)
73 {
74 char str[128], *env;
75 int default_bounce;
76 bool ret;
77 static const char bounce_str[] = "bounce";
78 static const char iommu_str[] = "iommu";
79 static const char dmar_str[] = "dmar"; /* compatibility */
80
81 default_bounce = 0;
82 env = kern_getenv("hw.busdma.default");
83 if (env != NULL) {
84 if (strcmp(env, bounce_str) == 0)
85 default_bounce = 1;
86 else if (strcmp(env, iommu_str) == 0 ||
87 strcmp(env, dmar_str) == 0)
88 default_bounce = 0;
89 freeenv(env);
90 }
91
92 snprintf(str, sizeof(str), "hw.busdma.pci%d.%d.%d.%d",
93 domain, bus, slot, func);
94 env = kern_getenv(str);
95 if (env == NULL)
96 return (default_bounce != 0);
97 if (strcmp(env, bounce_str) == 0)
98 ret = true;
99 else if (strcmp(env, iommu_str) == 0 ||
100 strcmp(env, dmar_str) == 0)
101 ret = false;
102 else
103 ret = default_bounce != 0;
104 freeenv(env);
105 return (ret);
106 }
107
108 /*
109 * Given original device, find the requester ID that will be seen by
110 * the IOMMU unit and used for page table lookup. PCI bridges may take
111 * ownership of transactions from downstream devices, so it may not be
112 * the same as the BSF of the target device. In those cases, all
113 * devices downstream of the bridge must share a single mapping
114 * domain, and must collectively be assigned to use either IOMMU or
115 * bounce mapping.
116 */
117 device_t
iommu_get_requester(device_t dev,uint16_t * rid)118 iommu_get_requester(device_t dev, uint16_t *rid)
119 {
120 devclass_t pci_class;
121 device_t l, pci, pcib, pcip, pcibp, requester;
122 int cap_offset;
123 uint16_t pcie_flags;
124 bool bridge_is_pcie;
125
126 pci_class = devclass_find("pci");
127 l = requester = dev;
128
129 pci = device_get_parent(dev);
130 if (pci == NULL || device_get_devclass(pci) != pci_class) {
131 *rid = 0; /* XXXKIB: Could be ACPI HID */
132 return (requester);
133 }
134
135 *rid = pci_get_rid(dev);
136
137 /*
138 * Walk the bridge hierarchy from the target device to the
139 * host port to find the translating bridge nearest the IOMMU
140 * unit.
141 */
142 for (;;) {
143 pci = device_get_parent(l);
144 KASSERT(pci != NULL, ("iommu_get_requester(%s): NULL parent "
145 "for %s", device_get_name(dev), device_get_name(l)));
146 KASSERT(device_get_devclass(pci) == pci_class,
147 ("iommu_get_requester(%s): non-pci parent %s for %s",
148 device_get_name(dev), device_get_name(pci),
149 device_get_name(l)));
150
151 pcib = device_get_parent(pci);
152 KASSERT(pcib != NULL, ("iommu_get_requester(%s): NULL bridge "
153 "for %s", device_get_name(dev), device_get_name(pci)));
154
155 /*
156 * The parent of our "bridge" isn't another PCI bus,
157 * so pcib isn't a PCI->PCI bridge but rather a host
158 * port, and the requester ID won't be translated
159 * further.
160 */
161 pcip = device_get_parent(pcib);
162 if (device_get_devclass(pcip) != pci_class)
163 break;
164 pcibp = device_get_parent(pcip);
165
166 if (pci_find_cap(l, PCIY_EXPRESS, &cap_offset) == 0) {
167 /*
168 * Do not stop the loop even if the target
169 * device is PCIe, because it is possible (but
170 * unlikely) to have a PCI->PCIe bridge
171 * somewhere in the hierarchy.
172 */
173 l = pcib;
174 } else {
175 /*
176 * Device is not PCIe, it cannot be seen as a
177 * requester by IOMMU unit. Check whether the
178 * bridge is PCIe.
179 */
180 bridge_is_pcie = pci_find_cap(pcib, PCIY_EXPRESS,
181 &cap_offset) == 0;
182 requester = pcib;
183
184 /*
185 * Check for a buggy PCIe/PCI bridge that
186 * doesn't report the express capability. If
187 * the bridge above it is express but isn't a
188 * PCI bridge, then we know pcib is actually a
189 * PCIe/PCI bridge.
190 */
191 if (!bridge_is_pcie && pci_find_cap(pcibp,
192 PCIY_EXPRESS, &cap_offset) == 0) {
193 pcie_flags = pci_read_config(pcibp,
194 cap_offset + PCIER_FLAGS, 2);
195 if ((pcie_flags & PCIEM_FLAGS_TYPE) !=
196 PCIEM_TYPE_PCI_BRIDGE)
197 bridge_is_pcie = true;
198 }
199
200 if (bridge_is_pcie) {
201 /*
202 * The current device is not PCIe, but
203 * the bridge above it is. This is a
204 * PCIe->PCI bridge. Assume that the
205 * requester ID will be the secondary
206 * bus number with slot and function
207 * set to zero.
208 *
209 * XXX: Doesn't handle the case where
210 * the bridge is PCIe->PCI-X, and the
211 * bridge will only take ownership of
212 * requests in some cases. We should
213 * provide context entries with the
214 * same page tables for taken and
215 * non-taken transactions.
216 */
217 *rid = PCI_RID(pci_get_bus(l), 0, 0);
218 l = pcibp;
219 } else {
220 /*
221 * Neither the device nor the bridge
222 * above it are PCIe. This is a
223 * conventional PCI->PCI bridge, which
224 * will use the bridge's BSF as the
225 * requester ID.
226 */
227 *rid = pci_get_rid(pcib);
228 l = pcib;
229 }
230 }
231 }
232 return (requester);
233 }
234
235 struct iommu_ctx *
iommu_instantiate_ctx(struct iommu_unit * unit,device_t dev,bool rmrr)236 iommu_instantiate_ctx(struct iommu_unit *unit, device_t dev, bool rmrr)
237 {
238 device_t requester;
239 struct iommu_ctx *ctx;
240 bool disabled;
241 uint16_t rid;
242
243 requester = iommu_get_requester(dev, &rid);
244
245 /*
246 * If the user requested the IOMMU disabled for the device, we
247 * cannot disable the IOMMU unit, due to possibility of other
248 * devices on the same IOMMU unit still requiring translation.
249 * Instead provide the identity mapping for the device
250 * context.
251 */
252 disabled = iommu_bus_dma_is_dev_disabled(pci_get_domain(requester),
253 pci_get_bus(requester), pci_get_slot(requester),
254 pci_get_function(requester));
255 ctx = iommu_get_ctx(unit, requester, rid, disabled, rmrr);
256 if (ctx == NULL)
257 return (NULL);
258 if (disabled) {
259 /*
260 * Keep the first reference on context, release the
261 * later refs.
262 */
263 IOMMU_LOCK(unit);
264 if ((ctx->flags & IOMMU_CTX_DISABLED) == 0) {
265 ctx->flags |= IOMMU_CTX_DISABLED;
266 IOMMU_UNLOCK(unit);
267 } else {
268 iommu_free_ctx_locked(unit, ctx);
269 }
270 ctx = NULL;
271 }
272 return (ctx);
273 }
274
275 struct iommu_ctx *
iommu_get_dev_ctx(device_t dev)276 iommu_get_dev_ctx(device_t dev)
277 {
278 struct iommu_unit *unit;
279
280 unit = iommu_find(dev, bootverbose);
281 /* Not in scope of any IOMMU ? */
282 if (unit == NULL)
283 return (NULL);
284 if (!unit->dma_enabled)
285 return (NULL);
286
287 iommu_unit_pre_instantiate_ctx(unit);
288 return (iommu_instantiate_ctx(unit, dev, false));
289 }
290
291 bus_dma_tag_t
iommu_get_dma_tag(device_t dev,device_t child)292 iommu_get_dma_tag(device_t dev, device_t child)
293 {
294 struct iommu_ctx *ctx;
295 bus_dma_tag_t res;
296
297 ctx = iommu_get_dev_ctx(child);
298 if (ctx == NULL)
299 return (NULL);
300
301 res = (bus_dma_tag_t)ctx->tag;
302 return (res);
303 }
304
305 bool
bus_dma_iommu_set_buswide(device_t dev)306 bus_dma_iommu_set_buswide(device_t dev)
307 {
308 struct iommu_unit *unit;
309 device_t parent;
310 u_int busno, slot, func;
311
312 parent = device_get_parent(dev);
313 if (device_get_devclass(parent) != devclass_find("pci"))
314 return (false);
315 unit = iommu_find(dev, bootverbose);
316 if (unit == NULL)
317 return (false);
318 busno = pci_get_bus(dev);
319 slot = pci_get_slot(dev);
320 func = pci_get_function(dev);
321 if (slot != 0 || func != 0) {
322 if (bootverbose) {
323 device_printf(dev,
324 "iommu%d pci%d:%d:%d requested buswide busdma\n",
325 unit->unit, busno, slot, func);
326 }
327 return (false);
328 }
329 iommu_set_buswide_ctx(unit, busno);
330 return (true);
331 }
332
333 void
iommu_set_buswide_ctx(struct iommu_unit * unit,u_int busno)334 iommu_set_buswide_ctx(struct iommu_unit *unit, u_int busno)
335 {
336
337 MPASS(busno <= PCI_BUSMAX);
338 IOMMU_LOCK(unit);
339 unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] |=
340 1 << (busno % (NBBY * sizeof(uint32_t)));
341 IOMMU_UNLOCK(unit);
342 }
343
344 bool
iommu_is_buswide_ctx(struct iommu_unit * unit,u_int busno)345 iommu_is_buswide_ctx(struct iommu_unit *unit, u_int busno)
346 {
347
348 MPASS(busno <= PCI_BUSMAX);
349 return ((unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] &
350 (1U << (busno % (NBBY * sizeof(uint32_t))))) != 0);
351 }
352
353 static MALLOC_DEFINE(M_IOMMU_DMAMAP, "iommu_dmamap", "IOMMU DMA Map");
354
355 static void iommu_bus_schedule_dmamap(struct iommu_unit *unit,
356 struct bus_dmamap_iommu *map);
357
358 static int
iommu_bus_dma_tag_create(bus_dma_tag_t parent,bus_size_t alignment,bus_addr_t boundary,bus_addr_t lowaddr,bus_addr_t highaddr,bus_size_t maxsize,int nsegments,bus_size_t maxsegsz,int flags,bus_dma_lock_t * lockfunc,void * lockfuncarg,bus_dma_tag_t * dmat)359 iommu_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
360 bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
361 bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags,
362 bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat)
363 {
364 struct bus_dma_tag_iommu *newtag, *oldtag;
365 int error;
366
367 *dmat = NULL;
368 error = common_bus_dma_tag_create(parent != NULL ?
369 &((struct bus_dma_tag_iommu *)parent)->common : NULL, alignment,
370 boundary, lowaddr, highaddr, maxsize, nsegments, maxsegsz, flags,
371 lockfunc, lockfuncarg, sizeof(struct bus_dma_tag_iommu),
372 (void **)&newtag);
373 if (error != 0)
374 goto out;
375
376 oldtag = (struct bus_dma_tag_iommu *)parent;
377 newtag->common.impl = &bus_dma_iommu_impl;
378 newtag->ctx = oldtag->ctx;
379 newtag->owner = oldtag->owner;
380
381 *dmat = (bus_dma_tag_t)newtag;
382 out:
383 CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
384 __func__, newtag, (newtag != NULL ? newtag->common.flags : 0),
385 error);
386 return (error);
387 }
388
389 static int
iommu_bus_dma_tag_set_domain(bus_dma_tag_t dmat)390 iommu_bus_dma_tag_set_domain(bus_dma_tag_t dmat)
391 {
392
393 return (0);
394 }
395
396 static int
iommu_bus_dma_tag_destroy(bus_dma_tag_t dmat1)397 iommu_bus_dma_tag_destroy(bus_dma_tag_t dmat1)
398 {
399 struct bus_dma_tag_iommu *dmat;
400 struct iommu_unit *iommu;
401 struct iommu_ctx *ctx;
402 int error;
403
404 error = 0;
405 dmat = (struct bus_dma_tag_iommu *)dmat1;
406
407 if (dmat != NULL) {
408 if (dmat->map_count != 0) {
409 error = EBUSY;
410 goto out;
411 }
412 ctx = dmat->ctx;
413 if (dmat == ctx->tag) {
414 iommu = ctx->domain->iommu;
415 IOMMU_LOCK(iommu);
416 iommu_free_ctx_locked(iommu, dmat->ctx);
417 }
418 free(dmat->segments, M_IOMMU_DMAMAP);
419 free(dmat, M_DEVBUF);
420 }
421 out:
422 CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat, error);
423 return (error);
424 }
425
426 static bool
iommu_bus_dma_id_mapped(bus_dma_tag_t dmat,vm_paddr_t buf,bus_size_t buflen)427 iommu_bus_dma_id_mapped(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen)
428 {
429
430 return (false);
431 }
432
433 static int
iommu_bus_dmamap_create(bus_dma_tag_t dmat,int flags,bus_dmamap_t * mapp)434 iommu_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
435 {
436 struct bus_dma_tag_iommu *tag;
437 struct bus_dmamap_iommu *map;
438
439 tag = (struct bus_dma_tag_iommu *)dmat;
440 map = malloc_domainset(sizeof(*map), M_IOMMU_DMAMAP,
441 DOMAINSET_PREF(tag->common.domain), M_NOWAIT | M_ZERO);
442 if (map == NULL) {
443 *mapp = NULL;
444 return (ENOMEM);
445 }
446 if (tag->segments == NULL) {
447 tag->segments = malloc_domainset(sizeof(bus_dma_segment_t) *
448 tag->common.nsegments, M_IOMMU_DMAMAP,
449 DOMAINSET_PREF(tag->common.domain), M_NOWAIT);
450 if (tag->segments == NULL) {
451 free(map, M_IOMMU_DMAMAP);
452 *mapp = NULL;
453 return (ENOMEM);
454 }
455 }
456 IOMMU_DMAMAP_INIT(map);
457 TAILQ_INIT(&map->map_entries);
458 map->tag = tag;
459 map->locked = true;
460 map->cansleep = false;
461 tag->map_count++;
462 *mapp = (bus_dmamap_t)map;
463
464 return (0);
465 }
466
467 static int
iommu_bus_dmamap_destroy(bus_dma_tag_t dmat,bus_dmamap_t map1)468 iommu_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map1)
469 {
470 struct bus_dma_tag_iommu *tag;
471 struct bus_dmamap_iommu *map;
472
473 tag = (struct bus_dma_tag_iommu *)dmat;
474 map = (struct bus_dmamap_iommu *)map1;
475 if (map != NULL) {
476 IOMMU_DMAMAP_LOCK(map);
477 if (!TAILQ_EMPTY(&map->map_entries)) {
478 IOMMU_DMAMAP_UNLOCK(map);
479 return (EBUSY);
480 }
481 IOMMU_DMAMAP_DESTROY(map);
482 free(map, M_IOMMU_DMAMAP);
483 }
484 tag->map_count--;
485 return (0);
486 }
487
488
489 static int
iommu_bus_dmamem_alloc(bus_dma_tag_t dmat,void ** vaddr,int flags,bus_dmamap_t * mapp)490 iommu_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
491 bus_dmamap_t *mapp)
492 {
493 struct bus_dma_tag_iommu *tag;
494 struct bus_dmamap_iommu *map;
495 int error, mflags;
496 vm_memattr_t attr;
497
498 error = iommu_bus_dmamap_create(dmat, flags, mapp);
499 if (error != 0)
500 return (error);
501
502 mflags = (flags & BUS_DMA_NOWAIT) != 0 ? M_NOWAIT : M_WAITOK;
503 mflags |= (flags & BUS_DMA_ZERO) != 0 ? M_ZERO : 0;
504 attr = (flags & BUS_DMA_NOCACHE) != 0 ? VM_MEMATTR_UNCACHEABLE :
505 VM_MEMATTR_DEFAULT;
506
507 tag = (struct bus_dma_tag_iommu *)dmat;
508 map = (struct bus_dmamap_iommu *)*mapp;
509
510 if (tag->common.maxsize < PAGE_SIZE &&
511 tag->common.alignment <= tag->common.maxsize &&
512 attr == VM_MEMATTR_DEFAULT) {
513 *vaddr = malloc_domainset(tag->common.maxsize, M_DEVBUF,
514 DOMAINSET_PREF(tag->common.domain), mflags);
515 map->flags |= BUS_DMAMAP_IOMMU_MALLOC;
516 } else {
517 *vaddr = kmem_alloc_attr_domainset(
518 DOMAINSET_PREF(tag->common.domain), tag->common.maxsize,
519 mflags, 0ul, BUS_SPACE_MAXADDR, attr);
520 map->flags |= BUS_DMAMAP_IOMMU_KMEM_ALLOC;
521 }
522 if (*vaddr == NULL) {
523 iommu_bus_dmamap_destroy(dmat, *mapp);
524 *mapp = NULL;
525 return (ENOMEM);
526 }
527 return (0);
528 }
529
530 static void
iommu_bus_dmamem_free(bus_dma_tag_t dmat,void * vaddr,bus_dmamap_t map1)531 iommu_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map1)
532 {
533 struct bus_dma_tag_iommu *tag;
534 struct bus_dmamap_iommu *map;
535
536 tag = (struct bus_dma_tag_iommu *)dmat;
537 map = (struct bus_dmamap_iommu *)map1;
538
539 if ((map->flags & BUS_DMAMAP_IOMMU_MALLOC) != 0) {
540 free(vaddr, M_DEVBUF);
541 map->flags &= ~BUS_DMAMAP_IOMMU_MALLOC;
542 } else {
543 KASSERT((map->flags & BUS_DMAMAP_IOMMU_KMEM_ALLOC) != 0,
544 ("iommu_bus_dmamem_free for non alloced map %p", map));
545 kmem_free(vaddr, tag->common.maxsize);
546 map->flags &= ~BUS_DMAMAP_IOMMU_KMEM_ALLOC;
547 }
548
549 iommu_bus_dmamap_destroy(dmat, map1);
550 }
551
552 static int
iommu_bus_dmamap_load_something1(struct bus_dma_tag_iommu * tag,struct bus_dmamap_iommu * map,vm_page_t * ma,int offset,bus_size_t buflen,int flags,bus_dma_segment_t * segs,int * segp,struct iommu_map_entries_tailq * entries)553 iommu_bus_dmamap_load_something1(struct bus_dma_tag_iommu *tag,
554 struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen,
555 int flags, bus_dma_segment_t *segs, int *segp,
556 struct iommu_map_entries_tailq *entries)
557 {
558 struct iommu_ctx *ctx;
559 struct iommu_domain *domain;
560 struct iommu_map_entry *entry;
561 bus_size_t buflen1;
562 int error, e_flags, idx, gas_flags, seg;
563
564 KASSERT(offset < IOMMU_PAGE_SIZE, ("offset %d", offset));
565 if (segs == NULL)
566 segs = tag->segments;
567 ctx = tag->ctx;
568 domain = ctx->domain;
569 e_flags = IOMMU_MAP_ENTRY_READ |
570 ((flags & BUS_DMA_NOWRITE) == 0 ? IOMMU_MAP_ENTRY_WRITE : 0);
571 seg = *segp;
572 error = 0;
573 idx = 0;
574 while (buflen > 0) {
575 seg++;
576 if (seg >= tag->common.nsegments) {
577 error = EFBIG;
578 break;
579 }
580 buflen1 = buflen > tag->common.maxsegsz ?
581 tag->common.maxsegsz : buflen;
582
583 /*
584 * (Too) optimistically allow split if there are more
585 * then one segments left.
586 */
587 gas_flags = map->cansleep ? IOMMU_MF_CANWAIT : 0;
588 if (seg + 1 < tag->common.nsegments)
589 gas_flags |= IOMMU_MF_CANSPLIT;
590
591 error = iommu_gas_map(domain, &tag->common, buflen1,
592 offset, e_flags, gas_flags, ma + idx, &entry);
593 if (error != 0)
594 break;
595 /* Update buflen1 in case buffer split. */
596 if (buflen1 > entry->end - entry->start - offset)
597 buflen1 = entry->end - entry->start - offset;
598
599 KASSERT(vm_addr_align_ok(entry->start + offset,
600 tag->common.alignment),
601 ("alignment failed: ctx %p start 0x%jx offset %x "
602 "align 0x%jx", ctx, (uintmax_t)entry->start, offset,
603 (uintmax_t)tag->common.alignment));
604 KASSERT(entry->end <= tag->common.lowaddr ||
605 entry->start >= tag->common.highaddr,
606 ("entry placement failed: ctx %p start 0x%jx end 0x%jx "
607 "lowaddr 0x%jx highaddr 0x%jx", ctx,
608 (uintmax_t)entry->start, (uintmax_t)entry->end,
609 (uintmax_t)tag->common.lowaddr,
610 (uintmax_t)tag->common.highaddr));
611 KASSERT(vm_addr_bound_ok(entry->start + offset, buflen1,
612 tag->common.boundary),
613 ("boundary failed: ctx %p start 0x%jx end 0x%jx "
614 "boundary 0x%jx", ctx, (uintmax_t)entry->start,
615 (uintmax_t)entry->end, (uintmax_t)tag->common.boundary));
616 KASSERT(buflen1 <= tag->common.maxsegsz,
617 ("segment too large: ctx %p start 0x%jx end 0x%jx "
618 "buflen1 0x%jx maxsegsz 0x%jx", ctx,
619 (uintmax_t)entry->start, (uintmax_t)entry->end,
620 (uintmax_t)buflen1, (uintmax_t)tag->common.maxsegsz));
621
622 KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0,
623 ("entry %p missing IOMMU_MAP_ENTRY_MAP", entry));
624 TAILQ_INSERT_TAIL(entries, entry, dmamap_link);
625
626 segs[seg].ds_addr = entry->start + offset;
627 segs[seg].ds_len = buflen1;
628
629 idx += OFF_TO_IDX(offset + buflen1);
630 offset += buflen1;
631 offset &= IOMMU_PAGE_MASK;
632 buflen -= buflen1;
633 }
634 if (error == 0)
635 *segp = seg;
636 return (error);
637 }
638
639 static int
iommu_bus_dmamap_load_something(struct bus_dma_tag_iommu * tag,struct bus_dmamap_iommu * map,vm_page_t * ma,int offset,bus_size_t buflen,int flags,bus_dma_segment_t * segs,int * segp)640 iommu_bus_dmamap_load_something(struct bus_dma_tag_iommu *tag,
641 struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen,
642 int flags, bus_dma_segment_t *segs, int *segp)
643 {
644 struct iommu_ctx *ctx;
645 struct iommu_domain *domain;
646 struct iommu_map_entries_tailq entries;
647 int error;
648
649 ctx = tag->ctx;
650 domain = ctx->domain;
651 atomic_add_long(&ctx->loads, 1);
652
653 TAILQ_INIT(&entries);
654 error = iommu_bus_dmamap_load_something1(tag, map, ma, offset,
655 buflen, flags, segs, segp, &entries);
656 if (error == 0) {
657 IOMMU_DMAMAP_LOCK(map);
658 TAILQ_CONCAT(&map->map_entries, &entries, dmamap_link);
659 IOMMU_DMAMAP_UNLOCK(map);
660 } else if (!TAILQ_EMPTY(&entries)) {
661 /*
662 * The busdma interface does not allow us to report
663 * partial buffer load, so unfortunately we have to
664 * revert all work done.
665 */
666 IOMMU_DOMAIN_LOCK(domain);
667 TAILQ_CONCAT(&domain->unload_entries, &entries, dmamap_link);
668 IOMMU_DOMAIN_UNLOCK(domain);
669 taskqueue_enqueue(domain->iommu->delayed_taskqueue,
670 &domain->unload_task);
671 }
672
673 if (error == ENOMEM && (flags & BUS_DMA_NOWAIT) == 0 &&
674 !map->cansleep)
675 error = EINPROGRESS;
676 if (error == EINPROGRESS)
677 iommu_bus_schedule_dmamap(domain->iommu, map);
678 return (error);
679 }
680
681 static int
iommu_bus_dmamap_load_ma(bus_dma_tag_t dmat,bus_dmamap_t map1,struct vm_page ** ma,bus_size_t tlen,int ma_offs,int flags,bus_dma_segment_t * segs,int * segp)682 iommu_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map1,
683 struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags,
684 bus_dma_segment_t *segs, int *segp)
685 {
686 struct bus_dma_tag_iommu *tag;
687 struct bus_dmamap_iommu *map;
688
689 tag = (struct bus_dma_tag_iommu *)dmat;
690 map = (struct bus_dmamap_iommu *)map1;
691 return (iommu_bus_dmamap_load_something(tag, map, ma, ma_offs, tlen,
692 flags, segs, segp));
693 }
694
695 static int
iommu_bus_dmamap_load_phys(bus_dma_tag_t dmat,bus_dmamap_t map1,vm_paddr_t buf,bus_size_t buflen,int flags,bus_dma_segment_t * segs,int * segp)696 iommu_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map1,
697 vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs,
698 int *segp)
699 {
700 struct bus_dma_tag_iommu *tag;
701 struct bus_dmamap_iommu *map;
702 vm_page_t *ma, fma;
703 vm_paddr_t pstart, pend, paddr;
704 int error, i, ma_cnt, mflags, offset;
705
706 tag = (struct bus_dma_tag_iommu *)dmat;
707 map = (struct bus_dmamap_iommu *)map1;
708 pstart = trunc_page(buf);
709 pend = round_page(buf + buflen);
710 offset = buf & PAGE_MASK;
711 ma_cnt = OFF_TO_IDX(pend - pstart);
712 mflags = map->cansleep ? M_WAITOK : M_NOWAIT;
713 ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags);
714 if (ma == NULL)
715 return (ENOMEM);
716 fma = NULL;
717 for (i = 0; i < ma_cnt; i++) {
718 paddr = pstart + ptoa(i);
719 ma[i] = PHYS_TO_VM_PAGE(paddr);
720 if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) {
721 /*
722 * If PHYS_TO_VM_PAGE() returned NULL or the
723 * vm_page was not initialized we'll use a
724 * fake page.
725 */
726 if (fma == NULL) {
727 fma = malloc(sizeof(struct vm_page) * ma_cnt,
728 M_DEVBUF, M_ZERO | mflags);
729 if (fma == NULL) {
730 free(ma, M_DEVBUF);
731 return (ENOMEM);
732 }
733 }
734 vm_page_initfake(&fma[i], pstart + ptoa(i),
735 VM_MEMATTR_DEFAULT);
736 ma[i] = &fma[i];
737 }
738 }
739 error = iommu_bus_dmamap_load_something(tag, map, ma, offset, buflen,
740 flags, segs, segp);
741 free(fma, M_DEVBUF);
742 free(ma, M_DEVBUF);
743 return (error);
744 }
745
746 static int
iommu_bus_dmamap_load_buffer(bus_dma_tag_t dmat,bus_dmamap_t map1,void * buf,bus_size_t buflen,pmap_t pmap,int flags,bus_dma_segment_t * segs,int * segp)747 iommu_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map1, void *buf,
748 bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs,
749 int *segp)
750 {
751 struct bus_dma_tag_iommu *tag;
752 struct bus_dmamap_iommu *map;
753 vm_page_t *ma, fma;
754 vm_paddr_t pstart, pend, paddr;
755 int error, i, ma_cnt, mflags, offset;
756
757 tag = (struct bus_dma_tag_iommu *)dmat;
758 map = (struct bus_dmamap_iommu *)map1;
759 pstart = trunc_page((vm_offset_t)buf);
760 pend = round_page((vm_offset_t)buf + buflen);
761 offset = (vm_offset_t)buf & PAGE_MASK;
762 ma_cnt = OFF_TO_IDX(pend - pstart);
763 mflags = map->cansleep ? M_WAITOK : M_NOWAIT;
764 ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags);
765 if (ma == NULL)
766 return (ENOMEM);
767 fma = NULL;
768 for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) {
769 if (pmap == kernel_pmap)
770 paddr = pmap_kextract(pstart);
771 else
772 paddr = pmap_extract(pmap, pstart);
773 ma[i] = PHYS_TO_VM_PAGE(paddr);
774 if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) {
775 /*
776 * If PHYS_TO_VM_PAGE() returned NULL or the
777 * vm_page was not initialized we'll use a
778 * fake page.
779 */
780 if (fma == NULL) {
781 fma = malloc(sizeof(struct vm_page) * ma_cnt,
782 M_DEVBUF, M_ZERO | mflags);
783 if (fma == NULL) {
784 free(ma, M_DEVBUF);
785 return (ENOMEM);
786 }
787 }
788 vm_page_initfake(&fma[i], paddr, VM_MEMATTR_DEFAULT);
789 ma[i] = &fma[i];
790 }
791 }
792 error = iommu_bus_dmamap_load_something(tag, map, ma, offset, buflen,
793 flags, segs, segp);
794 free(ma, M_DEVBUF);
795 free(fma, M_DEVBUF);
796 return (error);
797 }
798
799 static void
iommu_bus_dmamap_waitok(bus_dma_tag_t dmat,bus_dmamap_t map1,struct memdesc * mem,bus_dmamap_callback_t * callback,void * callback_arg)800 iommu_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map1,
801 struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg)
802 {
803 struct bus_dmamap_iommu *map;
804
805 if (map1 == NULL)
806 return;
807 map = (struct bus_dmamap_iommu *)map1;
808 map->mem = *mem;
809 map->tag = (struct bus_dma_tag_iommu *)dmat;
810 map->callback = callback;
811 map->callback_arg = callback_arg;
812 }
813
814 static bus_dma_segment_t *
iommu_bus_dmamap_complete(bus_dma_tag_t dmat,bus_dmamap_t map1,bus_dma_segment_t * segs,int nsegs,int error)815 iommu_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map1,
816 bus_dma_segment_t *segs, int nsegs, int error)
817 {
818 struct bus_dma_tag_iommu *tag;
819 struct bus_dmamap_iommu *map;
820
821 tag = (struct bus_dma_tag_iommu *)dmat;
822 map = (struct bus_dmamap_iommu *)map1;
823
824 if (!map->locked) {
825 KASSERT(map->cansleep,
826 ("map not locked and not sleepable context %p", map));
827
828 /*
829 * We are called from the delayed context. Relock the
830 * driver.
831 */
832 (tag->common.lockfunc)(tag->common.lockfuncarg, BUS_DMA_LOCK);
833 map->locked = true;
834 }
835
836 if (segs == NULL)
837 segs = tag->segments;
838 return (segs);
839 }
840
841 /*
842 * The limitations of busdma KPI forces the iommu to perform the actual
843 * unload, consisting of the unmapping of the map entries page tables,
844 * from the delayed context on i386, since page table page mapping
845 * might require a sleep to be successfull. The unfortunate
846 * consequence is that the DMA requests can be served some time after
847 * the bus_dmamap_unload() call returned.
848 *
849 * On amd64, we assume that sf allocation cannot fail.
850 */
851 static void
iommu_bus_dmamap_unload(bus_dma_tag_t dmat,bus_dmamap_t map1)852 iommu_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map1)
853 {
854 struct bus_dma_tag_iommu *tag;
855 struct bus_dmamap_iommu *map;
856 struct iommu_ctx *ctx;
857 struct iommu_domain *domain;
858 struct iommu_map_entries_tailq entries;
859
860 tag = (struct bus_dma_tag_iommu *)dmat;
861 map = (struct bus_dmamap_iommu *)map1;
862 ctx = tag->ctx;
863 domain = ctx->domain;
864 atomic_add_long(&ctx->unloads, 1);
865
866 TAILQ_INIT(&entries);
867 IOMMU_DMAMAP_LOCK(map);
868 TAILQ_CONCAT(&entries, &map->map_entries, dmamap_link);
869 IOMMU_DMAMAP_UNLOCK(map);
870 #if defined(IOMMU_DOMAIN_UNLOAD_SLEEP)
871 IOMMU_DOMAIN_LOCK(domain);
872 TAILQ_CONCAT(&domain->unload_entries, &entries, dmamap_link);
873 IOMMU_DOMAIN_UNLOCK(domain);
874 taskqueue_enqueue(domain->iommu->delayed_taskqueue,
875 &domain->unload_task);
876 #else
877 THREAD_NO_SLEEPING();
878 iommu_domain_unload(domain, &entries, false);
879 THREAD_SLEEPING_OK();
880 KASSERT(TAILQ_EMPTY(&entries), ("lazy iommu_ctx_unload %p", ctx));
881 #endif
882 }
883
884 static void
iommu_bus_dmamap_sync(bus_dma_tag_t dmat,bus_dmamap_t map1,bus_dmasync_op_t op)885 iommu_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map1,
886 bus_dmasync_op_t op)
887 {
888 struct bus_dmamap_iommu *map __unused;
889
890 map = (struct bus_dmamap_iommu *)map1;
891 kmsan_bus_dmamap_sync(&map->kmsan_mem, op);
892 }
893
894 #ifdef KMSAN
895 static void
iommu_bus_dmamap_load_kmsan(bus_dmamap_t map1,struct memdesc * mem)896 iommu_bus_dmamap_load_kmsan(bus_dmamap_t map1, struct memdesc *mem)
897 {
898 struct bus_dmamap_iommu *map;
899
900 map = (struct bus_dmamap_iommu *)map1;
901 if (map == NULL)
902 return;
903 memcpy(&map->kmsan_mem, mem, sizeof(struct memdesc));
904 }
905 #endif
906
907 struct bus_dma_impl bus_dma_iommu_impl = {
908 .tag_create = iommu_bus_dma_tag_create,
909 .tag_destroy = iommu_bus_dma_tag_destroy,
910 .tag_set_domain = iommu_bus_dma_tag_set_domain,
911 .id_mapped = iommu_bus_dma_id_mapped,
912 .map_create = iommu_bus_dmamap_create,
913 .map_destroy = iommu_bus_dmamap_destroy,
914 .mem_alloc = iommu_bus_dmamem_alloc,
915 .mem_free = iommu_bus_dmamem_free,
916 .load_phys = iommu_bus_dmamap_load_phys,
917 .load_buffer = iommu_bus_dmamap_load_buffer,
918 .load_ma = iommu_bus_dmamap_load_ma,
919 .map_waitok = iommu_bus_dmamap_waitok,
920 .map_complete = iommu_bus_dmamap_complete,
921 .map_unload = iommu_bus_dmamap_unload,
922 .map_sync = iommu_bus_dmamap_sync,
923 #ifdef KMSAN
924 .load_kmsan = iommu_bus_dmamap_load_kmsan,
925 #endif
926 };
927
928 static void
iommu_bus_task_dmamap(void * arg,int pending)929 iommu_bus_task_dmamap(void *arg, int pending)
930 {
931 struct bus_dma_tag_iommu *tag;
932 struct bus_dmamap_iommu *map;
933 struct iommu_unit *unit;
934
935 unit = arg;
936 IOMMU_LOCK(unit);
937 while ((map = TAILQ_FIRST(&unit->delayed_maps)) != NULL) {
938 TAILQ_REMOVE(&unit->delayed_maps, map, delay_link);
939 IOMMU_UNLOCK(unit);
940 tag = map->tag;
941 map->cansleep = true;
942 map->locked = false;
943 bus_dmamap_load_mem((bus_dma_tag_t)tag, (bus_dmamap_t)map,
944 &map->mem, map->callback, map->callback_arg,
945 BUS_DMA_WAITOK);
946 map->cansleep = false;
947 if (map->locked) {
948 (tag->common.lockfunc)(tag->common.lockfuncarg,
949 BUS_DMA_UNLOCK);
950 } else
951 map->locked = true;
952 map->cansleep = false;
953 IOMMU_LOCK(unit);
954 }
955 IOMMU_UNLOCK(unit);
956 }
957
958 static void
iommu_bus_schedule_dmamap(struct iommu_unit * unit,struct bus_dmamap_iommu * map)959 iommu_bus_schedule_dmamap(struct iommu_unit *unit, struct bus_dmamap_iommu *map)
960 {
961
962 map->locked = false;
963 IOMMU_LOCK(unit);
964 TAILQ_INSERT_TAIL(&unit->delayed_maps, map, delay_link);
965 IOMMU_UNLOCK(unit);
966 taskqueue_enqueue(unit->delayed_taskqueue, &unit->dmamap_load_task);
967 }
968
969 int
iommu_init_busdma(struct iommu_unit * unit)970 iommu_init_busdma(struct iommu_unit *unit)
971 {
972 int error;
973
974 unit->dma_enabled = 0;
975 error = TUNABLE_INT_FETCH("hw.iommu.dma", &unit->dma_enabled);
976 if (error == 0) /* compatibility */
977 TUNABLE_INT_FETCH("hw.dmar.dma", &unit->dma_enabled);
978 SYSCTL_ADD_INT(&unit->sysctl_ctx,
979 SYSCTL_CHILDREN(device_get_sysctl_tree(unit->dev)),
980 OID_AUTO, "dma", CTLFLAG_RD, &unit->dma_enabled, 0,
981 "DMA ops enabled");
982 TAILQ_INIT(&unit->delayed_maps);
983 TASK_INIT(&unit->dmamap_load_task, 0, iommu_bus_task_dmamap, unit);
984 unit->delayed_taskqueue = taskqueue_create("iommu", M_WAITOK,
985 taskqueue_thread_enqueue, &unit->delayed_taskqueue);
986 taskqueue_start_threads(&unit->delayed_taskqueue, 1, PI_DISK,
987 "iommu%d busdma taskq", unit->unit);
988 return (0);
989 }
990
991 void
iommu_fini_busdma(struct iommu_unit * unit)992 iommu_fini_busdma(struct iommu_unit *unit)
993 {
994
995 if (unit->delayed_taskqueue == NULL)
996 return;
997
998 taskqueue_drain(unit->delayed_taskqueue, &unit->dmamap_load_task);
999 taskqueue_free(unit->delayed_taskqueue);
1000 unit->delayed_taskqueue = NULL;
1001 }
1002
1003 int
bus_dma_iommu_load_ident(bus_dma_tag_t dmat,bus_dmamap_t map1,vm_paddr_t start,vm_size_t length,int flags)1004 bus_dma_iommu_load_ident(bus_dma_tag_t dmat, bus_dmamap_t map1,
1005 vm_paddr_t start, vm_size_t length, int flags)
1006 {
1007 struct bus_dma_tag_common *tc;
1008 struct bus_dma_tag_iommu *tag;
1009 struct bus_dmamap_iommu *map;
1010 struct iommu_ctx *ctx;
1011 struct iommu_domain *domain;
1012 struct iommu_map_entry *entry;
1013 vm_page_t *ma;
1014 vm_size_t i;
1015 int error;
1016 bool waitok;
1017
1018 MPASS((start & PAGE_MASK) == 0);
1019 MPASS((length & PAGE_MASK) == 0);
1020 MPASS(length > 0);
1021 MPASS(start + length >= start);
1022 MPASS((flags & ~(BUS_DMA_NOWAIT | BUS_DMA_NOWRITE)) == 0);
1023
1024 tc = (struct bus_dma_tag_common *)dmat;
1025 if (tc->impl != &bus_dma_iommu_impl)
1026 return (0);
1027
1028 tag = (struct bus_dma_tag_iommu *)dmat;
1029 ctx = tag->ctx;
1030 domain = ctx->domain;
1031 map = (struct bus_dmamap_iommu *)map1;
1032 waitok = (flags & BUS_DMA_NOWAIT) != 0;
1033
1034 entry = iommu_gas_alloc_entry(domain, waitok ? 0 : IOMMU_PGF_WAITOK);
1035 if (entry == NULL)
1036 return (ENOMEM);
1037 entry->start = start;
1038 entry->end = start + length;
1039 ma = malloc(sizeof(vm_page_t) * atop(length), M_TEMP, waitok ?
1040 M_WAITOK : M_NOWAIT);
1041 if (ma == NULL) {
1042 iommu_gas_free_entry(entry);
1043 return (ENOMEM);
1044 }
1045 for (i = 0; i < atop(length); i++) {
1046 ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i,
1047 VM_MEMATTR_DEFAULT);
1048 }
1049 error = iommu_gas_map_region(domain, entry, IOMMU_MAP_ENTRY_READ |
1050 ((flags & BUS_DMA_NOWRITE) ? 0 : IOMMU_MAP_ENTRY_WRITE) |
1051 IOMMU_MAP_ENTRY_MAP, waitok ? IOMMU_MF_CANWAIT : 0, ma);
1052 if (error == 0) {
1053 IOMMU_DMAMAP_LOCK(map);
1054 TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link);
1055 IOMMU_DMAMAP_UNLOCK(map);
1056 } else {
1057 iommu_gas_free_entry(entry);
1058 }
1059 for (i = 0; i < atop(length); i++)
1060 vm_page_putfake(ma[i]);
1061 free(ma, M_TEMP);
1062 return (error);
1063 }
1064
1065 static void
iommu_domain_unload_task(void * arg,int pending)1066 iommu_domain_unload_task(void *arg, int pending)
1067 {
1068 struct iommu_domain *domain;
1069 struct iommu_map_entries_tailq entries;
1070
1071 domain = arg;
1072 TAILQ_INIT(&entries);
1073
1074 for (;;) {
1075 IOMMU_DOMAIN_LOCK(domain);
1076 TAILQ_SWAP(&domain->unload_entries, &entries,
1077 iommu_map_entry, dmamap_link);
1078 IOMMU_DOMAIN_UNLOCK(domain);
1079 if (TAILQ_EMPTY(&entries))
1080 break;
1081 iommu_domain_unload(domain, &entries, true);
1082 }
1083 }
1084
1085 void
iommu_domain_init(struct iommu_unit * unit,struct iommu_domain * domain,const struct iommu_domain_map_ops * ops)1086 iommu_domain_init(struct iommu_unit *unit, struct iommu_domain *domain,
1087 const struct iommu_domain_map_ops *ops)
1088 {
1089
1090 domain->ops = ops;
1091 domain->iommu = unit;
1092
1093 TASK_INIT(&domain->unload_task, 0, iommu_domain_unload_task, domain);
1094 RB_INIT(&domain->rb_root);
1095 TAILQ_INIT(&domain->unload_entries);
1096 mtx_init(&domain->lock, "iodom", NULL, MTX_DEF);
1097 }
1098
1099 void
iommu_domain_fini(struct iommu_domain * domain)1100 iommu_domain_fini(struct iommu_domain *domain)
1101 {
1102
1103 mtx_destroy(&domain->lock);
1104 }
1105