1 /*-
2 * Copyright (c) 1997, 1998 Justin T. Gibbs.
3 * Copyright (c) 2015-2016 The FreeBSD Foundation
4 * All rights reserved.
5 *
6 * Portions of this software were developed by Andrew Turner
7 * under sponsorship of the FreeBSD Foundation.
8 *
9 * Portions of this software were developed by Semihalf
10 * under sponsorship of the FreeBSD Foundation.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions, and the following disclaimer,
17 * without modification, immediately at the beginning of the file.
18 * 2. The name of the author may not be used to endorse or promote products
19 * derived from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
25 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/domainset.h>
37 #include <sys/malloc.h>
38 #include <sys/bus.h>
39 #include <sys/interrupt.h>
40 #include <sys/kernel.h>
41 #include <sys/ktr.h>
42 #include <sys/lock.h>
43 #include <sys/memdesc.h>
44 #include <sys/msan.h>
45 #include <sys/mutex.h>
46 #include <sys/proc.h>
47 #include <sys/sysctl.h>
48 #include <sys/uio.h>
49
50 #include <vm/vm.h>
51 #include <vm/vm_extern.h>
52 #include <vm/vm_kern.h>
53 #include <vm/vm_page.h>
54 #include <vm/vm_map.h>
55
56 #include <machine/atomic.h>
57 #include <machine/bus.h>
58 #include <machine/md_var.h>
59 #include <arm64/include/bus_dma_impl.h>
60
61 #define MAX_BPAGES 4096
62
63 enum {
64 BF_COULD_BOUNCE = 0x01,
65 BF_MIN_ALLOC_COMP = 0x02,
66 BF_KMEM_ALLOC_PAGES = 0x04,
67 BF_KMEM_ALLOC_CONTIG = 0x08,
68 BF_KMEM_ALLOC = BF_KMEM_ALLOC_PAGES | BF_KMEM_ALLOC_CONTIG,
69 BF_COHERENT = 0x10,
70 };
71
72 struct bounce_page;
73 struct bounce_zone;
74
75 struct bus_dma_tag {
76 struct bus_dma_tag_common common;
77 size_t alloc_size;
78 size_t alloc_alignment;
79 int map_count;
80 int bounce_flags;
81 bus_dma_segment_t *segments;
82 struct bounce_zone *bounce_zone;
83 };
84
85 static SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
86 "Busdma parameters");
87
88 struct sync_list {
89 vm_offset_t vaddr; /* kva of client data */
90 bus_addr_t paddr; /* physical address */
91 vm_page_t pages; /* starting page of client data */
92 bus_size_t datacount; /* client data count */
93 };
94
95 struct bus_dmamap {
96 STAILQ_HEAD(, bounce_page) bpages;
97 int pagesneeded;
98 int pagesreserved;
99 bus_dma_tag_t dmat;
100 struct memdesc mem;
101 bus_dmamap_callback_t *callback;
102 void *callback_arg;
103 __sbintime_t queued_time;
104 STAILQ_ENTRY(bus_dmamap) links;
105 u_int flags;
106 #define DMAMAP_COHERENT (1 << 0)
107 #define DMAMAP_FROM_DMAMEM (1 << 1)
108 #define DMAMAP_MBUF (1 << 2)
109 int sync_count;
110 #ifdef KMSAN
111 struct memdesc kmsan_mem;
112 #endif
113 struct sync_list slist[];
114 };
115
116 static bool _bus_dmamap_pagesneeded(bus_dma_tag_t dmat, bus_dmamap_t map,
117 vm_paddr_t buf, bus_size_t buflen, int *pagesneeded);
118 static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
119 pmap_t pmap, void *buf, bus_size_t buflen, int flags);
120 static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map,
121 vm_paddr_t buf, bus_size_t buflen, int flags);
122
123 static MALLOC_DEFINE(M_BUSDMA, "busdma", "busdma metadata");
124
125 #define dmat_alignment(dmat) ((dmat)->common.alignment)
126 #define dmat_bounce_flags(dmat) ((dmat)->bounce_flags)
127 #define dmat_boundary(dmat) ((dmat)->common.boundary)
128 #define dmat_domain(dmat) ((dmat)->common.domain)
129 #define dmat_flags(dmat) ((dmat)->common.flags)
130 #define dmat_highaddr(dmat) ((dmat)->common.highaddr)
131 #define dmat_lowaddr(dmat) ((dmat)->common.lowaddr)
132 #define dmat_lockfunc(dmat) ((dmat)->common.lockfunc)
133 #define dmat_lockfuncarg(dmat) ((dmat)->common.lockfuncarg)
134 #define dmat_maxsegsz(dmat) ((dmat)->common.maxsegsz)
135 #define dmat_nsegments(dmat) ((dmat)->common.nsegments)
136
137 #include "../../kern/subr_busdma_bounce.c"
138
139 static int
bounce_bus_dma_zone_setup(bus_dma_tag_t dmat)140 bounce_bus_dma_zone_setup(bus_dma_tag_t dmat)
141 {
142 struct bounce_zone *bz;
143 bus_size_t maxsize;
144 int error;
145
146 /*
147 * Round size up to a full page, and add one more page because
148 * there can always be one more boundary crossing than the
149 * number of pages in a transfer.
150 */
151 maxsize = roundup2(dmat->common.maxsize, PAGE_SIZE) + PAGE_SIZE;
152
153 /* Must bounce */
154 if ((error = alloc_bounce_zone(dmat)) != 0)
155 return (error);
156 bz = dmat->bounce_zone;
157
158 if (ptoa(bz->total_bpages) < maxsize) {
159 int pages;
160
161 pages = atop(maxsize) + 1 - bz->total_bpages;
162
163 /* Add pages to our bounce pool */
164 if (alloc_bounce_pages(dmat, pages) < pages)
165 return (ENOMEM);
166 }
167 /* Performed initial allocation */
168 dmat->bounce_flags |= BF_MIN_ALLOC_COMP;
169
170 return (error);
171 }
172
173 /*
174 * Return true if the DMA should bounce because the start or end does not fall
175 * on a cacheline boundary (which would require a partial cacheline flush).
176 * COHERENT memory doesn't trigger cacheline flushes. Memory allocated by
177 * bus_dmamem_alloc() is always aligned to cacheline boundaries, and there's a
178 * strict rule that such memory cannot be accessed by the CPU while DMA is in
179 * progress (or by multiple DMA engines at once), so that it's always safe to do
180 * full cacheline flushes even if that affects memory outside the range of a
181 * given DMA operation that doesn't involve the full allocated buffer. If we're
182 * mapping an mbuf, that follows the same rules as a buffer we allocated.
183 */
184 static bool
cacheline_bounce(bus_dma_tag_t dmat,bus_dmamap_t map,bus_addr_t paddr,bus_size_t size)185 cacheline_bounce(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t paddr,
186 bus_size_t size)
187 {
188
189 #define DMAMAP_CACHELINE_FLAGS \
190 (DMAMAP_FROM_DMAMEM | DMAMAP_COHERENT | DMAMAP_MBUF)
191 if ((dmat->bounce_flags & BF_COHERENT) != 0)
192 return (false);
193 if (map != NULL && (map->flags & DMAMAP_CACHELINE_FLAGS) != 0)
194 return (false);
195 return (((paddr | size) & (dcache_line_size - 1)) != 0);
196 #undef DMAMAP_CACHELINE_FLAGS
197 }
198
199 /*
200 * Return true if the given address does not fall on the alignment boundary.
201 */
202 static bool
alignment_bounce(bus_dma_tag_t dmat,bus_addr_t addr)203 alignment_bounce(bus_dma_tag_t dmat, bus_addr_t addr)
204 {
205
206 return (!vm_addr_align_ok(addr, dmat->common.alignment));
207 }
208
209 static bool
might_bounce(bus_dma_tag_t dmat,bus_dmamap_t map,bus_addr_t paddr,bus_size_t size)210 might_bounce(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t paddr,
211 bus_size_t size)
212 {
213
214 /* Memory allocated by bounce_bus_dmamem_alloc won't bounce */
215 if (map && (map->flags & DMAMAP_FROM_DMAMEM) != 0)
216 return (false);
217
218 if ((dmat->bounce_flags & BF_COULD_BOUNCE) != 0)
219 return (true);
220
221 if (cacheline_bounce(dmat, map, paddr, size))
222 return (true);
223
224 if (alignment_bounce(dmat, paddr))
225 return (true);
226
227 return (false);
228 }
229
230 static bool
must_bounce(bus_dma_tag_t dmat,bus_dmamap_t map,bus_addr_t paddr,bus_size_t size)231 must_bounce(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t paddr,
232 bus_size_t size)
233 {
234
235 if (cacheline_bounce(dmat, map, paddr, size))
236 return (true);
237
238 if ((dmat->bounce_flags & BF_COULD_BOUNCE) != 0 &&
239 addr_needs_bounce(dmat, paddr))
240 return (true);
241
242 return (false);
243 }
244
245 /*
246 * Allocate a device specific dma_tag.
247 */
248 static int
bounce_bus_dma_tag_create(bus_dma_tag_t parent,bus_size_t alignment,bus_addr_t boundary,bus_addr_t lowaddr,bus_addr_t highaddr,bus_size_t maxsize,int nsegments,bus_size_t maxsegsz,int flags,bus_dma_lock_t * lockfunc,void * lockfuncarg,bus_dma_tag_t * dmat)249 bounce_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
250 bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
251 bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags,
252 bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat)
253 {
254 bus_dma_tag_t newtag;
255 int error;
256
257 *dmat = NULL;
258 error = common_bus_dma_tag_create(parent != NULL ? &parent->common :
259 NULL, alignment, boundary, lowaddr, highaddr, maxsize, nsegments,
260 maxsegsz, flags, lockfunc, lockfuncarg,
261 sizeof (struct bus_dma_tag), (void **)&newtag);
262 if (error != 0)
263 return (error);
264
265 newtag->common.impl = &bus_dma_bounce_impl;
266 newtag->map_count = 0;
267 newtag->segments = NULL;
268
269 if ((flags & BUS_DMA_COHERENT) != 0) {
270 newtag->bounce_flags |= BF_COHERENT;
271 }
272
273 if (parent != NULL) {
274 if ((parent->bounce_flags & BF_COULD_BOUNCE) != 0)
275 newtag->bounce_flags |= BF_COULD_BOUNCE;
276
277 /* Copy some flags from the parent */
278 newtag->bounce_flags |= parent->bounce_flags & BF_COHERENT;
279 }
280
281 if ((newtag->bounce_flags & BF_COHERENT) != 0) {
282 newtag->alloc_alignment = newtag->common.alignment;
283 newtag->alloc_size = newtag->common.maxsize;
284 } else {
285 /*
286 * Ensure the buffer is aligned to a cacheline when allocating
287 * a non-coherent buffer. This is so we don't have any data
288 * that another CPU may be accessing around DMA buffer
289 * causing the cache to become dirty.
290 */
291 newtag->alloc_alignment = MAX(newtag->common.alignment,
292 dcache_line_size);
293 newtag->alloc_size = roundup2(newtag->common.maxsize,
294 dcache_line_size);
295 }
296
297 if (newtag->common.lowaddr < ptoa((vm_paddr_t)Maxmem) ||
298 newtag->common.alignment > 1)
299 newtag->bounce_flags |= BF_COULD_BOUNCE;
300
301 if ((flags & BUS_DMA_ALLOCNOW) != 0)
302 error = bounce_bus_dma_zone_setup(newtag);
303 else
304 error = 0;
305
306 if (error != 0)
307 free(newtag, M_DEVBUF);
308 else
309 *dmat = newtag;
310 CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
311 __func__, newtag, (newtag != NULL ? newtag->common.flags : 0),
312 error);
313 return (error);
314 }
315
316 static int
bounce_bus_dma_tag_destroy(bus_dma_tag_t dmat)317 bounce_bus_dma_tag_destroy(bus_dma_tag_t dmat)
318 {
319 int error = 0;
320
321 if (dmat != NULL) {
322 if (dmat->map_count != 0) {
323 error = EBUSY;
324 goto out;
325 }
326 if (dmat->segments != NULL)
327 free(dmat->segments, M_DEVBUF);
328 free(dmat, M_DEVBUF);
329 }
330 out:
331 CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat, error);
332 return (error);
333 }
334
335 /*
336 * Update the domain for the tag. We may need to reallocate the zone and
337 * bounce pages.
338 */
339 static int
bounce_bus_dma_tag_set_domain(bus_dma_tag_t dmat)340 bounce_bus_dma_tag_set_domain(bus_dma_tag_t dmat)
341 {
342
343 KASSERT(dmat->map_count == 0,
344 ("bounce_bus_dma_tag_set_domain: Domain set after use.\n"));
345 if ((dmat->bounce_flags & BF_COULD_BOUNCE) == 0 ||
346 dmat->bounce_zone == NULL)
347 return (0);
348 dmat->bounce_flags &= ~BF_MIN_ALLOC_COMP;
349 return (bounce_bus_dma_zone_setup(dmat));
350 }
351
352 static bool
bounce_bus_dma_id_mapped(bus_dma_tag_t dmat,vm_paddr_t buf,bus_size_t buflen)353 bounce_bus_dma_id_mapped(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen)
354 {
355
356 if (!might_bounce(dmat, NULL, buf, buflen))
357 return (true);
358 return (!_bus_dmamap_pagesneeded(dmat, NULL, buf, buflen, NULL));
359 }
360
361 static bus_dmamap_t
alloc_dmamap(bus_dma_tag_t dmat,int flags)362 alloc_dmamap(bus_dma_tag_t dmat, int flags)
363 {
364 u_long mapsize;
365 bus_dmamap_t map;
366
367 mapsize = sizeof(*map);
368 mapsize += sizeof(struct sync_list) * dmat->common.nsegments;
369 map = malloc_domainset(mapsize, M_DEVBUF,
370 DOMAINSET_PREF(dmat->common.domain), flags | M_ZERO);
371 if (map == NULL)
372 return (NULL);
373
374 /* Initialize the new map */
375 STAILQ_INIT(&map->bpages);
376
377 return (map);
378 }
379
380 /*
381 * Allocate a handle for mapping from kva/uva/physical
382 * address space into bus device space.
383 */
384 static int
bounce_bus_dmamap_create(bus_dma_tag_t dmat,int flags,bus_dmamap_t * mapp)385 bounce_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
386 {
387 struct bounce_zone *bz;
388 int error, maxpages, pages;
389
390 error = 0;
391
392 if (dmat->segments == NULL) {
393 dmat->segments = mallocarray_domainset(dmat->common.nsegments,
394 sizeof(bus_dma_segment_t), M_DEVBUF,
395 DOMAINSET_PREF(dmat->common.domain), M_NOWAIT);
396 if (dmat->segments == NULL) {
397 CTR3(KTR_BUSDMA, "%s: tag %p error %d",
398 __func__, dmat, ENOMEM);
399 return (ENOMEM);
400 }
401 }
402
403 *mapp = alloc_dmamap(dmat, M_NOWAIT);
404 if (*mapp == NULL) {
405 CTR3(KTR_BUSDMA, "%s: tag %p error %d",
406 __func__, dmat, ENOMEM);
407 return (ENOMEM);
408 }
409
410 /*
411 * Bouncing might be required if the driver asks for an active
412 * exclusion region, a data alignment that is stricter than 1, and/or
413 * an active address boundary.
414 */
415 if (dmat->bounce_zone == NULL) {
416 if ((error = alloc_bounce_zone(dmat)) != 0) {
417 free(*mapp, M_DEVBUF);
418 return (error);
419 }
420 }
421 bz = dmat->bounce_zone;
422
423 /*
424 * Attempt to add pages to our pool on a per-instance basis up to a sane
425 * limit. Even if the tag isn't subject of bouncing due to alignment
426 * and boundary constraints, it could still auto-bounce due to
427 * cacheline alignment, which requires at most two bounce pages.
428 */
429 if (dmat->common.alignment > 1)
430 maxpages = MAX_BPAGES;
431 else
432 maxpages = MIN(MAX_BPAGES, Maxmem -
433 atop(dmat->common.lowaddr));
434 if ((dmat->bounce_flags & BF_MIN_ALLOC_COMP) == 0 ||
435 (bz->map_count > 0 && bz->total_bpages < maxpages)) {
436 pages = atop(roundup2(dmat->common.maxsize, PAGE_SIZE)) + 1;
437 pages = MIN(maxpages - bz->total_bpages, pages);
438 pages = MAX(pages, 2);
439 if (alloc_bounce_pages(dmat, pages) < pages)
440 error = ENOMEM;
441 if ((dmat->bounce_flags & BF_MIN_ALLOC_COMP) == 0) {
442 if (error == 0) {
443 dmat->bounce_flags |= BF_MIN_ALLOC_COMP;
444 }
445 } else
446 error = 0;
447 }
448 bz->map_count++;
449
450 if (error == 0) {
451 dmat->map_count++;
452 if ((dmat->bounce_flags & BF_COHERENT) != 0)
453 (*mapp)->flags |= DMAMAP_COHERENT;
454 } else {
455 free(*mapp, M_DEVBUF);
456 }
457 CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
458 __func__, dmat, dmat->common.flags, error);
459 return (error);
460 }
461
462 /*
463 * Destroy a handle for mapping from kva/uva/physical
464 * address space into bus device space.
465 */
466 static int
bounce_bus_dmamap_destroy(bus_dma_tag_t dmat,bus_dmamap_t map)467 bounce_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map)
468 {
469
470 /* Check we are destroying the correct map type */
471 if ((map->flags & DMAMAP_FROM_DMAMEM) != 0)
472 panic("bounce_bus_dmamap_destroy: Invalid map freed\n");
473
474 if (STAILQ_FIRST(&map->bpages) != NULL || map->sync_count != 0) {
475 CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, EBUSY);
476 return (EBUSY);
477 }
478 if (dmat->bounce_zone)
479 dmat->bounce_zone->map_count--;
480 free(map, M_DEVBUF);
481 dmat->map_count--;
482 CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat);
483 return (0);
484 }
485
486 /*
487 * Allocate a piece of memory that can be efficiently mapped into
488 * bus device space based on the constraints lited in the dma tag.
489 * A dmamap to for use with dmamap_load is also allocated.
490 */
491 static int
bounce_bus_dmamem_alloc(bus_dma_tag_t dmat,void ** vaddr,int flags,bus_dmamap_t * mapp)492 bounce_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
493 bus_dmamap_t *mapp)
494 {
495 vm_memattr_t attr;
496 int mflags;
497
498 if (flags & BUS_DMA_NOWAIT)
499 mflags = M_NOWAIT;
500 else
501 mflags = M_WAITOK;
502
503 if (dmat->segments == NULL) {
504 dmat->segments = mallocarray_domainset(dmat->common.nsegments,
505 sizeof(bus_dma_segment_t), M_DEVBUF,
506 DOMAINSET_PREF(dmat->common.domain), mflags);
507 if (dmat->segments == NULL) {
508 CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
509 __func__, dmat, dmat->common.flags, ENOMEM);
510 return (ENOMEM);
511 }
512 }
513 if (flags & BUS_DMA_ZERO)
514 mflags |= M_ZERO;
515 if (flags & BUS_DMA_NOCACHE)
516 attr = VM_MEMATTR_UNCACHEABLE;
517 else if ((flags & BUS_DMA_COHERENT) != 0 &&
518 (dmat->bounce_flags & BF_COHERENT) == 0)
519 /*
520 * If we have a non-coherent tag, and are trying to allocate
521 * a coherent block of memory it needs to be uncached.
522 */
523 attr = VM_MEMATTR_UNCACHEABLE;
524 else
525 attr = VM_MEMATTR_DEFAULT;
526
527 /*
528 * Create the map, but don't set the could bounce flag as
529 * this allocation should never bounce;
530 */
531 *mapp = alloc_dmamap(dmat, mflags);
532 if (*mapp == NULL) {
533 CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
534 __func__, dmat, dmat->common.flags, ENOMEM);
535 return (ENOMEM);
536 }
537
538 /*
539 * Mark the map as coherent if we used uncacheable memory or the
540 * tag was already marked as coherent.
541 */
542 if (attr == VM_MEMATTR_UNCACHEABLE ||
543 (dmat->bounce_flags & BF_COHERENT) != 0)
544 (*mapp)->flags |= DMAMAP_COHERENT;
545
546 (*mapp)->flags |= DMAMAP_FROM_DMAMEM;
547
548 /*
549 * Allocate the buffer from the malloc(9) allocator if...
550 * - It's small enough to fit into a single page.
551 * - Its alignment requirement is also smaller than the page size.
552 * - The low address requirement is fulfilled.
553 * - Default cache attributes are requested (WB).
554 * else allocate non-contiguous pages if...
555 * - The page count that could get allocated doesn't exceed
556 * nsegments also when the maximum segment size is less
557 * than PAGE_SIZE.
558 * - The alignment constraint isn't larger than a page boundary.
559 * - There are no boundary-crossing constraints.
560 * else allocate a block of contiguous pages because one or more of the
561 * constraints is something that only the contig allocator can fulfill.
562 *
563 * NOTE: The (dmat->common.alignment <= dmat->maxsize) check
564 * below is just a quick hack. The exact alignment guarantees
565 * of malloc(9) need to be nailed down, and the code below
566 * should be rewritten to take that into account.
567 *
568 * In the meantime warn the user if malloc gets it wrong.
569 */
570 if (dmat->alloc_size <= PAGE_SIZE &&
571 dmat->alloc_alignment <= PAGE_SIZE &&
572 dmat->common.lowaddr >= ptoa((vm_paddr_t)Maxmem) &&
573 attr == VM_MEMATTR_DEFAULT) {
574 *vaddr = malloc_domainset_aligned(dmat->alloc_size,
575 dmat->alloc_alignment, M_DEVBUF,
576 DOMAINSET_PREF(dmat->common.domain), mflags);
577 } else if (dmat->common.nsegments >=
578 howmany(dmat->alloc_size, MIN(dmat->common.maxsegsz, PAGE_SIZE)) &&
579 dmat->alloc_alignment <= PAGE_SIZE &&
580 (dmat->common.boundary % PAGE_SIZE) == 0) {
581 /* Page-based multi-segment allocations allowed */
582 *vaddr = kmem_alloc_attr_domainset(
583 DOMAINSET_PREF(dmat->common.domain), dmat->alloc_size,
584 mflags, 0ul, dmat->common.lowaddr, attr);
585 dmat->bounce_flags |= BF_KMEM_ALLOC_PAGES;
586 } else {
587 *vaddr = kmem_alloc_contig_domainset(
588 DOMAINSET_PREF(dmat->common.domain), dmat->alloc_size,
589 mflags, 0ul, dmat->common.lowaddr,
590 dmat->alloc_alignment != 0 ? dmat->alloc_alignment : 1ul,
591 dmat->common.boundary, attr);
592 dmat->bounce_flags |= BF_KMEM_ALLOC_CONTIG;
593 }
594 if (*vaddr == NULL) {
595 CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
596 __func__, dmat, dmat->common.flags, ENOMEM);
597 free(*mapp, M_DEVBUF);
598 return (ENOMEM);
599 } else if (!vm_addr_align_ok(vtophys(*vaddr), dmat->alloc_alignment)) {
600 printf("bus_dmamem_alloc failed to align memory properly.\n");
601 }
602 dmat->map_count++;
603 CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
604 __func__, dmat, dmat->common.flags, 0);
605 return (0);
606 }
607
608 /*
609 * Free a piece of memory and it's allociated dmamap, that was allocated
610 * via bus_dmamem_alloc.
611 */
612 static void
bounce_bus_dmamem_free(bus_dma_tag_t dmat,void * vaddr,bus_dmamap_t map)613 bounce_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map)
614 {
615
616 /*
617 * Check the map came from bounce_bus_dmamem_alloc, so the map
618 * should be NULL and the BF_KMEM_ALLOC flag cleared if malloc()
619 * was used and set if kmem_alloc_contig() was used.
620 */
621 if ((map->flags & DMAMAP_FROM_DMAMEM) == 0)
622 panic("bus_dmamem_free: Invalid map freed\n");
623 if ((dmat->bounce_flags & BF_KMEM_ALLOC) == 0)
624 free(vaddr, M_DEVBUF);
625 else
626 kmem_free(vaddr, dmat->alloc_size);
627 free(map, M_DEVBUF);
628 dmat->map_count--;
629 CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat,
630 dmat->bounce_flags);
631 }
632
633 static bool
_bus_dmamap_pagesneeded(bus_dma_tag_t dmat,bus_dmamap_t map,vm_paddr_t buf,bus_size_t buflen,int * pagesneeded)634 _bus_dmamap_pagesneeded(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf,
635 bus_size_t buflen, int *pagesneeded)
636 {
637 bus_addr_t curaddr;
638 bus_size_t sgsize;
639 int count;
640
641 /*
642 * Count the number of bounce pages needed in order to
643 * complete this transfer
644 */
645 count = 0;
646 curaddr = buf;
647 while (buflen != 0) {
648 sgsize = buflen;
649 if (must_bounce(dmat, map, curaddr, sgsize)) {
650 sgsize = MIN(sgsize,
651 PAGE_SIZE - (curaddr & PAGE_MASK));
652 if (pagesneeded == NULL)
653 return (true);
654 count++;
655 }
656 curaddr += sgsize;
657 buflen -= sgsize;
658 }
659
660 if (pagesneeded != NULL)
661 *pagesneeded = count;
662 return (count != 0);
663 }
664
665 static void
_bus_dmamap_count_phys(bus_dma_tag_t dmat,bus_dmamap_t map,vm_paddr_t buf,bus_size_t buflen,int flags)666 _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf,
667 bus_size_t buflen, int flags)
668 {
669
670 if (map->pagesneeded == 0) {
671 _bus_dmamap_pagesneeded(dmat, map, buf, buflen,
672 &map->pagesneeded);
673 CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded);
674 }
675 }
676
677 static void
_bus_dmamap_count_pages(bus_dma_tag_t dmat,bus_dmamap_t map,pmap_t pmap,void * buf,bus_size_t buflen,int flags)678 _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap,
679 void *buf, bus_size_t buflen, int flags)
680 {
681 vm_offset_t vaddr;
682 vm_offset_t vendaddr;
683 bus_addr_t paddr;
684 bus_size_t sg_len;
685
686 if (map->pagesneeded == 0) {
687 CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, "
688 "alignment= %d", dmat->common.lowaddr,
689 ptoa((vm_paddr_t)Maxmem),
690 dmat->common.boundary, dmat->common.alignment);
691 CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d", map,
692 map->pagesneeded);
693 /*
694 * Count the number of bounce pages
695 * needed in order to complete this transfer
696 */
697 vaddr = (vm_offset_t)buf;
698 vendaddr = (vm_offset_t)buf + buflen;
699
700 while (vaddr < vendaddr) {
701 sg_len = MIN(vendaddr - vaddr,
702 PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK));
703 if (pmap == kernel_pmap)
704 paddr = pmap_kextract(vaddr);
705 else
706 paddr = pmap_extract(pmap, vaddr);
707 if (must_bounce(dmat, map, paddr, sg_len) != 0) {
708 sg_len = roundup2(sg_len,
709 dmat->common.alignment);
710 map->pagesneeded++;
711 }
712 vaddr += sg_len;
713 }
714 CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded);
715 }
716 }
717
718 /*
719 * Utility function to load a physical buffer. segp contains
720 * the starting segment on entrace, and the ending segment on exit.
721 */
722 static int
bounce_bus_dmamap_load_phys(bus_dma_tag_t dmat,bus_dmamap_t map,vm_paddr_t buf,bus_size_t buflen,int flags,bus_dma_segment_t * segs,int * segp)723 bounce_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map,
724 vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs,
725 int *segp)
726 {
727 struct sync_list *sl;
728 bus_size_t sgsize;
729 bus_addr_t curaddr, sl_end;
730 int error;
731
732 if (segs == NULL)
733 segs = dmat->segments;
734
735 if (might_bounce(dmat, map, (bus_addr_t)buf, buflen)) {
736 _bus_dmamap_count_phys(dmat, map, buf, buflen, flags);
737 if (map->pagesneeded != 0) {
738 error = _bus_dmamap_reserve_pages(dmat, map, flags);
739 if (error)
740 return (error);
741 }
742 }
743
744 sl = map->slist + map->sync_count - 1;
745 sl_end = 0;
746
747 while (buflen > 0) {
748 curaddr = buf;
749 sgsize = buflen;
750 if (map->pagesneeded != 0 &&
751 must_bounce(dmat, map, curaddr, sgsize)) {
752 /*
753 * The attempt to split a physically continuous buffer
754 * seems very controversial, it's unclear whether we
755 * can do this in all cases. Also, memory for bounced
756 * buffers is allocated as pages, so we cannot
757 * guarantee multipage alignment.
758 */
759 KASSERT(dmat->common.alignment <= PAGE_SIZE,
760 ("bounced buffer cannot have alignment bigger "
761 "than PAGE_SIZE: %lu", dmat->common.alignment));
762 sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK));
763 curaddr = add_bounce_page(dmat, map, 0, curaddr,
764 sgsize);
765 } else if ((map->flags & DMAMAP_COHERENT) == 0) {
766 if (map->sync_count > 0)
767 sl_end = sl->paddr + sl->datacount;
768
769 if (map->sync_count == 0 || curaddr != sl_end) {
770 if (++map->sync_count > dmat->common.nsegments)
771 break;
772 sl++;
773 sl->vaddr = 0;
774 sl->paddr = curaddr;
775 sl->pages = PHYS_TO_VM_PAGE(curaddr);
776 KASSERT(sl->pages != NULL,
777 ("%s: page at PA:0x%08lx is not in "
778 "vm_page_array", __func__, curaddr));
779 sl->datacount = sgsize;
780 } else
781 sl->datacount += sgsize;
782 }
783 if (!_bus_dmamap_addsegs(dmat, map, curaddr, sgsize, segs,
784 segp))
785 break;
786 buf += sgsize;
787 buflen -= sgsize;
788 }
789
790 /*
791 * Did we fit?
792 */
793 if (buflen != 0) {
794 bus_dmamap_unload(dmat, map);
795 return (EFBIG); /* XXX better return value here? */
796 }
797 return (0);
798 }
799
800 /*
801 * Utility function to load a linear buffer. segp contains
802 * the starting segment on entrace, and the ending segment on exit.
803 */
804 static int
bounce_bus_dmamap_load_buffer(bus_dma_tag_t dmat,bus_dmamap_t map,void * buf,bus_size_t buflen,pmap_t pmap,int flags,bus_dma_segment_t * segs,int * segp)805 bounce_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf,
806 bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs,
807 int *segp)
808 {
809 struct sync_list *sl;
810 bus_size_t sgsize;
811 bus_addr_t curaddr, sl_pend;
812 vm_offset_t kvaddr, vaddr, sl_vend;
813 int error;
814
815 KASSERT((map->flags & DMAMAP_FROM_DMAMEM) != 0 ||
816 dmat->common.alignment <= PAGE_SIZE,
817 ("loading user buffer with alignment bigger than PAGE_SIZE is not "
818 "supported"));
819
820 if (segs == NULL)
821 segs = dmat->segments;
822
823 if (flags & BUS_DMA_LOAD_MBUF)
824 map->flags |= DMAMAP_MBUF;
825
826 if (might_bounce(dmat, map, (bus_addr_t)buf, buflen)) {
827 _bus_dmamap_count_pages(dmat, map, pmap, buf, buflen, flags);
828 if (map->pagesneeded != 0) {
829 error = _bus_dmamap_reserve_pages(dmat, map, flags);
830 if (error)
831 return (error);
832 }
833 }
834
835 /*
836 * XXX Optimally we should parse input buffer for physically
837 * continuous segments first and then pass these segment into
838 * load loop.
839 */
840 sl = map->slist + map->sync_count - 1;
841 vaddr = (vm_offset_t)buf;
842 sl_pend = 0;
843 sl_vend = 0;
844
845 while (buflen > 0) {
846 /*
847 * Get the physical address for this segment.
848 */
849 if (__predict_true(pmap == kernel_pmap)) {
850 curaddr = pmap_kextract(vaddr);
851 kvaddr = vaddr;
852 } else {
853 curaddr = pmap_extract(pmap, vaddr);
854 kvaddr = 0;
855 }
856
857 /*
858 * Compute the segment size, and adjust counts.
859 */
860 sgsize = buflen;
861 if ((map->flags & DMAMAP_FROM_DMAMEM) == 0 ||
862 (dmat->bounce_flags & BF_KMEM_ALLOC_CONTIG) == 0)
863 sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK));
864
865 if (map->pagesneeded != 0 &&
866 must_bounce(dmat, map, curaddr, sgsize)) {
867 /* See comment in bounce_bus_dmamap_load_phys */
868 KASSERT(dmat->common.alignment <= PAGE_SIZE,
869 ("bounced buffer cannot have alignment bigger "
870 "than PAGE_SIZE: %lu", dmat->common.alignment));
871 curaddr = add_bounce_page(dmat, map, kvaddr, curaddr,
872 sgsize);
873 } else if ((map->flags & DMAMAP_COHERENT) == 0) {
874 if (map->sync_count > 0) {
875 sl_pend = sl->paddr + sl->datacount;
876 sl_vend = sl->vaddr + sl->datacount;
877 }
878
879 if (map->sync_count == 0 ||
880 (kvaddr != 0 && kvaddr != sl_vend) ||
881 (curaddr != sl_pend)) {
882 if (++map->sync_count > dmat->common.nsegments)
883 break;
884 sl++;
885 sl->vaddr = kvaddr;
886 sl->paddr = curaddr;
887 if (kvaddr != 0) {
888 sl->pages = NULL;
889 } else {
890 sl->pages = PHYS_TO_VM_PAGE(curaddr);
891 KASSERT(sl->pages != NULL,
892 ("%s: page at PA:0x%08lx is not "
893 "in vm_page_array", __func__,
894 curaddr));
895 }
896 sl->datacount = sgsize;
897 } else
898 sl->datacount += sgsize;
899 }
900 if (!_bus_dmamap_addsegs(dmat, map, curaddr, sgsize, segs,
901 segp))
902 break;
903 vaddr += sgsize;
904 buflen -= MIN(sgsize, buflen); /* avoid underflow */
905 }
906
907 /*
908 * Did we fit?
909 */
910 if (buflen != 0) {
911 bus_dmamap_unload(dmat, map);
912 return (EFBIG); /* XXX better return value here? */
913 }
914 return (0);
915 }
916
917 static void
bounce_bus_dmamap_waitok(bus_dma_tag_t dmat,bus_dmamap_t map,struct memdesc * mem,bus_dmamap_callback_t * callback,void * callback_arg)918 bounce_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map,
919 struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg)
920 {
921
922 map->mem = *mem;
923 map->dmat = dmat;
924 map->callback = callback;
925 map->callback_arg = callback_arg;
926 }
927
928 static bus_dma_segment_t *
bounce_bus_dmamap_complete(bus_dma_tag_t dmat,bus_dmamap_t map,bus_dma_segment_t * segs,int nsegs,int error)929 bounce_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map,
930 bus_dma_segment_t *segs, int nsegs, int error)
931 {
932
933 if (segs == NULL)
934 segs = dmat->segments;
935 return (segs);
936 }
937
938 /*
939 * Release the mapping held by map.
940 */
941 static void
bounce_bus_dmamap_unload(bus_dma_tag_t dmat,bus_dmamap_t map)942 bounce_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map)
943 {
944 free_bounce_pages(dmat, map);
945 map->sync_count = 0;
946 map->flags &= ~DMAMAP_MBUF;
947 }
948
949 static void
dma_preread_safe(char * va,vm_size_t size)950 dma_preread_safe(char *va, vm_size_t size)
951 {
952 /*
953 * Write back any partial cachelines immediately before and
954 * after the DMA region.
955 */
956 if (!__is_aligned(va, dcache_line_size))
957 cpu_dcache_wb_range(va, 1);
958 if (!__is_aligned(va + size, dcache_line_size))
959 cpu_dcache_wb_range(va + size, 1);
960
961 cpu_dcache_inv_range(va, size);
962 }
963
964 static void
dma_dcache_sync(struct sync_list * sl,bus_dmasync_op_t op)965 dma_dcache_sync(struct sync_list *sl, bus_dmasync_op_t op)
966 {
967 uint32_t len, offset;
968 vm_page_t m;
969 vm_paddr_t pa;
970 vm_offset_t va, tempva;
971 bus_size_t size;
972
973 offset = sl->paddr & PAGE_MASK;
974 m = sl->pages;
975 size = sl->datacount;
976 pa = sl->paddr;
977
978 for ( ; size != 0; size -= len, pa += len, offset = 0, ++m) {
979 tempva = 0;
980 if (sl->vaddr == 0) {
981 len = min(PAGE_SIZE - offset, size);
982 tempva = pmap_quick_enter_page(m);
983 va = tempva | offset;
984 KASSERT(pa == (VM_PAGE_TO_PHYS(m) | offset),
985 ("unexpected vm_page_t phys: 0x%16lx != 0x%16lx",
986 VM_PAGE_TO_PHYS(m) | offset, pa));
987 } else {
988 len = sl->datacount;
989 va = sl->vaddr;
990 }
991
992 switch (op) {
993 case BUS_DMASYNC_PREWRITE:
994 case BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD:
995 cpu_dcache_wb_range((void *)va, len);
996 break;
997 case BUS_DMASYNC_PREREAD:
998 /*
999 * An mbuf may start in the middle of a cacheline. There
1000 * will be no cpu writes to the beginning of that line
1001 * (which contains the mbuf header) while dma is in
1002 * progress. Handle that case by doing a writeback of
1003 * just the first cacheline before invalidating the
1004 * overall buffer. Any mbuf in a chain may have this
1005 * misalignment. Buffers which are not mbufs bounce if
1006 * they are not aligned to a cacheline.
1007 */
1008 dma_preread_safe((void *)va, len);
1009 break;
1010 case BUS_DMASYNC_POSTREAD:
1011 case BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE:
1012 cpu_dcache_inv_range((void *)va, len);
1013 break;
1014 default:
1015 panic("unsupported combination of sync operations: "
1016 "0x%08x\n", op);
1017 }
1018
1019 if (tempva != 0)
1020 pmap_quick_remove_page(tempva);
1021 }
1022 }
1023
1024 static void
bounce_bus_dmamap_sync(bus_dma_tag_t dmat,bus_dmamap_t map,bus_dmasync_op_t op)1025 bounce_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map,
1026 bus_dmasync_op_t op)
1027 {
1028 struct bounce_page *bpage;
1029 struct sync_list *sl, *end;
1030 vm_offset_t datavaddr, tempvaddr;
1031
1032 if (op == BUS_DMASYNC_POSTWRITE)
1033 return;
1034
1035 if ((op & BUS_DMASYNC_POSTREAD) != 0) {
1036 /*
1037 * Wait for any DMA operations to complete before the bcopy.
1038 */
1039 dsb(sy);
1040 }
1041
1042 if ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) {
1043 CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x "
1044 "performing bounce", __func__, dmat, dmat->common.flags,
1045 op);
1046
1047 if ((op & BUS_DMASYNC_PREWRITE) != 0) {
1048 while (bpage != NULL) {
1049 tempvaddr = 0;
1050 datavaddr = bpage->datavaddr;
1051 if (datavaddr == 0) {
1052 tempvaddr = pmap_quick_enter_page(
1053 bpage->datapage);
1054 datavaddr = tempvaddr | bpage->dataoffs;
1055 }
1056
1057 bcopy((void *)datavaddr,
1058 (void *)bpage->vaddr, bpage->datacount);
1059 if (tempvaddr != 0)
1060 pmap_quick_remove_page(tempvaddr);
1061 if ((map->flags & DMAMAP_COHERENT) == 0)
1062 cpu_dcache_wb_range((void *)bpage->vaddr,
1063 bpage->datacount);
1064 bpage = STAILQ_NEXT(bpage, links);
1065 }
1066 dmat->bounce_zone->total_bounced++;
1067 } else if ((op & BUS_DMASYNC_PREREAD) != 0) {
1068 while (bpage != NULL) {
1069 if ((map->flags & DMAMAP_COHERENT) == 0)
1070 cpu_dcache_wbinv_range((void *)bpage->vaddr,
1071 bpage->datacount);
1072 bpage = STAILQ_NEXT(bpage, links);
1073 }
1074 }
1075
1076 if ((op & BUS_DMASYNC_POSTREAD) != 0) {
1077 while (bpage != NULL) {
1078 if ((map->flags & DMAMAP_COHERENT) == 0)
1079 cpu_dcache_inv_range((void *)bpage->vaddr,
1080 bpage->datacount);
1081 tempvaddr = 0;
1082 datavaddr = bpage->datavaddr;
1083 if (datavaddr == 0) {
1084 tempvaddr = pmap_quick_enter_page(
1085 bpage->datapage);
1086 datavaddr = tempvaddr | bpage->dataoffs;
1087 }
1088
1089 bcopy((void *)bpage->vaddr,
1090 (void *)datavaddr, bpage->datacount);
1091
1092 if (tempvaddr != 0)
1093 pmap_quick_remove_page(tempvaddr);
1094 bpage = STAILQ_NEXT(bpage, links);
1095 }
1096 dmat->bounce_zone->total_bounced++;
1097 }
1098 }
1099
1100 /*
1101 * Cache maintenance for normal (non-COHERENT non-bounce) buffers.
1102 */
1103 if (map->sync_count != 0) {
1104 sl = &map->slist[0];
1105 end = &map->slist[map->sync_count];
1106 CTR3(KTR_BUSDMA, "%s: tag %p op 0x%x "
1107 "performing sync", __func__, dmat, op);
1108
1109 for ( ; sl != end; ++sl)
1110 dma_dcache_sync(sl, op);
1111 }
1112
1113 if ((op & (BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE)) != 0) {
1114 /*
1115 * Wait for the bcopy to complete before any DMA operations.
1116 */
1117 dsb(sy);
1118 }
1119
1120 kmsan_bus_dmamap_sync(&map->kmsan_mem, op);
1121 }
1122
1123 #ifdef KMSAN
1124 static void
bounce_bus_dmamap_load_kmsan(bus_dmamap_t map,struct memdesc * mem)1125 bounce_bus_dmamap_load_kmsan(bus_dmamap_t map, struct memdesc *mem)
1126 {
1127 if (map == NULL)
1128 return;
1129 memcpy(&map->kmsan_mem, mem, sizeof(map->kmsan_mem));
1130 }
1131 #endif
1132
1133 struct bus_dma_impl bus_dma_bounce_impl = {
1134 .tag_create = bounce_bus_dma_tag_create,
1135 .tag_destroy = bounce_bus_dma_tag_destroy,
1136 .tag_set_domain = bounce_bus_dma_tag_set_domain,
1137 .id_mapped = bounce_bus_dma_id_mapped,
1138 .map_create = bounce_bus_dmamap_create,
1139 .map_destroy = bounce_bus_dmamap_destroy,
1140 .mem_alloc = bounce_bus_dmamem_alloc,
1141 .mem_free = bounce_bus_dmamem_free,
1142 .load_phys = bounce_bus_dmamap_load_phys,
1143 .load_buffer = bounce_bus_dmamap_load_buffer,
1144 .load_ma = bus_dmamap_load_ma_triv,
1145 .map_waitok = bounce_bus_dmamap_waitok,
1146 .map_complete = bounce_bus_dmamap_complete,
1147 .map_unload = bounce_bus_dmamap_unload,
1148 .map_sync = bounce_bus_dmamap_sync,
1149 #ifdef KMSAN
1150 .load_kmsan = bounce_bus_dmamap_load_kmsan,
1151 #endif
1152 };
1153