1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2012-2015 Ian Lepore 5 * Copyright (c) 2010 Mark Tinguely 6 * Copyright (c) 2004 Olivier Houchard 7 * Copyright (c) 2002 Peter Grehan 8 * Copyright (c) 1997, 1998 Justin T. Gibbs. 9 * All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions, and the following disclaimer, 16 * without modification, immediately at the beginning of the file. 17 * 2. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 24 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * From i386/busdma_machdep.c 191438 2009-04-23 20:24:19Z jhb 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/malloc.h> 38 #include <sys/bus.h> 39 #include <sys/busdma_bufalloc.h> 40 #include <sys/counter.h> 41 #include <sys/interrupt.h> 42 #include <sys/kernel.h> 43 #include <sys/ktr.h> 44 #include <sys/lock.h> 45 #include <sys/memdesc.h> 46 #include <sys/proc.h> 47 #include <sys/mutex.h> 48 #include <sys/sysctl.h> 49 #include <sys/uio.h> 50 51 #include <vm/vm.h> 52 #include <vm/vm_param.h> 53 #include <vm/vm_page.h> 54 #include <vm/vm_phys.h> 55 #include <vm/vm_map.h> 56 #include <vm/vm_extern.h> 57 #include <vm/vm_kern.h> 58 59 #include <machine/atomic.h> 60 #include <machine/bus.h> 61 #include <machine/cpu.h> 62 #include <machine/md_var.h> 63 64 //#define ARM_BUSDMA_MAPLOAD_STATS 65 66 #define BUSDMA_DCACHE_ALIGN cpuinfo.dcache_line_size 67 #define BUSDMA_DCACHE_MASK cpuinfo.dcache_line_mask 68 69 #define MAX_BPAGES 64 70 #define MAX_DMA_SEGMENTS 4096 71 #define BUS_DMA_EXCL_BOUNCE BUS_DMA_BUS2 72 #define BUS_DMA_ALIGN_BOUNCE BUS_DMA_BUS3 73 #define BUS_DMA_COULD_BOUNCE (BUS_DMA_EXCL_BOUNCE | BUS_DMA_ALIGN_BOUNCE) 74 #define BUS_DMA_MIN_ALLOC_COMP BUS_DMA_BUS4 75 76 struct bounce_page; 77 struct bounce_zone; 78 79 struct bus_dma_tag { 80 bus_size_t alignment; 81 bus_addr_t boundary; 82 bus_addr_t lowaddr; 83 bus_addr_t highaddr; 84 bus_size_t maxsize; 85 u_int nsegments; 86 bus_size_t maxsegsz; 87 int flags; 88 int map_count; 89 bus_dma_lock_t *lockfunc; 90 void *lockfuncarg; 91 struct bounce_zone *bounce_zone; 92 }; 93 94 struct sync_list { 95 vm_offset_t vaddr; /* kva of client data */ 96 bus_addr_t paddr; /* physical address */ 97 vm_page_t pages; /* starting page of client data */ 98 bus_size_t datacount; /* client data count */ 99 }; 100 101 static uint32_t tags_total; 102 static uint32_t maps_total; 103 static uint32_t maps_dmamem; 104 static uint32_t maps_coherent; 105 #ifdef ARM_BUSDMA_MAPLOAD_STATS 106 static counter_u64_t maploads_total; 107 static counter_u64_t maploads_bounced; 108 static counter_u64_t maploads_coherent; 109 static counter_u64_t maploads_dmamem; 110 static counter_u64_t maploads_mbuf; 111 static counter_u64_t maploads_physmem; 112 #endif 113 114 SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 115 "Busdma parameters"); 116 SYSCTL_UINT(_hw_busdma, OID_AUTO, tags_total, CTLFLAG_RD, &tags_total, 0, 117 "Number of active tags"); 118 SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_total, CTLFLAG_RD, &maps_total, 0, 119 "Number of active maps"); 120 SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_dmamem, CTLFLAG_RD, &maps_dmamem, 0, 121 "Number of active maps for bus_dmamem_alloc buffers"); 122 SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_coherent, CTLFLAG_RD, &maps_coherent, 0, 123 "Number of active maps with BUS_DMA_COHERENT flag set"); 124 #ifdef ARM_BUSDMA_MAPLOAD_STATS 125 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_total, CTLFLAG_RD, 126 &maploads_total, "Number of load operations performed"); 127 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_bounced, CTLFLAG_RD, 128 &maploads_bounced, "Number of load operations that used bounce buffers"); 129 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_coherent, CTLFLAG_RD, 130 &maploads_dmamem, "Number of load operations on BUS_DMA_COHERENT memory"); 131 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_dmamem, CTLFLAG_RD, 132 &maploads_dmamem, "Number of load operations on bus_dmamem_alloc buffers"); 133 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_mbuf, CTLFLAG_RD, 134 &maploads_mbuf, "Number of load operations for mbufs"); 135 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_physmem, CTLFLAG_RD, 136 &maploads_physmem, "Number of load operations on physical buffers"); 137 #endif 138 139 struct bus_dmamap { 140 STAILQ_HEAD(, bounce_page) bpages; 141 int pagesneeded; 142 int pagesreserved; 143 bus_dma_tag_t dmat; 144 struct memdesc mem; 145 bus_dmamap_callback_t *callback; 146 void *callback_arg; 147 __sbintime_t queued_time; 148 int flags; 149 #define DMAMAP_COHERENT (1 << 0) 150 #define DMAMAP_DMAMEM_ALLOC (1 << 1) 151 #define DMAMAP_MBUF (1 << 2) 152 STAILQ_ENTRY(bus_dmamap) links; 153 bus_dma_segment_t *segments; 154 int sync_count; 155 struct sync_list slist[]; 156 }; 157 158 static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, pmap_t pmap, 159 bus_dmamap_t map, void *buf, bus_size_t buflen, int flags); 160 static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, 161 vm_paddr_t buf, bus_size_t buflen, int flags); 162 static void dma_preread_safe(vm_offset_t va, vm_paddr_t pa, vm_size_t size); 163 static void dma_dcache_sync(struct sync_list *sl, bus_dmasync_op_t op); 164 165 static busdma_bufalloc_t coherent_allocator; /* Cache of coherent buffers */ 166 static busdma_bufalloc_t standard_allocator; /* Cache of standard buffers */ 167 168 MALLOC_DEFINE(M_BUSDMA, "busdma", "busdma metadata"); 169 170 #define dmat_alignment(dmat) ((dmat)->alignment) 171 #define dmat_bounce_flags(dmat) (0) 172 #define dmat_boundary(dmat) ((dmat)->boundary) 173 #define dmat_flags(dmat) ((dmat)->flags) 174 #define dmat_highaddr(dmat) ((dmat)->highaddr) 175 #define dmat_lowaddr(dmat) ((dmat)->lowaddr) 176 #define dmat_lockfunc(dmat) ((dmat)->lockfunc) 177 #define dmat_lockfuncarg(dmat) ((dmat)->lockfuncarg) 178 #define dmat_maxsegsz(dmat) ((dmat)->maxsegsz) 179 #define dmat_nsegments(dmat) ((dmat)->nsegments) 180 181 #include "../../kern/subr_busdma_bounce.c" 182 183 static void 184 busdma_init(void *dummy) 185 { 186 int uma_flags; 187 188 #ifdef ARM_BUSDMA_MAPLOAD_STATS 189 maploads_total = counter_u64_alloc(M_WAITOK); 190 maploads_bounced = counter_u64_alloc(M_WAITOK); 191 maploads_coherent = counter_u64_alloc(M_WAITOK); 192 maploads_dmamem = counter_u64_alloc(M_WAITOK); 193 maploads_mbuf = counter_u64_alloc(M_WAITOK); 194 maploads_physmem = counter_u64_alloc(M_WAITOK); 195 #endif 196 197 uma_flags = 0; 198 199 /* Create a cache of buffers in standard (cacheable) memory. */ 200 standard_allocator = busdma_bufalloc_create("buffer", 201 BUSDMA_DCACHE_ALIGN,/* minimum_alignment */ 202 NULL, /* uma_alloc func */ 203 NULL, /* uma_free func */ 204 uma_flags); /* uma_zcreate_flags */ 205 206 #ifdef INVARIANTS 207 /* 208 * Force UMA zone to allocate service structures like 209 * slabs using own allocator. uma_debug code performs 210 * atomic ops on uma_slab_t fields and safety of this 211 * operation is not guaranteed for write-back caches 212 */ 213 uma_flags = UMA_ZONE_NOTOUCH; 214 #endif 215 /* 216 * Create a cache of buffers in uncacheable memory, to implement the 217 * BUS_DMA_COHERENT (and potentially BUS_DMA_NOCACHE) flag. 218 */ 219 coherent_allocator = busdma_bufalloc_create("coherent", 220 BUSDMA_DCACHE_ALIGN,/* minimum_alignment */ 221 busdma_bufalloc_alloc_uncacheable, 222 busdma_bufalloc_free_uncacheable, 223 uma_flags); /* uma_zcreate_flags */ 224 } 225 226 /* 227 * This init historically used SI_SUB_VM, but now the init code requires 228 * malloc(9) using M_BUSDMA memory and the pcpu zones for counter(9), which get 229 * set up by SI_SUB_KMEM and SI_ORDER_LAST, so we'll go right after that by 230 * using SI_SUB_KMEM+1. 231 */ 232 SYSINIT(busdma, SI_SUB_KMEM+1, SI_ORDER_FIRST, busdma_init, NULL); 233 234 /* 235 * This routine checks the exclusion zone constraints from a tag against the 236 * physical RAM available on the machine. If a tag specifies an exclusion zone 237 * but there's no RAM in that zone, then we avoid allocating resources to bounce 238 * a request, and we can use any memory allocator (as opposed to needing 239 * kmem_alloc_contig() just because it can allocate pages in an address range). 240 * 241 * Most tags have BUS_SPACE_MAXADDR or BUS_SPACE_MAXADDR_32BIT (they are the 242 * same value on 32-bit architectures) as their lowaddr constraint, and we can't 243 * possibly have RAM at an address higher than the highest address we can 244 * express, so we take a fast out. 245 */ 246 static int 247 exclusion_bounce_check(bus_addr_t lowaddr, bus_addr_t highaddr) 248 { 249 int i; 250 251 if (lowaddr >= BUS_SPACE_MAXADDR) 252 return (0); 253 254 for (i = 0; phys_avail[i] && phys_avail[i + 1]; i += 2) { 255 if ((lowaddr >= phys_avail[i] && lowaddr < phys_avail[i + 1]) || 256 (lowaddr < phys_avail[i] && highaddr >= phys_avail[i])) 257 return (1); 258 } 259 return (0); 260 } 261 262 /* 263 * Return true if the tag has an exclusion zone that could lead to bouncing. 264 */ 265 static __inline int 266 exclusion_bounce(bus_dma_tag_t dmat) 267 { 268 269 return (dmat->flags & BUS_DMA_EXCL_BOUNCE); 270 } 271 272 /* 273 * Return true if the given address does not fall on the alignment boundary. 274 */ 275 static __inline int 276 alignment_bounce(bus_dma_tag_t dmat, bus_addr_t addr) 277 { 278 279 return (!vm_addr_align_ok(addr, dmat->alignment)); 280 } 281 282 /* 283 * Return true if the DMA should bounce because the start or end does not fall 284 * on a cacheline boundary (which would require a partial cacheline flush). 285 * COHERENT memory doesn't trigger cacheline flushes. Memory allocated by 286 * bus_dmamem_alloc() is always aligned to cacheline boundaries, and there's a 287 * strict rule that such memory cannot be accessed by the CPU while DMA is in 288 * progress (or by multiple DMA engines at once), so that it's always safe to do 289 * full cacheline flushes even if that affects memory outside the range of a 290 * given DMA operation that doesn't involve the full allocated buffer. If we're 291 * mapping an mbuf, that follows the same rules as a buffer we allocated. 292 */ 293 static __inline int 294 cacheline_bounce(bus_dmamap_t map, bus_addr_t addr, bus_size_t size) 295 { 296 297 if (map->flags & (DMAMAP_DMAMEM_ALLOC | DMAMAP_COHERENT | DMAMAP_MBUF)) 298 return (0); 299 return ((addr | size) & BUSDMA_DCACHE_MASK); 300 } 301 302 /* 303 * Return true if we might need to bounce the DMA described by addr and size. 304 * 305 * This is used to quick-check whether we need to do the more expensive work of 306 * checking the DMA page-by-page looking for alignment and exclusion bounces. 307 * 308 * Note that the addr argument might be either virtual or physical. It doesn't 309 * matter because we only look at the low-order bits, which are the same in both 310 * address spaces and maximum alignment of generic buffer is limited up to page 311 * size. 312 * Bouncing of buffers allocated by bus_dmamem_alloc()is not necessary, these 313 * always comply with the required rules (alignment, boundary, and address 314 * range). 315 */ 316 static __inline int 317 might_bounce(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t addr, 318 bus_size_t size) 319 { 320 321 KASSERT(map->flags & DMAMAP_DMAMEM_ALLOC || 322 dmat->alignment <= PAGE_SIZE, 323 ("%s: unsupported alignment (0x%08lx) for buffer not " 324 "allocated by bus_dmamem_alloc()", 325 __func__, dmat->alignment)); 326 327 return (!(map->flags & DMAMAP_DMAMEM_ALLOC) && 328 ((dmat->flags & BUS_DMA_EXCL_BOUNCE) || 329 alignment_bounce(dmat, addr) || 330 cacheline_bounce(map, addr, size))); 331 } 332 333 /* 334 * Return true if we must bounce the DMA described by paddr and size. 335 * 336 * Bouncing can be triggered by DMA that doesn't begin and end on cacheline 337 * boundaries, or doesn't begin on an alignment boundary, or falls within the 338 * exclusion zone of the tag. 339 */ 340 static int 341 must_bounce(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t paddr, 342 bus_size_t size) 343 { 344 345 if (cacheline_bounce(map, paddr, size)) 346 return (1); 347 348 /* 349 * Check the tag's exclusion zone. 350 */ 351 if (exclusion_bounce(dmat) && addr_needs_bounce(dmat, paddr)) 352 return (1); 353 354 return (0); 355 } 356 357 /* 358 * Allocate a device specific dma_tag. 359 */ 360 int 361 bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, 362 bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, 363 bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, 364 int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, 365 void *lockfuncarg, bus_dma_tag_t *dmat) 366 { 367 bus_dma_tag_t newtag; 368 int error = 0; 369 370 /* Basic sanity checking. */ 371 KASSERT(boundary == 0 || powerof2(boundary), 372 ("dma tag boundary %lu, must be a power of 2", boundary)); 373 KASSERT(boundary == 0 || boundary >= maxsegsz, 374 ("dma tag boundary %lu is < maxsegsz %lu\n", boundary, maxsegsz)); 375 KASSERT(alignment != 0 && powerof2(alignment), 376 ("dma tag alignment %lu, must be non-zero power of 2", alignment)); 377 KASSERT(maxsegsz != 0, ("dma tag maxsegsz must not be zero")); 378 379 /* Return a NULL tag on failure */ 380 *dmat = NULL; 381 382 /* Filters are no longer supported. */ 383 if (filter != NULL || filterarg != NULL) 384 return (EINVAL); 385 386 newtag = (bus_dma_tag_t)malloc(sizeof(*newtag), M_BUSDMA, 387 M_ZERO | M_NOWAIT); 388 if (newtag == NULL) { 389 CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", 390 __func__, newtag, 0, error); 391 return (ENOMEM); 392 } 393 394 newtag->alignment = alignment; 395 newtag->boundary = boundary; 396 newtag->lowaddr = trunc_page((vm_paddr_t)lowaddr) + (PAGE_SIZE - 1); 397 newtag->highaddr = trunc_page((vm_paddr_t)highaddr) + 398 (PAGE_SIZE - 1); 399 newtag->maxsize = maxsize; 400 newtag->nsegments = nsegments; 401 newtag->maxsegsz = maxsegsz; 402 newtag->flags = flags; 403 newtag->map_count = 0; 404 if (lockfunc != NULL) { 405 newtag->lockfunc = lockfunc; 406 newtag->lockfuncarg = lockfuncarg; 407 } else { 408 newtag->lockfunc = _busdma_dflt_lock; 409 newtag->lockfuncarg = NULL; 410 } 411 412 /* Take into account any restrictions imposed by our parent tag */ 413 if (parent != NULL) { 414 newtag->lowaddr = MIN(parent->lowaddr, newtag->lowaddr); 415 newtag->highaddr = MAX(parent->highaddr, newtag->highaddr); 416 newtag->alignment = MAX(parent->alignment, newtag->alignment); 417 newtag->flags |= parent->flags & BUS_DMA_COULD_BOUNCE; 418 newtag->flags |= parent->flags & BUS_DMA_COHERENT; 419 if (newtag->boundary == 0) 420 newtag->boundary = parent->boundary; 421 else if (parent->boundary != 0) 422 newtag->boundary = MIN(parent->boundary, 423 newtag->boundary); 424 } 425 426 if (exclusion_bounce_check(newtag->lowaddr, newtag->highaddr)) 427 newtag->flags |= BUS_DMA_EXCL_BOUNCE; 428 if (alignment_bounce(newtag, 1)) 429 newtag->flags |= BUS_DMA_ALIGN_BOUNCE; 430 431 /* 432 * Any request can auto-bounce due to cacheline alignment, in addition 433 * to any alignment or boundary specifications in the tag, so if the 434 * ALLOCNOW flag is set, there's always work to do. 435 */ 436 if ((flags & BUS_DMA_ALLOCNOW) != 0) { 437 struct bounce_zone *bz; 438 /* 439 * Round size up to a full page, and add one more page because 440 * there can always be one more boundary crossing than the 441 * number of pages in a transfer. 442 */ 443 maxsize = roundup2(maxsize, PAGE_SIZE) + PAGE_SIZE; 444 445 if ((error = alloc_bounce_zone(newtag)) != 0) { 446 free(newtag, M_BUSDMA); 447 return (error); 448 } 449 bz = newtag->bounce_zone; 450 451 if (ptoa(bz->total_bpages) < maxsize) { 452 int pages; 453 454 pages = atop(maxsize) - bz->total_bpages; 455 456 /* Add pages to our bounce pool */ 457 if (alloc_bounce_pages(newtag, pages) < pages) 458 error = ENOMEM; 459 } 460 /* Performed initial allocation */ 461 newtag->flags |= BUS_DMA_MIN_ALLOC_COMP; 462 } else 463 newtag->bounce_zone = NULL; 464 465 if (error != 0) { 466 free(newtag, M_BUSDMA); 467 } else { 468 atomic_add_32(&tags_total, 1); 469 *dmat = newtag; 470 } 471 CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", 472 __func__, newtag, (newtag != NULL ? newtag->flags : 0), error); 473 return (error); 474 } 475 476 void 477 bus_dma_template_clone(bus_dma_template_t *t, bus_dma_tag_t dmat) 478 { 479 480 if (t == NULL || dmat == NULL) 481 return; 482 483 t->alignment = dmat->alignment; 484 t->boundary = dmat->boundary; 485 t->lowaddr = dmat->lowaddr; 486 t->highaddr = dmat->highaddr; 487 t->maxsize = dmat->maxsize; 488 t->nsegments = dmat->nsegments; 489 t->maxsegsize = dmat->maxsegsz; 490 t->flags = dmat->flags; 491 t->lockfunc = dmat->lockfunc; 492 t->lockfuncarg = dmat->lockfuncarg; 493 } 494 495 int 496 bus_dma_tag_set_domain(bus_dma_tag_t dmat, int domain) 497 { 498 499 return (0); 500 } 501 502 int 503 bus_dma_tag_destroy(bus_dma_tag_t dmat) 504 { 505 int error = 0; 506 507 if (dmat != NULL) { 508 if (dmat->map_count != 0) { 509 error = EBUSY; 510 goto out; 511 } 512 free(dmat, M_BUSDMA); 513 } 514 out: 515 CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat, error); 516 return (error); 517 } 518 519 static int 520 allocate_bz_and_pages(bus_dma_tag_t dmat, bus_dmamap_t mapp) 521 { 522 struct bounce_zone *bz; 523 int maxpages; 524 int error; 525 526 if (dmat->bounce_zone == NULL) 527 if ((error = alloc_bounce_zone(dmat)) != 0) 528 return (error); 529 bz = dmat->bounce_zone; 530 /* Initialize the new map */ 531 STAILQ_INIT(&(mapp->bpages)); 532 533 /* 534 * Attempt to add pages to our pool on a per-instance basis up to a sane 535 * limit. Even if the tag isn't flagged as COULD_BOUNCE due to 536 * alignment and boundary constraints, it could still auto-bounce due to 537 * cacheline alignment, which requires at most two bounce pages. 538 */ 539 if (dmat->flags & BUS_DMA_COULD_BOUNCE) 540 maxpages = MAX_BPAGES; 541 else 542 maxpages = 2 * bz->map_count; 543 if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0 || 544 (bz->map_count > 0 && bz->total_bpages < maxpages)) { 545 int pages; 546 547 pages = atop(roundup2(dmat->maxsize, PAGE_SIZE)) + 1; 548 pages = MIN(maxpages - bz->total_bpages, pages); 549 pages = MAX(pages, 2); 550 if (alloc_bounce_pages(dmat, pages) < pages) 551 return (ENOMEM); 552 553 if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0) 554 dmat->flags |= BUS_DMA_MIN_ALLOC_COMP; 555 } 556 bz->map_count++; 557 return (0); 558 } 559 560 static bus_dmamap_t 561 allocate_map(bus_dma_tag_t dmat, int mflags) 562 { 563 int mapsize, segsize; 564 bus_dmamap_t map; 565 566 /* 567 * Allocate the map. The map structure ends with an embedded 568 * variable-sized array of sync_list structures. Following that 569 * we allocate enough extra space to hold the array of bus_dma_segments. 570 */ 571 KASSERT(dmat->nsegments <= MAX_DMA_SEGMENTS, 572 ("cannot allocate %u dma segments (max is %u)", 573 dmat->nsegments, MAX_DMA_SEGMENTS)); 574 segsize = sizeof(struct bus_dma_segment) * dmat->nsegments; 575 mapsize = sizeof(*map) + sizeof(struct sync_list) * dmat->nsegments; 576 map = malloc(mapsize + segsize, M_BUSDMA, mflags | M_ZERO); 577 if (map == NULL) { 578 CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); 579 return (NULL); 580 } 581 map->segments = (bus_dma_segment_t *)((uintptr_t)map + mapsize); 582 STAILQ_INIT(&map->bpages); 583 return (map); 584 } 585 586 /* 587 * Allocate a handle for mapping from kva/uva/physical 588 * address space into bus device space. 589 */ 590 int 591 bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) 592 { 593 bus_dmamap_t map; 594 int error = 0; 595 596 *mapp = map = allocate_map(dmat, M_NOWAIT); 597 if (map == NULL) { 598 CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); 599 return (ENOMEM); 600 } 601 602 /* 603 * Bouncing might be required if the driver asks for an exclusion 604 * region, a data alignment that is stricter than 1, or DMA that begins 605 * or ends with a partial cacheline. Whether bouncing will actually 606 * happen can't be known until mapping time, but we need to pre-allocate 607 * resources now because we might not be allowed to at mapping time. 608 */ 609 error = allocate_bz_and_pages(dmat, map); 610 if (error != 0) { 611 free(map, M_BUSDMA); 612 *mapp = NULL; 613 return (error); 614 } 615 if (map->flags & DMAMAP_COHERENT) 616 atomic_add_32(&maps_coherent, 1); 617 atomic_add_32(&maps_total, 1); 618 dmat->map_count++; 619 620 return (0); 621 } 622 623 /* 624 * Destroy a handle for mapping from kva/uva/physical 625 * address space into bus device space. 626 */ 627 int 628 bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) 629 { 630 631 if (STAILQ_FIRST(&map->bpages) != NULL || map->sync_count != 0) { 632 CTR3(KTR_BUSDMA, "%s: tag %p error %d", 633 __func__, dmat, EBUSY); 634 return (EBUSY); 635 } 636 if (dmat->bounce_zone) 637 dmat->bounce_zone->map_count--; 638 if (map->flags & DMAMAP_COHERENT) 639 atomic_subtract_32(&maps_coherent, 1); 640 atomic_subtract_32(&maps_total, 1); 641 free(map, M_BUSDMA); 642 dmat->map_count--; 643 CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat); 644 return (0); 645 } 646 647 /* 648 * Allocate a piece of memory that can be efficiently mapped into bus device 649 * space based on the constraints listed in the dma tag. Returns a pointer to 650 * the allocated memory, and a pointer to an associated bus_dmamap. 651 */ 652 int 653 bus_dmamem_alloc(bus_dma_tag_t dmat, void **vaddr, int flags, 654 bus_dmamap_t *mapp) 655 { 656 busdma_bufalloc_t ba; 657 struct busdma_bufzone *bufzone; 658 bus_dmamap_t map; 659 vm_memattr_t memattr; 660 int mflags; 661 662 if (flags & BUS_DMA_NOWAIT) 663 mflags = M_NOWAIT; 664 else 665 mflags = M_WAITOK; 666 if (flags & BUS_DMA_ZERO) 667 mflags |= M_ZERO; 668 669 *mapp = map = allocate_map(dmat, mflags); 670 if (map == NULL) { 671 CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", 672 __func__, dmat, dmat->flags, ENOMEM); 673 return (ENOMEM); 674 } 675 map->flags = DMAMAP_DMAMEM_ALLOC; 676 677 /* For coherent memory, set the map flag that disables sync ops. */ 678 if (flags & BUS_DMA_COHERENT) 679 map->flags |= DMAMAP_COHERENT; 680 681 /* 682 * Choose a busdma buffer allocator based on memory type flags. 683 * If the tag's COHERENT flag is set, that means normal memory 684 * is already coherent, use the normal allocator. 685 */ 686 if ((flags & BUS_DMA_COHERENT) && 687 ((dmat->flags & BUS_DMA_COHERENT) == 0)) { 688 memattr = VM_MEMATTR_UNCACHEABLE; 689 ba = coherent_allocator; 690 } else { 691 memattr = VM_MEMATTR_DEFAULT; 692 ba = standard_allocator; 693 } 694 695 /* 696 * Try to find a bufzone in the allocator that holds a cache of buffers 697 * of the right size for this request. If the buffer is too big to be 698 * held in the allocator cache, this returns NULL. 699 */ 700 bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize); 701 702 /* 703 * Allocate the buffer from the uma(9) allocator if... 704 * - It's small enough to be in the allocator (bufzone not NULL). 705 * - The alignment constraint isn't larger than the allocation size 706 * (the allocator aligns buffers to their size boundaries). 707 * - There's no need to handle lowaddr/highaddr exclusion zones. 708 * else allocate non-contiguous pages if... 709 * - The page count that could get allocated doesn't exceed 710 * nsegments also when the maximum segment size is less 711 * than PAGE_SIZE. 712 * - The alignment constraint isn't larger than a page boundary. 713 * - There are no boundary-crossing constraints. 714 * else allocate a block of contiguous pages because one or more of the 715 * constraints is something that only the contig allocator can fulfill. 716 */ 717 if (bufzone != NULL && dmat->alignment <= bufzone->size && 718 !exclusion_bounce(dmat)) { 719 *vaddr = uma_zalloc(bufzone->umazone, mflags); 720 } else if (dmat->nsegments >= 721 howmany(dmat->maxsize, MIN(dmat->maxsegsz, PAGE_SIZE)) && 722 dmat->alignment <= PAGE_SIZE && 723 (dmat->boundary % PAGE_SIZE) == 0) { 724 *vaddr = kmem_alloc_attr(dmat->maxsize, mflags, 0, 725 dmat->lowaddr, memattr); 726 } else { 727 *vaddr = kmem_alloc_contig(dmat->maxsize, mflags, 0, 728 dmat->lowaddr, dmat->alignment, dmat->boundary, memattr); 729 } 730 if (*vaddr == NULL) { 731 CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", 732 __func__, dmat, dmat->flags, ENOMEM); 733 free(map, M_BUSDMA); 734 *mapp = NULL; 735 return (ENOMEM); 736 } 737 if (map->flags & DMAMAP_COHERENT) 738 atomic_add_32(&maps_coherent, 1); 739 atomic_add_32(&maps_dmamem, 1); 740 atomic_add_32(&maps_total, 1); 741 dmat->map_count++; 742 743 CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", 744 __func__, dmat, dmat->flags, 0); 745 return (0); 746 } 747 748 /* 749 * Free a piece of memory that was allocated via bus_dmamem_alloc, along with 750 * its associated map. 751 */ 752 void 753 bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) 754 { 755 struct busdma_bufzone *bufzone; 756 busdma_bufalloc_t ba; 757 758 if ((map->flags & DMAMAP_COHERENT) && 759 ((dmat->flags & BUS_DMA_COHERENT) == 0)) 760 ba = coherent_allocator; 761 else 762 ba = standard_allocator; 763 764 bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize); 765 766 if (bufzone != NULL && dmat->alignment <= bufzone->size && 767 !exclusion_bounce(dmat)) 768 uma_zfree(bufzone->umazone, vaddr); 769 else 770 kmem_free(vaddr, dmat->maxsize); 771 772 dmat->map_count--; 773 if (map->flags & DMAMAP_COHERENT) 774 atomic_subtract_32(&maps_coherent, 1); 775 atomic_subtract_32(&maps_total, 1); 776 atomic_subtract_32(&maps_dmamem, 1); 777 free(map, M_BUSDMA); 778 CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->flags); 779 } 780 781 static void 782 _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, 783 bus_size_t buflen, int flags) 784 { 785 bus_addr_t curaddr; 786 bus_size_t sgsize; 787 788 if (map->pagesneeded == 0) { 789 CTR5(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d" 790 " map= %p, pagesneeded= %d", 791 dmat->lowaddr, dmat->boundary, dmat->alignment, 792 map, map->pagesneeded); 793 /* 794 * Count the number of bounce pages 795 * needed in order to complete this transfer 796 */ 797 curaddr = buf; 798 while (buflen != 0) { 799 sgsize = buflen; 800 if (must_bounce(dmat, map, curaddr, sgsize) != 0) { 801 sgsize = MIN(sgsize, 802 PAGE_SIZE - (curaddr & PAGE_MASK)); 803 map->pagesneeded++; 804 } 805 curaddr += sgsize; 806 buflen -= sgsize; 807 } 808 CTR1(KTR_BUSDMA, "pagesneeded= %d", map->pagesneeded); 809 } 810 } 811 812 static void 813 _bus_dmamap_count_pages(bus_dma_tag_t dmat, pmap_t pmap, bus_dmamap_t map, 814 void *buf, bus_size_t buflen, int flags) 815 { 816 vm_offset_t vaddr; 817 vm_offset_t vendaddr; 818 bus_addr_t paddr; 819 bus_size_t sg_len; 820 821 if (map->pagesneeded == 0) { 822 CTR5(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d" 823 " map= %p, pagesneeded= %d", 824 dmat->lowaddr, dmat->boundary, dmat->alignment, 825 map, map->pagesneeded); 826 /* 827 * Count the number of bounce pages 828 * needed in order to complete this transfer 829 */ 830 vaddr = (vm_offset_t)buf; 831 vendaddr = (vm_offset_t)buf + buflen; 832 833 while (vaddr < vendaddr) { 834 sg_len = MIN(vendaddr - vaddr, 835 (PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK))); 836 if (__predict_true(pmap == kernel_pmap)) 837 paddr = pmap_kextract(vaddr); 838 else 839 paddr = pmap_extract(pmap, vaddr); 840 if (must_bounce(dmat, map, paddr, sg_len) != 0) 841 map->pagesneeded++; 842 vaddr += sg_len; 843 } 844 CTR1(KTR_BUSDMA, "pagesneeded= %d", map->pagesneeded); 845 } 846 } 847 848 /* 849 * Utility function to load a physical buffer. segp contains 850 * the starting segment on entrace, and the ending segment on exit. 851 */ 852 int 853 _bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, 854 bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) 855 { 856 bus_addr_t curaddr; 857 bus_addr_t sl_end = 0; 858 bus_size_t sgsize; 859 struct sync_list *sl; 860 int error; 861 862 if (segs == NULL) 863 segs = map->segments; 864 865 #ifdef ARM_BUSDMA_MAPLOAD_STATS 866 counter_u64_add(maploads_total, 1); 867 counter_u64_add(maploads_physmem, 1); 868 #endif 869 870 if (might_bounce(dmat, map, (bus_addr_t)buf, buflen)) { 871 _bus_dmamap_count_phys(dmat, map, buf, buflen, flags); 872 if (map->pagesneeded != 0) { 873 #ifdef ARM_BUSDMA_MAPLOAD_STATS 874 counter_u64_add(maploads_bounced, 1); 875 #endif 876 error = _bus_dmamap_reserve_pages(dmat, map, flags); 877 if (error) 878 return (error); 879 } 880 } 881 882 sl = map->slist + map->sync_count - 1; 883 884 while (buflen > 0) { 885 curaddr = buf; 886 sgsize = buflen; 887 if (map->pagesneeded != 0 && must_bounce(dmat, map, curaddr, 888 sgsize)) { 889 sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); 890 curaddr = add_bounce_page(dmat, map, 0, curaddr, 891 sgsize); 892 } else if ((dmat->flags & BUS_DMA_COHERENT) == 0) { 893 if (map->sync_count > 0) 894 sl_end = sl->paddr + sl->datacount; 895 896 if (map->sync_count == 0 || curaddr != sl_end) { 897 if (++map->sync_count > dmat->nsegments) 898 break; 899 sl++; 900 sl->vaddr = 0; 901 sl->paddr = curaddr; 902 sl->datacount = sgsize; 903 sl->pages = PHYS_TO_VM_PAGE(curaddr); 904 KASSERT(sl->pages != NULL, 905 ("%s: page at PA:0x%08lx is not in " 906 "vm_page_array", __func__, curaddr)); 907 } else 908 sl->datacount += sgsize; 909 } 910 if (!_bus_dmamap_addsegs(dmat, map, curaddr, sgsize, segs, 911 segp)) 912 break; 913 buf += sgsize; 914 buflen -= sgsize; 915 } 916 917 /* 918 * Did we fit? 919 */ 920 if (buflen != 0) { 921 bus_dmamap_unload(dmat, map); 922 return (EFBIG); /* XXX better return value here? */ 923 } 924 return (0); 925 } 926 927 int 928 _bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map, 929 struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, 930 bus_dma_segment_t *segs, int *segp) 931 { 932 933 return (bus_dmamap_load_ma_triv(dmat, map, ma, tlen, ma_offs, flags, 934 segs, segp)); 935 } 936 937 /* 938 * Utility function to load a linear buffer. segp contains 939 * the starting segment on entrance, and the ending segment on exit. 940 */ 941 int 942 _bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, 943 bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, 944 int *segp) 945 { 946 bus_size_t sgsize; 947 bus_addr_t curaddr; 948 bus_addr_t sl_pend = 0; 949 vm_offset_t kvaddr, vaddr, sl_vend = 0; 950 struct sync_list *sl; 951 int error; 952 953 #ifdef ARM_BUSDMA_MAPLOAD_STATS 954 counter_u64_add(maploads_total, 1); 955 if (map->flags & DMAMAP_COHERENT) 956 counter_u64_add(maploads_coherent, 1); 957 if (map->flags & DMAMAP_DMAMEM_ALLOC) 958 counter_u64_add(maploads_dmamem, 1); 959 #endif 960 961 if (segs == NULL) 962 segs = map->segments; 963 964 if (flags & BUS_DMA_LOAD_MBUF) { 965 #ifdef ARM_BUSDMA_MAPLOAD_STATS 966 counter_u64_add(maploads_mbuf, 1); 967 #endif 968 map->flags |= DMAMAP_MBUF; 969 } 970 971 if (might_bounce(dmat, map, (bus_addr_t)buf, buflen)) { 972 _bus_dmamap_count_pages(dmat, pmap, map, buf, buflen, flags); 973 if (map->pagesneeded != 0) { 974 #ifdef ARM_BUSDMA_MAPLOAD_STATS 975 counter_u64_add(maploads_bounced, 1); 976 #endif 977 error = _bus_dmamap_reserve_pages(dmat, map, flags); 978 if (error) 979 return (error); 980 } 981 } 982 983 sl = map->slist + map->sync_count - 1; 984 vaddr = (vm_offset_t)buf; 985 986 while (buflen > 0) { 987 /* 988 * Get the physical address for this segment. 989 */ 990 if (__predict_true(pmap == kernel_pmap)) { 991 curaddr = pmap_kextract(vaddr); 992 kvaddr = vaddr; 993 } else { 994 curaddr = pmap_extract(pmap, vaddr); 995 kvaddr = 0; 996 } 997 998 /* 999 * Compute the segment size, and adjust counts. 1000 */ 1001 sgsize = MIN(buflen, PAGE_SIZE - (curaddr & PAGE_MASK)); 1002 1003 if (map->pagesneeded != 0 && must_bounce(dmat, map, curaddr, 1004 sgsize)) { 1005 curaddr = add_bounce_page(dmat, map, kvaddr, curaddr, 1006 sgsize); 1007 } else if ((dmat->flags & BUS_DMA_COHERENT) == 0) { 1008 if (map->sync_count > 0) { 1009 sl_pend = sl->paddr + sl->datacount; 1010 sl_vend = sl->vaddr + sl->datacount; 1011 } 1012 1013 if (map->sync_count == 0 || 1014 (kvaddr != 0 && kvaddr != sl_vend) || 1015 (curaddr != sl_pend)) { 1016 if (++map->sync_count > dmat->nsegments) 1017 goto cleanup; 1018 sl++; 1019 sl->vaddr = kvaddr; 1020 sl->paddr = curaddr; 1021 if (kvaddr != 0) { 1022 sl->pages = NULL; 1023 } else { 1024 sl->pages = PHYS_TO_VM_PAGE(curaddr); 1025 KASSERT(sl->pages != NULL, 1026 ("%s: page at PA:0x%08lx is not " 1027 "in vm_page_array", __func__, 1028 curaddr)); 1029 } 1030 sl->datacount = sgsize; 1031 } else 1032 sl->datacount += sgsize; 1033 } 1034 if (!_bus_dmamap_addsegs(dmat, map, curaddr, sgsize, segs, 1035 segp)) 1036 break; 1037 vaddr += sgsize; 1038 buflen -= MIN(sgsize, buflen); /* avoid underflow */ 1039 } 1040 1041 cleanup: 1042 /* 1043 * Did we fit? 1044 */ 1045 if (buflen != 0) { 1046 bus_dmamap_unload(dmat, map); 1047 return (EFBIG); /* XXX better return value here? */ 1048 } 1049 return (0); 1050 } 1051 1052 void 1053 _bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, 1054 bus_dmamap_callback_t *callback, void *callback_arg) 1055 { 1056 1057 map->mem = *mem; 1058 map->dmat = dmat; 1059 map->callback = callback; 1060 map->callback_arg = callback_arg; 1061 } 1062 1063 bus_dma_segment_t * 1064 _bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, 1065 bus_dma_segment_t *segs, int nsegs, int error) 1066 { 1067 1068 if (segs == NULL) 1069 segs = map->segments; 1070 return (segs); 1071 } 1072 1073 /* 1074 * Release the mapping held by map. 1075 */ 1076 void 1077 bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) 1078 { 1079 struct bounce_zone *bz; 1080 1081 if ((bz = dmat->bounce_zone) != NULL) { 1082 free_bounce_pages(dmat, map); 1083 1084 if (map->pagesreserved != 0) { 1085 mtx_lock(&bounce_lock); 1086 bz->free_bpages += map->pagesreserved; 1087 bz->reserved_bpages -= map->pagesreserved; 1088 mtx_unlock(&bounce_lock); 1089 map->pagesreserved = 0; 1090 } 1091 map->pagesneeded = 0; 1092 } 1093 map->sync_count = 0; 1094 map->flags &= ~DMAMAP_MBUF; 1095 } 1096 1097 static void 1098 dma_preread_safe(vm_offset_t va, vm_paddr_t pa, vm_size_t size) 1099 { 1100 /* 1101 * Write back any partial cachelines immediately before and 1102 * after the DMA region. We don't need to round the address 1103 * down to the nearest cacheline or specify the exact size, 1104 * as dcache_wb_poc() will do the rounding for us and works 1105 * at cacheline granularity. 1106 */ 1107 if (va & BUSDMA_DCACHE_MASK) 1108 dcache_wb_poc(va, pa, 1); 1109 if ((va + size) & BUSDMA_DCACHE_MASK) 1110 dcache_wb_poc(va + size, pa + size, 1); 1111 1112 dcache_inv_poc_dma(va, pa, size); 1113 } 1114 1115 static void 1116 dma_dcache_sync(struct sync_list *sl, bus_dmasync_op_t op) 1117 { 1118 uint32_t len, offset; 1119 vm_page_t m; 1120 vm_paddr_t pa; 1121 vm_offset_t va, tempva; 1122 bus_size_t size; 1123 1124 offset = sl->paddr & PAGE_MASK; 1125 m = sl->pages; 1126 size = sl->datacount; 1127 pa = sl->paddr; 1128 1129 for ( ; size != 0; size -= len, pa += len, offset = 0, ++m) { 1130 tempva = 0; 1131 if (sl->vaddr == 0) { 1132 len = min(PAGE_SIZE - offset, size); 1133 tempva = pmap_quick_enter_page(m); 1134 va = tempva | offset; 1135 KASSERT(pa == (VM_PAGE_TO_PHYS(m) | offset), 1136 ("unexpected vm_page_t phys: 0x%08x != 0x%08x", 1137 VM_PAGE_TO_PHYS(m) | offset, pa)); 1138 } else { 1139 len = sl->datacount; 1140 va = sl->vaddr; 1141 } 1142 1143 switch (op) { 1144 case BUS_DMASYNC_PREWRITE: 1145 case BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD: 1146 dcache_wb_poc(va, pa, len); 1147 break; 1148 case BUS_DMASYNC_PREREAD: 1149 /* 1150 * An mbuf may start in the middle of a cacheline. There 1151 * will be no cpu writes to the beginning of that line 1152 * (which contains the mbuf header) while dma is in 1153 * progress. Handle that case by doing a writeback of 1154 * just the first cacheline before invalidating the 1155 * overall buffer. Any mbuf in a chain may have this 1156 * misalignment. Buffers which are not mbufs bounce if 1157 * they are not aligned to a cacheline. 1158 */ 1159 dma_preread_safe(va, pa, len); 1160 break; 1161 case BUS_DMASYNC_POSTREAD: 1162 case BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE: 1163 dcache_inv_poc(va, pa, len); 1164 break; 1165 default: 1166 panic("unsupported combination of sync operations: " 1167 "0x%08x\n", op); 1168 } 1169 1170 if (tempva != 0) 1171 pmap_quick_remove_page(tempva); 1172 } 1173 } 1174 1175 void 1176 bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) 1177 { 1178 struct bounce_page *bpage; 1179 struct sync_list *sl, *end; 1180 vm_offset_t datavaddr, tempvaddr; 1181 1182 if (op == BUS_DMASYNC_POSTWRITE) 1183 return; 1184 1185 /* 1186 * If the buffer was from user space, it is possible that this is not 1187 * the same vm map, especially on a POST operation. It's not clear that 1188 * dma on userland buffers can work at all right now. To be safe, until 1189 * we're able to test direct userland dma, panic on a map mismatch. 1190 */ 1191 if ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { 1192 CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x " 1193 "performing bounce", __func__, dmat, dmat->flags, op); 1194 1195 /* 1196 * For PREWRITE do a writeback. Clean the caches from the 1197 * innermost to the outermost levels. 1198 */ 1199 if (op & BUS_DMASYNC_PREWRITE) { 1200 while (bpage != NULL) { 1201 tempvaddr = 0; 1202 datavaddr = bpage->datavaddr; 1203 if (datavaddr == 0) { 1204 tempvaddr = pmap_quick_enter_page( 1205 bpage->datapage); 1206 datavaddr = tempvaddr | bpage->dataoffs; 1207 } 1208 bcopy((void *)datavaddr, (void *)bpage->vaddr, 1209 bpage->datacount); 1210 if (tempvaddr != 0) 1211 pmap_quick_remove_page(tempvaddr); 1212 if ((dmat->flags & BUS_DMA_COHERENT) == 0) 1213 dcache_wb_poc(bpage->vaddr, 1214 bpage->busaddr, bpage->datacount); 1215 bpage = STAILQ_NEXT(bpage, links); 1216 } 1217 dmat->bounce_zone->total_bounced++; 1218 } 1219 1220 /* 1221 * Do an invalidate for PREREAD unless a writeback was already 1222 * done above due to PREWRITE also being set. The reason for a 1223 * PREREAD invalidate is to prevent dirty lines currently in the 1224 * cache from being evicted during the DMA. If a writeback was 1225 * done due to PREWRITE also being set there will be no dirty 1226 * lines and the POSTREAD invalidate handles the rest. The 1227 * invalidate is done from the innermost to outermost level. If 1228 * L2 were done first, a dirty cacheline could be automatically 1229 * evicted from L1 before we invalidated it, re-dirtying the L2. 1230 */ 1231 if ((op & BUS_DMASYNC_PREREAD) && !(op & BUS_DMASYNC_PREWRITE)) { 1232 bpage = STAILQ_FIRST(&map->bpages); 1233 while (bpage != NULL) { 1234 if ((dmat->flags & BUS_DMA_COHERENT) == 0) 1235 dcache_inv_poc_dma(bpage->vaddr, 1236 bpage->busaddr, bpage->datacount); 1237 bpage = STAILQ_NEXT(bpage, links); 1238 } 1239 } 1240 1241 /* 1242 * Re-invalidate the caches on a POSTREAD, even though they were 1243 * already invalidated at PREREAD time. Aggressive prefetching 1244 * due to accesses to other data near the dma buffer could have 1245 * brought buffer data into the caches which is now stale. The 1246 * caches are invalidated from the outermost to innermost; the 1247 * prefetches could be happening right now, and if L1 were 1248 * invalidated first, stale L2 data could be prefetched into L1. 1249 */ 1250 if (op & BUS_DMASYNC_POSTREAD) { 1251 while (bpage != NULL) { 1252 if ((dmat->flags & BUS_DMA_COHERENT) == 0) 1253 dcache_inv_poc(bpage->vaddr, 1254 bpage->busaddr, bpage->datacount); 1255 tempvaddr = 0; 1256 datavaddr = bpage->datavaddr; 1257 if (datavaddr == 0) { 1258 tempvaddr = pmap_quick_enter_page( 1259 bpage->datapage); 1260 datavaddr = tempvaddr | bpage->dataoffs; 1261 } 1262 bcopy((void *)bpage->vaddr, (void *)datavaddr, 1263 bpage->datacount); 1264 if (tempvaddr != 0) 1265 pmap_quick_remove_page(tempvaddr); 1266 bpage = STAILQ_NEXT(bpage, links); 1267 } 1268 dmat->bounce_zone->total_bounced++; 1269 } 1270 } 1271 1272 /* 1273 * For COHERENT memory no cache maintenance is necessary, but ensure all 1274 * writes have reached memory for the PREWRITE case. No action is 1275 * needed for a PREREAD without PREWRITE also set, because that would 1276 * imply that the cpu had written to the COHERENT buffer and expected 1277 * the dma device to see that change, and by definition a PREWRITE sync 1278 * is required to make that happen. 1279 */ 1280 if (map->flags & DMAMAP_COHERENT) { 1281 if (op & BUS_DMASYNC_PREWRITE) { 1282 dsb(); 1283 if ((dmat->flags & BUS_DMA_COHERENT) == 0) 1284 cpu_l2cache_drain_writebuf(); 1285 } 1286 return; 1287 } 1288 1289 /* 1290 * Cache maintenance for normal (non-COHERENT non-bounce) buffers. All 1291 * the comments about the sequences for flushing cache levels in the 1292 * bounce buffer code above apply here as well. In particular, the fact 1293 * that the sequence is inner-to-outer for PREREAD invalidation and 1294 * outer-to-inner for POSTREAD invalidation is not a mistake. 1295 */ 1296 if (map->sync_count != 0) { 1297 sl = &map->slist[0]; 1298 end = &map->slist[map->sync_count]; 1299 CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x " 1300 "performing sync", __func__, dmat, dmat->flags, op); 1301 1302 for ( ; sl != end; ++sl) 1303 dma_dcache_sync(sl, op); 1304 } 1305 } 1306