/*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2013 Daisuke Aoyama * Copyright (c) 2013 Oleksandr Tymoshenko * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "bcm2835_dma.h" #include "bcm2835_vcbus.h" #define MAX_REG 9 /* private flags */ #define BCM_DMA_CH_USED 0x00000001 #define BCM_DMA_CH_FREE 0x40000000 #define BCM_DMA_CH_UNMAP 0x80000000 /* Register Map (4.2.1.2) */ #define BCM_DMA_CS(n) (0x100*(n) + 0x00) #define CS_ACTIVE (1 << 0) #define CS_END (1 << 1) #define CS_INT (1 << 2) #define CS_DREQ (1 << 3) #define CS_ISPAUSED (1 << 4) #define CS_ISHELD (1 << 5) #define CS_ISWAIT (1 << 6) #define CS_ERR (1 << 8) #define CS_WAITWRT (1 << 28) #define CS_DISDBG (1 << 29) #define CS_ABORT (1 << 30) #define CS_RESET (1U << 31) #define BCM_DMA_CBADDR(n) (0x100*(n) + 0x04) #define BCM_DMA_INFO(n) (0x100*(n) + 0x08) #define INFO_INT_EN (1 << 0) #define INFO_TDMODE (1 << 1) #define INFO_WAIT_RESP (1 << 3) #define INFO_D_INC (1 << 4) #define INFO_D_WIDTH (1 << 5) #define INFO_D_DREQ (1 << 6) #define INFO_S_INC (1 << 8) #define INFO_S_WIDTH (1 << 9) #define INFO_S_DREQ (1 << 10) #define INFO_WAITS_SHIFT (21) #define INFO_PERMAP_SHIFT (16) #define INFO_PERMAP_MASK (0x1f << INFO_PERMAP_SHIFT) #define BCM_DMA_SRC(n) (0x100*(n) + 0x0C) #define BCM_DMA_DST(n) (0x100*(n) + 0x10) #define BCM_DMA_LEN(n) (0x100*(n) + 0x14) #define BCM_DMA_STRIDE(n) (0x100*(n) + 0x18) #define BCM_DMA_CBNEXT(n) (0x100*(n) + 0x1C) #define BCM_DMA_DEBUG(n) (0x100*(n) + 0x20) #define DEBUG_ERROR_MASK (7) #define BCM_DMA_INT_STATUS 0xfe0 #define BCM_DMA_ENABLE 0xff0 /* relative offset from BCM_VC_DMA0_BASE (p.39) */ #define BCM_DMA_CH(n) (0x100*(n)) /* channels used by GPU */ #define BCM_DMA_CH_BULK 0 #define BCM_DMA_CH_FAST1 2 #define BCM_DMA_CH_FAST2 3 #define BCM_DMA_CH_GPU_MASK ((1 << BCM_DMA_CH_BULK) | \ (1 << BCM_DMA_CH_FAST1) | \ (1 << BCM_DMA_CH_FAST2)) /* DMA Control Block - 256bit aligned (p.40) */ struct bcm_dma_cb { uint32_t info; /* Transfer Information */ uint32_t src; /* Source Address */ uint32_t dst; /* Destination Address */ uint32_t len; /* Transfer Length */ uint32_t stride; /* 2D Mode Stride */ uint32_t next; /* Next Control Block Address */ uint32_t rsvd1; /* Reserved */ uint32_t rsvd2; /* Reserved */ }; #ifdef DEBUG static void bcm_dma_cb_dump(struct bcm_dma_cb *cb); static void bcm_dma_reg_dump(int ch); #endif /* DMA channel private info */ struct bcm_dma_ch { int ch; uint32_t flags; struct bcm_dma_cb * cb; uint32_t vc_cb; bus_dmamap_t dma_map; void (*intr_func)(int, void *); void * intr_arg; }; struct bcm_dma_softc { device_t sc_dev; struct mtx sc_mtx; struct resource * sc_mem; struct resource * sc_irq[BCM_DMA_CH_MAX]; void * sc_intrhand[BCM_DMA_CH_MAX]; struct bcm_dma_ch sc_dma_ch[BCM_DMA_CH_MAX]; bus_dma_tag_t sc_dma_tag; }; static struct bcm_dma_softc *bcm_dma_sc = NULL; static uint32_t bcm_dma_channel_mask; static struct ofw_compat_data compat_data[] = { {"broadcom,bcm2835-dma", 1}, {"brcm,bcm2835-dma", 1}, {NULL, 0} }; static void bcm_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err) { bus_addr_t *addr; if (err) return; addr = (bus_addr_t*)arg; *addr = ARMC_TO_VCBUS(segs[0].ds_addr); } static void bcm_dma_reset(device_t dev, int ch) { struct bcm_dma_softc *sc = device_get_softc(dev); struct bcm_dma_cb *cb; uint32_t cs; int count; if (ch < 0 || ch >= BCM_DMA_CH_MAX) return; cs = bus_read_4(sc->sc_mem, BCM_DMA_CS(ch)); if (cs & CS_ACTIVE) { /* pause current task */ bus_write_4(sc->sc_mem, BCM_DMA_CS(ch), 0); count = 1000; do { cs = bus_read_4(sc->sc_mem, BCM_DMA_CS(ch)); } while (!(cs & CS_ISPAUSED) && (count-- > 0)); if (!(cs & CS_ISPAUSED)) { device_printf(dev, "Can't abort DMA transfer at channel %d\n", ch); } bus_write_4(sc->sc_mem, BCM_DMA_CBNEXT(ch), 0); /* Complete everything, clear interrupt */ bus_write_4(sc->sc_mem, BCM_DMA_CS(ch), CS_ABORT | CS_INT | CS_END| CS_ACTIVE); } /* clear control blocks */ bus_write_4(sc->sc_mem, BCM_DMA_CBADDR(ch), 0); bus_write_4(sc->sc_mem, BCM_DMA_CBNEXT(ch), 0); /* Reset control block */ cb = sc->sc_dma_ch[ch].cb; bzero(cb, sizeof(*cb)); cb->info = INFO_WAIT_RESP; } static int bcm_dma_init(device_t dev) { struct bcm_dma_softc *sc = device_get_softc(dev); uint32_t reg; struct bcm_dma_ch *ch; void *cb_virt; vm_paddr_t cb_phys; int err; int i; /* * Only channels set in bcm_dma_channel_mask can be controlled by us. * The others are out of our control as well as the corresponding bits * in both BCM_DMA_ENABLE and BCM_DMA_INT_STATUS global registers. As * these registers are RW ones, there is no safe way how to write only * the bits which can be controlled by us. * * Fortunately, after reset, all channels are enabled in BCM_DMA_ENABLE * register and all statuses are cleared in BCM_DMA_INT_STATUS one. * Not touching these registers is a trade off between correct * initialization which does not count on anything and not messing up * something we have no control over. */ reg = bus_read_4(sc->sc_mem, BCM_DMA_ENABLE); if ((reg & bcm_dma_channel_mask) != bcm_dma_channel_mask) device_printf(dev, "channels are not enabled\n"); reg = bus_read_4(sc->sc_mem, BCM_DMA_INT_STATUS); if ((reg & bcm_dma_channel_mask) != 0) device_printf(dev, "statuses are not cleared\n"); /* * Allocate DMA chunks control blocks based on p.40 of the peripheral * spec - control block should be 32-bit aligned. The DMA controller * has a full 32-bit register dedicated to this address, so we do not * need to bother with the per-SoC peripheral restrictions. */ err = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, sizeof(struct bcm_dma_cb), 1, sizeof(struct bcm_dma_cb), BUS_DMA_ALLOCNOW, NULL, NULL, &sc->sc_dma_tag); if (err) { device_printf(dev, "failed allocate DMA tag\n"); return (err); } /* setup initial settings */ for (i = 0; i < BCM_DMA_CH_MAX; i++) { ch = &sc->sc_dma_ch[i]; bzero(ch, sizeof(struct bcm_dma_ch)); ch->ch = i; ch->flags = BCM_DMA_CH_UNMAP; if ((bcm_dma_channel_mask & (1 << i)) == 0) continue; err = bus_dmamem_alloc(sc->sc_dma_tag, &cb_virt, BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, &ch->dma_map); if (err) { device_printf(dev, "cannot allocate DMA memory\n"); break; } /* * Least alignment for busdma-allocated stuff is cache * line size, so just make sure nothing stupid happened * and we got properly aligned address */ if ((uintptr_t)cb_virt & 0x1f) { device_printf(dev, "DMA address is not 32-bytes aligned: %p\n", (void*)cb_virt); break; } err = bus_dmamap_load(sc->sc_dma_tag, ch->dma_map, cb_virt, sizeof(struct bcm_dma_cb), bcm_dmamap_cb, &cb_phys, BUS_DMA_WAITOK); if (err) { device_printf(dev, "cannot load DMA memory\n"); break; } ch->cb = cb_virt; ch->vc_cb = cb_phys; ch->flags = BCM_DMA_CH_FREE; ch->cb->info = INFO_WAIT_RESP; /* reset DMA engine */ bus_write_4(sc->sc_mem, BCM_DMA_CS(i), CS_RESET); } return (0); } /* * Allocate DMA channel for further use, returns channel # or * BCM_DMA_CH_INVALID */ int bcm_dma_allocate(int req_ch) { struct bcm_dma_softc *sc = bcm_dma_sc; int ch = BCM_DMA_CH_INVALID; int i; if (sc == NULL) return (BCM_DMA_CH_INVALID); if (req_ch >= BCM_DMA_CH_MAX) return (BCM_DMA_CH_INVALID); /* Auto(req_ch < 0) or CH specified */ mtx_lock(&sc->sc_mtx); if (req_ch < 0) { for (i = 0; i < BCM_DMA_CH_MAX; i++) { if (sc->sc_dma_ch[i].flags & BCM_DMA_CH_FREE) { ch = i; sc->sc_dma_ch[ch].flags &= ~BCM_DMA_CH_FREE; sc->sc_dma_ch[ch].flags |= BCM_DMA_CH_USED; break; } } } else if (sc->sc_dma_ch[req_ch].flags & BCM_DMA_CH_FREE) { ch = req_ch; sc->sc_dma_ch[ch].flags &= ~BCM_DMA_CH_FREE; sc->sc_dma_ch[ch].flags |= BCM_DMA_CH_USED; } mtx_unlock(&sc->sc_mtx); return (ch); } /* * Frees allocated channel. Returns 0 on success, -1 otherwise */ int bcm_dma_free(int ch) { struct bcm_dma_softc *sc = bcm_dma_sc; if (sc == NULL) return (-1); if (ch < 0 || ch >= BCM_DMA_CH_MAX) return (-1); mtx_lock(&sc->sc_mtx); if (sc->sc_dma_ch[ch].flags & BCM_DMA_CH_USED) { sc->sc_dma_ch[ch].flags |= BCM_DMA_CH_FREE; sc->sc_dma_ch[ch].flags &= ~BCM_DMA_CH_USED; sc->sc_dma_ch[ch].intr_func = NULL; sc->sc_dma_ch[ch].intr_arg = NULL; /* reset DMA engine */ bcm_dma_reset(sc->sc_dev, ch); } mtx_unlock(&sc->sc_mtx); return (0); } /* * Assign handler function for channel interrupt * Returns 0 on success, -1 otherwise */ int bcm_dma_setup_intr(int ch, void (*func)(int, void *), void *arg) { struct bcm_dma_softc *sc = bcm_dma_sc; struct bcm_dma_cb *cb; if (sc == NULL) return (-1); if (ch < 0 || ch >= BCM_DMA_CH_MAX) return (-1); if (!(sc->sc_dma_ch[ch].flags & BCM_DMA_CH_USED)) return (-1); sc->sc_dma_ch[ch].intr_func = func; sc->sc_dma_ch[ch].intr_arg = arg; cb = sc->sc_dma_ch[ch].cb; cb->info |= INFO_INT_EN; return (0); } /* * Setup DMA source parameters * ch - channel number * dreq - hardware DREQ # or BCM_DMA_DREQ_NONE if * source is physical memory * inc_addr - BCM_DMA_INC_ADDR if source address * should be increased after each access or * BCM_DMA_SAME_ADDR if address should remain * the same * width - size of read operation, BCM_DMA_32BIT * for 32bit bursts, BCM_DMA_128BIT for 128 bits * * Returns 0 on success, -1 otherwise */ int bcm_dma_setup_src(int ch, int dreq, int inc_addr, int width) { struct bcm_dma_softc *sc = bcm_dma_sc; uint32_t info; if (ch < 0 || ch >= BCM_DMA_CH_MAX) return (-1); if (!(sc->sc_dma_ch[ch].flags & BCM_DMA_CH_USED)) return (-1); info = sc->sc_dma_ch[ch].cb->info; info &= ~INFO_PERMAP_MASK; info |= (dreq << INFO_PERMAP_SHIFT) & INFO_PERMAP_MASK; if (dreq) info |= INFO_S_DREQ; else info &= ~INFO_S_DREQ; if (width == BCM_DMA_128BIT) info |= INFO_S_WIDTH; else info &= ~INFO_S_WIDTH; if (inc_addr == BCM_DMA_INC_ADDR) info |= INFO_S_INC; else info &= ~INFO_S_INC; sc->sc_dma_ch[ch].cb->info = info; return (0); } /* * Setup DMA destination parameters * ch - channel number * dreq - hardware DREQ # or BCM_DMA_DREQ_NONE if * destination is physical memory * inc_addr - BCM_DMA_INC_ADDR if source address * should be increased after each access or * BCM_DMA_SAME_ADDR if address should remain * the same * width - size of write operation, BCM_DMA_32BIT * for 32bit bursts, BCM_DMA_128BIT for 128 bits * * Returns 0 on success, -1 otherwise */ int bcm_dma_setup_dst(int ch, int dreq, int inc_addr, int width) { struct bcm_dma_softc *sc = bcm_dma_sc; uint32_t info; if (ch < 0 || ch >= BCM_DMA_CH_MAX) return (-1); if (!(sc->sc_dma_ch[ch].flags & BCM_DMA_CH_USED)) return (-1); info = sc->sc_dma_ch[ch].cb->info; info &= ~INFO_PERMAP_MASK; info |= (dreq << INFO_PERMAP_SHIFT) & INFO_PERMAP_MASK; if (dreq) info |= INFO_D_DREQ; else info &= ~INFO_D_DREQ; if (width == BCM_DMA_128BIT) info |= INFO_D_WIDTH; else info &= ~INFO_D_WIDTH; if (inc_addr == BCM_DMA_INC_ADDR) info |= INFO_D_INC; else info &= ~INFO_D_INC; sc->sc_dma_ch[ch].cb->info = info; return (0); } #ifdef DEBUG void bcm_dma_cb_dump(struct bcm_dma_cb *cb) { printf("DMA CB "); printf("INFO: %8.8x ", cb->info); printf("SRC: %8.8x ", cb->src); printf("DST: %8.8x ", cb->dst); printf("LEN: %8.8x ", cb->len); printf("\n"); printf("STRIDE: %8.8x ", cb->stride); printf("NEXT: %8.8x ", cb->next); printf("RSVD1: %8.8x ", cb->rsvd1); printf("RSVD2: %8.8x ", cb->rsvd2); printf("\n"); } void bcm_dma_reg_dump(int ch) { struct bcm_dma_softc *sc = bcm_dma_sc; int i; uint32_t reg; if (sc == NULL) return; if (ch < 0 || ch >= BCM_DMA_CH_MAX) return; printf("DMA%d: ", ch); for (i = 0; i < MAX_REG; i++) { reg = bus_read_4(sc->sc_mem, BCM_DMA_CH(ch) + i*4); printf("%8.8x ", reg); } printf("\n"); } #endif /* * Start DMA transaction * ch - channel number * src, dst - source and destination address in * ARM physical memory address space. * len - amount of bytes to be transferred * * Returns 0 on success, -1 otherwise */ int bcm_dma_start(int ch, vm_paddr_t src, vm_paddr_t dst, int len) { struct bcm_dma_softc *sc = bcm_dma_sc; struct bcm_dma_cb *cb; if (sc == NULL) return (-1); if (ch < 0 || ch >= BCM_DMA_CH_MAX) return (-1); if (!(sc->sc_dma_ch[ch].flags & BCM_DMA_CH_USED)) return (-1); cb = sc->sc_dma_ch[ch].cb; cb->src = ARMC_TO_VCBUS(src); cb->dst = ARMC_TO_VCBUS(dst); cb->len = len; bus_dmamap_sync(sc->sc_dma_tag, sc->sc_dma_ch[ch].dma_map, BUS_DMASYNC_PREWRITE); bus_write_4(sc->sc_mem, BCM_DMA_CBADDR(ch), sc->sc_dma_ch[ch].vc_cb); bus_write_4(sc->sc_mem, BCM_DMA_CS(ch), CS_ACTIVE); #ifdef DEBUG bcm_dma_cb_dump(sc->sc_dma_ch[ch].cb); bcm_dma_reg_dump(ch); #endif return (0); } /* * Get length requested for DMA transaction * ch - channel number * * Returns size of transaction, 0 if channel is invalid */ uint32_t bcm_dma_length(int ch) { struct bcm_dma_softc *sc = bcm_dma_sc; struct bcm_dma_cb *cb; if (sc == NULL) return (0); if (ch < 0 || ch >= BCM_DMA_CH_MAX) return (0); if (!(sc->sc_dma_ch[ch].flags & BCM_DMA_CH_USED)) return (0); cb = sc->sc_dma_ch[ch].cb; return (cb->len); } static void bcm_dma_intr(void *arg) { struct bcm_dma_softc *sc = bcm_dma_sc; struct bcm_dma_ch *ch = (struct bcm_dma_ch *)arg; uint32_t cs, debug; /* my interrupt? */ cs = bus_read_4(sc->sc_mem, BCM_DMA_CS(ch->ch)); /* * Is it an active channel? Our diagnostics could be better here, but * it's not necessarily an easy task to resolve a rid/resource to an * actual irq number. We'd want to do this to set a flag indicating * whether the irq is shared or not, so we know to complain. */ if (!(ch->flags & BCM_DMA_CH_USED)) return; /* Again, we can't complain here. The same logic applies. */ if (!(cs & (CS_INT | CS_ERR))) return; if (cs & CS_ERR) { debug = bus_read_4(sc->sc_mem, BCM_DMA_DEBUG(ch->ch)); device_printf(sc->sc_dev, "DMA error %d on CH%d\n", debug & DEBUG_ERROR_MASK, ch->ch); bus_write_4(sc->sc_mem, BCM_DMA_DEBUG(ch->ch), debug & DEBUG_ERROR_MASK); bcm_dma_reset(sc->sc_dev, ch->ch); } if (cs & CS_INT) { /* acknowledge interrupt */ bus_write_4(sc->sc_mem, BCM_DMA_CS(ch->ch), CS_INT | CS_END); /* Prepare for possible access to len field */ bus_dmamap_sync(sc->sc_dma_tag, ch->dma_map, BUS_DMASYNC_POSTWRITE); /* save callback function and argument */ if (ch->intr_func) ch->intr_func(ch->ch, ch->intr_arg); } } static int bcm_dma_probe(device_t dev) { if (!ofw_bus_status_okay(dev)) return (ENXIO); if (ofw_bus_search_compatible(dev, compat_data)->ocd_data == 0) return (ENXIO); device_set_desc(dev, "BCM2835 DMA Controller"); return (BUS_PROBE_DEFAULT); } static int bcm_dma_attach(device_t dev) { struct bcm_dma_softc *sc = device_get_softc(dev); phandle_t node; int rid, err = 0; int i; sc->sc_dev = dev; if (bcm_dma_sc) return (ENXIO); for (i = 0; i < BCM_DMA_CH_MAX; i++) { sc->sc_irq[i] = NULL; sc->sc_intrhand[i] = NULL; } /* Get DMA channel mask. */ node = ofw_bus_get_node(sc->sc_dev); if (OF_getencprop(node, "brcm,dma-channel-mask", &bcm_dma_channel_mask, sizeof(bcm_dma_channel_mask)) == -1 && OF_getencprop(node, "broadcom,channels", &bcm_dma_channel_mask, sizeof(bcm_dma_channel_mask)) == -1) { device_printf(dev, "could not get channel mask property\n"); return (ENXIO); } /* Mask out channels used by GPU. */ bcm_dma_channel_mask &= ~BCM_DMA_CH_GPU_MASK; /* DMA0 - DMA14 */ rid = 0; sc->sc_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->sc_mem == NULL) { device_printf(dev, "could not allocate memory resource\n"); return (ENXIO); } /* IRQ DMA0 - DMA11 XXX NOT USE DMA12(spurious?) */ for (rid = 0; rid < BCM_DMA_CH_MAX; rid++) { if ((bcm_dma_channel_mask & (1 << rid)) == 0) continue; sc->sc_irq[rid] = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE | RF_SHAREABLE); if (sc->sc_irq[rid] == NULL) { device_printf(dev, "cannot allocate interrupt\n"); err = ENXIO; goto fail; } if (bus_setup_intr(dev, sc->sc_irq[rid], INTR_TYPE_MISC | INTR_MPSAFE, NULL, bcm_dma_intr, &sc->sc_dma_ch[rid], &sc->sc_intrhand[rid])) { device_printf(dev, "cannot setup interrupt handler\n"); err = ENXIO; goto fail; } } mtx_init(&sc->sc_mtx, "bcmdma", "bcmdma", MTX_DEF); bcm_dma_sc = sc; err = bcm_dma_init(dev); if (err) goto fail; return (err); fail: if (sc->sc_mem) bus_release_resource(dev, SYS_RES_MEMORY, 0, sc->sc_mem); for (i = 0; i < BCM_DMA_CH_MAX; i++) { if (sc->sc_intrhand[i]) bus_teardown_intr(dev, sc->sc_irq[i], sc->sc_intrhand[i]); if (sc->sc_irq[i]) bus_release_resource(dev, SYS_RES_IRQ, 0, sc->sc_irq[i]); } return (err); } static device_method_t bcm_dma_methods[] = { DEVMETHOD(device_probe, bcm_dma_probe), DEVMETHOD(device_attach, bcm_dma_attach), { 0, 0 } }; static driver_t bcm_dma_driver = { "bcm_dma", bcm_dma_methods, sizeof(struct bcm_dma_softc), }; EARLY_DRIVER_MODULE(bcm_dma, simplebus, bcm_dma_driver, 0, 0, BUS_PASS_SUPPORTDEV + BUS_PASS_ORDER_MIDDLE); MODULE_VERSION(bcm_dma, 1);