xref: /freebsd/sys/dev/qcom_ess_edma/qcom_ess_edma_tx.c (revision 4b15965daa99044daf184221b7c283bf7f2d7e66)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2021 Adrian Chadd <adrian@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice unmodified, this list of conditions, and the following
11  *    disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/bus.h>
35 
36 #include <sys/kernel.h>
37 #include <sys/module.h>
38 #include <sys/rman.h>
39 #include <sys/lock.h>
40 #include <sys/malloc.h>
41 #include <sys/mutex.h>
42 #include <sys/mbuf.h>
43 #include <sys/endian.h>
44 #include <sys/smp.h>
45 #include <sys/socket.h>
46 #include <sys/sockio.h>
47 
48 #include <net/if.h>
49 #include <net/if_var.h>
50 #include <net/if_media.h>
51 #include <net/ethernet.h>
52 
53 #include <machine/bus.h>
54 #include <machine/resource.h>
55 
56 #include <dev/qcom_ess_edma/qcom_ess_edma_var.h>
57 #include <dev/qcom_ess_edma/qcom_ess_edma_reg.h>
58 #include <dev/qcom_ess_edma/qcom_ess_edma_hw.h>
59 #include <dev/qcom_ess_edma/qcom_ess_edma_desc.h>
60 #include <dev/qcom_ess_edma/qcom_ess_edma_tx.h>
61 #include <dev/qcom_ess_edma/qcom_ess_edma_debug.h>
62 
63 /*
64  * Map the given TX queue to a given CPU.
65  *
66  * The current mapping in the if_transmit() path
67  * will map mp_ncpu groups of flowids to the TXQs.
68  * So for a 4 CPU system the first four will be CPU 0,
69  * the second four will be CPU 1, etc.
70  */
71 int
72 qcom_ess_edma_tx_queue_to_cpu(struct qcom_ess_edma_softc *sc, int queue)
73 {
74 
75 	return (queue / mp_ncpus);
76 }
77 
78 int
79 qcom_ess_edma_tx_ring_setup(struct qcom_ess_edma_softc *sc,
80     struct qcom_ess_edma_desc_ring *ring)
81 {
82 	struct qcom_ess_edma_sw_desc_tx *txd;
83 	int i, ret;
84 
85 	for (i = 0; i < EDMA_TX_RING_SIZE; i++) {
86 		txd = qcom_ess_edma_desc_ring_get_sw_desc(sc, ring, i);
87 		if (txd == NULL) {
88 			device_printf(sc->sc_dev,
89 			    "ERROR; couldn't get sw desc (idx %d)\n", i);
90 			return (EINVAL);
91 		}
92 		txd->m = NULL;
93 		ret = bus_dmamap_create(ring->buffer_dma_tag,
94 		    BUS_DMA_NOWAIT,
95 		    &txd->m_dmamap);
96 		if (ret != 0) {
97 			device_printf(sc->sc_dev,
98 			    "%s: failed to create dmamap (%d)\n",
99 			    __func__, ret);
100 		}
101 	}
102 
103 	return (0);
104 }
105 
106 int
107 qcom_ess_edma_tx_ring_clean(struct qcom_ess_edma_softc *sc,
108     struct qcom_ess_edma_desc_ring *ring)
109 {
110 	device_printf(sc->sc_dev, "%s: TODO\n", __func__);
111 	return (0);
112 }
113 
114 /*
115  * Clear the sw/hw descriptor entries, unmap/free the mbuf chain that's
116  * part of this.
117  */
118 static int
119 qcom_ess_edma_tx_unmap_and_clean(struct qcom_ess_edma_softc *sc,
120     struct qcom_ess_edma_desc_ring *ring, uint16_t idx)
121 {
122 	struct qcom_ess_edma_sw_desc_tx *txd;
123 	struct qcom_ess_edma_tx_desc *ds;
124 
125 	/* Get the software/hardware descriptors we're going to update */
126 	txd = qcom_ess_edma_desc_ring_get_sw_desc(sc, ring, idx);
127 	if (txd == NULL) {
128 		device_printf(sc->sc_dev,
129 		    "ERROR; couldn't get sw desc (idx %d)\n", idx);
130 		return (EINVAL);
131 	}
132 
133 	ds = qcom_ess_edma_desc_ring_get_hw_desc(sc, ring, idx);
134 	if (ds == NULL) {
135 		device_printf(sc->sc_dev,
136 		    "ERROR; couldn't get hw desc (idx %d)\n", idx);
137 		return (EINVAL);
138 	}
139 
140 	if (txd->m != NULL) {
141 		QCOM_ESS_EDMA_DPRINTF(sc, QCOM_ESS_EDMA_DBG_TX_RING,
142 		    "%s:   idx %d, unmap/free\n", __func__, idx);
143 		bus_dmamap_unload(ring->buffer_dma_tag, txd->m_dmamap);
144 		m_freem(txd->m);
145 		txd->m = NULL;
146 		txd->is_first = txd->is_last = 0;
147 	}
148 
149 #ifdef	ESS_EDMA_DEBUG_CLEAR_DESC
150 	/* This is purely for debugging/testing right now; it's slow! */
151 	memset(ds, 0, sizeof(struct qcom_ess_edma_tx_desc));
152 #endif
153 
154 	return (0);
155 }
156 
157 /*
158  * Run through the TX ring, complete/free frames.
159  */
160 int
161 qcom_ess_edma_tx_ring_complete(struct qcom_ess_edma_softc *sc, int queue)
162 {
163 	struct qcom_ess_edma_desc_ring *ring;
164 	uint32_t n;
165 	uint16_t sw_next_to_clean, hw_next_to_clean;
166 
167 	ring = &sc->sc_tx_ring[queue];
168 
169 	EDMA_RING_LOCK_ASSERT(ring);
170 
171 	qcom_ess_edma_desc_ring_flush_postupdate(sc, ring);
172 
173 	sw_next_to_clean = ring->next_to_clean;
174 	hw_next_to_clean = 0;
175 	n = 0;
176 
177 	/* Get the current hardware completion index */
178 	(void) qcom_ess_edma_hw_tx_read_tpd_cons_idx(sc, queue,
179 	    &hw_next_to_clean);
180 
181 	QCOM_ESS_EDMA_DPRINTF(sc, QCOM_ESS_EDMA_DBG_TX_RING,
182 	    "%s: called; sw=%d, hw=%d\n", __func__,
183 	    sw_next_to_clean, hw_next_to_clean);
184 
185 	/* clean the buffer chain and descriptor(s) here */
186 	while (sw_next_to_clean != hw_next_to_clean) {
187 		qcom_ess_edma_tx_unmap_and_clean(sc, ring, sw_next_to_clean);
188 		QCOM_ESS_EDMA_DPRINTF(sc, QCOM_ESS_EDMA_DBG_TX_RING,
189 		    "%s  cleaning %d\n", __func__, sw_next_to_clean);
190 		sw_next_to_clean++;
191 		if (sw_next_to_clean >= ring->ring_count)
192 			sw_next_to_clean = 0;
193 		n++;
194 	}
195 
196 	ring->stats.num_cleaned += n;
197 	ring->stats.num_tx_complete++;
198 
199 	ring->next_to_clean = sw_next_to_clean;
200 
201 	/* update the TPD consumer index register */
202 	qcom_ess_edma_hw_tx_update_cons_idx(sc, queue, sw_next_to_clean);
203 
204 	QCOM_ESS_EDMA_DPRINTF(sc, QCOM_ESS_EDMA_DBG_TX_RING_COMPLETE,
205 	    "%s: cleaned %d descriptors\n", __func__, n);
206 
207 	return (0);
208 }
209 
210 /*
211  * Attempt to enqueue a single frame.
212  *
213  * This is the MVP required to send a single ethernet mbuf / mbuf chain.
214  * VLAN tags are added/required as the default switch configuration
215  * from device-tree uses both the port bitmap and VLAN IDs for
216  * controlling LAN/WAN/etc interface traffic.
217  *
218  * Note, this does NOT update the transmit pointer to the hardware;
219  * that must be done after calling this function one or more times.
220  *
221  * The mbuf is either consumed into the ring or it is returned
222  * unsent.  If we've modifide it in any way then the caller should
223  * use what's returned back in m0 (eg to pushback.)
224  */
225 int
226 qcom_ess_edma_tx_ring_frame(struct qcom_ess_edma_softc *sc, int queue,
227     struct mbuf **m0, uint16_t port_bitmap, int default_vlan)
228 {
229 	struct qcom_ess_edma_sw_desc_tx *txd_first;
230 	struct qcom_ess_edma_desc_ring *ring;
231 	struct ether_vlan_header *eh;
232 	bus_dma_segment_t txsegs[QCOM_ESS_EDMA_MAX_TXFRAGS];
233 	uint32_t word1, word3;
234 	uint32_t eop;
235 	int vlan_id;
236 	int num_left, ret, nsegs, i;
237 	uint16_t next_to_fill;
238 	uint16_t svlan_tag;
239 	struct mbuf *m;
240 
241 	ring = &sc->sc_tx_ring[queue];
242 
243 	EDMA_RING_LOCK_ASSERT(ring);
244 
245 	m = *m0;
246 
247 	/*
248 	 * Do we have ANY space? If not, return ENOBUFS, let the
249 	 * caller decide what to do with the mbuf.
250 	 */
251 	num_left = qcom_ess_edma_desc_ring_get_num_available(sc, ring);
252 	if (num_left < 2) {
253 		QCOM_ESS_EDMA_DPRINTF(sc, QCOM_ESS_EDMA_DBG_TX_FRAME,
254 		    "%s: num_left=%d\n", __func__, num_left);
255 		ring->stats.num_enqueue_full++;
256 		return (ENOBUFS);
257 	}
258 
259 	/*
260 	 * Get the current sw/hw descriptor offset; we'll use its
261 	 * dmamap and then switch it out with the last one when
262 	 * the mbuf is put there.
263 	 */
264 	next_to_fill = ring->next_to_fill;
265 	txd_first = qcom_ess_edma_desc_ring_get_sw_desc(sc, ring,
266 	    next_to_fill);
267 	QCOM_ESS_EDMA_DPRINTF(sc, QCOM_ESS_EDMA_DBG_TX_FRAME,
268 	    "%s: starting at idx %d\n", __func__, next_to_fill);
269 
270 	/*
271 	 * Do the initial mbuf load; see how many fragments we
272 	 * have.  If we don't have enough descriptors available
273 	 * then immediately unmap and return an error.
274 	 */
275 	ret = bus_dmamap_load_mbuf_sg(ring->buffer_dma_tag,
276 	    txd_first->m_dmamap,
277 	    m,
278 	    txsegs,
279 	    &nsegs,
280 	    BUS_DMA_NOWAIT);
281 	if (ret != 0) {
282 		ring->stats.num_tx_mapfail++;
283 		QCOM_ESS_EDMA_DPRINTF(sc, QCOM_ESS_EDMA_DBG_TX_FRAME,
284 		    "%s: map failed (%d)\n", __func__, ret);
285 		return (ENOBUFS);
286 	}
287 	if (nsegs == 0) {
288 		ring->stats.num_tx_maxfrags++;
289 		QCOM_ESS_EDMA_DPRINTF(sc, QCOM_ESS_EDMA_DBG_TX_FRAME,
290 		    "%s: too many segs\n", __func__);
291 		return (ENOBUFS);
292 	}
293 
294 	if (nsegs + 2 > num_left) {
295 		QCOM_ESS_EDMA_DPRINTF(sc, QCOM_ESS_EDMA_DBG_TX_FRAME,
296 		    "%s: nsegs=%d, num_left=%d\n", __func__, nsegs, num_left);
297 		bus_dmamap_unload(ring->buffer_dma_tag, txd_first->m_dmamap);
298 		ring->stats.num_enqueue_full++;
299 		return (ENOBUFS);
300 	}
301 
302 	bus_dmamap_sync(ring->buffer_dma_tag, txd_first->m_dmamap,
303 	    BUS_DMASYNC_PREWRITE);
304 
305 	/*
306 	 * At this point we're committed to sending the frame.
307 	 *
308 	 * Get rid of the rcvif that is being used to track /send/ ifnet.
309 	 */
310 	m->m_pkthdr.rcvif = NULL;
311 
312 	/*
313 	 *
314 	 * Configure up the various header fields that are shared
315 	 * between descriptors.
316 	 */
317 	svlan_tag = 0; /* 802.3ad tag? */
318 	/* word1 - tx checksum, v4/v6 TSO, pppoe, 802.3ad vlan flag */
319 	word1 = 0;
320 	/*
321 	 * word3 - insert default vlan; vlan tag/flag, CPU/STP/RSTP stuff,
322 	 * port map
323 	 */
324 	word3 = 0;
325 	word3 |= (port_bitmap << EDMA_TPD_PORT_BITMAP_SHIFT);
326 
327 	/*
328 	 * If VLAN offload is enabled, we can enable inserting a CVLAN
329 	 * tag here for the default VLAN, or the VLAN interface.
330 	 * The default switch configuration requires both a port_bitmap
331 	 * and 802.1q VLANs configured.
332 	 *
333 	 * If there's a VLAN tag on the mbuf then we leave it alone.
334 	 * I don't want to try and strip out the VLAN header from a packet
335 	 * here.
336 	 *
337 	 * There's no 802.1ad support in here yet.
338 	 */
339 	eh = mtod(m, struct ether_vlan_header *);
340 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
341 		/* Don't add a tag, just use what's here */
342 		vlan_id = -1;
343 		QCOM_ESS_EDMA_DPRINTF(sc, QCOM_ESS_EDMA_DBG_TX_FRAME,
344 		    "%s:   no vlan id\n", __func__);
345 
346 	} else if ((m->m_flags & M_VLANTAG) != 0) {
347 		/* We have an offload VLAN tag, use it */
348 		vlan_id = m->m_pkthdr.ether_vtag & 0x0fff;
349 		QCOM_ESS_EDMA_DPRINTF(sc, QCOM_ESS_EDMA_DBG_TX_FRAME,
350 		    "%s:   header tag vlan id=%d\n", __func__, vlan_id);
351 	} else {
352 		/* No VLAN tag, no VLAN header; default VLAN */
353 		vlan_id = default_vlan;
354 		QCOM_ESS_EDMA_DPRINTF(sc, QCOM_ESS_EDMA_DBG_TX_FRAME,
355 		    "%s:   no vlan tag/hdr; vlan id=%d\n", __func__,
356 		    vlan_id);
357 	}
358 
359 	/*
360 	 * Only add the offload tag if we need to.
361 	 */
362 	if (vlan_id != -1) {
363 		word3 |= (1U << EDMA_TX_INS_CVLAN);
364 		word3 |= (vlan_id << EDMA_TX_CVLAN_TAG_SHIFT);
365 	}
366 
367 	/* End of frame flag */
368 	eop = 0;
369 
370 	/*
371 	 * Walk the mbuf segment list, and allocate descriptor
372 	 * entries.  Put the mbuf in the last descriptor entry
373 	 * and then switch out the first/last dmamap entries.
374 	 */
375 	for (i = 0; i < nsegs; i++) {
376 		struct qcom_ess_edma_sw_desc_tx *txd;
377 		struct qcom_ess_edma_tx_desc *ds;
378 		QCOM_ESS_EDMA_DPRINTF(sc, QCOM_ESS_EDMA_DBG_TX_FRAME,
379 		    "%s:   filling idx %d\n", __func__, next_to_fill);
380 		txd = qcom_ess_edma_desc_ring_get_sw_desc(sc, ring, next_to_fill);
381 		ds = qcom_ess_edma_desc_ring_get_hw_desc(sc, ring, next_to_fill);
382 		txd->m = NULL;
383 		if (i == 0) {
384 			txd->is_first = 1;
385 		}
386 		if (i == (nsegs - 1)) {
387 			bus_dmamap_t dm;
388 
389 			txd->is_last = 1;
390 			eop = EDMA_TPD_EOP;
391 			/*
392 			 * Put the txmap and the mbuf in the last swdesc.
393 			 * That way it isn't freed until we've transmitted
394 			 * all the descriptors of this frame, in case the
395 			 * hardware decides to notify us of some half-sent
396 			 * stuff.
397 			 *
398 			 * Moving the pointers around here sucks a little
399 			 * but it DOES beat not freeing the dmamap entries
400 			 * correctly.
401 			 */
402 			txd->m = m;
403 			dm = txd_first->m_dmamap;
404 			txd_first->m_dmamap = txd->m_dmamap;
405 			txd->m_dmamap = dm;
406 		}
407 		ds->word1 = word1 | eop;
408 		ds->word3 = word3;
409 		ds->svlan_tag = svlan_tag;
410 		ds->addr = htole32(txsegs[i].ds_addr);
411 		ds->len = htole16(txsegs[i].ds_len);
412 
413 		QCOM_ESS_EDMA_DPRINTF(sc, QCOM_ESS_EDMA_DBG_TX_FRAME,
414 		    "%s:   addr=0x%lx len=%ld eop=0x%x\n",
415 		    __func__,
416 		    txsegs[i].ds_addr,
417 		    txsegs[i].ds_len,
418 		    eop);
419 
420 		next_to_fill++;
421 		if (next_to_fill >= ring->ring_count)
422 			next_to_fill = 0;
423 	}
424 
425 	ring->stats.num_added += nsegs;
426 
427 	/* Finish, update ring tracking */
428 	ring->next_to_fill = next_to_fill;
429 
430 	ring->stats.num_tx_ok++;
431 
432 	return (0);
433 }
434 
435 /*
436  * Update the hardware with the new state of the transmit ring.
437  */
438 int
439 qcom_ess_edma_tx_ring_frame_update(struct qcom_ess_edma_softc *sc, int queue)
440 {
441 	struct qcom_ess_edma_desc_ring *ring;
442 
443 	ring = &sc->sc_tx_ring[queue];
444 
445 	EDMA_RING_LOCK_ASSERT(ring);
446 
447 	qcom_ess_edma_desc_ring_flush_preupdate(sc, ring);
448 
449 	(void) qcom_ess_edma_hw_tx_update_tpd_prod_idx(sc, queue,
450 	    ring->next_to_fill);
451 
452 	/* XXX keep stats for this specific call? */
453 	return (0);
454 }
455