xref: /linux/drivers/net/ethernet/broadcom/bnxt/bnxt_gso.c (revision cc5d90667db81474ed7a92a1b2fa3daec5559307)
10440e27eSJoe Damato // SPDX-License-Identifier: GPL-2.0-or-later
20440e27eSJoe Damato /* Broadcom NetXtreme-C/E network driver.
30440e27eSJoe Damato  *
40440e27eSJoe Damato  * This program is free software; you can redistribute it and/or modify
50440e27eSJoe Damato  * it under the terms of the GNU General Public License as published by
60440e27eSJoe Damato  * the Free Software Foundation.
70440e27eSJoe Damato  */
80440e27eSJoe Damato 
90440e27eSJoe Damato #include <linux/pci.h>
100440e27eSJoe Damato #include <linux/netdevice.h>
110440e27eSJoe Damato #include <linux/skbuff.h>
120440e27eSJoe Damato #include <net/netdev_queues.h>
130440e27eSJoe Damato #include <net/ip.h>
140440e27eSJoe Damato #include <net/ipv6.h>
150440e27eSJoe Damato #include <net/udp.h>
160440e27eSJoe Damato #include <net/tso.h>
170440e27eSJoe Damato #include <linux/bnxt/hsi.h>
180440e27eSJoe Damato 
190440e27eSJoe Damato #include "bnxt.h"
200440e27eSJoe Damato #include "bnxt_gso.h"
210440e27eSJoe Damato 
22*cc5d9066SJoe Damato static u32 bnxt_sw_gso_lhint(unsigned int len)
23*cc5d9066SJoe Damato {
24*cc5d9066SJoe Damato 	if (len <= 512)
25*cc5d9066SJoe Damato 		return TX_BD_FLAGS_LHINT_512_AND_SMALLER;
26*cc5d9066SJoe Damato 	else if (len <= 1023)
27*cc5d9066SJoe Damato 		return TX_BD_FLAGS_LHINT_512_TO_1023;
28*cc5d9066SJoe Damato 	else if (len <= 2047)
29*cc5d9066SJoe Damato 		return TX_BD_FLAGS_LHINT_1024_TO_2047;
30*cc5d9066SJoe Damato 	else
31*cc5d9066SJoe Damato 		return TX_BD_FLAGS_LHINT_2048_AND_LARGER;
32*cc5d9066SJoe Damato }
33*cc5d9066SJoe Damato 
340440e27eSJoe Damato netdev_tx_t bnxt_sw_udp_gso_xmit(struct bnxt *bp,
350440e27eSJoe Damato 				 struct bnxt_tx_ring_info *txr,
360440e27eSJoe Damato 				 struct netdev_queue *txq,
370440e27eSJoe Damato 				 struct sk_buff *skb)
380440e27eSJoe Damato {
39*cc5d9066SJoe Damato 	unsigned int last_unmap_len __maybe_unused = 0;
40*cc5d9066SJoe Damato 	dma_addr_t last_unmap_addr __maybe_unused = 0;
41*cc5d9066SJoe Damato 	struct bnxt_sw_tx_bd *last_unmap_buf = NULL;
42*cc5d9066SJoe Damato 	unsigned int hdr_len, mss, num_segs;
43*cc5d9066SJoe Damato 	struct pci_dev *pdev = bp->pdev;
44*cc5d9066SJoe Damato 	unsigned int total_payload;
45*cc5d9066SJoe Damato 	struct tso_dma_map map;
46*cc5d9066SJoe Damato 	u32 vlan_tag_flags = 0;
47*cc5d9066SJoe Damato 	int i, bds_needed;
48*cc5d9066SJoe Damato 	struct tso_t tso;
49*cc5d9066SJoe Damato 	u16 cfa_action;
50*cc5d9066SJoe Damato 	__le32 csum;
51*cc5d9066SJoe Damato 	u16 prod;
52*cc5d9066SJoe Damato 
53*cc5d9066SJoe Damato 	hdr_len = tso_start(skb, &tso);
54*cc5d9066SJoe Damato 	mss = skb_shinfo(skb)->gso_size;
55*cc5d9066SJoe Damato 	total_payload = skb->len - hdr_len;
56*cc5d9066SJoe Damato 	num_segs = DIV_ROUND_UP(total_payload, mss);
57*cc5d9066SJoe Damato 
58*cc5d9066SJoe Damato 	if (unlikely(num_segs <= 1))
59*cc5d9066SJoe Damato 		goto drop;
60*cc5d9066SJoe Damato 
61*cc5d9066SJoe Damato 	/* Upper bound on the number of descriptors needed.
62*cc5d9066SJoe Damato 	 *
63*cc5d9066SJoe Damato 	 * Each segment uses 1 long BD + 1 ext BD + payload BDs, which is
64*cc5d9066SJoe Damato 	 * at most num_segs + nr_frags (each frag boundary crossing adds at
65*cc5d9066SJoe Damato 	 * most 1 extra BD).
66*cc5d9066SJoe Damato 	 */
67*cc5d9066SJoe Damato 	bds_needed = 3 * num_segs + skb_shinfo(skb)->nr_frags + 1;
68*cc5d9066SJoe Damato 
69*cc5d9066SJoe Damato 	if (unlikely(bnxt_tx_avail(bp, txr) < bds_needed)) {
70*cc5d9066SJoe Damato 		netif_txq_try_stop(txq, bnxt_tx_avail(bp, txr),
71*cc5d9066SJoe Damato 				   bp->tx_wake_thresh);
72*cc5d9066SJoe Damato 		return NETDEV_TX_BUSY;
73*cc5d9066SJoe Damato 	}
74*cc5d9066SJoe Damato 
75*cc5d9066SJoe Damato 	/* BD backpressure alone cannot prevent overwriting in-flight
76*cc5d9066SJoe Damato 	 * headers in the inline buffer. Check slot availability directly.
77*cc5d9066SJoe Damato 	 */
78*cc5d9066SJoe Damato 	if (!netif_txq_maybe_stop(txq, bnxt_inline_avail(txr),
79*cc5d9066SJoe Damato 				  num_segs, num_segs))
80*cc5d9066SJoe Damato 		return NETDEV_TX_BUSY;
81*cc5d9066SJoe Damato 
82*cc5d9066SJoe Damato 	if (unlikely(tso_dma_map_init(&map, &pdev->dev, skb, hdr_len)))
83*cc5d9066SJoe Damato 		goto drop;
84*cc5d9066SJoe Damato 
85*cc5d9066SJoe Damato 	cfa_action = bnxt_xmit_get_cfa_action(skb);
86*cc5d9066SJoe Damato 	if (skb_vlan_tag_present(skb)) {
87*cc5d9066SJoe Damato 		vlan_tag_flags = TX_BD_CFA_META_KEY_VLAN |
88*cc5d9066SJoe Damato 				 skb_vlan_tag_get(skb);
89*cc5d9066SJoe Damato 		if (skb->vlan_proto == htons(ETH_P_8021Q))
90*cc5d9066SJoe Damato 			vlan_tag_flags |= 1 << TX_BD_CFA_META_TPID_SHIFT;
91*cc5d9066SJoe Damato 	}
92*cc5d9066SJoe Damato 
93*cc5d9066SJoe Damato 	csum = cpu_to_le32(TX_BD_FLAGS_TCP_UDP_CHKSUM);
94*cc5d9066SJoe Damato 	if (!tso.ipv6)
95*cc5d9066SJoe Damato 		csum |= cpu_to_le32(TX_BD_FLAGS_IP_CKSUM);
96*cc5d9066SJoe Damato 
97*cc5d9066SJoe Damato 	prod = txr->tx_prod;
98*cc5d9066SJoe Damato 
99*cc5d9066SJoe Damato 	for (i = 0; i < num_segs; i++) {
100*cc5d9066SJoe Damato 		unsigned int seg_payload = min_t(unsigned int, mss,
101*cc5d9066SJoe Damato 						 total_payload - i * mss);
102*cc5d9066SJoe Damato 		u16 slot = (txr->tx_inline_prod + i) &
103*cc5d9066SJoe Damato 			   (BNXT_SW_USO_MAX_SEGS - 1);
104*cc5d9066SJoe Damato 		struct bnxt_sw_tx_bd *tx_buf;
105*cc5d9066SJoe Damato 		unsigned int mapping_len;
106*cc5d9066SJoe Damato 		dma_addr_t this_hdr_dma;
107*cc5d9066SJoe Damato 		unsigned int chunk_len;
108*cc5d9066SJoe Damato 		unsigned int offset;
109*cc5d9066SJoe Damato 		dma_addr_t dma_addr;
110*cc5d9066SJoe Damato 		struct tx_bd *txbd;
111*cc5d9066SJoe Damato 		struct udphdr *uh;
112*cc5d9066SJoe Damato 		void *this_hdr;
113*cc5d9066SJoe Damato 		int bd_count;
114*cc5d9066SJoe Damato 		bool last;
115*cc5d9066SJoe Damato 		u32 flags;
116*cc5d9066SJoe Damato 
117*cc5d9066SJoe Damato 		last = (i == num_segs - 1);
118*cc5d9066SJoe Damato 		offset = slot * TSO_HEADER_SIZE;
119*cc5d9066SJoe Damato 		this_hdr = txr->tx_inline_buf + offset;
120*cc5d9066SJoe Damato 		this_hdr_dma = txr->tx_inline_dma + offset;
121*cc5d9066SJoe Damato 
122*cc5d9066SJoe Damato 		tso_build_hdr(skb, this_hdr, &tso, seg_payload, last);
123*cc5d9066SJoe Damato 
124*cc5d9066SJoe Damato 		/* Zero stale csum fields copied from the original skb;
125*cc5d9066SJoe Damato 		 * HW offload recomputes from scratch.
126*cc5d9066SJoe Damato 		 */
127*cc5d9066SJoe Damato 		uh = this_hdr + skb_transport_offset(skb);
128*cc5d9066SJoe Damato 		uh->check = 0;
129*cc5d9066SJoe Damato 		if (!tso.ipv6) {
130*cc5d9066SJoe Damato 			struct iphdr *iph = this_hdr + skb_network_offset(skb);
131*cc5d9066SJoe Damato 
132*cc5d9066SJoe Damato 			iph->check = 0;
133*cc5d9066SJoe Damato 		}
134*cc5d9066SJoe Damato 
135*cc5d9066SJoe Damato 		dma_sync_single_for_device(&pdev->dev, this_hdr_dma,
136*cc5d9066SJoe Damato 					   hdr_len, DMA_TO_DEVICE);
137*cc5d9066SJoe Damato 
138*cc5d9066SJoe Damato 		bd_count = tso_dma_map_count(&map, seg_payload);
139*cc5d9066SJoe Damato 
140*cc5d9066SJoe Damato 		tx_buf = &txr->tx_buf_ring[RING_TX(bp, prod)];
141*cc5d9066SJoe Damato 		txbd = &txr->tx_desc_ring[TX_RING(bp, prod)][TX_IDX(prod)];
142*cc5d9066SJoe Damato 
143*cc5d9066SJoe Damato 		tx_buf->skb = skb;
144*cc5d9066SJoe Damato 		tx_buf->nr_frags = bd_count;
145*cc5d9066SJoe Damato 		tx_buf->is_push = 0;
146*cc5d9066SJoe Damato 		tx_buf->is_ts_pkt = 0;
147*cc5d9066SJoe Damato 
148*cc5d9066SJoe Damato 		dma_unmap_addr_set(tx_buf, mapping, this_hdr_dma);
149*cc5d9066SJoe Damato 		dma_unmap_len_set(tx_buf, len, 0);
150*cc5d9066SJoe Damato 
151*cc5d9066SJoe Damato 		if (last) {
152*cc5d9066SJoe Damato 			tx_buf->is_sw_gso = BNXT_SW_GSO_LAST;
153*cc5d9066SJoe Damato 			tso_dma_map_completion_save(&map, &tx_buf->sw_gso_cstate);
154*cc5d9066SJoe Damato 		} else {
155*cc5d9066SJoe Damato 			tx_buf->is_sw_gso = BNXT_SW_GSO_MID;
156*cc5d9066SJoe Damato 		}
157*cc5d9066SJoe Damato 
158*cc5d9066SJoe Damato 		flags = (hdr_len << TX_BD_LEN_SHIFT) |
159*cc5d9066SJoe Damato 			TX_BD_TYPE_LONG_TX_BD |
160*cc5d9066SJoe Damato 			TX_BD_CNT(2 + bd_count);
161*cc5d9066SJoe Damato 
162*cc5d9066SJoe Damato 		flags |= bnxt_sw_gso_lhint(hdr_len + seg_payload);
163*cc5d9066SJoe Damato 
164*cc5d9066SJoe Damato 		txbd->tx_bd_len_flags_type = cpu_to_le32(flags);
165*cc5d9066SJoe Damato 		txbd->tx_bd_haddr = cpu_to_le64(this_hdr_dma);
166*cc5d9066SJoe Damato 		txbd->tx_bd_opaque = SET_TX_OPAQUE(bp, txr, prod,
167*cc5d9066SJoe Damato 						   2 + bd_count);
168*cc5d9066SJoe Damato 
169*cc5d9066SJoe Damato 		prod = NEXT_TX(prod);
170*cc5d9066SJoe Damato 		bnxt_init_ext_bd(bp, txr, prod, csum,
171*cc5d9066SJoe Damato 				 vlan_tag_flags, cfa_action);
172*cc5d9066SJoe Damato 
173*cc5d9066SJoe Damato 		/* set dma_unmap_len on the LAST BD touching each
174*cc5d9066SJoe Damato 		 * region. Since completions are in-order, the last segment
175*cc5d9066SJoe Damato 		 * completes after all earlier ones, so the unmap is safe.
176*cc5d9066SJoe Damato 		 */
177*cc5d9066SJoe Damato 		while (tso_dma_map_next(&map, &dma_addr, &chunk_len,
178*cc5d9066SJoe Damato 					&mapping_len, seg_payload)) {
179*cc5d9066SJoe Damato 			prod = NEXT_TX(prod);
180*cc5d9066SJoe Damato 			txbd = &txr->tx_desc_ring[TX_RING(bp, prod)][TX_IDX(prod)];
181*cc5d9066SJoe Damato 			tx_buf = &txr->tx_buf_ring[RING_TX(bp, prod)];
182*cc5d9066SJoe Damato 
183*cc5d9066SJoe Damato 			txbd->tx_bd_haddr = cpu_to_le64(dma_addr);
184*cc5d9066SJoe Damato 			dma_unmap_addr_set(tx_buf, mapping, dma_addr);
185*cc5d9066SJoe Damato 			dma_unmap_len_set(tx_buf, len, 0);
186*cc5d9066SJoe Damato 			tx_buf->skb = NULL;
187*cc5d9066SJoe Damato 			tx_buf->is_sw_gso = 0;
188*cc5d9066SJoe Damato 
189*cc5d9066SJoe Damato 			if (mapping_len) {
190*cc5d9066SJoe Damato 				if (last_unmap_buf) {
191*cc5d9066SJoe Damato 					dma_unmap_addr_set(last_unmap_buf,
192*cc5d9066SJoe Damato 							   mapping,
193*cc5d9066SJoe Damato 							   last_unmap_addr);
194*cc5d9066SJoe Damato 					dma_unmap_len_set(last_unmap_buf,
195*cc5d9066SJoe Damato 							  len,
196*cc5d9066SJoe Damato 							  last_unmap_len);
197*cc5d9066SJoe Damato 				}
198*cc5d9066SJoe Damato 				last_unmap_addr = dma_addr;
199*cc5d9066SJoe Damato 				last_unmap_len = mapping_len;
200*cc5d9066SJoe Damato 			}
201*cc5d9066SJoe Damato 			last_unmap_buf = tx_buf;
202*cc5d9066SJoe Damato 
203*cc5d9066SJoe Damato 			flags = chunk_len << TX_BD_LEN_SHIFT;
204*cc5d9066SJoe Damato 			txbd->tx_bd_len_flags_type = cpu_to_le32(flags);
205*cc5d9066SJoe Damato 			txbd->tx_bd_opaque = 0;
206*cc5d9066SJoe Damato 
207*cc5d9066SJoe Damato 			seg_payload -= chunk_len;
208*cc5d9066SJoe Damato 		}
209*cc5d9066SJoe Damato 
210*cc5d9066SJoe Damato 		txbd->tx_bd_len_flags_type |=
211*cc5d9066SJoe Damato 			cpu_to_le32(TX_BD_FLAGS_PACKET_END);
212*cc5d9066SJoe Damato 
213*cc5d9066SJoe Damato 		prod = NEXT_TX(prod);
214*cc5d9066SJoe Damato 	}
215*cc5d9066SJoe Damato 
216*cc5d9066SJoe Damato 	if (last_unmap_buf) {
217*cc5d9066SJoe Damato 		dma_unmap_addr_set(last_unmap_buf, mapping, last_unmap_addr);
218*cc5d9066SJoe Damato 		dma_unmap_len_set(last_unmap_buf, len, last_unmap_len);
219*cc5d9066SJoe Damato 	}
220*cc5d9066SJoe Damato 
221*cc5d9066SJoe Damato 	txr->tx_inline_prod += num_segs;
222*cc5d9066SJoe Damato 
223*cc5d9066SJoe Damato 	netdev_tx_sent_queue(txq, skb->len);
224*cc5d9066SJoe Damato 
225*cc5d9066SJoe Damato 	WRITE_ONCE(txr->tx_prod, prod);
226*cc5d9066SJoe Damato 	/* Sync BDs before doorbell */
227*cc5d9066SJoe Damato 	wmb();
228*cc5d9066SJoe Damato 	bnxt_db_write(bp, &txr->tx_db, prod);
229*cc5d9066SJoe Damato 
230*cc5d9066SJoe Damato 	if (unlikely(bnxt_tx_avail(bp, txr) <= bp->tx_wake_thresh))
231*cc5d9066SJoe Damato 		netif_txq_try_stop(txq, bnxt_tx_avail(bp, txr),
232*cc5d9066SJoe Damato 				   bp->tx_wake_thresh);
233*cc5d9066SJoe Damato 
234*cc5d9066SJoe Damato 	return NETDEV_TX_OK;
235*cc5d9066SJoe Damato 
236*cc5d9066SJoe Damato drop:
2370440e27eSJoe Damato 	dev_kfree_skb_any(skb);
2380440e27eSJoe Damato 	dev_core_stats_tx_dropped_inc(bp->dev);
2390440e27eSJoe Damato 	return NETDEV_TX_OK;
2400440e27eSJoe Damato }
241