xref: /linux/drivers/net/ethernet/broadcom/bnxt/bnxt_gso.c (revision 91a4855d6c03e770e42f17c798a36a3c46e63de2)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Broadcom NetXtreme-C/E network driver.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation.
7  */
8 
9 #include <linux/pci.h>
10 #include <linux/netdevice.h>
11 #include <linux/skbuff.h>
12 #include <net/netdev_queues.h>
13 #include <net/ip.h>
14 #include <net/ipv6.h>
15 #include <net/udp.h>
16 #include <net/tso.h>
17 #include <linux/bnxt/hsi.h>
18 
19 #include "bnxt.h"
20 #include "bnxt_gso.h"
21 
22 static u32 bnxt_sw_gso_lhint(unsigned int len)
23 {
24 	if (len <= 512)
25 		return TX_BD_FLAGS_LHINT_512_AND_SMALLER;
26 	else if (len <= 1023)
27 		return TX_BD_FLAGS_LHINT_512_TO_1023;
28 	else if (len <= 2047)
29 		return TX_BD_FLAGS_LHINT_1024_TO_2047;
30 	else
31 		return TX_BD_FLAGS_LHINT_2048_AND_LARGER;
32 }
33 
34 netdev_tx_t bnxt_sw_udp_gso_xmit(struct bnxt *bp,
35 				 struct bnxt_tx_ring_info *txr,
36 				 struct netdev_queue *txq,
37 				 struct sk_buff *skb)
38 {
39 	unsigned int last_unmap_len __maybe_unused = 0;
40 	dma_addr_t last_unmap_addr __maybe_unused = 0;
41 	struct bnxt_sw_tx_bd *last_unmap_buf = NULL;
42 	unsigned int hdr_len, mss, num_segs;
43 	struct pci_dev *pdev = bp->pdev;
44 	unsigned int total_payload;
45 	struct tso_dma_map map;
46 	u32 vlan_tag_flags = 0;
47 	int i, bds_needed;
48 	struct tso_t tso;
49 	u16 cfa_action;
50 	__le32 csum;
51 	u16 prod;
52 
53 	hdr_len = tso_start(skb, &tso);
54 	mss = skb_shinfo(skb)->gso_size;
55 	total_payload = skb->len - hdr_len;
56 	num_segs = DIV_ROUND_UP(total_payload, mss);
57 
58 	if (unlikely(num_segs <= 1))
59 		goto drop;
60 
61 	/* Upper bound on the number of descriptors needed.
62 	 *
63 	 * Each segment uses 1 long BD + 1 ext BD + payload BDs, which is
64 	 * at most num_segs + nr_frags (each frag boundary crossing adds at
65 	 * most 1 extra BD).
66 	 */
67 	bds_needed = 3 * num_segs + skb_shinfo(skb)->nr_frags + 1;
68 
69 	if (unlikely(bnxt_tx_avail(bp, txr) < bds_needed)) {
70 		netif_txq_try_stop(txq, bnxt_tx_avail(bp, txr),
71 				   bp->tx_wake_thresh);
72 		return NETDEV_TX_BUSY;
73 	}
74 
75 	/* BD backpressure alone cannot prevent overwriting in-flight
76 	 * headers in the inline buffer. Check slot availability directly.
77 	 */
78 	if (!netif_txq_maybe_stop(txq, bnxt_inline_avail(txr),
79 				  num_segs, num_segs))
80 		return NETDEV_TX_BUSY;
81 
82 	if (unlikely(tso_dma_map_init(&map, &pdev->dev, skb, hdr_len)))
83 		goto drop;
84 
85 	cfa_action = bnxt_xmit_get_cfa_action(skb);
86 	if (skb_vlan_tag_present(skb)) {
87 		vlan_tag_flags = TX_BD_CFA_META_KEY_VLAN |
88 				 skb_vlan_tag_get(skb);
89 		if (skb->vlan_proto == htons(ETH_P_8021Q))
90 			vlan_tag_flags |= 1 << TX_BD_CFA_META_TPID_SHIFT;
91 	}
92 
93 	csum = cpu_to_le32(TX_BD_FLAGS_TCP_UDP_CHKSUM);
94 	if (!tso.ipv6)
95 		csum |= cpu_to_le32(TX_BD_FLAGS_IP_CKSUM);
96 
97 	prod = txr->tx_prod;
98 
99 	for (i = 0; i < num_segs; i++) {
100 		unsigned int seg_payload = min_t(unsigned int, mss,
101 						 total_payload - i * mss);
102 		u16 slot = (txr->tx_inline_prod + i) &
103 			   (BNXT_SW_USO_MAX_SEGS - 1);
104 		struct bnxt_sw_tx_bd *tx_buf;
105 		unsigned int mapping_len;
106 		dma_addr_t this_hdr_dma;
107 		unsigned int chunk_len;
108 		unsigned int offset;
109 		dma_addr_t dma_addr;
110 		struct tx_bd *txbd;
111 		struct udphdr *uh;
112 		void *this_hdr;
113 		int bd_count;
114 		bool last;
115 		u32 flags;
116 
117 		last = (i == num_segs - 1);
118 		offset = slot * TSO_HEADER_SIZE;
119 		this_hdr = txr->tx_inline_buf + offset;
120 		this_hdr_dma = txr->tx_inline_dma + offset;
121 
122 		tso_build_hdr(skb, this_hdr, &tso, seg_payload, last);
123 
124 		/* Zero stale csum fields copied from the original skb;
125 		 * HW offload recomputes from scratch.
126 		 */
127 		uh = this_hdr + skb_transport_offset(skb);
128 		uh->check = 0;
129 		if (!tso.ipv6) {
130 			struct iphdr *iph = this_hdr + skb_network_offset(skb);
131 
132 			iph->check = 0;
133 		}
134 
135 		dma_sync_single_for_device(&pdev->dev, this_hdr_dma,
136 					   hdr_len, DMA_TO_DEVICE);
137 
138 		bd_count = tso_dma_map_count(&map, seg_payload);
139 
140 		tx_buf = &txr->tx_buf_ring[RING_TX(bp, prod)];
141 		txbd = &txr->tx_desc_ring[TX_RING(bp, prod)][TX_IDX(prod)];
142 
143 		tx_buf->skb = skb;
144 		tx_buf->nr_frags = bd_count;
145 		tx_buf->is_push = 0;
146 		tx_buf->is_ts_pkt = 0;
147 
148 		dma_unmap_addr_set(tx_buf, mapping, this_hdr_dma);
149 		dma_unmap_len_set(tx_buf, len, 0);
150 
151 		if (last) {
152 			tx_buf->is_sw_gso = BNXT_SW_GSO_LAST;
153 			tso_dma_map_completion_save(&map, &tx_buf->sw_gso_cstate);
154 		} else {
155 			tx_buf->is_sw_gso = BNXT_SW_GSO_MID;
156 		}
157 
158 		flags = (hdr_len << TX_BD_LEN_SHIFT) |
159 			TX_BD_TYPE_LONG_TX_BD |
160 			TX_BD_CNT(2 + bd_count);
161 
162 		flags |= bnxt_sw_gso_lhint(hdr_len + seg_payload);
163 
164 		txbd->tx_bd_len_flags_type = cpu_to_le32(flags);
165 		txbd->tx_bd_haddr = cpu_to_le64(this_hdr_dma);
166 		txbd->tx_bd_opaque = SET_TX_OPAQUE(bp, txr, prod,
167 						   2 + bd_count);
168 
169 		prod = NEXT_TX(prod);
170 		bnxt_init_ext_bd(bp, txr, prod, csum,
171 				 vlan_tag_flags, cfa_action);
172 
173 		/* set dma_unmap_len on the LAST BD touching each
174 		 * region. Since completions are in-order, the last segment
175 		 * completes after all earlier ones, so the unmap is safe.
176 		 */
177 		while (tso_dma_map_next(&map, &dma_addr, &chunk_len,
178 					&mapping_len, seg_payload)) {
179 			prod = NEXT_TX(prod);
180 			txbd = &txr->tx_desc_ring[TX_RING(bp, prod)][TX_IDX(prod)];
181 			tx_buf = &txr->tx_buf_ring[RING_TX(bp, prod)];
182 
183 			txbd->tx_bd_haddr = cpu_to_le64(dma_addr);
184 			dma_unmap_addr_set(tx_buf, mapping, dma_addr);
185 			dma_unmap_len_set(tx_buf, len, 0);
186 			tx_buf->skb = NULL;
187 			tx_buf->is_sw_gso = 0;
188 
189 			if (mapping_len) {
190 				if (last_unmap_buf) {
191 					dma_unmap_addr_set(last_unmap_buf,
192 							   mapping,
193 							   last_unmap_addr);
194 					dma_unmap_len_set(last_unmap_buf,
195 							  len,
196 							  last_unmap_len);
197 				}
198 				last_unmap_addr = dma_addr;
199 				last_unmap_len = mapping_len;
200 			}
201 			last_unmap_buf = tx_buf;
202 
203 			flags = chunk_len << TX_BD_LEN_SHIFT;
204 			txbd->tx_bd_len_flags_type = cpu_to_le32(flags);
205 			txbd->tx_bd_opaque = 0;
206 
207 			seg_payload -= chunk_len;
208 		}
209 
210 		txbd->tx_bd_len_flags_type |=
211 			cpu_to_le32(TX_BD_FLAGS_PACKET_END);
212 
213 		prod = NEXT_TX(prod);
214 	}
215 
216 	if (last_unmap_buf) {
217 		dma_unmap_addr_set(last_unmap_buf, mapping, last_unmap_addr);
218 		dma_unmap_len_set(last_unmap_buf, len, last_unmap_len);
219 	}
220 
221 	txr->tx_inline_prod += num_segs;
222 
223 	netdev_tx_sent_queue(txq, skb->len);
224 
225 	WRITE_ONCE(txr->tx_prod, prod);
226 	/* Sync BDs before doorbell */
227 	wmb();
228 	bnxt_db_write(bp, &txr->tx_db, prod);
229 
230 	if (unlikely(bnxt_tx_avail(bp, txr) <= bp->tx_wake_thresh))
231 		netif_txq_try_stop(txq, bnxt_tx_avail(bp, txr),
232 				   bp->tx_wake_thresh);
233 
234 	return NETDEV_TX_OK;
235 
236 drop:
237 	dev_kfree_skb_any(skb);
238 	dev_core_stats_tx_dropped_inc(bp->dev);
239 	return NETDEV_TX_OK;
240 }
241