xref: /freebsd/sys/dev/ice/ice_common_txrx.h (revision 13d826ff947d9026f98e317e7385b22abfc0eace)
1 /* SPDX-License-Identifier: BSD-3-Clause */
2 /*  Copyright (c) 2024, Intel Corporation
3  *  All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions are met:
7  *
8  *   1. Redistributions of source code must retain the above copyright notice,
9  *      this list of conditions and the following disclaimer.
10  *
11  *   2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *
15  *   3. Neither the name of the Intel Corporation nor the names of its
16  *      contributors may be used to endorse or promote products derived from
17  *      this software without specific prior written permission.
18  *
19  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *  POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /**
33  * @file ice_common_txrx.h
34  * @brief common Tx/Rx utility functions
35  *
36  * Contains common utility functions for the Tx/Rx hot path.
37  *
38  * The functions do depend on the if_pkt_info_t structure. A suitable
39  * implementation of this structure must be provided if these functions are to
40  * be used without the iflib networking stack.
41  */
42 
43 #ifndef _ICE_COMMON_TXRX_H_
44 #define _ICE_COMMON_TXRX_H_
45 
46 #include <netinet/udp.h>
47 #include <netinet/sctp.h>
48 
49 /**
50  * ice_tso_detect_sparse - detect TSO packets with too many segments
51  * @pi: packet information
52  *
53  * Hardware only transmits packets with a maximum of 8 descriptors. For TSO
54  * packets, hardware needs to be able to build the split packets using 8 or
55  * fewer descriptors. Additionally, the header must be contained within at
56  * most 3 descriptors.
57  *
58  * To verify this, we walk the headers to find out how many descriptors the
59  * headers require (usually 1). Then we ensure that, for each TSO segment, its
60  * data plus the headers are contained within 8 or fewer descriptors.
61  */
62 static inline int
63 ice_tso_detect_sparse(if_pkt_info_t pi)
64 {
65 	int count, curseg, i, hlen, segsz, seglen, tsolen, hdrs, maxsegs;
66 	bus_dma_segment_t *segs = pi->ipi_segs;
67 	int nsegs = pi->ipi_nsegs;
68 
69 	curseg = hdrs = 0;
70 
71 	hlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen;
72 	tsolen = pi->ipi_len - hlen;
73 
74 	/* First, count the number of descriptors for the header.
75 	 * Additionally, make sure it does not span more than 3 segments.
76 	 */
77 	i = 0;
78 	curseg = segs[0].ds_len;
79 	while (hlen > 0) {
80 		hdrs++;
81 		if (hdrs > ICE_MAX_TSO_HDR_SEGS)
82 			return (1);
83 		if (curseg == 0) {
84 			i++;
85 			if (__predict_false(i == nsegs))
86 				return (1);
87 
88 			curseg = segs[i].ds_len;
89 		}
90 		seglen = min(curseg, hlen);
91 		curseg -= seglen;
92 		hlen -= seglen;
93 	}
94 
95 	maxsegs = ICE_MAX_TX_SEGS - hdrs;
96 
97 	/* We must count the headers, in order to verify that they take up
98 	 * 3 or fewer descriptors. However, we don't need to check the data
99 	 * if the total segments is small.
100 	 */
101 	if (nsegs <= maxsegs)
102 		return (0);
103 
104 	count = 0;
105 
106 	/* Now check the data to make sure that each TSO segment is made up of
107 	 * no more than maxsegs descriptors. This ensures that hardware will
108 	 * be capable of performing TSO offload.
109 	 */
110 	while (tsolen > 0) {
111 		segsz = pi->ipi_tso_segsz;
112 		while (segsz > 0 && tsolen != 0) {
113 			count++;
114 			if (count > maxsegs) {
115 				return (1);
116 			}
117 			if (curseg == 0) {
118 				i++;
119 				if (__predict_false(i == nsegs)) {
120 					return (1);
121 				}
122 				curseg = segs[i].ds_len;
123 			}
124 			seglen = min(curseg, segsz);
125 			segsz -= seglen;
126 			curseg -= seglen;
127 			tsolen -= seglen;
128 		}
129 		count = 0;
130 	}
131 
132 	return (0);
133 }
134 
135 /**
136  * ice_tso_setup - Setup a context descriptor to prepare for a TSO packet
137  * @txq: the Tx queue to use
138  * @pi: the packet info to prepare for
139  *
140  * Setup a context descriptor in preparation for sending a Tx packet that
141  * requires the TSO offload. Returns the index of the descriptor to use when
142  * encapsulating the Tx packet data into descriptors.
143  */
144 static inline int
145 ice_tso_setup(struct ice_tx_queue *txq, if_pkt_info_t pi)
146 {
147 	struct ice_tx_ctx_desc		*txd;
148 	u32				cmd, mss, type, tsolen;
149 	int				idx;
150 	u64				type_cmd_tso_mss;
151 
152 	idx = pi->ipi_pidx;
153 	txd = (struct ice_tx_ctx_desc *)&txq->tx_base[idx];
154 	tsolen = pi->ipi_len - (pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen);
155 
156 	type = ICE_TX_DESC_DTYPE_CTX;
157 	cmd = ICE_TX_CTX_DESC_TSO;
158 	/* TSO MSS must not be less than 64 */
159 	if (pi->ipi_tso_segsz < ICE_MIN_TSO_MSS) {
160 		txq->stats.mss_too_small++;
161 		pi->ipi_tso_segsz = ICE_MIN_TSO_MSS;
162 	}
163 	mss = pi->ipi_tso_segsz;
164 
165 	type_cmd_tso_mss = ((u64)type << ICE_TXD_CTX_QW1_DTYPE_S) |
166 	    ((u64)cmd << ICE_TXD_CTX_QW1_CMD_S) |
167 	    ((u64)tsolen << ICE_TXD_CTX_QW1_TSO_LEN_S) |
168 	    ((u64)mss << ICE_TXD_CTX_QW1_MSS_S);
169 	txd->qw1 = htole64(type_cmd_tso_mss);
170 
171 	txd->tunneling_params = htole32(0);
172 	txq->tso++;
173 
174 	return ((idx + 1) & (txq->desc_count-1));
175 }
176 
177 /**
178  * ice_tx_setup_offload - Setup register values for performing a Tx offload
179  * @txq: The Tx queue, used to track checksum offload stats
180  * @pi: the packet info to program for
181  * @cmd: the cmd register value to update
182  * @off: the off register value to update
183  *
184  * Based on the packet info provided, update the cmd and off values for
185  * enabling Tx offloads. This depends on the packet type and which offloads
186  * have been requested.
187  *
188  * We also track the total number of times that we've requested hardware
189  * offload a particular type of checksum for debugging purposes.
190  */
191 static inline void
192 ice_tx_setup_offload(struct ice_tx_queue *txq, if_pkt_info_t pi, u32 *cmd, u32 *off)
193 {
194 	u32 remaining_csum_flags = pi->ipi_csum_flags;
195 
196 	switch (pi->ipi_etype) {
197 #ifdef INET
198 		case ETHERTYPE_IP:
199 			if (pi->ipi_csum_flags & ICE_CSUM_IP) {
200 				*cmd |= ICE_TX_DESC_CMD_IIPT_IPV4_CSUM;
201 				txq->stats.cso[ICE_CSO_STAT_TX_IP4]++;
202 				remaining_csum_flags &= ~CSUM_IP;
203 			} else
204 				*cmd |= ICE_TX_DESC_CMD_IIPT_IPV4;
205 			break;
206 #endif
207 #ifdef INET6
208 		case ETHERTYPE_IPV6:
209 			*cmd |= ICE_TX_DESC_CMD_IIPT_IPV6;
210 			/*
211 			 * This indicates that the IIPT flag was set to the IPV6 value;
212 			 * there's no checksum for IPv6 packets.
213 			 */
214 			txq->stats.cso[ICE_CSO_STAT_TX_IP6]++;
215 			break;
216 #endif
217 		default:
218 			txq->stats.cso[ICE_CSO_STAT_TX_L3_ERR]++;
219 			break;
220 	}
221 
222 	*off |= (pi->ipi_ehdrlen >> 1) << ICE_TX_DESC_LEN_MACLEN_S;
223 	*off |= (pi->ipi_ip_hlen >> 2) << ICE_TX_DESC_LEN_IPLEN_S;
224 
225 	if (!(remaining_csum_flags & ~ICE_RX_CSUM_FLAGS))
226 		return;
227 
228 	switch (pi->ipi_ipproto) {
229 		case IPPROTO_TCP:
230 			if (pi->ipi_csum_flags & ICE_CSUM_TCP) {
231 				*cmd |= ICE_TX_DESC_CMD_L4T_EOFT_TCP;
232 				*off |= (pi->ipi_tcp_hlen >> 2) <<
233 				    ICE_TX_DESC_LEN_L4_LEN_S;
234 				txq->stats.cso[ICE_CSO_STAT_TX_TCP]++;
235 			}
236 			break;
237 		case IPPROTO_UDP:
238 			if (pi->ipi_csum_flags & ICE_CSUM_UDP) {
239 				*cmd |= ICE_TX_DESC_CMD_L4T_EOFT_UDP;
240 				*off |= (sizeof(struct udphdr) >> 2) <<
241 				    ICE_TX_DESC_LEN_L4_LEN_S;
242 				txq->stats.cso[ICE_CSO_STAT_TX_UDP]++;
243 			}
244 			break;
245 		case IPPROTO_SCTP:
246 			if (pi->ipi_csum_flags & ICE_CSUM_SCTP) {
247 				*cmd |= ICE_TX_DESC_CMD_L4T_EOFT_SCTP;
248 				*off |= (sizeof(struct sctphdr) >> 2) <<
249 				    ICE_TX_DESC_LEN_L4_LEN_S;
250 				txq->stats.cso[ICE_CSO_STAT_TX_SCTP]++;
251 			}
252 			break;
253 		default:
254 			txq->stats.cso[ICE_CSO_STAT_TX_L4_ERR]++;
255 			break;
256 	}
257 }
258 
259 /**
260  * ice_rx_checksum - verify hardware checksum is valid or not
261  * @rxq: the Rx queue structure
262  * @flags: checksum flags to update
263  * @data: checksum data to update
264  * @status0: descriptor status data
265  * @ptype: packet type
266  *
267  * Determine whether the hardware indicated that the Rx checksum is valid. If
268  * so, update the checksum flags and data, informing the stack of the status
269  * of the checksum so that it does not spend time verifying it manually.
270  */
271 static void
272 ice_rx_checksum(struct ice_rx_queue *rxq, uint32_t *flags, uint32_t *data,
273 		u16 status0, u16 ptype)
274 {
275 	const u16 l3_error = (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |
276 			      BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S));
277 	const u16 l4_error = (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S) |
278 			      BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S));
279 	const u16 xsum_errors = (l3_error | l4_error |
280 				 BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S));
281 	struct ice_rx_ptype_decoded decoded;
282 	bool is_ipv4, is_ipv6;
283 
284 	/* No L3 or L4 checksum was calculated */
285 	if (!(status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S))) {
286 		return;
287 	}
288 
289 	decoded = ice_decode_rx_desc_ptype(ptype);
290 	*flags = 0;
291 
292 	if (!(decoded.known && decoded.outer_ip))
293 		return;
294 
295 	is_ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
296 	    (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4);
297 	is_ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
298 	    (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6);
299 
300 	/* No checksum errors were reported */
301 	if (!(status0 & xsum_errors)) {
302 		if (is_ipv4)
303 			*flags |= CSUM_L3_CALC | CSUM_L3_VALID;
304 
305 		switch (decoded.inner_prot) {
306 		case ICE_RX_PTYPE_INNER_PROT_TCP:
307 		case ICE_RX_PTYPE_INNER_PROT_UDP:
308 		case ICE_RX_PTYPE_INNER_PROT_SCTP:
309 			*flags |= CSUM_L4_CALC | CSUM_L4_VALID;
310 			*data |= htons(0xffff);
311 			break;
312 		default:
313 			break;
314 		}
315 
316 		return;
317 	}
318 
319 	/*
320 	 * Certain IPv6 extension headers impact the validity of L4 checksums.
321 	 * If one of these headers exist, hardware will set the IPV6EXADD bit
322 	 * in the descriptor. If the bit is set then pretend like hardware
323 	 * didn't checksum this packet.
324 	 */
325 	if (is_ipv6 && (status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S))) {
326 		rxq->stats.cso[ICE_CSO_STAT_RX_IP6_ERR]++;
327 		return;
328 	}
329 
330 	/*
331 	 * At this point, status0 must have at least one of the l3_error or
332 	 * l4_error bits set.
333 	 */
334 
335 	if (status0 & l3_error) {
336 		if (is_ipv4) {
337 			rxq->stats.cso[ICE_CSO_STAT_RX_IP4_ERR]++;
338 			*flags |= CSUM_L3_CALC;
339 		} else {
340 			/* Hardware indicated L3 error but this isn't IPv4? */
341 			rxq->stats.cso[ICE_CSO_STAT_RX_L3_ERR]++;
342 		}
343 		/* don't bother reporting L4 errors if we got an L3 error */
344 		return;
345 	} else if (is_ipv4) {
346 		*flags |= CSUM_L3_CALC | CSUM_L3_VALID;
347 	}
348 
349 	if (status0 & l4_error) {
350 		switch (decoded.inner_prot) {
351 		case ICE_RX_PTYPE_INNER_PROT_TCP:
352 			rxq->stats.cso[ICE_CSO_STAT_RX_TCP_ERR]++;
353 			*flags |= CSUM_L4_CALC;
354 			break;
355 		case ICE_RX_PTYPE_INNER_PROT_UDP:
356 			rxq->stats.cso[ICE_CSO_STAT_RX_UDP_ERR]++;
357 			*flags |= CSUM_L4_CALC;
358 			break;
359 		case ICE_RX_PTYPE_INNER_PROT_SCTP:
360 			rxq->stats.cso[ICE_CSO_STAT_RX_SCTP_ERR]++;
361 			*flags |= CSUM_L4_CALC;
362 			break;
363 		default:
364 			/*
365 			 * Hardware indicated L4 error, but this isn't one of
366 			 * the expected protocols.
367 			 */
368 			rxq->stats.cso[ICE_CSO_STAT_RX_L4_ERR]++;
369 		}
370 	}
371 }
372 
373 /**
374  * ice_ptype_to_hash - Convert packet type to a hash value
375  * @ptype: the packet type to convert
376  *
377  * Given the packet type, convert to a suitable hashtype to report to the
378  * upper stack via the iri_rsstype value of the if_rxd_info_t structure.
379  *
380  * If the hash type is unknown we'll report M_HASHTYPE_OPAQUE.
381  */
382 static inline int
383 ice_ptype_to_hash(u16 ptype)
384 {
385 	struct ice_rx_ptype_decoded decoded;
386 
387 	if (ptype >= ARRAY_SIZE(ice_ptype_lkup))
388 		return M_HASHTYPE_OPAQUE;
389 
390 	decoded = ice_decode_rx_desc_ptype(ptype);
391 
392 	if (!decoded.known)
393 		return M_HASHTYPE_OPAQUE;
394 
395 	if (decoded.outer_ip == ICE_RX_PTYPE_OUTER_L2)
396 		return M_HASHTYPE_OPAQUE;
397 
398 	/* Note: anything that gets to this point is IP */
399 	if (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6) {
400 		switch (decoded.inner_prot) {
401 		case ICE_RX_PTYPE_INNER_PROT_TCP:
402 			return M_HASHTYPE_RSS_TCP_IPV6;
403 		case ICE_RX_PTYPE_INNER_PROT_UDP:
404 			return M_HASHTYPE_RSS_UDP_IPV6;
405 		default:
406 			return M_HASHTYPE_RSS_IPV6;
407 		}
408 	}
409 	if (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4) {
410 		switch (decoded.inner_prot) {
411 		case ICE_RX_PTYPE_INNER_PROT_TCP:
412 			return M_HASHTYPE_RSS_TCP_IPV4;
413 		case ICE_RX_PTYPE_INNER_PROT_UDP:
414 			return M_HASHTYPE_RSS_UDP_IPV4;
415 		default:
416 			return M_HASHTYPE_RSS_IPV4;
417 		}
418 	}
419 
420 	/* We should never get here!! */
421 	return M_HASHTYPE_OPAQUE;
422 }
423 #endif
424