1 /* SPDX-License-Identifier: BSD-3-Clause */ 2 /* Copyright (c) 2024, Intel Corporation 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 * 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * 3. Neither the name of the Intel Corporation nor the names of its 16 * contributors may be used to endorse or promote products derived from 17 * this software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /** 33 * @file ice_common_txrx.h 34 * @brief common Tx/Rx utility functions 35 * 36 * Contains common utility functions for the Tx/Rx hot path. 37 * 38 * The functions do depend on the if_pkt_info_t structure. A suitable 39 * implementation of this structure must be provided if these functions are to 40 * be used without the iflib networking stack. 41 */ 42 43 #ifndef _ICE_COMMON_TXRX_H_ 44 #define _ICE_COMMON_TXRX_H_ 45 46 #include <netinet/udp.h> 47 #include <netinet/sctp.h> 48 49 /** 50 * ice_tso_detect_sparse - detect TSO packets with too many segments 51 * @pi: packet information 52 * 53 * Hardware only transmits packets with a maximum of 8 descriptors. For TSO 54 * packets, hardware needs to be able to build the split packets using 8 or 55 * fewer descriptors. Additionally, the header must be contained within at 56 * most 3 descriptors. 57 * 58 * To verify this, we walk the headers to find out how many descriptors the 59 * headers require (usually 1). Then we ensure that, for each TSO segment, its 60 * data plus the headers are contained within 8 or fewer descriptors. 61 */ 62 static inline int 63 ice_tso_detect_sparse(if_pkt_info_t pi) 64 { 65 int count, curseg, i, hlen, segsz, seglen, tsolen, hdrs, maxsegs; 66 bus_dma_segment_t *segs = pi->ipi_segs; 67 int nsegs = pi->ipi_nsegs; 68 69 curseg = hdrs = 0; 70 71 hlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen; 72 tsolen = pi->ipi_len - hlen; 73 74 /* First, count the number of descriptors for the header. 75 * Additionally, make sure it does not span more than 3 segments. 76 */ 77 i = 0; 78 curseg = segs[0].ds_len; 79 while (hlen > 0) { 80 hdrs++; 81 if (hdrs > ICE_MAX_TSO_HDR_SEGS) 82 return (1); 83 if (curseg == 0) { 84 i++; 85 if (__predict_false(i == nsegs)) 86 return (1); 87 88 curseg = segs[i].ds_len; 89 } 90 seglen = min(curseg, hlen); 91 curseg -= seglen; 92 hlen -= seglen; 93 } 94 95 maxsegs = ICE_MAX_TX_SEGS - hdrs; 96 97 /* We must count the headers, in order to verify that they take up 98 * 3 or fewer descriptors. However, we don't need to check the data 99 * if the total segments is small. 100 */ 101 if (nsegs <= maxsegs) 102 return (0); 103 104 count = 0; 105 106 /* Now check the data to make sure that each TSO segment is made up of 107 * no more than maxsegs descriptors. This ensures that hardware will 108 * be capable of performing TSO offload. 109 */ 110 while (tsolen > 0) { 111 segsz = pi->ipi_tso_segsz; 112 while (segsz > 0 && tsolen != 0) { 113 count++; 114 if (count > maxsegs) { 115 return (1); 116 } 117 if (curseg == 0) { 118 i++; 119 if (__predict_false(i == nsegs)) { 120 return (1); 121 } 122 curseg = segs[i].ds_len; 123 } 124 seglen = min(curseg, segsz); 125 segsz -= seglen; 126 curseg -= seglen; 127 tsolen -= seglen; 128 } 129 count = 0; 130 } 131 132 return (0); 133 } 134 135 /** 136 * ice_tso_setup - Setup a context descriptor to prepare for a TSO packet 137 * @txq: the Tx queue to use 138 * @pi: the packet info to prepare for 139 * 140 * Setup a context descriptor in preparation for sending a Tx packet that 141 * requires the TSO offload. Returns the index of the descriptor to use when 142 * encapsulating the Tx packet data into descriptors. 143 */ 144 static inline int 145 ice_tso_setup(struct ice_tx_queue *txq, if_pkt_info_t pi) 146 { 147 struct ice_tx_ctx_desc *txd; 148 u32 cmd, mss, type, tsolen; 149 int idx; 150 u64 type_cmd_tso_mss; 151 152 idx = pi->ipi_pidx; 153 txd = (struct ice_tx_ctx_desc *)&txq->tx_base[idx]; 154 tsolen = pi->ipi_len - (pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen); 155 156 type = ICE_TX_DESC_DTYPE_CTX; 157 cmd = ICE_TX_CTX_DESC_TSO; 158 /* TSO MSS must not be less than 64 */ 159 if (pi->ipi_tso_segsz < ICE_MIN_TSO_MSS) { 160 txq->stats.mss_too_small++; 161 pi->ipi_tso_segsz = ICE_MIN_TSO_MSS; 162 } 163 mss = pi->ipi_tso_segsz; 164 165 type_cmd_tso_mss = ((u64)type << ICE_TXD_CTX_QW1_DTYPE_S) | 166 ((u64)cmd << ICE_TXD_CTX_QW1_CMD_S) | 167 ((u64)tsolen << ICE_TXD_CTX_QW1_TSO_LEN_S) | 168 ((u64)mss << ICE_TXD_CTX_QW1_MSS_S); 169 txd->qw1 = htole64(type_cmd_tso_mss); 170 171 txd->tunneling_params = htole32(0); 172 txq->stats.tso++; 173 174 return ((idx + 1) & (txq->desc_count-1)); 175 } 176 177 /** 178 * ice_tx_setup_offload - Setup register values for performing a Tx offload 179 * @txq: The Tx queue, used to track checksum offload stats 180 * @pi: the packet info to program for 181 * @cmd: the cmd register value to update 182 * @off: the off register value to update 183 * 184 * Based on the packet info provided, update the cmd and off values for 185 * enabling Tx offloads. This depends on the packet type and which offloads 186 * have been requested. 187 * 188 * We also track the total number of times that we've requested hardware 189 * offload a particular type of checksum for debugging purposes. 190 */ 191 static inline void 192 ice_tx_setup_offload(struct ice_tx_queue *txq, if_pkt_info_t pi, u32 *cmd, u32 *off) 193 { 194 u32 remaining_csum_flags = pi->ipi_csum_flags; 195 196 switch (pi->ipi_etype) { 197 #ifdef INET 198 case ETHERTYPE_IP: 199 if (pi->ipi_csum_flags & ICE_CSUM_IP) { 200 *cmd |= ICE_TX_DESC_CMD_IIPT_IPV4_CSUM; 201 txq->stats.cso[ICE_CSO_STAT_TX_IP4]++; 202 remaining_csum_flags &= ~CSUM_IP; 203 } else 204 *cmd |= ICE_TX_DESC_CMD_IIPT_IPV4; 205 break; 206 #endif 207 #ifdef INET6 208 case ETHERTYPE_IPV6: 209 *cmd |= ICE_TX_DESC_CMD_IIPT_IPV6; 210 /* 211 * This indicates that the IIPT flag was set to the IPV6 value; 212 * there's no checksum for IPv6 packets. 213 */ 214 txq->stats.cso[ICE_CSO_STAT_TX_IP6]++; 215 break; 216 #endif 217 default: 218 txq->stats.cso[ICE_CSO_STAT_TX_L3_ERR]++; 219 break; 220 } 221 222 *off |= (pi->ipi_ehdrlen >> 1) << ICE_TX_DESC_LEN_MACLEN_S; 223 *off |= (pi->ipi_ip_hlen >> 2) << ICE_TX_DESC_LEN_IPLEN_S; 224 225 if (!(remaining_csum_flags & ~ICE_RX_CSUM_FLAGS)) 226 return; 227 228 switch (pi->ipi_ipproto) { 229 case IPPROTO_TCP: 230 if (pi->ipi_csum_flags & ICE_CSUM_TCP) { 231 *cmd |= ICE_TX_DESC_CMD_L4T_EOFT_TCP; 232 *off |= (pi->ipi_tcp_hlen >> 2) << 233 ICE_TX_DESC_LEN_L4_LEN_S; 234 txq->stats.cso[ICE_CSO_STAT_TX_TCP]++; 235 } 236 break; 237 case IPPROTO_UDP: 238 if (pi->ipi_csum_flags & ICE_CSUM_UDP) { 239 *cmd |= ICE_TX_DESC_CMD_L4T_EOFT_UDP; 240 *off |= (sizeof(struct udphdr) >> 2) << 241 ICE_TX_DESC_LEN_L4_LEN_S; 242 txq->stats.cso[ICE_CSO_STAT_TX_UDP]++; 243 } 244 break; 245 case IPPROTO_SCTP: 246 if (pi->ipi_csum_flags & ICE_CSUM_SCTP) { 247 *cmd |= ICE_TX_DESC_CMD_L4T_EOFT_SCTP; 248 *off |= (sizeof(struct sctphdr) >> 2) << 249 ICE_TX_DESC_LEN_L4_LEN_S; 250 txq->stats.cso[ICE_CSO_STAT_TX_SCTP]++; 251 } 252 break; 253 default: 254 txq->stats.cso[ICE_CSO_STAT_TX_L4_ERR]++; 255 break; 256 } 257 } 258 259 /** 260 * ice_rx_checksum - verify hardware checksum is valid or not 261 * @rxq: the Rx queue structure 262 * @flags: checksum flags to update 263 * @data: checksum data to update 264 * @status0: descriptor status data 265 * @ptype: packet type 266 * 267 * Determine whether the hardware indicated that the Rx checksum is valid. If 268 * so, update the checksum flags and data, informing the stack of the status 269 * of the checksum so that it does not spend time verifying it manually. 270 */ 271 static void 272 ice_rx_checksum(struct ice_rx_queue *rxq, uint32_t *flags, uint32_t *data, 273 u16 status0, u16 ptype) 274 { 275 const u16 l3_error = (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) | 276 BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)); 277 const u16 l4_error = (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S) | 278 BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S)); 279 const u16 xsum_errors = (l3_error | l4_error | 280 BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S)); 281 struct ice_rx_ptype_decoded decoded; 282 bool is_ipv4, is_ipv6; 283 284 /* No L3 or L4 checksum was calculated */ 285 if (!(status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S))) { 286 return; 287 } 288 289 decoded = ice_decode_rx_desc_ptype(ptype); 290 *flags = 0; 291 292 if (!(decoded.known && decoded.outer_ip)) 293 return; 294 295 is_ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) && 296 (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4); 297 is_ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) && 298 (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6); 299 300 /* No checksum errors were reported */ 301 if (!(status0 & xsum_errors)) { 302 if (is_ipv4) 303 *flags |= CSUM_L3_CALC | CSUM_L3_VALID; 304 305 switch (decoded.inner_prot) { 306 case ICE_RX_PTYPE_INNER_PROT_TCP: 307 case ICE_RX_PTYPE_INNER_PROT_UDP: 308 case ICE_RX_PTYPE_INNER_PROT_SCTP: 309 *flags |= CSUM_L4_CALC | CSUM_L4_VALID; 310 *data |= htons(0xffff); 311 break; 312 default: 313 break; 314 } 315 316 return; 317 } 318 319 /* 320 * Certain IPv6 extension headers impact the validity of L4 checksums. 321 * If one of these headers exist, hardware will set the IPV6EXADD bit 322 * in the descriptor. If the bit is set then pretend like hardware 323 * didn't checksum this packet. 324 */ 325 if (is_ipv6 && (status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S))) { 326 rxq->stats.cso[ICE_CSO_STAT_RX_IP6_ERR]++; 327 return; 328 } 329 330 /* 331 * At this point, status0 must have at least one of the l3_error or 332 * l4_error bits set. 333 */ 334 335 if (status0 & l3_error) { 336 if (is_ipv4) { 337 rxq->stats.cso[ICE_CSO_STAT_RX_IP4_ERR]++; 338 *flags |= CSUM_L3_CALC; 339 } else { 340 /* Hardware indicated L3 error but this isn't IPv4? */ 341 rxq->stats.cso[ICE_CSO_STAT_RX_L3_ERR]++; 342 } 343 /* don't bother reporting L4 errors if we got an L3 error */ 344 return; 345 } else if (is_ipv4) { 346 *flags |= CSUM_L3_CALC | CSUM_L3_VALID; 347 } 348 349 if (status0 & l4_error) { 350 switch (decoded.inner_prot) { 351 case ICE_RX_PTYPE_INNER_PROT_TCP: 352 rxq->stats.cso[ICE_CSO_STAT_RX_TCP_ERR]++; 353 *flags |= CSUM_L4_CALC; 354 break; 355 case ICE_RX_PTYPE_INNER_PROT_UDP: 356 rxq->stats.cso[ICE_CSO_STAT_RX_UDP_ERR]++; 357 *flags |= CSUM_L4_CALC; 358 break; 359 case ICE_RX_PTYPE_INNER_PROT_SCTP: 360 rxq->stats.cso[ICE_CSO_STAT_RX_SCTP_ERR]++; 361 *flags |= CSUM_L4_CALC; 362 break; 363 default: 364 /* 365 * Hardware indicated L4 error, but this isn't one of 366 * the expected protocols. 367 */ 368 rxq->stats.cso[ICE_CSO_STAT_RX_L4_ERR]++; 369 } 370 } 371 } 372 373 /** 374 * ice_ptype_to_hash - Convert packet type to a hash value 375 * @ptype: the packet type to convert 376 * 377 * Given the packet type, convert to a suitable hashtype to report to the 378 * upper stack via the iri_rsstype value of the if_rxd_info_t structure. 379 * 380 * If the hash type is unknown we'll report M_HASHTYPE_OPAQUE. 381 */ 382 static inline int 383 ice_ptype_to_hash(u16 ptype) 384 { 385 struct ice_rx_ptype_decoded decoded; 386 387 if (ptype >= ARRAY_SIZE(ice_ptype_lkup)) 388 return M_HASHTYPE_OPAQUE; 389 390 decoded = ice_decode_rx_desc_ptype(ptype); 391 392 if (!decoded.known) 393 return M_HASHTYPE_OPAQUE; 394 395 if (decoded.outer_ip == ICE_RX_PTYPE_OUTER_L2) 396 return M_HASHTYPE_OPAQUE; 397 398 /* Note: anything that gets to this point is IP */ 399 if (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6) { 400 switch (decoded.inner_prot) { 401 case ICE_RX_PTYPE_INNER_PROT_TCP: 402 return M_HASHTYPE_RSS_TCP_IPV6; 403 case ICE_RX_PTYPE_INNER_PROT_UDP: 404 return M_HASHTYPE_RSS_UDP_IPV6; 405 default: 406 return M_HASHTYPE_RSS_IPV6; 407 } 408 } 409 if (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4) { 410 switch (decoded.inner_prot) { 411 case ICE_RX_PTYPE_INNER_PROT_TCP: 412 return M_HASHTYPE_RSS_TCP_IPV4; 413 case ICE_RX_PTYPE_INNER_PROT_UDP: 414 return M_HASHTYPE_RSS_UDP_IPV4; 415 default: 416 return M_HASHTYPE_RSS_IPV4; 417 } 418 } 419 420 /* We should never get here!! */ 421 return M_HASHTYPE_OPAQUE; 422 } 423 #endif 424