1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* Copyright (C) 2024-2025 Intel Corporation */ 3 4 #ifndef __LIBETH_RX_H 5 #define __LIBETH_RX_H 6 7 #include <linux/if_vlan.h> 8 9 #include <net/page_pool/helpers.h> 10 #include <net/xdp.h> 11 12 /* Rx buffer management */ 13 14 /* Space reserved in front of each frame */ 15 #define LIBETH_SKB_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN) 16 #define LIBETH_XDP_HEADROOM (ALIGN(XDP_PACKET_HEADROOM, NET_SKB_PAD) + \ 17 NET_IP_ALIGN) 18 /* Maximum headroom for worst-case calculations */ 19 #define LIBETH_MAX_HEADROOM LIBETH_XDP_HEADROOM 20 /* Link layer / L2 overhead: Ethernet, 2 VLAN tags (C + S), FCS */ 21 #define LIBETH_RX_LL_LEN (ETH_HLEN + 2 * VLAN_HLEN + ETH_FCS_LEN) 22 /* Maximum supported L2-L4 header length */ 23 #define LIBETH_MAX_HEAD roundup_pow_of_two(max(MAX_HEADER, 256)) 24 25 /* Always use order-0 pages */ 26 #define LIBETH_RX_PAGE_ORDER 0 27 /* Pick a sane buffer stride and align to a cacheline boundary */ 28 #define LIBETH_RX_BUF_STRIDE SKB_DATA_ALIGN(128) 29 /* HW-writeable space in one buffer: truesize - headroom/tailroom, aligned */ 30 #define LIBETH_RX_PAGE_LEN(hr) \ 31 ALIGN_DOWN(SKB_MAX_ORDER(hr, LIBETH_RX_PAGE_ORDER), \ 32 LIBETH_RX_BUF_STRIDE) 33 34 /** 35 * struct libeth_fqe - structure representing an Rx buffer (fill queue element) 36 * @netmem: network memory reference holding the buffer 37 * @offset: offset from the page start (to the headroom) 38 * @truesize: total space occupied by the buffer (w/ headroom and tailroom) 39 * 40 * Depending on the MTU, API switches between one-page-per-frame and shared 41 * page model (to conserve memory on bigger-page platforms). In case of the 42 * former, @offset is always 0 and @truesize is always ```PAGE_SIZE```. 43 */ 44 struct libeth_fqe { 45 netmem_ref netmem; 46 u32 offset; 47 u32 truesize; 48 } __aligned_largest; 49 50 /** 51 * enum libeth_fqe_type - enum representing types of Rx buffers 52 * @LIBETH_FQE_MTU: buffer size is determined by MTU 53 * @LIBETH_FQE_SHORT: buffer size is smaller than MTU, for short frames 54 * @LIBETH_FQE_HDR: buffer size is ```LIBETH_MAX_HEAD```-sized, for headers 55 */ 56 enum libeth_fqe_type { 57 LIBETH_FQE_MTU = 0U, 58 LIBETH_FQE_SHORT, 59 LIBETH_FQE_HDR, 60 }; 61 62 /** 63 * struct libeth_fq - structure representing a buffer (fill) queue 64 * @fp: hotpath part of the structure 65 * @pp: &page_pool for buffer management 66 * @fqes: array of Rx buffers 67 * @truesize: size to allocate per buffer, w/overhead 68 * @count: number of descriptors/buffers the queue has 69 * @type: type of the buffers this queue has 70 * @hsplit: flag whether header split is enabled 71 * @xdp: flag indicating whether XDP is enabled 72 * @buf_len: HW-writeable length per each buffer 73 * @nid: ID of the closest NUMA node with memory 74 */ 75 struct libeth_fq { 76 struct_group_tagged(libeth_fq_fp, fp, 77 struct page_pool *pp; 78 struct libeth_fqe *fqes; 79 80 u32 truesize; 81 u32 count; 82 ); 83 84 /* Cold fields */ 85 enum libeth_fqe_type type:2; 86 bool hsplit:1; 87 bool xdp:1; 88 89 u32 buf_len; 90 int nid; 91 }; 92 93 int libeth_rx_fq_create(struct libeth_fq *fq, struct napi_struct *napi); 94 void libeth_rx_fq_destroy(struct libeth_fq *fq); 95 96 /** 97 * libeth_rx_alloc - allocate a new Rx buffer 98 * @fq: fill queue to allocate for 99 * @i: index of the buffer within the queue 100 * 101 * Return: DMA address to be passed to HW for Rx on successful allocation, 102 * ```DMA_MAPPING_ERROR``` otherwise. 103 */ 104 static inline dma_addr_t libeth_rx_alloc(const struct libeth_fq_fp *fq, u32 i) 105 { 106 struct libeth_fqe *buf = &fq->fqes[i]; 107 108 buf->truesize = fq->truesize; 109 buf->netmem = page_pool_dev_alloc_netmem(fq->pp, &buf->offset, 110 &buf->truesize); 111 if (unlikely(!buf->netmem)) 112 return DMA_MAPPING_ERROR; 113 114 return page_pool_get_dma_addr_netmem(buf->netmem) + buf->offset + 115 fq->pp->p.offset; 116 } 117 118 void libeth_rx_recycle_slow(netmem_ref netmem); 119 120 /** 121 * libeth_rx_sync_for_cpu - synchronize or recycle buffer post DMA 122 * @fqe: buffer to process 123 * @len: frame length from the descriptor 124 * 125 * Process the buffer after it's written by HW. The regular path is to 126 * synchronize DMA for CPU, but in case of no data it will be immediately 127 * recycled back to its PP. 128 * 129 * Return: true when there's data to process, false otherwise. 130 */ 131 static inline bool libeth_rx_sync_for_cpu(const struct libeth_fqe *fqe, 132 u32 len) 133 { 134 netmem_ref netmem = fqe->netmem; 135 136 /* Very rare, but possible case. The most common reason: 137 * the last fragment contained FCS only, which was then 138 * stripped by the HW. 139 */ 140 if (unlikely(!len)) { 141 libeth_rx_recycle_slow(netmem); 142 return false; 143 } 144 145 page_pool_dma_sync_netmem_for_cpu(netmem_get_pp(netmem), netmem, 146 fqe->offset, len); 147 148 return true; 149 } 150 151 /* Converting abstract packet type numbers into a software structure with 152 * the packet parameters to do O(1) lookup on Rx. 153 */ 154 155 enum { 156 LIBETH_RX_PT_OUTER_L2 = 0U, 157 LIBETH_RX_PT_OUTER_IPV4, 158 LIBETH_RX_PT_OUTER_IPV6, 159 }; 160 161 enum { 162 LIBETH_RX_PT_NOT_FRAG = 0U, 163 LIBETH_RX_PT_FRAG, 164 }; 165 166 enum { 167 LIBETH_RX_PT_TUNNEL_IP_NONE = 0U, 168 LIBETH_RX_PT_TUNNEL_IP_IP, 169 LIBETH_RX_PT_TUNNEL_IP_GRENAT, 170 LIBETH_RX_PT_TUNNEL_IP_GRENAT_MAC, 171 LIBETH_RX_PT_TUNNEL_IP_GRENAT_MAC_VLAN, 172 }; 173 174 enum { 175 LIBETH_RX_PT_TUNNEL_END_NONE = 0U, 176 LIBETH_RX_PT_TUNNEL_END_IPV4, 177 LIBETH_RX_PT_TUNNEL_END_IPV6, 178 }; 179 180 enum { 181 LIBETH_RX_PT_INNER_NONE = 0U, 182 LIBETH_RX_PT_INNER_UDP, 183 LIBETH_RX_PT_INNER_TCP, 184 LIBETH_RX_PT_INNER_SCTP, 185 LIBETH_RX_PT_INNER_ICMP, 186 LIBETH_RX_PT_INNER_TIMESYNC, 187 }; 188 189 #define LIBETH_RX_PT_PAYLOAD_NONE PKT_HASH_TYPE_NONE 190 #define LIBETH_RX_PT_PAYLOAD_L2 PKT_HASH_TYPE_L2 191 #define LIBETH_RX_PT_PAYLOAD_L3 PKT_HASH_TYPE_L3 192 #define LIBETH_RX_PT_PAYLOAD_L4 PKT_HASH_TYPE_L4 193 194 struct libeth_rx_pt { 195 u32 outer_ip:2; 196 u32 outer_frag:1; 197 u32 tunnel_type:3; 198 u32 tunnel_end_prot:2; 199 u32 tunnel_end_frag:1; 200 u32 inner_prot:3; 201 enum pkt_hash_types payload_layer:2; 202 203 u32 pad:2; 204 enum xdp_rss_hash_type hash_type:16; 205 }; 206 207 /** 208 * struct libeth_rx_csum - checksum offload bits decoded from the Rx descriptor 209 * @l3l4p: detectable L3 and L4 integrity check is processed by the hardware 210 * @ipe: IP checksum error 211 * @eipe: external (outermost) IP header (only for tunels) 212 * @eudpe: external (outermost) UDP checksum error (only for tunels) 213 * @ipv6exadd: IPv6 header with extension headers 214 * @l4e: L4 integrity error 215 * @pprs: set for packets that skip checksum calculation in the HW pre parser 216 * @nat: the packet is a UDP tunneled packet 217 * @raw_csum_valid: set if raw checksum is valid 218 * @pad: padding to naturally align raw_csum field 219 * @raw_csum: raw checksum 220 */ 221 struct libeth_rx_csum { 222 u32 l3l4p:1; 223 u32 ipe:1; 224 u32 eipe:1; 225 u32 eudpe:1; 226 u32 ipv6exadd:1; 227 u32 l4e:1; 228 u32 pprs:1; 229 u32 nat:1; 230 231 u32 raw_csum_valid:1; 232 u32 pad:7; 233 u32 raw_csum:16; 234 }; 235 236 /** 237 * struct libeth_rqe_info - receive queue element info 238 * @len: packet length 239 * @ptype: packet type based on types programmed into the device 240 * @eop: whether it's the last fragment of the packet 241 * @rxe: MAC errors: CRC, Alignment, Oversize, Undersizes, Length error 242 * @vlan: C-VLAN or S-VLAN tag depending on the VLAN offload configuration 243 */ 244 struct libeth_rqe_info { 245 u32 len; 246 247 u32 ptype:14; 248 u32 eop:1; 249 u32 rxe:1; 250 251 u32 vlan:16; 252 }; 253 254 void libeth_rx_pt_gen_hash_type(struct libeth_rx_pt *pt); 255 256 /** 257 * libeth_rx_pt_get_ip_ver - get IP version from a packet type structure 258 * @pt: packet type params 259 * 260 * Wrapper to compile out the IPv6 code from the drivers when not supported 261 * by the kernel. 262 * 263 * Return: @pt.outer_ip or stub for IPv6 when not compiled-in. 264 */ 265 static inline u32 libeth_rx_pt_get_ip_ver(struct libeth_rx_pt pt) 266 { 267 #if !IS_ENABLED(CONFIG_IPV6) 268 switch (pt.outer_ip) { 269 case LIBETH_RX_PT_OUTER_IPV4: 270 return LIBETH_RX_PT_OUTER_IPV4; 271 default: 272 return LIBETH_RX_PT_OUTER_L2; 273 } 274 #else 275 return pt.outer_ip; 276 #endif 277 } 278 279 /* libeth_has_*() can be used to quickly check whether the HW metadata is 280 * available to avoid further expensive processing such as descriptor reads. 281 * They already check for the corresponding netdev feature to be enabled, 282 * thus can be used as drop-in replacements. 283 */ 284 285 static inline bool libeth_rx_pt_has_checksum(const struct net_device *dev, 286 struct libeth_rx_pt pt) 287 { 288 /* Non-zero _INNER* is only possible when _OUTER_IPV* is set, 289 * it is enough to check only for the L4 type. 290 */ 291 return likely(pt.inner_prot > LIBETH_RX_PT_INNER_NONE && 292 (dev->features & NETIF_F_RXCSUM)); 293 } 294 295 static inline bool libeth_rx_pt_has_hash(const struct net_device *dev, 296 struct libeth_rx_pt pt) 297 { 298 return likely(pt.payload_layer > LIBETH_RX_PT_PAYLOAD_NONE && 299 (dev->features & NETIF_F_RXHASH)); 300 } 301 302 /** 303 * libeth_rx_pt_set_hash - fill in skb hash value basing on the PT 304 * @skb: skb to fill the hash in 305 * @hash: 32-bit hash value from the descriptor 306 * @pt: packet type 307 */ 308 static inline void libeth_rx_pt_set_hash(struct sk_buff *skb, u32 hash, 309 struct libeth_rx_pt pt) 310 { 311 skb_set_hash(skb, hash, pt.payload_layer); 312 } 313 314 #endif /* __LIBETH_RX_H */ 315