xref: /linux/include/net/libeth/rx.h (revision 6ad5ff6e7282d1252364cc08af88260ef0ec4cda)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /* Copyright (C) 2024-2025 Intel Corporation */
3 
4 #ifndef __LIBETH_RX_H
5 #define __LIBETH_RX_H
6 
7 #include <linux/if_vlan.h>
8 
9 #include <net/page_pool/helpers.h>
10 #include <net/xdp.h>
11 
12 /* Rx buffer management */
13 
14 /* Space reserved in front of each frame */
15 #define LIBETH_SKB_HEADROOM	(NET_SKB_PAD + NET_IP_ALIGN)
16 /* Maximum headroom for worst-case calculations */
17 #define LIBETH_MAX_HEADROOM	LIBETH_SKB_HEADROOM
18 /* Link layer / L2 overhead: Ethernet, 2 VLAN tags (C + S), FCS */
19 #define LIBETH_RX_LL_LEN	(ETH_HLEN + 2 * VLAN_HLEN + ETH_FCS_LEN)
20 /* Maximum supported L2-L4 header length */
21 #define LIBETH_MAX_HEAD		roundup_pow_of_two(max(MAX_HEADER, 256))
22 
23 /* Always use order-0 pages */
24 #define LIBETH_RX_PAGE_ORDER	0
25 /* Pick a sane buffer stride and align to a cacheline boundary */
26 #define LIBETH_RX_BUF_STRIDE	SKB_DATA_ALIGN(128)
27 /* HW-writeable space in one buffer: truesize - headroom/tailroom, aligned */
28 #define LIBETH_RX_PAGE_LEN(hr)						  \
29 	ALIGN_DOWN(SKB_MAX_ORDER(hr, LIBETH_RX_PAGE_ORDER),		  \
30 		   LIBETH_RX_BUF_STRIDE)
31 
32 /**
33  * struct libeth_fqe - structure representing an Rx buffer (fill queue element)
34  * @netmem: network memory reference holding the buffer
35  * @offset: offset from the page start (to the headroom)
36  * @truesize: total space occupied by the buffer (w/ headroom and tailroom)
37  *
38  * Depending on the MTU, API switches between one-page-per-frame and shared
39  * page model (to conserve memory on bigger-page platforms). In case of the
40  * former, @offset is always 0 and @truesize is always ```PAGE_SIZE```.
41  */
42 struct libeth_fqe {
43 	netmem_ref		netmem;
44 	u32			offset;
45 	u32			truesize;
46 } __aligned_largest;
47 
48 /**
49  * enum libeth_fqe_type - enum representing types of Rx buffers
50  * @LIBETH_FQE_MTU: buffer size is determined by MTU
51  * @LIBETH_FQE_SHORT: buffer size is smaller than MTU, for short frames
52  * @LIBETH_FQE_HDR: buffer size is ```LIBETH_MAX_HEAD```-sized, for headers
53  */
54 enum libeth_fqe_type {
55 	LIBETH_FQE_MTU		= 0U,
56 	LIBETH_FQE_SHORT,
57 	LIBETH_FQE_HDR,
58 };
59 
60 /**
61  * struct libeth_fq - structure representing a buffer (fill) queue
62  * @fp: hotpath part of the structure
63  * @pp: &page_pool for buffer management
64  * @fqes: array of Rx buffers
65  * @truesize: size to allocate per buffer, w/overhead
66  * @count: number of descriptors/buffers the queue has
67  * @type: type of the buffers this queue has
68  * @hsplit: flag whether header split is enabled
69  * @buf_len: HW-writeable length per each buffer
70  * @nid: ID of the closest NUMA node with memory
71  */
72 struct libeth_fq {
73 	struct_group_tagged(libeth_fq_fp, fp,
74 		struct page_pool	*pp;
75 		struct libeth_fqe	*fqes;
76 
77 		u32			truesize;
78 		u32			count;
79 	);
80 
81 	/* Cold fields */
82 	enum libeth_fqe_type	type:2;
83 	bool			hsplit:1;
84 
85 	u32			buf_len;
86 	int			nid;
87 };
88 
89 int libeth_rx_fq_create(struct libeth_fq *fq, struct napi_struct *napi);
90 void libeth_rx_fq_destroy(struct libeth_fq *fq);
91 
92 /**
93  * libeth_rx_alloc - allocate a new Rx buffer
94  * @fq: fill queue to allocate for
95  * @i: index of the buffer within the queue
96  *
97  * Return: DMA address to be passed to HW for Rx on successful allocation,
98  * ```DMA_MAPPING_ERROR``` otherwise.
99  */
100 static inline dma_addr_t libeth_rx_alloc(const struct libeth_fq_fp *fq, u32 i)
101 {
102 	struct libeth_fqe *buf = &fq->fqes[i];
103 
104 	buf->truesize = fq->truesize;
105 	buf->netmem = page_pool_dev_alloc_netmem(fq->pp, &buf->offset,
106 						 &buf->truesize);
107 	if (unlikely(!buf->netmem))
108 		return DMA_MAPPING_ERROR;
109 
110 	return page_pool_get_dma_addr_netmem(buf->netmem) + buf->offset +
111 	       fq->pp->p.offset;
112 }
113 
114 void libeth_rx_recycle_slow(netmem_ref netmem);
115 
116 /**
117  * libeth_rx_sync_for_cpu - synchronize or recycle buffer post DMA
118  * @fqe: buffer to process
119  * @len: frame length from the descriptor
120  *
121  * Process the buffer after it's written by HW. The regular path is to
122  * synchronize DMA for CPU, but in case of no data it will be immediately
123  * recycled back to its PP.
124  *
125  * Return: true when there's data to process, false otherwise.
126  */
127 static inline bool libeth_rx_sync_for_cpu(const struct libeth_fqe *fqe,
128 					  u32 len)
129 {
130 	netmem_ref netmem = fqe->netmem;
131 
132 	/* Very rare, but possible case. The most common reason:
133 	 * the last fragment contained FCS only, which was then
134 	 * stripped by the HW.
135 	 */
136 	if (unlikely(!len)) {
137 		libeth_rx_recycle_slow(netmem);
138 		return false;
139 	}
140 
141 	page_pool_dma_sync_netmem_for_cpu(netmem_get_pp(netmem), netmem,
142 					  fqe->offset, len);
143 
144 	return true;
145 }
146 
147 /* Converting abstract packet type numbers into a software structure with
148  * the packet parameters to do O(1) lookup on Rx.
149  */
150 
151 enum {
152 	LIBETH_RX_PT_OUTER_L2			= 0U,
153 	LIBETH_RX_PT_OUTER_IPV4,
154 	LIBETH_RX_PT_OUTER_IPV6,
155 };
156 
157 enum {
158 	LIBETH_RX_PT_NOT_FRAG			= 0U,
159 	LIBETH_RX_PT_FRAG,
160 };
161 
162 enum {
163 	LIBETH_RX_PT_TUNNEL_IP_NONE		= 0U,
164 	LIBETH_RX_PT_TUNNEL_IP_IP,
165 	LIBETH_RX_PT_TUNNEL_IP_GRENAT,
166 	LIBETH_RX_PT_TUNNEL_IP_GRENAT_MAC,
167 	LIBETH_RX_PT_TUNNEL_IP_GRENAT_MAC_VLAN,
168 };
169 
170 enum {
171 	LIBETH_RX_PT_TUNNEL_END_NONE		= 0U,
172 	LIBETH_RX_PT_TUNNEL_END_IPV4,
173 	LIBETH_RX_PT_TUNNEL_END_IPV6,
174 };
175 
176 enum {
177 	LIBETH_RX_PT_INNER_NONE			= 0U,
178 	LIBETH_RX_PT_INNER_UDP,
179 	LIBETH_RX_PT_INNER_TCP,
180 	LIBETH_RX_PT_INNER_SCTP,
181 	LIBETH_RX_PT_INNER_ICMP,
182 	LIBETH_RX_PT_INNER_TIMESYNC,
183 };
184 
185 #define LIBETH_RX_PT_PAYLOAD_NONE		PKT_HASH_TYPE_NONE
186 #define LIBETH_RX_PT_PAYLOAD_L2			PKT_HASH_TYPE_L2
187 #define LIBETH_RX_PT_PAYLOAD_L3			PKT_HASH_TYPE_L3
188 #define LIBETH_RX_PT_PAYLOAD_L4			PKT_HASH_TYPE_L4
189 
190 struct libeth_rx_pt {
191 	u32					outer_ip:2;
192 	u32					outer_frag:1;
193 	u32					tunnel_type:3;
194 	u32					tunnel_end_prot:2;
195 	u32					tunnel_end_frag:1;
196 	u32					inner_prot:3;
197 	enum pkt_hash_types			payload_layer:2;
198 
199 	u32					pad:2;
200 	enum xdp_rss_hash_type			hash_type:16;
201 };
202 
203 /**
204  * struct libeth_rx_csum - checksum offload bits decoded from the Rx descriptor
205  * @l3l4p: detectable L3 and L4 integrity check is processed by the hardware
206  * @ipe: IP checksum error
207  * @eipe: external (outermost) IP header (only for tunels)
208  * @eudpe: external (outermost) UDP checksum error (only for tunels)
209  * @ipv6exadd: IPv6 header with extension headers
210  * @l4e: L4 integrity error
211  * @pprs: set for packets that skip checksum calculation in the HW pre parser
212  * @nat: the packet is a UDP tunneled packet
213  * @raw_csum_valid: set if raw checksum is valid
214  * @pad: padding to naturally align raw_csum field
215  * @raw_csum: raw checksum
216  */
217 struct libeth_rx_csum {
218 	u32					l3l4p:1;
219 	u32					ipe:1;
220 	u32					eipe:1;
221 	u32					eudpe:1;
222 	u32					ipv6exadd:1;
223 	u32					l4e:1;
224 	u32					pprs:1;
225 	u32					nat:1;
226 
227 	u32					raw_csum_valid:1;
228 	u32					pad:7;
229 	u32					raw_csum:16;
230 };
231 
232 /**
233  * struct libeth_rqe_info - receive queue element info
234  * @len: packet length
235  * @ptype: packet type based on types programmed into the device
236  * @eop: whether it's the last fragment of the packet
237  * @rxe: MAC errors: CRC, Alignment, Oversize, Undersizes, Length error
238  * @vlan: C-VLAN or S-VLAN tag depending on the VLAN offload configuration
239  */
240 struct libeth_rqe_info {
241 	u32					len;
242 
243 	u32					ptype:14;
244 	u32					eop:1;
245 	u32					rxe:1;
246 
247 	u32					vlan:16;
248 };
249 
250 void libeth_rx_pt_gen_hash_type(struct libeth_rx_pt *pt);
251 
252 /**
253  * libeth_rx_pt_get_ip_ver - get IP version from a packet type structure
254  * @pt: packet type params
255  *
256  * Wrapper to compile out the IPv6 code from the drivers when not supported
257  * by the kernel.
258  *
259  * Return: @pt.outer_ip or stub for IPv6 when not compiled-in.
260  */
261 static inline u32 libeth_rx_pt_get_ip_ver(struct libeth_rx_pt pt)
262 {
263 #if !IS_ENABLED(CONFIG_IPV6)
264 	switch (pt.outer_ip) {
265 	case LIBETH_RX_PT_OUTER_IPV4:
266 		return LIBETH_RX_PT_OUTER_IPV4;
267 	default:
268 		return LIBETH_RX_PT_OUTER_L2;
269 	}
270 #else
271 	return pt.outer_ip;
272 #endif
273 }
274 
275 /* libeth_has_*() can be used to quickly check whether the HW metadata is
276  * available to avoid further expensive processing such as descriptor reads.
277  * They already check for the corresponding netdev feature to be enabled,
278  * thus can be used as drop-in replacements.
279  */
280 
281 static inline bool libeth_rx_pt_has_checksum(const struct net_device *dev,
282 					     struct libeth_rx_pt pt)
283 {
284 	/* Non-zero _INNER* is only possible when _OUTER_IPV* is set,
285 	 * it is enough to check only for the L4 type.
286 	 */
287 	return likely(pt.inner_prot > LIBETH_RX_PT_INNER_NONE &&
288 		      (dev->features & NETIF_F_RXCSUM));
289 }
290 
291 static inline bool libeth_rx_pt_has_hash(const struct net_device *dev,
292 					 struct libeth_rx_pt pt)
293 {
294 	return likely(pt.payload_layer > LIBETH_RX_PT_PAYLOAD_NONE &&
295 		      (dev->features & NETIF_F_RXHASH));
296 }
297 
298 /**
299  * libeth_rx_pt_set_hash - fill in skb hash value basing on the PT
300  * @skb: skb to fill the hash in
301  * @hash: 32-bit hash value from the descriptor
302  * @pt: packet type
303  */
304 static inline void libeth_rx_pt_set_hash(struct sk_buff *skb, u32 hash,
305 					 struct libeth_rx_pt pt)
306 {
307 	skb_set_hash(skb, hash, pt.payload_layer);
308 }
309 
310 #endif /* __LIBETH_RX_H */
311