1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /* Copyright (C) 2024-2025 Intel Corporation */
3
4 #ifndef __LIBETH_RX_H
5 #define __LIBETH_RX_H
6
7 #include <linux/if_vlan.h>
8
9 #include <net/page_pool/helpers.h>
10 #include <net/xdp.h>
11
12 /* Rx buffer management */
13
14 /* Space reserved in front of each frame */
15 #define LIBETH_SKB_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN)
16 #define LIBETH_XDP_HEADROOM (ALIGN(XDP_PACKET_HEADROOM, NET_SKB_PAD) + \
17 NET_IP_ALIGN)
18 /* Maximum headroom for worst-case calculations */
19 #define LIBETH_MAX_HEADROOM LIBETH_XDP_HEADROOM
20 /* Link layer / L2 overhead: Ethernet, 2 VLAN tags (C + S), FCS */
21 #define LIBETH_RX_LL_LEN (ETH_HLEN + 2 * VLAN_HLEN + ETH_FCS_LEN)
22 /* Maximum supported L2-L4 header length */
23 #define LIBETH_MAX_HEAD roundup_pow_of_two(max(MAX_HEADER, 256))
24
25 /* Always use order-0 pages */
26 #define LIBETH_RX_PAGE_ORDER 0
27 /* Pick a sane buffer stride and align to a cacheline boundary */
28 #define LIBETH_RX_BUF_STRIDE SKB_DATA_ALIGN(128)
29 /* HW-writeable space in one buffer: truesize - headroom/tailroom, aligned */
30 #define LIBETH_RX_PAGE_LEN(hr) \
31 ALIGN_DOWN(SKB_MAX_ORDER(hr, LIBETH_RX_PAGE_ORDER), \
32 LIBETH_RX_BUF_STRIDE)
33
34 /**
35 * struct libeth_fqe - structure representing an Rx buffer (fill queue element)
36 * @netmem: network memory reference holding the buffer
37 * @offset: offset from the page start (to the headroom)
38 * @truesize: total space occupied by the buffer (w/ headroom and tailroom)
39 *
40 * Depending on the MTU, API switches between one-page-per-frame and shared
41 * page model (to conserve memory on bigger-page platforms). In case of the
42 * former, @offset is always 0 and @truesize is always ```PAGE_SIZE```.
43 */
44 struct libeth_fqe {
45 netmem_ref netmem;
46 u32 offset;
47 u32 truesize;
48 } __aligned_largest;
49
50 /**
51 * enum libeth_fqe_type - enum representing types of Rx buffers
52 * @LIBETH_FQE_MTU: buffer size is determined by MTU
53 * @LIBETH_FQE_SHORT: buffer size is smaller than MTU, for short frames
54 * @LIBETH_FQE_HDR: buffer size is ```LIBETH_MAX_HEAD```-sized, for headers
55 */
56 enum libeth_fqe_type {
57 LIBETH_FQE_MTU = 0U,
58 LIBETH_FQE_SHORT,
59 LIBETH_FQE_HDR,
60 };
61
62 /**
63 * struct libeth_fq - structure representing a buffer (fill) queue
64 * @fp: hotpath part of the structure
65 * @pp: &page_pool for buffer management
66 * @fqes: array of Rx buffers
67 * @truesize: size to allocate per buffer, w/overhead
68 * @count: number of descriptors/buffers the queue has
69 * @type: type of the buffers this queue has
70 * @hsplit: flag whether header split is enabled
71 * @xdp: flag indicating whether XDP is enabled
72 * @buf_len: HW-writeable length per each buffer
73 * @nid: ID of the closest NUMA node with memory
74 */
75 struct libeth_fq {
76 struct_group_tagged(libeth_fq_fp, fp,
77 struct page_pool *pp;
78 struct libeth_fqe *fqes;
79
80 u32 truesize;
81 u32 count;
82 );
83
84 /* Cold fields */
85 enum libeth_fqe_type type:2;
86 bool hsplit:1;
87 bool xdp:1;
88
89 u32 buf_len;
90 int nid;
91 };
92
93 int libeth_rx_fq_create(struct libeth_fq *fq, struct napi_struct *napi);
94 void libeth_rx_fq_destroy(struct libeth_fq *fq);
95
96 /**
97 * libeth_rx_alloc - allocate a new Rx buffer
98 * @fq: fill queue to allocate for
99 * @i: index of the buffer within the queue
100 *
101 * Return: DMA address to be passed to HW for Rx on successful allocation,
102 * ```DMA_MAPPING_ERROR``` otherwise.
103 */
libeth_rx_alloc(const struct libeth_fq_fp * fq,u32 i)104 static inline dma_addr_t libeth_rx_alloc(const struct libeth_fq_fp *fq, u32 i)
105 {
106 struct libeth_fqe *buf = &fq->fqes[i];
107
108 buf->truesize = fq->truesize;
109 buf->netmem = page_pool_dev_alloc_netmem(fq->pp, &buf->offset,
110 &buf->truesize);
111 if (unlikely(!buf->netmem))
112 return DMA_MAPPING_ERROR;
113
114 return page_pool_get_dma_addr_netmem(buf->netmem) + buf->offset +
115 fq->pp->p.offset;
116 }
117
118 void libeth_rx_recycle_slow(netmem_ref netmem);
119
120 /**
121 * libeth_rx_sync_for_cpu - synchronize or recycle buffer post DMA
122 * @fqe: buffer to process
123 * @len: frame length from the descriptor
124 *
125 * Process the buffer after it's written by HW. The regular path is to
126 * synchronize DMA for CPU, but in case of no data it will be immediately
127 * recycled back to its PP.
128 *
129 * Return: true when there's data to process, false otherwise.
130 */
libeth_rx_sync_for_cpu(const struct libeth_fqe * fqe,u32 len)131 static inline bool libeth_rx_sync_for_cpu(const struct libeth_fqe *fqe,
132 u32 len)
133 {
134 netmem_ref netmem = fqe->netmem;
135
136 /* Very rare, but possible case. The most common reason:
137 * the last fragment contained FCS only, which was then
138 * stripped by the HW.
139 */
140 if (unlikely(!len)) {
141 libeth_rx_recycle_slow(netmem);
142 return false;
143 }
144
145 page_pool_dma_sync_netmem_for_cpu(netmem_get_pp(netmem), netmem,
146 fqe->offset, len);
147
148 return true;
149 }
150
151 /* Converting abstract packet type numbers into a software structure with
152 * the packet parameters to do O(1) lookup on Rx.
153 */
154
155 enum {
156 LIBETH_RX_PT_OUTER_L2 = 0U,
157 LIBETH_RX_PT_OUTER_IPV4,
158 LIBETH_RX_PT_OUTER_IPV6,
159 };
160
161 enum {
162 LIBETH_RX_PT_NOT_FRAG = 0U,
163 LIBETH_RX_PT_FRAG,
164 };
165
166 enum {
167 LIBETH_RX_PT_TUNNEL_IP_NONE = 0U,
168 LIBETH_RX_PT_TUNNEL_IP_IP,
169 LIBETH_RX_PT_TUNNEL_IP_GRENAT,
170 LIBETH_RX_PT_TUNNEL_IP_GRENAT_MAC,
171 LIBETH_RX_PT_TUNNEL_IP_GRENAT_MAC_VLAN,
172 };
173
174 enum {
175 LIBETH_RX_PT_TUNNEL_END_NONE = 0U,
176 LIBETH_RX_PT_TUNNEL_END_IPV4,
177 LIBETH_RX_PT_TUNNEL_END_IPV6,
178 };
179
180 enum {
181 LIBETH_RX_PT_INNER_NONE = 0U,
182 LIBETH_RX_PT_INNER_UDP,
183 LIBETH_RX_PT_INNER_TCP,
184 LIBETH_RX_PT_INNER_SCTP,
185 LIBETH_RX_PT_INNER_ICMP,
186 LIBETH_RX_PT_INNER_TIMESYNC,
187 };
188
189 #define LIBETH_RX_PT_PAYLOAD_NONE PKT_HASH_TYPE_NONE
190 #define LIBETH_RX_PT_PAYLOAD_L2 PKT_HASH_TYPE_L2
191 #define LIBETH_RX_PT_PAYLOAD_L3 PKT_HASH_TYPE_L3
192 #define LIBETH_RX_PT_PAYLOAD_L4 PKT_HASH_TYPE_L4
193
194 struct libeth_rx_pt {
195 u32 outer_ip:2;
196 u32 outer_frag:1;
197 u32 tunnel_type:3;
198 u32 tunnel_end_prot:2;
199 u32 tunnel_end_frag:1;
200 u32 inner_prot:3;
201 enum pkt_hash_types payload_layer:2;
202
203 u32 pad:2;
204 enum xdp_rss_hash_type hash_type:16;
205 };
206
207 /**
208 * struct libeth_rx_csum - checksum offload bits decoded from the Rx descriptor
209 * @l3l4p: detectable L3 and L4 integrity check is processed by the hardware
210 * @ipe: IP checksum error
211 * @eipe: external (outermost) IP header (only for tunels)
212 * @eudpe: external (outermost) UDP checksum error (only for tunels)
213 * @ipv6exadd: IPv6 header with extension headers
214 * @l4e: L4 integrity error
215 * @pprs: set for packets that skip checksum calculation in the HW pre parser
216 * @nat: the packet is a UDP tunneled packet
217 * @raw_csum_valid: set if raw checksum is valid
218 * @pad: padding to naturally align raw_csum field
219 * @raw_csum: raw checksum
220 */
221 struct libeth_rx_csum {
222 u32 l3l4p:1;
223 u32 ipe:1;
224 u32 eipe:1;
225 u32 eudpe:1;
226 u32 ipv6exadd:1;
227 u32 l4e:1;
228 u32 pprs:1;
229 u32 nat:1;
230
231 u32 raw_csum_valid:1;
232 u32 pad:7;
233 u32 raw_csum:16;
234 };
235
236 /**
237 * struct libeth_rqe_info - receive queue element info
238 * @len: packet length
239 * @ptype: packet type based on types programmed into the device
240 * @eop: whether it's the last fragment of the packet
241 * @rxe: MAC errors: CRC, Alignment, Oversize, Undersizes, Length error
242 * @vlan: C-VLAN or S-VLAN tag depending on the VLAN offload configuration
243 */
244 struct libeth_rqe_info {
245 u32 len;
246
247 u32 ptype:14;
248 u32 eop:1;
249 u32 rxe:1;
250
251 u32 vlan:16;
252 };
253
254 void libeth_rx_pt_gen_hash_type(struct libeth_rx_pt *pt);
255
256 /**
257 * libeth_rx_pt_get_ip_ver - get IP version from a packet type structure
258 * @pt: packet type params
259 *
260 * Wrapper to compile out the IPv6 code from the drivers when not supported
261 * by the kernel.
262 *
263 * Return: @pt.outer_ip or stub for IPv6 when not compiled-in.
264 */
libeth_rx_pt_get_ip_ver(struct libeth_rx_pt pt)265 static inline u32 libeth_rx_pt_get_ip_ver(struct libeth_rx_pt pt)
266 {
267 #if !IS_ENABLED(CONFIG_IPV6)
268 switch (pt.outer_ip) {
269 case LIBETH_RX_PT_OUTER_IPV4:
270 return LIBETH_RX_PT_OUTER_IPV4;
271 default:
272 return LIBETH_RX_PT_OUTER_L2;
273 }
274 #else
275 return pt.outer_ip;
276 #endif
277 }
278
279 /* libeth_has_*() can be used to quickly check whether the HW metadata is
280 * available to avoid further expensive processing such as descriptor reads.
281 * They already check for the corresponding netdev feature to be enabled,
282 * thus can be used as drop-in replacements.
283 */
284
libeth_rx_pt_has_checksum(const struct net_device * dev,struct libeth_rx_pt pt)285 static inline bool libeth_rx_pt_has_checksum(const struct net_device *dev,
286 struct libeth_rx_pt pt)
287 {
288 /* Non-zero _INNER* is only possible when _OUTER_IPV* is set,
289 * it is enough to check only for the L4 type.
290 */
291 return likely(pt.inner_prot > LIBETH_RX_PT_INNER_NONE &&
292 (dev->features & NETIF_F_RXCSUM));
293 }
294
libeth_rx_pt_has_hash(const struct net_device * dev,struct libeth_rx_pt pt)295 static inline bool libeth_rx_pt_has_hash(const struct net_device *dev,
296 struct libeth_rx_pt pt)
297 {
298 return likely(pt.payload_layer > LIBETH_RX_PT_PAYLOAD_NONE &&
299 (dev->features & NETIF_F_RXHASH));
300 }
301
302 /**
303 * libeth_rx_pt_set_hash - fill in skb hash value basing on the PT
304 * @skb: skb to fill the hash in
305 * @hash: 32-bit hash value from the descriptor
306 * @pt: packet type
307 */
libeth_rx_pt_set_hash(struct sk_buff * skb,u32 hash,struct libeth_rx_pt pt)308 static inline void libeth_rx_pt_set_hash(struct sk_buff *skb, u32 hash,
309 struct libeth_rx_pt pt)
310 {
311 skb_set_hash(skb, hash, pt.payload_layer);
312 }
313
314 #endif /* __LIBETH_RX_H */
315