xref: /linux/include/net/gro.h (revision bea00fab2b0e5359ee88a2b127f15a35cd48872b)
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 
3 #ifndef _NET_GRO_H
4 #define _NET_GRO_H
5 
6 #include <linux/indirect_call_wrapper.h>
7 #include <linux/ip.h>
8 #include <linux/ipv6.h>
9 #include <net/ip6_checksum.h>
10 #include <linux/skbuff.h>
11 #include <net/udp.h>
12 #include <net/hotdata.h>
13 
14 struct napi_gro_cb {
15 	union {
16 		struct {
17 			/* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */
18 			void	*frag0;
19 
20 			/* Length of frag0. */
21 			unsigned int frag0_len;
22 		};
23 
24 		struct {
25 			/* used in skb_gro_receive() slow path */
26 			struct sk_buff *last;
27 
28 			/* jiffies when first packet was created/queued */
29 			unsigned long age;
30 		};
31 	};
32 
33 	/* This indicates where we are processing relative to skb->data. */
34 	int	data_offset;
35 
36 	/* This is non-zero if the packet cannot be merged with the new skb. */
37 	u16	flush;
38 
39 	/* Save the IP ID here and check when we get to the transport layer */
40 	u16	flush_id;
41 
42 	/* Number of segments aggregated. */
43 	u16	count;
44 
45 	/* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */
46 	u16	proto;
47 
48 /* Used in napi_gro_cb::free */
49 #define NAPI_GRO_FREE             1
50 #define NAPI_GRO_FREE_STOLEN_HEAD 2
51 	/* portion of the cb set to zero at every gro iteration */
52 	struct_group(zeroed,
53 
54 		/* Start offset for remote checksum offload */
55 		u16	gro_remcsum_start;
56 
57 		/* This is non-zero if the packet may be of the same flow. */
58 		u8	same_flow:1;
59 
60 		/* Used in tunnel GRO receive */
61 		u8	encap_mark:1;
62 
63 		/* GRO checksum is valid */
64 		u8	csum_valid:1;
65 
66 		/* Number of checksums via CHECKSUM_UNNECESSARY */
67 		u8	csum_cnt:3;
68 
69 		/* Free the skb? */
70 		u8	free:2;
71 
72 		/* Used in foo-over-udp, set in udp[46]_gro_receive */
73 		u8	is_ipv6:1;
74 
75 		/* Used in GRE, set in fou/gue_gro_receive */
76 		u8	is_fou:1;
77 
78 		/* Used to determine if flush_id can be ignored */
79 		u8	is_atomic:1;
80 
81 		/* Number of gro_receive callbacks this packet already went through */
82 		u8 recursion_counter:4;
83 
84 		/* GRO is done by frag_list pointer chaining. */
85 		u8	is_flist:1;
86 	);
87 
88 	/* used to support CHECKSUM_COMPLETE for tunneling protocols */
89 	__wsum	csum;
90 };
91 
92 #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb)
93 
94 #define GRO_RECURSION_LIMIT 15
95 static inline int gro_recursion_inc_test(struct sk_buff *skb)
96 {
97 	return ++NAPI_GRO_CB(skb)->recursion_counter == GRO_RECURSION_LIMIT;
98 }
99 
100 typedef struct sk_buff *(*gro_receive_t)(struct list_head *, struct sk_buff *);
101 static inline struct sk_buff *call_gro_receive(gro_receive_t cb,
102 					       struct list_head *head,
103 					       struct sk_buff *skb)
104 {
105 	if (unlikely(gro_recursion_inc_test(skb))) {
106 		NAPI_GRO_CB(skb)->flush |= 1;
107 		return NULL;
108 	}
109 
110 	return cb(head, skb);
111 }
112 
113 typedef struct sk_buff *(*gro_receive_sk_t)(struct sock *, struct list_head *,
114 					    struct sk_buff *);
115 static inline struct sk_buff *call_gro_receive_sk(gro_receive_sk_t cb,
116 						  struct sock *sk,
117 						  struct list_head *head,
118 						  struct sk_buff *skb)
119 {
120 	if (unlikely(gro_recursion_inc_test(skb))) {
121 		NAPI_GRO_CB(skb)->flush |= 1;
122 		return NULL;
123 	}
124 
125 	return cb(sk, head, skb);
126 }
127 
128 static inline unsigned int skb_gro_offset(const struct sk_buff *skb)
129 {
130 	return NAPI_GRO_CB(skb)->data_offset;
131 }
132 
133 static inline unsigned int skb_gro_len(const struct sk_buff *skb)
134 {
135 	return skb->len - NAPI_GRO_CB(skb)->data_offset;
136 }
137 
138 static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len)
139 {
140 	NAPI_GRO_CB(skb)->data_offset += len;
141 }
142 
143 static inline void *skb_gro_header_fast(const struct sk_buff *skb,
144 					unsigned int offset)
145 {
146 	return NAPI_GRO_CB(skb)->frag0 + offset;
147 }
148 
149 static inline bool skb_gro_may_pull(const struct sk_buff *skb,
150 				    unsigned int hlen)
151 {
152 	return likely(hlen <= NAPI_GRO_CB(skb)->frag0_len);
153 }
154 
155 static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen,
156 					unsigned int offset)
157 {
158 	if (!pskb_may_pull(skb, hlen))
159 		return NULL;
160 
161 	return skb->data + offset;
162 }
163 
164 static inline void *skb_gro_header(struct sk_buff *skb, unsigned int hlen,
165 				   unsigned int offset)
166 {
167 	void *ptr;
168 
169 	ptr = skb_gro_header_fast(skb, offset);
170 	if (!skb_gro_may_pull(skb, hlen))
171 		ptr = skb_gro_header_slow(skb, hlen, offset);
172 	return ptr;
173 }
174 
175 static inline void *skb_gro_network_header(const struct sk_buff *skb)
176 {
177 	if (skb_gro_may_pull(skb, skb_gro_offset(skb)))
178 		return skb_gro_header_fast(skb, skb_network_offset(skb));
179 
180 	return skb_network_header(skb);
181 }
182 
183 static inline __wsum inet_gro_compute_pseudo(const struct sk_buff *skb,
184 					     int proto)
185 {
186 	const struct iphdr *iph = skb_gro_network_header(skb);
187 
188 	return csum_tcpudp_nofold(iph->saddr, iph->daddr,
189 				  skb_gro_len(skb), proto, 0);
190 }
191 
192 static inline void skb_gro_postpull_rcsum(struct sk_buff *skb,
193 					const void *start, unsigned int len)
194 {
195 	if (NAPI_GRO_CB(skb)->csum_valid)
196 		NAPI_GRO_CB(skb)->csum = wsum_negate(csum_partial(start, len,
197 						wsum_negate(NAPI_GRO_CB(skb)->csum)));
198 }
199 
200 /* GRO checksum functions. These are logical equivalents of the normal
201  * checksum functions (in skbuff.h) except that they operate on the GRO
202  * offsets and fields in sk_buff.
203  */
204 
205 __sum16 __skb_gro_checksum_complete(struct sk_buff *skb);
206 
207 static inline bool skb_at_gro_remcsum_start(struct sk_buff *skb)
208 {
209 	return (NAPI_GRO_CB(skb)->gro_remcsum_start == skb_gro_offset(skb));
210 }
211 
212 static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb,
213 						      bool zero_okay,
214 						      __sum16 check)
215 {
216 	return ((skb->ip_summed != CHECKSUM_PARTIAL ||
217 		skb_checksum_start_offset(skb) <
218 		 skb_gro_offset(skb)) &&
219 		!skb_at_gro_remcsum_start(skb) &&
220 		NAPI_GRO_CB(skb)->csum_cnt == 0 &&
221 		(!zero_okay || check));
222 }
223 
224 static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb,
225 							   __wsum psum)
226 {
227 	if (NAPI_GRO_CB(skb)->csum_valid &&
228 	    !csum_fold(csum_add(psum, NAPI_GRO_CB(skb)->csum)))
229 		return 0;
230 
231 	NAPI_GRO_CB(skb)->csum = psum;
232 
233 	return __skb_gro_checksum_complete(skb);
234 }
235 
236 static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb)
237 {
238 	if (NAPI_GRO_CB(skb)->csum_cnt > 0) {
239 		/* Consume a checksum from CHECKSUM_UNNECESSARY */
240 		NAPI_GRO_CB(skb)->csum_cnt--;
241 	} else {
242 		/* Update skb for CHECKSUM_UNNECESSARY and csum_level when we
243 		 * verified a new top level checksum or an encapsulated one
244 		 * during GRO. This saves work if we fallback to normal path.
245 		 */
246 		__skb_incr_checksum_unnecessary(skb);
247 	}
248 }
249 
250 #define __skb_gro_checksum_validate(skb, proto, zero_okay, check,	\
251 				    compute_pseudo)			\
252 ({									\
253 	__sum16 __ret = 0;						\
254 	if (__skb_gro_checksum_validate_needed(skb, zero_okay, check))	\
255 		__ret = __skb_gro_checksum_validate_complete(skb,	\
256 				compute_pseudo(skb, proto));		\
257 	if (!__ret)							\
258 		skb_gro_incr_csum_unnecessary(skb);			\
259 	__ret;								\
260 })
261 
262 #define skb_gro_checksum_validate(skb, proto, compute_pseudo)		\
263 	__skb_gro_checksum_validate(skb, proto, false, 0, compute_pseudo)
264 
265 #define skb_gro_checksum_validate_zero_check(skb, proto, check,		\
266 					     compute_pseudo)		\
267 	__skb_gro_checksum_validate(skb, proto, true, check, compute_pseudo)
268 
269 #define skb_gro_checksum_simple_validate(skb)				\
270 	__skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo)
271 
272 static inline bool __skb_gro_checksum_convert_check(struct sk_buff *skb)
273 {
274 	return (NAPI_GRO_CB(skb)->csum_cnt == 0 &&
275 		!NAPI_GRO_CB(skb)->csum_valid);
276 }
277 
278 static inline void __skb_gro_checksum_convert(struct sk_buff *skb,
279 					      __wsum pseudo)
280 {
281 	NAPI_GRO_CB(skb)->csum = ~pseudo;
282 	NAPI_GRO_CB(skb)->csum_valid = 1;
283 }
284 
285 #define skb_gro_checksum_try_convert(skb, proto, compute_pseudo)	\
286 do {									\
287 	if (__skb_gro_checksum_convert_check(skb))			\
288 		__skb_gro_checksum_convert(skb, 			\
289 					   compute_pseudo(skb, proto));	\
290 } while (0)
291 
292 struct gro_remcsum {
293 	int offset;
294 	__wsum delta;
295 };
296 
297 static inline void skb_gro_remcsum_init(struct gro_remcsum *grc)
298 {
299 	grc->offset = 0;
300 	grc->delta = 0;
301 }
302 
303 static inline void *skb_gro_remcsum_process(struct sk_buff *skb, void *ptr,
304 					    unsigned int off, size_t hdrlen,
305 					    int start, int offset,
306 					    struct gro_remcsum *grc,
307 					    bool nopartial)
308 {
309 	__wsum delta;
310 	size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start);
311 
312 	BUG_ON(!NAPI_GRO_CB(skb)->csum_valid);
313 
314 	if (!nopartial) {
315 		NAPI_GRO_CB(skb)->gro_remcsum_start = off + hdrlen + start;
316 		return ptr;
317 	}
318 
319 	ptr = skb_gro_header(skb, off + plen, off);
320 	if (!ptr)
321 		return NULL;
322 
323 	delta = remcsum_adjust(ptr + hdrlen, NAPI_GRO_CB(skb)->csum,
324 			       start, offset);
325 
326 	/* Adjust skb->csum since we changed the packet */
327 	NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta);
328 
329 	grc->offset = off + hdrlen + offset;
330 	grc->delta = delta;
331 
332 	return ptr;
333 }
334 
335 static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb,
336 					   struct gro_remcsum *grc)
337 {
338 	void *ptr;
339 	size_t plen = grc->offset + sizeof(u16);
340 
341 	if (!grc->delta)
342 		return;
343 
344 	ptr = skb_gro_header(skb, plen, grc->offset);
345 	if (!ptr)
346 		return;
347 
348 	remcsum_unadjust((__sum16 *)ptr, grc->delta);
349 }
350 
351 #ifdef CONFIG_XFRM_OFFLOAD
352 static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush)
353 {
354 	if (PTR_ERR(pp) != -EINPROGRESS)
355 		NAPI_GRO_CB(skb)->flush |= flush;
356 }
357 static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb,
358 					       struct sk_buff *pp,
359 					       int flush,
360 					       struct gro_remcsum *grc)
361 {
362 	if (PTR_ERR(pp) != -EINPROGRESS) {
363 		NAPI_GRO_CB(skb)->flush |= flush;
364 		skb_gro_remcsum_cleanup(skb, grc);
365 		skb->remcsum_offload = 0;
366 	}
367 }
368 #else
369 static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush)
370 {
371 	NAPI_GRO_CB(skb)->flush |= flush;
372 }
373 static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb,
374 					       struct sk_buff *pp,
375 					       int flush,
376 					       struct gro_remcsum *grc)
377 {
378 	NAPI_GRO_CB(skb)->flush |= flush;
379 	skb_gro_remcsum_cleanup(skb, grc);
380 	skb->remcsum_offload = 0;
381 }
382 #endif
383 
384 INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *,
385 							   struct sk_buff *));
386 INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int));
387 INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *,
388 							   struct sk_buff *));
389 INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int));
390 
391 INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *,
392 							   struct sk_buff *));
393 INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int));
394 
395 INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *,
396 							   struct sk_buff *));
397 INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int));
398 
399 #define indirect_call_gro_receive_inet(cb, f2, f1, head, skb)	\
400 ({								\
401 	unlikely(gro_recursion_inc_test(skb)) ?			\
402 		NAPI_GRO_CB(skb)->flush |= 1, NULL :		\
403 		INDIRECT_CALL_INET(cb, f2, f1, head, skb);	\
404 })
405 
406 struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
407 				struct udphdr *uh, struct sock *sk);
408 int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
409 
410 static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
411 {
412 	struct udphdr *uh;
413 	unsigned int hlen, off;
414 
415 	off  = skb_gro_offset(skb);
416 	hlen = off + sizeof(*uh);
417 	uh   = skb_gro_header(skb, hlen, off);
418 
419 	return uh;
420 }
421 
422 static inline __wsum ip6_gro_compute_pseudo(const struct sk_buff *skb,
423 					    int proto)
424 {
425 	const struct ipv6hdr *iph = skb_gro_network_header(skb);
426 
427 	return ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr,
428 					    skb_gro_len(skb), proto, 0));
429 }
430 
431 int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb);
432 
433 /* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
434 static inline void gro_normal_list(struct napi_struct *napi)
435 {
436 	if (!napi->rx_count)
437 		return;
438 	netif_receive_skb_list_internal(&napi->rx_list);
439 	INIT_LIST_HEAD(&napi->rx_list);
440 	napi->rx_count = 0;
441 }
442 
443 /* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
444  * pass the whole batch up to the stack.
445  */
446 static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, int segs)
447 {
448 	list_add_tail(&skb->list, &napi->rx_list);
449 	napi->rx_count += segs;
450 	if (napi->rx_count >= READ_ONCE(net_hotdata.gro_normal_batch))
451 		gro_normal_list(napi);
452 }
453 
454 /* This function is the alternative of 'inet_iif' and 'inet_sdif'
455  * functions in case we can not rely on fields of IPCB.
456  *
457  * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized.
458  * The caller must hold the RCU read lock.
459  */
460 static inline void inet_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif)
461 {
462 	*iif = inet_iif(skb) ?: skb->dev->ifindex;
463 	*sdif = 0;
464 
465 #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
466 	if (netif_is_l3_slave(skb->dev)) {
467 		struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev);
468 
469 		*sdif = *iif;
470 		*iif = master ? master->ifindex : 0;
471 	}
472 #endif
473 }
474 
475 /* This function is the alternative of 'inet6_iif' and 'inet6_sdif'
476  * functions in case we can not rely on fields of IP6CB.
477  *
478  * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized.
479  * The caller must hold the RCU read lock.
480  */
481 static inline void inet6_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif)
482 {
483 	/* using skb->dev->ifindex because skb_dst(skb) is not initialized */
484 	*iif = skb->dev->ifindex;
485 	*sdif = 0;
486 
487 #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
488 	if (netif_is_l3_slave(skb->dev)) {
489 		struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev);
490 
491 		*sdif = *iif;
492 		*iif = master ? master->ifindex : 0;
493 	}
494 #endif
495 }
496 
497 struct packet_offload *gro_find_receive_by_type(__be16 type);
498 struct packet_offload *gro_find_complete_by_type(__be16 type);
499 
500 #endif /* _NET_GRO_H */
501