xref: /linux/include/net/gro.h (revision 8f5b5f78113e881cb8570c961b0dc42b218a1b9e)
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 
3 #ifndef _NET_GRO_H
4 #define _NET_GRO_H
5 
6 #include <linux/indirect_call_wrapper.h>
7 #include <linux/ip.h>
8 #include <linux/ipv6.h>
9 #include <net/ip6_checksum.h>
10 #include <linux/skbuff.h>
11 #include <net/udp.h>
12 #include <net/hotdata.h>
13 
14 struct napi_gro_cb {
15 	union {
16 		struct {
17 			/* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */
18 			void	*frag0;
19 
20 			/* Length of frag0. */
21 			unsigned int frag0_len;
22 		};
23 
24 		struct {
25 			/* used in skb_gro_receive() slow path */
26 			struct sk_buff *last;
27 
28 			/* jiffies when first packet was created/queued */
29 			unsigned long age;
30 		};
31 	};
32 
33 	/* This indicates where we are processing relative to skb->data. */
34 	int	data_offset;
35 
36 	/* This is non-zero if the packet cannot be merged with the new skb. */
37 	u16	flush;
38 
39 	/* Save the IP ID here and check when we get to the transport layer */
40 	u16	flush_id;
41 
42 	/* Number of segments aggregated. */
43 	u16	count;
44 
45 	/* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */
46 	u16	proto;
47 
48 /* Used in napi_gro_cb::free */
49 #define NAPI_GRO_FREE             1
50 #define NAPI_GRO_FREE_STOLEN_HEAD 2
51 	/* portion of the cb set to zero at every gro iteration */
52 	struct_group(zeroed,
53 
54 		/* Start offset for remote checksum offload */
55 		u16	gro_remcsum_start;
56 
57 		/* This is non-zero if the packet may be of the same flow. */
58 		u8	same_flow:1;
59 
60 		/* Used in tunnel GRO receive */
61 		u8	encap_mark:1;
62 
63 		/* GRO checksum is valid */
64 		u8	csum_valid:1;
65 
66 		/* Number of checksums via CHECKSUM_UNNECESSARY */
67 		u8	csum_cnt:3;
68 
69 		/* Free the skb? */
70 		u8	free:2;
71 
72 		/* Used in foo-over-udp, set in udp[46]_gro_receive */
73 		u8	is_ipv6:1;
74 
75 		/* Used in GRE, set in fou/gue_gro_receive */
76 		u8	is_fou:1;
77 
78 		/* Used to determine if flush_id can be ignored */
79 		u8	is_atomic:1;
80 
81 		/* Number of gro_receive callbacks this packet already went through */
82 		u8 recursion_counter:4;
83 
84 		/* GRO is done by frag_list pointer chaining. */
85 		u8	is_flist:1;
86 	);
87 
88 	/* used to support CHECKSUM_COMPLETE for tunneling protocols */
89 	__wsum	csum;
90 
91 	/* L3 offsets */
92 	union {
93 		struct {
94 			u16 network_offset;
95 			u16 inner_network_offset;
96 		};
97 		u16 network_offsets[2];
98 	};
99 };
100 
101 #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb)
102 
103 #define GRO_RECURSION_LIMIT 15
104 static inline int gro_recursion_inc_test(struct sk_buff *skb)
105 {
106 	return ++NAPI_GRO_CB(skb)->recursion_counter == GRO_RECURSION_LIMIT;
107 }
108 
109 typedef struct sk_buff *(*gro_receive_t)(struct list_head *, struct sk_buff *);
110 static inline struct sk_buff *call_gro_receive(gro_receive_t cb,
111 					       struct list_head *head,
112 					       struct sk_buff *skb)
113 {
114 	if (unlikely(gro_recursion_inc_test(skb))) {
115 		NAPI_GRO_CB(skb)->flush |= 1;
116 		return NULL;
117 	}
118 
119 	return cb(head, skb);
120 }
121 
122 typedef struct sk_buff *(*gro_receive_sk_t)(struct sock *, struct list_head *,
123 					    struct sk_buff *);
124 static inline struct sk_buff *call_gro_receive_sk(gro_receive_sk_t cb,
125 						  struct sock *sk,
126 						  struct list_head *head,
127 						  struct sk_buff *skb)
128 {
129 	if (unlikely(gro_recursion_inc_test(skb))) {
130 		NAPI_GRO_CB(skb)->flush |= 1;
131 		return NULL;
132 	}
133 
134 	return cb(sk, head, skb);
135 }
136 
137 static inline unsigned int skb_gro_offset(const struct sk_buff *skb)
138 {
139 	return NAPI_GRO_CB(skb)->data_offset;
140 }
141 
142 static inline unsigned int skb_gro_len(const struct sk_buff *skb)
143 {
144 	return skb->len - NAPI_GRO_CB(skb)->data_offset;
145 }
146 
147 static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len)
148 {
149 	NAPI_GRO_CB(skb)->data_offset += len;
150 }
151 
152 static inline void *skb_gro_header_fast(const struct sk_buff *skb,
153 					unsigned int offset)
154 {
155 	return NAPI_GRO_CB(skb)->frag0 + offset;
156 }
157 
158 static inline bool skb_gro_may_pull(const struct sk_buff *skb,
159 				    unsigned int hlen)
160 {
161 	return likely(hlen <= NAPI_GRO_CB(skb)->frag0_len);
162 }
163 
164 static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen,
165 					unsigned int offset)
166 {
167 	if (!pskb_may_pull(skb, hlen))
168 		return NULL;
169 
170 	return skb->data + offset;
171 }
172 
173 static inline void *skb_gro_header(struct sk_buff *skb, unsigned int hlen,
174 				   unsigned int offset)
175 {
176 	void *ptr;
177 
178 	ptr = skb_gro_header_fast(skb, offset);
179 	if (!skb_gro_may_pull(skb, hlen))
180 		ptr = skb_gro_header_slow(skb, hlen, offset);
181 	return ptr;
182 }
183 
184 static inline void *skb_gro_network_header(const struct sk_buff *skb)
185 {
186 	if (skb_gro_may_pull(skb, skb_gro_offset(skb)))
187 		return skb_gro_header_fast(skb, skb_network_offset(skb));
188 
189 	return skb_network_header(skb);
190 }
191 
192 static inline __wsum inet_gro_compute_pseudo(const struct sk_buff *skb,
193 					     int proto)
194 {
195 	const struct iphdr *iph = skb_gro_network_header(skb);
196 
197 	return csum_tcpudp_nofold(iph->saddr, iph->daddr,
198 				  skb_gro_len(skb), proto, 0);
199 }
200 
201 static inline void skb_gro_postpull_rcsum(struct sk_buff *skb,
202 					const void *start, unsigned int len)
203 {
204 	if (NAPI_GRO_CB(skb)->csum_valid)
205 		NAPI_GRO_CB(skb)->csum = wsum_negate(csum_partial(start, len,
206 						wsum_negate(NAPI_GRO_CB(skb)->csum)));
207 }
208 
209 /* GRO checksum functions. These are logical equivalents of the normal
210  * checksum functions (in skbuff.h) except that they operate on the GRO
211  * offsets and fields in sk_buff.
212  */
213 
214 __sum16 __skb_gro_checksum_complete(struct sk_buff *skb);
215 
216 static inline bool skb_at_gro_remcsum_start(struct sk_buff *skb)
217 {
218 	return (NAPI_GRO_CB(skb)->gro_remcsum_start == skb_gro_offset(skb));
219 }
220 
221 static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb,
222 						      bool zero_okay,
223 						      __sum16 check)
224 {
225 	return ((skb->ip_summed != CHECKSUM_PARTIAL ||
226 		skb_checksum_start_offset(skb) <
227 		 skb_gro_offset(skb)) &&
228 		!skb_at_gro_remcsum_start(skb) &&
229 		NAPI_GRO_CB(skb)->csum_cnt == 0 &&
230 		(!zero_okay || check));
231 }
232 
233 static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb,
234 							   __wsum psum)
235 {
236 	if (NAPI_GRO_CB(skb)->csum_valid &&
237 	    !csum_fold(csum_add(psum, NAPI_GRO_CB(skb)->csum)))
238 		return 0;
239 
240 	NAPI_GRO_CB(skb)->csum = psum;
241 
242 	return __skb_gro_checksum_complete(skb);
243 }
244 
245 static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb)
246 {
247 	if (NAPI_GRO_CB(skb)->csum_cnt > 0) {
248 		/* Consume a checksum from CHECKSUM_UNNECESSARY */
249 		NAPI_GRO_CB(skb)->csum_cnt--;
250 	} else {
251 		/* Update skb for CHECKSUM_UNNECESSARY and csum_level when we
252 		 * verified a new top level checksum or an encapsulated one
253 		 * during GRO. This saves work if we fallback to normal path.
254 		 */
255 		__skb_incr_checksum_unnecessary(skb);
256 	}
257 }
258 
259 #define __skb_gro_checksum_validate(skb, proto, zero_okay, check,	\
260 				    compute_pseudo)			\
261 ({									\
262 	__sum16 __ret = 0;						\
263 	if (__skb_gro_checksum_validate_needed(skb, zero_okay, check))	\
264 		__ret = __skb_gro_checksum_validate_complete(skb,	\
265 				compute_pseudo(skb, proto));		\
266 	if (!__ret)							\
267 		skb_gro_incr_csum_unnecessary(skb);			\
268 	__ret;								\
269 })
270 
271 #define skb_gro_checksum_validate(skb, proto, compute_pseudo)		\
272 	__skb_gro_checksum_validate(skb, proto, false, 0, compute_pseudo)
273 
274 #define skb_gro_checksum_validate_zero_check(skb, proto, check,		\
275 					     compute_pseudo)		\
276 	__skb_gro_checksum_validate(skb, proto, true, check, compute_pseudo)
277 
278 #define skb_gro_checksum_simple_validate(skb)				\
279 	__skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo)
280 
281 static inline bool __skb_gro_checksum_convert_check(struct sk_buff *skb)
282 {
283 	return (NAPI_GRO_CB(skb)->csum_cnt == 0 &&
284 		!NAPI_GRO_CB(skb)->csum_valid);
285 }
286 
287 static inline void __skb_gro_checksum_convert(struct sk_buff *skb,
288 					      __wsum pseudo)
289 {
290 	NAPI_GRO_CB(skb)->csum = ~pseudo;
291 	NAPI_GRO_CB(skb)->csum_valid = 1;
292 }
293 
294 #define skb_gro_checksum_try_convert(skb, proto, compute_pseudo)	\
295 do {									\
296 	if (__skb_gro_checksum_convert_check(skb))			\
297 		__skb_gro_checksum_convert(skb, 			\
298 					   compute_pseudo(skb, proto));	\
299 } while (0)
300 
301 struct gro_remcsum {
302 	int offset;
303 	__wsum delta;
304 };
305 
306 static inline void skb_gro_remcsum_init(struct gro_remcsum *grc)
307 {
308 	grc->offset = 0;
309 	grc->delta = 0;
310 }
311 
312 static inline void *skb_gro_remcsum_process(struct sk_buff *skb, void *ptr,
313 					    unsigned int off, size_t hdrlen,
314 					    int start, int offset,
315 					    struct gro_remcsum *grc,
316 					    bool nopartial)
317 {
318 	__wsum delta;
319 	size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start);
320 
321 	BUG_ON(!NAPI_GRO_CB(skb)->csum_valid);
322 
323 	if (!nopartial) {
324 		NAPI_GRO_CB(skb)->gro_remcsum_start = off + hdrlen + start;
325 		return ptr;
326 	}
327 
328 	ptr = skb_gro_header(skb, off + plen, off);
329 	if (!ptr)
330 		return NULL;
331 
332 	delta = remcsum_adjust(ptr + hdrlen, NAPI_GRO_CB(skb)->csum,
333 			       start, offset);
334 
335 	/* Adjust skb->csum since we changed the packet */
336 	NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta);
337 
338 	grc->offset = off + hdrlen + offset;
339 	grc->delta = delta;
340 
341 	return ptr;
342 }
343 
344 static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb,
345 					   struct gro_remcsum *grc)
346 {
347 	void *ptr;
348 	size_t plen = grc->offset + sizeof(u16);
349 
350 	if (!grc->delta)
351 		return;
352 
353 	ptr = skb_gro_header(skb, plen, grc->offset);
354 	if (!ptr)
355 		return;
356 
357 	remcsum_unadjust((__sum16 *)ptr, grc->delta);
358 }
359 
360 #ifdef CONFIG_XFRM_OFFLOAD
361 static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush)
362 {
363 	if (PTR_ERR(pp) != -EINPROGRESS)
364 		NAPI_GRO_CB(skb)->flush |= flush;
365 }
366 static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb,
367 					       struct sk_buff *pp,
368 					       int flush,
369 					       struct gro_remcsum *grc)
370 {
371 	if (PTR_ERR(pp) != -EINPROGRESS) {
372 		NAPI_GRO_CB(skb)->flush |= flush;
373 		skb_gro_remcsum_cleanup(skb, grc);
374 		skb->remcsum_offload = 0;
375 	}
376 }
377 #else
378 static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush)
379 {
380 	NAPI_GRO_CB(skb)->flush |= flush;
381 }
382 static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb,
383 					       struct sk_buff *pp,
384 					       int flush,
385 					       struct gro_remcsum *grc)
386 {
387 	NAPI_GRO_CB(skb)->flush |= flush;
388 	skb_gro_remcsum_cleanup(skb, grc);
389 	skb->remcsum_offload = 0;
390 }
391 #endif
392 
393 INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *,
394 							   struct sk_buff *));
395 INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int));
396 INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *,
397 							   struct sk_buff *));
398 INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int));
399 
400 INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *,
401 							   struct sk_buff *));
402 INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int));
403 
404 INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *,
405 							   struct sk_buff *));
406 INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int));
407 
408 #define indirect_call_gro_receive_inet(cb, f2, f1, head, skb)	\
409 ({								\
410 	unlikely(gro_recursion_inc_test(skb)) ?			\
411 		NAPI_GRO_CB(skb)->flush |= 1, NULL :		\
412 		INDIRECT_CALL_INET(cb, f2, f1, head, skb);	\
413 })
414 
415 struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
416 				struct udphdr *uh, struct sock *sk);
417 int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
418 
419 static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
420 {
421 	struct udphdr *uh;
422 	unsigned int hlen, off;
423 
424 	off  = skb_gro_offset(skb);
425 	hlen = off + sizeof(*uh);
426 	uh   = skb_gro_header(skb, hlen, off);
427 
428 	return uh;
429 }
430 
431 static inline __wsum ip6_gro_compute_pseudo(const struct sk_buff *skb,
432 					    int proto)
433 {
434 	const struct ipv6hdr *iph = skb_gro_network_header(skb);
435 
436 	return ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr,
437 					    skb_gro_len(skb), proto, 0));
438 }
439 
440 int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb);
441 
442 /* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
443 static inline void gro_normal_list(struct napi_struct *napi)
444 {
445 	if (!napi->rx_count)
446 		return;
447 	netif_receive_skb_list_internal(&napi->rx_list);
448 	INIT_LIST_HEAD(&napi->rx_list);
449 	napi->rx_count = 0;
450 }
451 
452 /* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
453  * pass the whole batch up to the stack.
454  */
455 static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, int segs)
456 {
457 	list_add_tail(&skb->list, &napi->rx_list);
458 	napi->rx_count += segs;
459 	if (napi->rx_count >= READ_ONCE(net_hotdata.gro_normal_batch))
460 		gro_normal_list(napi);
461 }
462 
463 /* This function is the alternative of 'inet_iif' and 'inet_sdif'
464  * functions in case we can not rely on fields of IPCB.
465  *
466  * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized.
467  * The caller must hold the RCU read lock.
468  */
469 static inline void inet_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif)
470 {
471 	*iif = inet_iif(skb) ?: skb->dev->ifindex;
472 	*sdif = 0;
473 
474 #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
475 	if (netif_is_l3_slave(skb->dev)) {
476 		struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev);
477 
478 		*sdif = *iif;
479 		*iif = master ? master->ifindex : 0;
480 	}
481 #endif
482 }
483 
484 /* This function is the alternative of 'inet6_iif' and 'inet6_sdif'
485  * functions in case we can not rely on fields of IP6CB.
486  *
487  * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized.
488  * The caller must hold the RCU read lock.
489  */
490 static inline void inet6_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif)
491 {
492 	/* using skb->dev->ifindex because skb_dst(skb) is not initialized */
493 	*iif = skb->dev->ifindex;
494 	*sdif = 0;
495 
496 #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
497 	if (netif_is_l3_slave(skb->dev)) {
498 		struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev);
499 
500 		*sdif = *iif;
501 		*iif = master ? master->ifindex : 0;
502 	}
503 #endif
504 }
505 
506 struct packet_offload *gro_find_receive_by_type(__be16 type);
507 struct packet_offload *gro_find_complete_by_type(__be16 type);
508 
509 #endif /* _NET_GRO_H */
510