xref: /linux/include/net/gro.h (revision 07fdad3a93756b872da7b53647715c48d0f4a2d0)
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 
3 #ifndef _NET_GRO_H
4 #define _NET_GRO_H
5 
6 #include <linux/indirect_call_wrapper.h>
7 #include <linux/ip.h>
8 #include <linux/ipv6.h>
9 #include <net/ip6_checksum.h>
10 #include <linux/skbuff.h>
11 #include <net/udp.h>
12 #include <net/hotdata.h>
13 
14 /* This should be increased if a protocol with a bigger head is added. */
15 #define GRO_MAX_HEAD (MAX_HEADER + 128)
16 
17 struct napi_gro_cb {
18 	union {
19 		struct {
20 			/* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */
21 			void	*frag0;
22 
23 			/* Length of frag0. */
24 			unsigned int frag0_len;
25 		};
26 
27 		struct {
28 			/* used in skb_gro_receive() slow path */
29 			struct sk_buff *last;
30 
31 			/* jiffies when first packet was created/queued */
32 			unsigned long age;
33 		};
34 	};
35 
36 	/* This indicates where we are processing relative to skb->data. */
37 	int	data_offset;
38 
39 	/* This is non-zero if the packet cannot be merged with the new skb. */
40 	u16	flush;
41 
42 	/* Number of segments aggregated. */
43 	u16	count;
44 
45 	/* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */
46 	u16	proto;
47 
48 	u16	pad;
49 
50 /* Used in napi_gro_cb::free */
51 #define NAPI_GRO_FREE             1
52 #define NAPI_GRO_FREE_STOLEN_HEAD 2
53 	/* portion of the cb set to zero at every gro iteration */
54 	struct_group(zeroed,
55 
56 		/* Start offset for remote checksum offload */
57 		u16	gro_remcsum_start;
58 
59 		/* This is non-zero if the packet may be of the same flow. */
60 		u8	same_flow:1;
61 
62 		/* Used in tunnel GRO receive */
63 		u8	encap_mark:1;
64 
65 		/* GRO checksum is valid */
66 		u8	csum_valid:1;
67 
68 		/* Number of checksums via CHECKSUM_UNNECESSARY */
69 		u8	csum_cnt:3;
70 
71 		/* Free the skb? */
72 		u8	free:2;
73 
74 		/* Used in GRE, set in fou/gue_gro_receive */
75 		u8	is_fou:1;
76 
77 		/* Used to determine if ipid_offset can be ignored */
78 		u8	ip_fixedid:2;
79 
80 		/* Number of gro_receive callbacks this packet already went through */
81 		u8 recursion_counter:4;
82 
83 		/* GRO is done by frag_list pointer chaining. */
84 		u8	is_flist:1;
85 	);
86 
87 	/* used to support CHECKSUM_COMPLETE for tunneling protocols */
88 	__wsum	csum;
89 
90 	/* L3 offsets */
91 	union {
92 		struct {
93 			u16 network_offset;
94 			u16 inner_network_offset;
95 		};
96 		u16 network_offsets[2];
97 	};
98 };
99 
100 #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb)
101 
102 #define GRO_RECURSION_LIMIT 15
103 static inline int gro_recursion_inc_test(struct sk_buff *skb)
104 {
105 	return ++NAPI_GRO_CB(skb)->recursion_counter == GRO_RECURSION_LIMIT;
106 }
107 
108 typedef struct sk_buff *(*gro_receive_t)(struct list_head *, struct sk_buff *);
109 static inline struct sk_buff *call_gro_receive(gro_receive_t cb,
110 					       struct list_head *head,
111 					       struct sk_buff *skb)
112 {
113 	if (unlikely(gro_recursion_inc_test(skb))) {
114 		NAPI_GRO_CB(skb)->flush |= 1;
115 		return NULL;
116 	}
117 
118 	return cb(head, skb);
119 }
120 
121 typedef struct sk_buff *(*gro_receive_sk_t)(struct sock *, struct list_head *,
122 					    struct sk_buff *);
123 static inline struct sk_buff *call_gro_receive_sk(gro_receive_sk_t cb,
124 						  struct sock *sk,
125 						  struct list_head *head,
126 						  struct sk_buff *skb)
127 {
128 	if (unlikely(gro_recursion_inc_test(skb))) {
129 		NAPI_GRO_CB(skb)->flush |= 1;
130 		return NULL;
131 	}
132 
133 	return cb(sk, head, skb);
134 }
135 
136 static inline unsigned int skb_gro_offset(const struct sk_buff *skb)
137 {
138 	return NAPI_GRO_CB(skb)->data_offset;
139 }
140 
141 static inline unsigned int skb_gro_len(const struct sk_buff *skb)
142 {
143 	return skb->len - NAPI_GRO_CB(skb)->data_offset;
144 }
145 
146 static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len)
147 {
148 	NAPI_GRO_CB(skb)->data_offset += len;
149 }
150 
151 static inline void *skb_gro_header_fast(const struct sk_buff *skb,
152 					unsigned int offset)
153 {
154 	return NAPI_GRO_CB(skb)->frag0 + offset;
155 }
156 
157 static inline bool skb_gro_may_pull(const struct sk_buff *skb,
158 				    unsigned int hlen)
159 {
160 	return likely(hlen <= NAPI_GRO_CB(skb)->frag0_len);
161 }
162 
163 static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen,
164 					unsigned int offset)
165 {
166 	if (!pskb_may_pull(skb, hlen))
167 		return NULL;
168 
169 	return skb->data + offset;
170 }
171 
172 static inline void *skb_gro_header(struct sk_buff *skb, unsigned int hlen,
173 				   unsigned int offset)
174 {
175 	void *ptr;
176 
177 	ptr = skb_gro_header_fast(skb, offset);
178 	if (!skb_gro_may_pull(skb, hlen))
179 		ptr = skb_gro_header_slow(skb, hlen, offset);
180 	return ptr;
181 }
182 
183 static inline int skb_gro_receive_network_offset(const struct sk_buff *skb)
184 {
185 	return NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark];
186 }
187 
188 static inline void *skb_gro_network_header(const struct sk_buff *skb)
189 {
190 	if (skb_gro_may_pull(skb, skb_gro_offset(skb)))
191 		return skb_gro_header_fast(skb, skb_gro_receive_network_offset(skb));
192 
193 	return skb->data + skb_gro_receive_network_offset(skb);
194 }
195 
196 static inline __wsum inet_gro_compute_pseudo(const struct sk_buff *skb,
197 					     int proto)
198 {
199 	const struct iphdr *iph = skb_gro_network_header(skb);
200 
201 	return csum_tcpudp_nofold(iph->saddr, iph->daddr,
202 				  skb_gro_len(skb), proto, 0);
203 }
204 
205 static inline void skb_gro_postpull_rcsum(struct sk_buff *skb,
206 					const void *start, unsigned int len)
207 {
208 	if (NAPI_GRO_CB(skb)->csum_valid)
209 		NAPI_GRO_CB(skb)->csum = wsum_negate(csum_partial(start, len,
210 						wsum_negate(NAPI_GRO_CB(skb)->csum)));
211 }
212 
213 /* GRO checksum functions. These are logical equivalents of the normal
214  * checksum functions (in skbuff.h) except that they operate on the GRO
215  * offsets and fields in sk_buff.
216  */
217 
218 __sum16 __skb_gro_checksum_complete(struct sk_buff *skb);
219 
220 static inline bool skb_at_gro_remcsum_start(struct sk_buff *skb)
221 {
222 	return (NAPI_GRO_CB(skb)->gro_remcsum_start == skb_gro_offset(skb));
223 }
224 
225 static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb,
226 						      bool zero_okay,
227 						      __sum16 check)
228 {
229 	return ((skb->ip_summed != CHECKSUM_PARTIAL ||
230 		skb_checksum_start_offset(skb) <
231 		 skb_gro_offset(skb)) &&
232 		!skb_at_gro_remcsum_start(skb) &&
233 		NAPI_GRO_CB(skb)->csum_cnt == 0 &&
234 		(!zero_okay || check));
235 }
236 
237 static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb,
238 							   __wsum psum)
239 {
240 	if (NAPI_GRO_CB(skb)->csum_valid &&
241 	    !csum_fold(csum_add(psum, NAPI_GRO_CB(skb)->csum)))
242 		return 0;
243 
244 	NAPI_GRO_CB(skb)->csum = psum;
245 
246 	return __skb_gro_checksum_complete(skb);
247 }
248 
249 static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb)
250 {
251 	if (NAPI_GRO_CB(skb)->csum_cnt > 0) {
252 		/* Consume a checksum from CHECKSUM_UNNECESSARY */
253 		NAPI_GRO_CB(skb)->csum_cnt--;
254 	} else {
255 		/* Update skb for CHECKSUM_UNNECESSARY and csum_level when we
256 		 * verified a new top level checksum or an encapsulated one
257 		 * during GRO. This saves work if we fallback to normal path.
258 		 */
259 		__skb_incr_checksum_unnecessary(skb);
260 	}
261 }
262 
263 #define __skb_gro_checksum_validate(skb, proto, zero_okay, check,	\
264 				    compute_pseudo)			\
265 ({									\
266 	__sum16 __ret = 0;						\
267 	if (__skb_gro_checksum_validate_needed(skb, zero_okay, check))	\
268 		__ret = __skb_gro_checksum_validate_complete(skb,	\
269 				compute_pseudo(skb, proto));		\
270 	if (!__ret)							\
271 		skb_gro_incr_csum_unnecessary(skb);			\
272 	__ret;								\
273 })
274 
275 #define skb_gro_checksum_validate(skb, proto, compute_pseudo)		\
276 	__skb_gro_checksum_validate(skb, proto, false, 0, compute_pseudo)
277 
278 #define skb_gro_checksum_validate_zero_check(skb, proto, check,		\
279 					     compute_pseudo)		\
280 	__skb_gro_checksum_validate(skb, proto, true, check, compute_pseudo)
281 
282 #define skb_gro_checksum_simple_validate(skb)				\
283 	__skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo)
284 
285 static inline bool __skb_gro_checksum_convert_check(struct sk_buff *skb)
286 {
287 	return (NAPI_GRO_CB(skb)->csum_cnt == 0 &&
288 		!NAPI_GRO_CB(skb)->csum_valid);
289 }
290 
291 static inline void __skb_gro_checksum_convert(struct sk_buff *skb,
292 					      __wsum pseudo)
293 {
294 	NAPI_GRO_CB(skb)->csum = ~pseudo;
295 	NAPI_GRO_CB(skb)->csum_valid = 1;
296 }
297 
298 #define skb_gro_checksum_try_convert(skb, proto, compute_pseudo)	\
299 do {									\
300 	if (__skb_gro_checksum_convert_check(skb))			\
301 		__skb_gro_checksum_convert(skb, 			\
302 					   compute_pseudo(skb, proto));	\
303 } while (0)
304 
305 struct gro_remcsum {
306 	int offset;
307 	__wsum delta;
308 };
309 
310 static inline void skb_gro_remcsum_init(struct gro_remcsum *grc)
311 {
312 	grc->offset = 0;
313 	grc->delta = 0;
314 }
315 
316 static inline void *skb_gro_remcsum_process(struct sk_buff *skb, void *ptr,
317 					    unsigned int off, size_t hdrlen,
318 					    int start, int offset,
319 					    struct gro_remcsum *grc,
320 					    bool nopartial)
321 {
322 	__wsum delta;
323 	size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start);
324 
325 	BUG_ON(!NAPI_GRO_CB(skb)->csum_valid);
326 
327 	if (!nopartial) {
328 		NAPI_GRO_CB(skb)->gro_remcsum_start = off + hdrlen + start;
329 		return ptr;
330 	}
331 
332 	ptr = skb_gro_header(skb, off + plen, off);
333 	if (!ptr)
334 		return NULL;
335 
336 	delta = remcsum_adjust(ptr + hdrlen, NAPI_GRO_CB(skb)->csum,
337 			       start, offset);
338 
339 	/* Adjust skb->csum since we changed the packet */
340 	NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta);
341 
342 	grc->offset = off + hdrlen + offset;
343 	grc->delta = delta;
344 
345 	return ptr;
346 }
347 
348 static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb,
349 					   struct gro_remcsum *grc)
350 {
351 	void *ptr;
352 	size_t plen = grc->offset + sizeof(u16);
353 
354 	if (!grc->delta)
355 		return;
356 
357 	ptr = skb_gro_header(skb, plen, grc->offset);
358 	if (!ptr)
359 		return;
360 
361 	remcsum_unadjust((__sum16 *)ptr, grc->delta);
362 }
363 
364 #ifdef CONFIG_XFRM_OFFLOAD
365 static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush)
366 {
367 	if (PTR_ERR(pp) != -EINPROGRESS)
368 		NAPI_GRO_CB(skb)->flush |= flush;
369 }
370 static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb,
371 					       struct sk_buff *pp,
372 					       int flush,
373 					       struct gro_remcsum *grc)
374 {
375 	if (PTR_ERR(pp) != -EINPROGRESS) {
376 		NAPI_GRO_CB(skb)->flush |= flush;
377 		skb_gro_remcsum_cleanup(skb, grc);
378 		skb->remcsum_offload = 0;
379 	}
380 }
381 #else
382 static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush)
383 {
384 	NAPI_GRO_CB(skb)->flush |= flush;
385 }
386 static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb,
387 					       struct sk_buff *pp,
388 					       int flush,
389 					       struct gro_remcsum *grc)
390 {
391 	NAPI_GRO_CB(skb)->flush |= flush;
392 	skb_gro_remcsum_cleanup(skb, grc);
393 	skb->remcsum_offload = 0;
394 }
395 #endif
396 
397 INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *,
398 							   struct sk_buff *));
399 INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int));
400 INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *,
401 							   struct sk_buff *));
402 INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int));
403 
404 INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *,
405 							   struct sk_buff *));
406 INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int));
407 
408 INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *,
409 							   struct sk_buff *));
410 INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int));
411 
412 #define indirect_call_gro_receive_inet(cb, f2, f1, head, skb)	\
413 ({								\
414 	unlikely(gro_recursion_inc_test(skb)) ?			\
415 		NAPI_GRO_CB(skb)->flush |= 1, NULL :		\
416 		INDIRECT_CALL_INET(cb, f2, f1, head, skb);	\
417 })
418 
419 struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
420 				struct udphdr *uh, struct sock *sk);
421 int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
422 
423 static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
424 {
425 	struct udphdr *uh;
426 	unsigned int hlen, off;
427 
428 	off  = skb_gro_offset(skb);
429 	hlen = off + sizeof(*uh);
430 	uh   = skb_gro_header(skb, hlen, off);
431 
432 	return uh;
433 }
434 
435 static inline __wsum ip6_gro_compute_pseudo(const struct sk_buff *skb,
436 					    int proto)
437 {
438 	const struct ipv6hdr *iph = skb_gro_network_header(skb);
439 
440 	return ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr,
441 					    skb_gro_len(skb), proto, 0));
442 }
443 
444 static inline int inet_gro_flush(const struct iphdr *iph, const struct iphdr *iph2,
445 				 struct sk_buff *p, bool inner)
446 {
447 	const u32 id = ntohl(*(__be32 *)&iph->id);
448 	const u32 id2 = ntohl(*(__be32 *)&iph2->id);
449 	const u16 ipid_offset = (id >> 16) - (id2 >> 16);
450 	const u16 count = NAPI_GRO_CB(p)->count;
451 
452 	/* All fields must match except length and checksum. */
453 	if ((iph->ttl ^ iph2->ttl) | (iph->tos ^ iph2->tos) | ((id ^ id2) & IP_DF))
454 		return true;
455 
456 	/* When we receive our second frame we can make a decision on if we
457 	 * continue this flow as an atomic flow with a fixed ID or if we use
458 	 * an incrementing ID.
459 	 */
460 	if (count == 1 && !ipid_offset)
461 		NAPI_GRO_CB(p)->ip_fixedid |= 1 << inner;
462 
463 	return ipid_offset ^ (count * !(NAPI_GRO_CB(p)->ip_fixedid & (1 << inner)));
464 }
465 
466 static inline int ipv6_gro_flush(const struct ipv6hdr *iph, const struct ipv6hdr *iph2)
467 {
468 	/* <Version:4><Traffic_Class:8><Flow_Label:20> */
469 	__be32 first_word = *(__be32 *)iph ^ *(__be32 *)iph2;
470 
471 	/* Flush if Traffic Class fields are different. */
472 	return !!((first_word & htonl(0x0FF00000)) |
473 		(__force __be32)(iph->hop_limit ^ iph2->hop_limit));
474 }
475 
476 static inline int __gro_receive_network_flush(const void *th, const void *th2,
477 					      struct sk_buff *p, const u16 diff,
478 					      bool inner)
479 {
480 	const void *nh = th - diff;
481 	const void *nh2 = th2 - diff;
482 
483 	if (((struct iphdr *)nh)->version == 6)
484 		return ipv6_gro_flush(nh, nh2);
485 	else
486 		return inet_gro_flush(nh, nh2, p, inner);
487 }
488 
489 static inline int gro_receive_network_flush(const void *th, const void *th2,
490 					    struct sk_buff *p)
491 {
492 	int off = skb_transport_offset(p);
493 	int flush;
494 
495 	flush = __gro_receive_network_flush(th, th2, p, off - NAPI_GRO_CB(p)->network_offset, false);
496 	if (NAPI_GRO_CB(p)->encap_mark)
497 		flush |= __gro_receive_network_flush(th, th2, p, off - NAPI_GRO_CB(p)->inner_network_offset, true);
498 
499 	return flush;
500 }
501 
502 int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb);
503 int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb);
504 void __gro_flush(struct gro_node *gro, bool flush_old);
505 
506 static inline void gro_flush(struct gro_node *gro, bool flush_old)
507 {
508 	if (!gro->bitmask)
509 		return;
510 
511 	__gro_flush(gro, flush_old);
512 }
513 
514 static inline void napi_gro_flush(struct napi_struct *napi, bool flush_old)
515 {
516 	gro_flush(&napi->gro, flush_old);
517 }
518 
519 /* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
520 static inline void gro_normal_list(struct gro_node *gro)
521 {
522 	if (!gro->rx_count)
523 		return;
524 	netif_receive_skb_list_internal(&gro->rx_list);
525 	INIT_LIST_HEAD(&gro->rx_list);
526 	gro->rx_count = 0;
527 }
528 
529 static inline void gro_flush_normal(struct gro_node *gro, bool flush_old)
530 {
531 	gro_flush(gro, flush_old);
532 	gro_normal_list(gro);
533 }
534 
535 /* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
536  * pass the whole batch up to the stack.
537  */
538 static inline void gro_normal_one(struct gro_node *gro, struct sk_buff *skb,
539 				  int segs)
540 {
541 	list_add_tail(&skb->list, &gro->rx_list);
542 	gro->rx_count += segs;
543 	if (gro->rx_count >= READ_ONCE(net_hotdata.gro_normal_batch))
544 		gro_normal_list(gro);
545 }
546 
547 void gro_init(struct gro_node *gro);
548 void gro_cleanup(struct gro_node *gro);
549 
550 /* This function is the alternative of 'inet_iif' and 'inet_sdif'
551  * functions in case we can not rely on fields of IPCB.
552  *
553  * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized.
554  * The caller must hold the RCU read lock.
555  */
556 static inline void inet_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif)
557 {
558 	*iif = inet_iif(skb) ?: skb->dev->ifindex;
559 	*sdif = 0;
560 
561 #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
562 	if (netif_is_l3_slave(skb->dev)) {
563 		struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev);
564 
565 		*sdif = *iif;
566 		*iif = master ? master->ifindex : 0;
567 	}
568 #endif
569 }
570 
571 /* This function is the alternative of 'inet6_iif' and 'inet6_sdif'
572  * functions in case we can not rely on fields of IP6CB.
573  *
574  * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized.
575  * The caller must hold the RCU read lock.
576  */
577 static inline void inet6_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif)
578 {
579 	/* using skb->dev->ifindex because skb_dst(skb) is not initialized */
580 	*iif = skb->dev->ifindex;
581 	*sdif = 0;
582 
583 #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
584 	if (netif_is_l3_slave(skb->dev)) {
585 		struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev);
586 
587 		*sdif = *iif;
588 		*iif = master ? master->ifindex : 0;
589 	}
590 #endif
591 }
592 
593 struct packet_offload *gro_find_receive_by_type(__be16 type);
594 struct packet_offload *gro_find_complete_by_type(__be16 type);
595 
596 #endif /* _NET_GRO_H */
597