1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 3 #ifndef _NET_GRO_H 4 #define _NET_GRO_H 5 6 #include <linux/indirect_call_wrapper.h> 7 #include <linux/ip.h> 8 #include <linux/ipv6.h> 9 #include <net/ip6_checksum.h> 10 #include <linux/skbuff.h> 11 #include <net/udp.h> 12 #include <net/hotdata.h> 13 14 /* This should be increased if a protocol with a bigger head is added. */ 15 #define GRO_MAX_HEAD (MAX_HEADER + 128) 16 17 struct napi_gro_cb { 18 union { 19 struct { 20 /* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */ 21 void *frag0; 22 23 /* Length of frag0. */ 24 unsigned int frag0_len; 25 }; 26 27 struct { 28 /* used in skb_gro_receive() slow path */ 29 struct sk_buff *last; 30 31 /* jiffies when first packet was created/queued */ 32 unsigned long age; 33 }; 34 }; 35 36 /* This indicates where we are processing relative to skb->data. */ 37 int data_offset; 38 39 /* This is non-zero if the packet cannot be merged with the new skb. */ 40 u16 flush; 41 42 /* Number of segments aggregated. */ 43 u16 count; 44 45 /* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */ 46 u16 proto; 47 48 u16 pad; 49 50 /* Used in napi_gro_cb::free */ 51 #define NAPI_GRO_FREE 1 52 #define NAPI_GRO_FREE_STOLEN_HEAD 2 53 /* portion of the cb set to zero at every gro iteration */ 54 struct_group(zeroed, 55 56 /* Start offset for remote checksum offload */ 57 u16 gro_remcsum_start; 58 59 /* This is non-zero if the packet may be of the same flow. */ 60 u8 same_flow:1; 61 62 /* Used in tunnel GRO receive */ 63 u8 encap_mark:1; 64 65 /* GRO checksum is valid */ 66 u8 csum_valid:1; 67 68 /* Number of checksums via CHECKSUM_UNNECESSARY */ 69 u8 csum_cnt:3; 70 71 /* Free the skb? */ 72 u8 free:2; 73 74 /* Used in GRE, set in fou/gue_gro_receive */ 75 u8 is_fou:1; 76 77 /* Used to determine if ipid_offset can be ignored */ 78 u8 ip_fixedid:2; 79 80 /* Number of gro_receive callbacks this packet already went through */ 81 u8 recursion_counter:4; 82 83 /* GRO is done by frag_list pointer chaining. */ 84 u8 is_flist:1; 85 ); 86 87 /* used to support CHECKSUM_COMPLETE for tunneling protocols */ 88 __wsum csum; 89 90 /* L3 offsets */ 91 union { 92 struct { 93 u16 network_offset; 94 u16 inner_network_offset; 95 }; 96 u16 network_offsets[2]; 97 }; 98 }; 99 100 #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) 101 102 #define GRO_RECURSION_LIMIT 15 103 static inline int gro_recursion_inc_test(struct sk_buff *skb) 104 { 105 return ++NAPI_GRO_CB(skb)->recursion_counter == GRO_RECURSION_LIMIT; 106 } 107 108 typedef struct sk_buff *(*gro_receive_t)(struct list_head *, struct sk_buff *); 109 static inline struct sk_buff *call_gro_receive(gro_receive_t cb, 110 struct list_head *head, 111 struct sk_buff *skb) 112 { 113 if (unlikely(gro_recursion_inc_test(skb))) { 114 NAPI_GRO_CB(skb)->flush |= 1; 115 return NULL; 116 } 117 118 return cb(head, skb); 119 } 120 121 typedef struct sk_buff *(*gro_receive_sk_t)(struct sock *, struct list_head *, 122 struct sk_buff *); 123 static inline struct sk_buff *call_gro_receive_sk(gro_receive_sk_t cb, 124 struct sock *sk, 125 struct list_head *head, 126 struct sk_buff *skb) 127 { 128 if (unlikely(gro_recursion_inc_test(skb))) { 129 NAPI_GRO_CB(skb)->flush |= 1; 130 return NULL; 131 } 132 133 return cb(sk, head, skb); 134 } 135 136 static inline unsigned int skb_gro_offset(const struct sk_buff *skb) 137 { 138 return NAPI_GRO_CB(skb)->data_offset; 139 } 140 141 static inline unsigned int skb_gro_len(const struct sk_buff *skb) 142 { 143 return skb->len - NAPI_GRO_CB(skb)->data_offset; 144 } 145 146 static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len) 147 { 148 NAPI_GRO_CB(skb)->data_offset += len; 149 } 150 151 static inline void *skb_gro_header_fast(const struct sk_buff *skb, 152 unsigned int offset) 153 { 154 return NAPI_GRO_CB(skb)->frag0 + offset; 155 } 156 157 static inline bool skb_gro_may_pull(const struct sk_buff *skb, 158 unsigned int hlen) 159 { 160 return likely(hlen <= NAPI_GRO_CB(skb)->frag0_len); 161 } 162 163 static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, 164 unsigned int offset) 165 { 166 if (!pskb_may_pull(skb, hlen)) 167 return NULL; 168 169 return skb->data + offset; 170 } 171 172 static inline void *skb_gro_header(struct sk_buff *skb, unsigned int hlen, 173 unsigned int offset) 174 { 175 void *ptr; 176 177 ptr = skb_gro_header_fast(skb, offset); 178 if (!skb_gro_may_pull(skb, hlen)) 179 ptr = skb_gro_header_slow(skb, hlen, offset); 180 return ptr; 181 } 182 183 static inline int skb_gro_receive_network_offset(const struct sk_buff *skb) 184 { 185 return NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark]; 186 } 187 188 static inline void *skb_gro_network_header(const struct sk_buff *skb) 189 { 190 if (skb_gro_may_pull(skb, skb_gro_offset(skb))) 191 return skb_gro_header_fast(skb, skb_gro_receive_network_offset(skb)); 192 193 return skb->data + skb_gro_receive_network_offset(skb); 194 } 195 196 static inline __wsum inet_gro_compute_pseudo(const struct sk_buff *skb, 197 int proto) 198 { 199 const struct iphdr *iph = skb_gro_network_header(skb); 200 201 return csum_tcpudp_nofold(iph->saddr, iph->daddr, 202 skb_gro_len(skb), proto, 0); 203 } 204 205 static inline void skb_gro_postpull_rcsum(struct sk_buff *skb, 206 const void *start, unsigned int len) 207 { 208 if (NAPI_GRO_CB(skb)->csum_valid) 209 NAPI_GRO_CB(skb)->csum = wsum_negate(csum_partial(start, len, 210 wsum_negate(NAPI_GRO_CB(skb)->csum))); 211 } 212 213 /* GRO checksum functions. These are logical equivalents of the normal 214 * checksum functions (in skbuff.h) except that they operate on the GRO 215 * offsets and fields in sk_buff. 216 */ 217 218 __sum16 __skb_gro_checksum_complete(struct sk_buff *skb); 219 220 static inline bool skb_at_gro_remcsum_start(struct sk_buff *skb) 221 { 222 return (NAPI_GRO_CB(skb)->gro_remcsum_start == skb_gro_offset(skb)); 223 } 224 225 static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb, 226 bool zero_okay, 227 __sum16 check) 228 { 229 return ((skb->ip_summed != CHECKSUM_PARTIAL || 230 skb_checksum_start_offset(skb) < 231 skb_gro_offset(skb)) && 232 !skb_at_gro_remcsum_start(skb) && 233 NAPI_GRO_CB(skb)->csum_cnt == 0 && 234 (!zero_okay || check)); 235 } 236 237 static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb, 238 __wsum psum) 239 { 240 if (NAPI_GRO_CB(skb)->csum_valid && 241 !csum_fold(csum_add(psum, NAPI_GRO_CB(skb)->csum))) 242 return 0; 243 244 NAPI_GRO_CB(skb)->csum = psum; 245 246 return __skb_gro_checksum_complete(skb); 247 } 248 249 static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb) 250 { 251 if (NAPI_GRO_CB(skb)->csum_cnt > 0) { 252 /* Consume a checksum from CHECKSUM_UNNECESSARY */ 253 NAPI_GRO_CB(skb)->csum_cnt--; 254 } else { 255 /* Update skb for CHECKSUM_UNNECESSARY and csum_level when we 256 * verified a new top level checksum or an encapsulated one 257 * during GRO. This saves work if we fallback to normal path. 258 */ 259 __skb_incr_checksum_unnecessary(skb); 260 } 261 } 262 263 #define __skb_gro_checksum_validate(skb, proto, zero_okay, check, \ 264 compute_pseudo) \ 265 ({ \ 266 __sum16 __ret = 0; \ 267 if (__skb_gro_checksum_validate_needed(skb, zero_okay, check)) \ 268 __ret = __skb_gro_checksum_validate_complete(skb, \ 269 compute_pseudo(skb, proto)); \ 270 if (!__ret) \ 271 skb_gro_incr_csum_unnecessary(skb); \ 272 __ret; \ 273 }) 274 275 #define skb_gro_checksum_validate(skb, proto, compute_pseudo) \ 276 __skb_gro_checksum_validate(skb, proto, false, 0, compute_pseudo) 277 278 #define skb_gro_checksum_validate_zero_check(skb, proto, check, \ 279 compute_pseudo) \ 280 __skb_gro_checksum_validate(skb, proto, true, check, compute_pseudo) 281 282 #define skb_gro_checksum_simple_validate(skb) \ 283 __skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo) 284 285 static inline bool __skb_gro_checksum_convert_check(struct sk_buff *skb) 286 { 287 return (NAPI_GRO_CB(skb)->csum_cnt == 0 && 288 !NAPI_GRO_CB(skb)->csum_valid); 289 } 290 291 static inline void __skb_gro_checksum_convert(struct sk_buff *skb, 292 __wsum pseudo) 293 { 294 NAPI_GRO_CB(skb)->csum = ~pseudo; 295 NAPI_GRO_CB(skb)->csum_valid = 1; 296 } 297 298 #define skb_gro_checksum_try_convert(skb, proto, compute_pseudo) \ 299 do { \ 300 if (__skb_gro_checksum_convert_check(skb)) \ 301 __skb_gro_checksum_convert(skb, \ 302 compute_pseudo(skb, proto)); \ 303 } while (0) 304 305 struct gro_remcsum { 306 int offset; 307 __wsum delta; 308 }; 309 310 static inline void skb_gro_remcsum_init(struct gro_remcsum *grc) 311 { 312 grc->offset = 0; 313 grc->delta = 0; 314 } 315 316 static inline void *skb_gro_remcsum_process(struct sk_buff *skb, void *ptr, 317 unsigned int off, size_t hdrlen, 318 int start, int offset, 319 struct gro_remcsum *grc, 320 bool nopartial) 321 { 322 __wsum delta; 323 size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start); 324 325 BUG_ON(!NAPI_GRO_CB(skb)->csum_valid); 326 327 if (!nopartial) { 328 NAPI_GRO_CB(skb)->gro_remcsum_start = off + hdrlen + start; 329 return ptr; 330 } 331 332 ptr = skb_gro_header(skb, off + plen, off); 333 if (!ptr) 334 return NULL; 335 336 delta = remcsum_adjust(ptr + hdrlen, NAPI_GRO_CB(skb)->csum, 337 start, offset); 338 339 /* Adjust skb->csum since we changed the packet */ 340 NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta); 341 342 grc->offset = off + hdrlen + offset; 343 grc->delta = delta; 344 345 return ptr; 346 } 347 348 static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb, 349 struct gro_remcsum *grc) 350 { 351 void *ptr; 352 size_t plen = grc->offset + sizeof(u16); 353 354 if (!grc->delta) 355 return; 356 357 ptr = skb_gro_header(skb, plen, grc->offset); 358 if (!ptr) 359 return; 360 361 remcsum_unadjust((__sum16 *)ptr, grc->delta); 362 } 363 364 #ifdef CONFIG_XFRM_OFFLOAD 365 static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush) 366 { 367 if (PTR_ERR(pp) != -EINPROGRESS) 368 NAPI_GRO_CB(skb)->flush |= flush; 369 } 370 static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, 371 struct sk_buff *pp, 372 int flush, 373 struct gro_remcsum *grc) 374 { 375 if (PTR_ERR(pp) != -EINPROGRESS) { 376 NAPI_GRO_CB(skb)->flush |= flush; 377 skb_gro_remcsum_cleanup(skb, grc); 378 skb->remcsum_offload = 0; 379 } 380 } 381 #else 382 static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush) 383 { 384 NAPI_GRO_CB(skb)->flush |= flush; 385 } 386 static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, 387 struct sk_buff *pp, 388 int flush, 389 struct gro_remcsum *grc) 390 { 391 NAPI_GRO_CB(skb)->flush |= flush; 392 skb_gro_remcsum_cleanup(skb, grc); 393 skb->remcsum_offload = 0; 394 } 395 #endif 396 397 INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *, 398 struct sk_buff *)); 399 INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int)); 400 INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *, 401 struct sk_buff *)); 402 INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int)); 403 404 INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *, 405 struct sk_buff *)); 406 INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int)); 407 408 struct sk_buff *udp6_gro_receive(struct list_head *, struct sk_buff *); 409 int udp6_gro_complete(struct sk_buff *, int); 410 411 #define indirect_call_gro_receive_inet(cb, f2, f1, head, skb) \ 412 ({ \ 413 unlikely(gro_recursion_inc_test(skb)) ? \ 414 NAPI_GRO_CB(skb)->flush |= 1, NULL : \ 415 INDIRECT_CALL_INET(cb, f2, f1, head, skb); \ 416 }) 417 418 struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, 419 struct udphdr *uh, struct sock *sk); 420 int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup); 421 422 static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) 423 { 424 struct udphdr *uh; 425 unsigned int hlen, off; 426 427 off = skb_gro_offset(skb); 428 hlen = off + sizeof(*uh); 429 uh = skb_gro_header(skb, hlen, off); 430 431 return uh; 432 } 433 434 static inline __wsum ip6_gro_compute_pseudo(const struct sk_buff *skb, 435 int proto) 436 { 437 const struct ipv6hdr *iph = skb_gro_network_header(skb); 438 439 return ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr, 440 skb_gro_len(skb), proto, 0)); 441 } 442 443 static inline int inet_gro_flush(const struct iphdr *iph, const struct iphdr *iph2, 444 struct sk_buff *p, bool inner) 445 { 446 const u32 id = ntohl(*(__be32 *)&iph->id); 447 const u32 id2 = ntohl(*(__be32 *)&iph2->id); 448 const u16 ipid_offset = (id >> 16) - (id2 >> 16); 449 const u16 count = NAPI_GRO_CB(p)->count; 450 451 /* All fields must match except length and checksum. */ 452 if ((iph->ttl ^ iph2->ttl) | (iph->tos ^ iph2->tos) | ((id ^ id2) & IP_DF)) 453 return true; 454 455 /* When we receive our second frame we can make a decision on if we 456 * continue this flow as an atomic flow with a fixed ID or if we use 457 * an incrementing ID. 458 */ 459 if (count == 1 && !ipid_offset) 460 NAPI_GRO_CB(p)->ip_fixedid |= 1 << inner; 461 462 return ipid_offset ^ (count * !(NAPI_GRO_CB(p)->ip_fixedid & (1 << inner))); 463 } 464 465 static inline int ipv6_gro_flush(const struct ipv6hdr *iph, const struct ipv6hdr *iph2) 466 { 467 /* <Version:4><Traffic_Class:8><Flow_Label:20> */ 468 __be32 first_word = *(__be32 *)iph ^ *(__be32 *)iph2; 469 470 /* Flush if Traffic Class fields are different. */ 471 return !!((first_word & htonl(0x0FF00000)) | 472 (__force __be32)(iph->hop_limit ^ iph2->hop_limit)); 473 } 474 475 static inline int __gro_receive_network_flush(const void *th, const void *th2, 476 struct sk_buff *p, const u16 diff, 477 bool inner) 478 { 479 const void *nh = th - diff; 480 const void *nh2 = th2 - diff; 481 482 if (((struct iphdr *)nh)->version == 6) 483 return ipv6_gro_flush(nh, nh2); 484 else 485 return inet_gro_flush(nh, nh2, p, inner); 486 } 487 488 static inline int gro_receive_network_flush(const void *th, const void *th2, 489 struct sk_buff *p) 490 { 491 int off = skb_transport_offset(p); 492 int flush; 493 494 flush = __gro_receive_network_flush(th, th2, p, off - NAPI_GRO_CB(p)->network_offset, false); 495 if (NAPI_GRO_CB(p)->encap_mark) 496 flush |= __gro_receive_network_flush(th, th2, p, off - NAPI_GRO_CB(p)->inner_network_offset, true); 497 498 return flush; 499 } 500 501 int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb); 502 int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb); 503 void __gro_flush(struct gro_node *gro, bool flush_old); 504 505 static inline void gro_flush(struct gro_node *gro, bool flush_old) 506 { 507 if (!gro->bitmask) 508 return; 509 510 __gro_flush(gro, flush_old); 511 } 512 513 static inline void napi_gro_flush(struct napi_struct *napi, bool flush_old) 514 { 515 gro_flush(&napi->gro, flush_old); 516 } 517 518 /* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ 519 static inline void gro_normal_list(struct gro_node *gro) 520 { 521 if (!gro->rx_count) 522 return; 523 netif_receive_skb_list_internal(&gro->rx_list); 524 INIT_LIST_HEAD(&gro->rx_list); 525 gro->rx_count = 0; 526 } 527 528 static inline void gro_flush_normal(struct gro_node *gro, bool flush_old) 529 { 530 gro_flush(gro, flush_old); 531 gro_normal_list(gro); 532 } 533 534 /* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, 535 * pass the whole batch up to the stack. 536 */ 537 static inline void gro_normal_one(struct gro_node *gro, struct sk_buff *skb, 538 int segs) 539 { 540 list_add_tail(&skb->list, &gro->rx_list); 541 gro->rx_count += segs; 542 if (gro->rx_count >= READ_ONCE(net_hotdata.gro_normal_batch)) 543 gro_normal_list(gro); 544 } 545 546 void gro_init(struct gro_node *gro); 547 void gro_cleanup(struct gro_node *gro); 548 549 /* This function is the alternative of 'inet_iif' and 'inet_sdif' 550 * functions in case we can not rely on fields of IPCB. 551 * 552 * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized. 553 * The caller must hold the RCU read lock. 554 */ 555 static inline void inet_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif) 556 { 557 *iif = inet_iif(skb) ?: skb->dev->ifindex; 558 *sdif = 0; 559 560 #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) 561 if (netif_is_l3_slave(skb->dev)) { 562 struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev); 563 564 *sdif = *iif; 565 *iif = master ? master->ifindex : 0; 566 } 567 #endif 568 } 569 570 /* This function is the alternative of 'inet6_iif' and 'inet6_sdif' 571 * functions in case we can not rely on fields of IP6CB. 572 * 573 * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized. 574 * The caller must hold the RCU read lock. 575 */ 576 static inline void inet6_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif) 577 { 578 /* using skb->dev->ifindex because skb_dst(skb) is not initialized */ 579 *iif = skb->dev->ifindex; 580 *sdif = 0; 581 582 #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) 583 if (netif_is_l3_slave(skb->dev)) { 584 struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev); 585 586 *sdif = *iif; 587 *iif = master ? master->ifindex : 0; 588 } 589 #endif 590 } 591 592 struct packet_offload *gro_find_receive_by_type(__be16 type); 593 struct packet_offload *gro_find_complete_by_type(__be16 type); 594 595 static inline struct tcphdr *tcp_gro_pull_header(struct sk_buff *skb) 596 { 597 unsigned int thlen, hlen, off; 598 struct tcphdr *th; 599 600 off = skb_gro_offset(skb); 601 hlen = off + sizeof(*th); 602 th = skb_gro_header(skb, hlen, off); 603 if (unlikely(!th)) 604 return NULL; 605 606 thlen = th->doff * 4; 607 if (unlikely(thlen < sizeof(*th))) 608 return NULL; 609 610 hlen = off + thlen; 611 if (!skb_gro_may_pull(skb, hlen)) { 612 th = skb_gro_header_slow(skb, hlen, off); 613 if (unlikely(!th)) 614 return NULL; 615 } 616 617 skb_gro_pull(skb, thlen); 618 619 return th; 620 } 621 622 #endif /* _NET_GRO_H */ 623