1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 3 #ifndef _NET_GRO_H 4 #define _NET_GRO_H 5 6 #include <linux/indirect_call_wrapper.h> 7 #include <linux/ip.h> 8 #include <linux/ipv6.h> 9 #include <net/ip6_checksum.h> 10 #include <linux/skbuff.h> 11 #include <net/udp.h> 12 #include <net/hotdata.h> 13 14 /* This should be increased if a protocol with a bigger head is added. */ 15 #define GRO_MAX_HEAD (MAX_HEADER + 128) 16 17 struct napi_gro_cb { 18 union { 19 struct { 20 /* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */ 21 void *frag0; 22 23 /* Length of frag0. */ 24 unsigned int frag0_len; 25 }; 26 27 struct { 28 /* used in skb_gro_receive() slow path */ 29 struct sk_buff *last; 30 31 /* jiffies when first packet was created/queued */ 32 unsigned long age; 33 }; 34 }; 35 36 /* This indicates where we are processing relative to skb->data. */ 37 int data_offset; 38 39 /* This is non-zero if the packet cannot be merged with the new skb. */ 40 u16 flush; 41 42 /* Number of segments aggregated. */ 43 u16 count; 44 45 /* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */ 46 u16 proto; 47 48 u16 pad; 49 50 /* Used in napi_gro_cb::free */ 51 #define NAPI_GRO_FREE 1 52 #define NAPI_GRO_FREE_STOLEN_HEAD 2 53 /* portion of the cb set to zero at every gro iteration */ 54 struct_group(zeroed, 55 56 /* Start offset for remote checksum offload */ 57 u16 gro_remcsum_start; 58 59 /* This is non-zero if the packet may be of the same flow. */ 60 u8 same_flow:1; 61 62 /* Used in tunnel GRO receive */ 63 u8 encap_mark:1; 64 65 /* GRO checksum is valid */ 66 u8 csum_valid:1; 67 68 /* Number of checksums via CHECKSUM_UNNECESSARY */ 69 u8 csum_cnt:3; 70 71 /* Free the skb? */ 72 u8 free:2; 73 74 /* Used in GRE, set in fou/gue_gro_receive */ 75 u8 is_fou:1; 76 77 /* Used to determine if ipid_offset can be ignored */ 78 u8 ip_fixedid:2; 79 80 /* Number of gro_receive callbacks this packet already went through */ 81 u8 recursion_counter:4; 82 83 /* GRO is done by frag_list pointer chaining. */ 84 u8 is_flist:1; 85 ); 86 87 /* used to support CHECKSUM_COMPLETE for tunneling protocols */ 88 __wsum csum; 89 90 /* L3 offsets */ 91 union { 92 struct { 93 u16 network_offset; 94 u16 inner_network_offset; 95 }; 96 u16 network_offsets[2]; 97 }; 98 }; 99 100 #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) 101 102 #define GRO_RECURSION_LIMIT 15 103 static inline int gro_recursion_inc_test(struct sk_buff *skb) 104 { 105 return ++NAPI_GRO_CB(skb)->recursion_counter == GRO_RECURSION_LIMIT; 106 } 107 108 typedef struct sk_buff *(*gro_receive_t)(struct list_head *, struct sk_buff *); 109 static inline struct sk_buff *call_gro_receive(gro_receive_t cb, 110 struct list_head *head, 111 struct sk_buff *skb) 112 { 113 if (unlikely(gro_recursion_inc_test(skb))) { 114 NAPI_GRO_CB(skb)->flush |= 1; 115 return NULL; 116 } 117 118 return cb(head, skb); 119 } 120 121 typedef struct sk_buff *(*gro_receive_sk_t)(struct sock *, struct list_head *, 122 struct sk_buff *); 123 static inline struct sk_buff *call_gro_receive_sk(gro_receive_sk_t cb, 124 struct sock *sk, 125 struct list_head *head, 126 struct sk_buff *skb) 127 { 128 if (unlikely(gro_recursion_inc_test(skb))) { 129 NAPI_GRO_CB(skb)->flush |= 1; 130 return NULL; 131 } 132 133 return cb(sk, head, skb); 134 } 135 136 static inline unsigned int skb_gro_offset(const struct sk_buff *skb) 137 { 138 return NAPI_GRO_CB(skb)->data_offset; 139 } 140 141 static inline unsigned int skb_gro_len(const struct sk_buff *skb) 142 { 143 return skb->len - NAPI_GRO_CB(skb)->data_offset; 144 } 145 146 static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len) 147 { 148 NAPI_GRO_CB(skb)->data_offset += len; 149 } 150 151 static inline void *skb_gro_header_fast(const struct sk_buff *skb, 152 unsigned int offset) 153 { 154 return NAPI_GRO_CB(skb)->frag0 + offset; 155 } 156 157 static inline bool skb_gro_may_pull(const struct sk_buff *skb, 158 unsigned int hlen) 159 { 160 return likely(hlen <= NAPI_GRO_CB(skb)->frag0_len); 161 } 162 163 static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, 164 unsigned int offset) 165 { 166 if (!pskb_may_pull(skb, hlen)) 167 return NULL; 168 169 return skb->data + offset; 170 } 171 172 static inline void *skb_gro_header(struct sk_buff *skb, unsigned int hlen, 173 unsigned int offset) 174 { 175 void *ptr; 176 177 ptr = skb_gro_header_fast(skb, offset); 178 if (!skb_gro_may_pull(skb, hlen)) 179 ptr = skb_gro_header_slow(skb, hlen, offset); 180 return ptr; 181 } 182 183 static inline int skb_gro_receive_network_offset(const struct sk_buff *skb) 184 { 185 return NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark]; 186 } 187 188 static inline void *skb_gro_network_header(const struct sk_buff *skb) 189 { 190 if (skb_gro_may_pull(skb, skb_gro_offset(skb))) 191 return skb_gro_header_fast(skb, skb_gro_receive_network_offset(skb)); 192 193 return skb->data + skb_gro_receive_network_offset(skb); 194 } 195 196 static inline __wsum inet_gro_compute_pseudo(const struct sk_buff *skb, 197 int proto) 198 { 199 const struct iphdr *iph = skb_gro_network_header(skb); 200 201 return csum_tcpudp_nofold(iph->saddr, iph->daddr, 202 skb_gro_len(skb), proto, 0); 203 } 204 205 static inline void skb_gro_postpull_rcsum(struct sk_buff *skb, 206 const void *start, unsigned int len) 207 { 208 if (NAPI_GRO_CB(skb)->csum_valid) 209 NAPI_GRO_CB(skb)->csum = wsum_negate(csum_partial(start, len, 210 wsum_negate(NAPI_GRO_CB(skb)->csum))); 211 } 212 213 /* GRO checksum functions. These are logical equivalents of the normal 214 * checksum functions (in skbuff.h) except that they operate on the GRO 215 * offsets and fields in sk_buff. 216 */ 217 218 __sum16 __skb_gro_checksum_complete(struct sk_buff *skb); 219 220 static inline bool skb_at_gro_remcsum_start(struct sk_buff *skb) 221 { 222 return (NAPI_GRO_CB(skb)->gro_remcsum_start == skb_gro_offset(skb)); 223 } 224 225 static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb, 226 bool zero_okay, 227 __sum16 check) 228 { 229 return ((skb->ip_summed != CHECKSUM_PARTIAL || 230 skb_checksum_start_offset(skb) < 231 skb_gro_offset(skb)) && 232 !skb_at_gro_remcsum_start(skb) && 233 NAPI_GRO_CB(skb)->csum_cnt == 0 && 234 (!zero_okay || check)); 235 } 236 237 static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb, 238 __wsum psum) 239 { 240 if (NAPI_GRO_CB(skb)->csum_valid && 241 !csum_fold(csum_add(psum, NAPI_GRO_CB(skb)->csum))) 242 return 0; 243 244 NAPI_GRO_CB(skb)->csum = psum; 245 246 return __skb_gro_checksum_complete(skb); 247 } 248 249 static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb) 250 { 251 if (NAPI_GRO_CB(skb)->csum_cnt > 0) { 252 /* Consume a checksum from CHECKSUM_UNNECESSARY */ 253 NAPI_GRO_CB(skb)->csum_cnt--; 254 } else { 255 /* Update skb for CHECKSUM_UNNECESSARY and csum_level when we 256 * verified a new top level checksum or an encapsulated one 257 * during GRO. This saves work if we fallback to normal path. 258 */ 259 __skb_incr_checksum_unnecessary(skb); 260 } 261 } 262 263 #define __skb_gro_checksum_validate(skb, proto, zero_okay, check, \ 264 compute_pseudo) \ 265 ({ \ 266 __sum16 __ret = 0; \ 267 if (__skb_gro_checksum_validate_needed(skb, zero_okay, check)) \ 268 __ret = __skb_gro_checksum_validate_complete(skb, \ 269 compute_pseudo(skb, proto)); \ 270 if (!__ret) \ 271 skb_gro_incr_csum_unnecessary(skb); \ 272 __ret; \ 273 }) 274 275 #define skb_gro_checksum_validate(skb, proto, compute_pseudo) \ 276 __skb_gro_checksum_validate(skb, proto, false, 0, compute_pseudo) 277 278 #define skb_gro_checksum_validate_zero_check(skb, proto, check, \ 279 compute_pseudo) \ 280 __skb_gro_checksum_validate(skb, proto, true, check, compute_pseudo) 281 282 #define skb_gro_checksum_simple_validate(skb) \ 283 __skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo) 284 285 static inline bool __skb_gro_checksum_convert_check(struct sk_buff *skb) 286 { 287 return (NAPI_GRO_CB(skb)->csum_cnt == 0 && 288 !NAPI_GRO_CB(skb)->csum_valid); 289 } 290 291 static inline void __skb_gro_checksum_convert(struct sk_buff *skb, 292 __wsum pseudo) 293 { 294 NAPI_GRO_CB(skb)->csum = ~pseudo; 295 NAPI_GRO_CB(skb)->csum_valid = 1; 296 } 297 298 #define skb_gro_checksum_try_convert(skb, proto, compute_pseudo) \ 299 do { \ 300 if (__skb_gro_checksum_convert_check(skb)) \ 301 __skb_gro_checksum_convert(skb, \ 302 compute_pseudo(skb, proto)); \ 303 } while (0) 304 305 struct gro_remcsum { 306 int offset; 307 __wsum delta; 308 }; 309 310 static inline void skb_gro_remcsum_init(struct gro_remcsum *grc) 311 { 312 grc->offset = 0; 313 grc->delta = 0; 314 } 315 316 static inline void *skb_gro_remcsum_process(struct sk_buff *skb, void *ptr, 317 unsigned int off, size_t hdrlen, 318 int start, int offset, 319 struct gro_remcsum *grc, 320 bool nopartial) 321 { 322 __wsum delta; 323 size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start); 324 325 BUG_ON(!NAPI_GRO_CB(skb)->csum_valid); 326 327 if (!nopartial) { 328 NAPI_GRO_CB(skb)->gro_remcsum_start = off + hdrlen + start; 329 return ptr; 330 } 331 332 ptr = skb_gro_header(skb, off + plen, off); 333 if (!ptr) 334 return NULL; 335 336 delta = remcsum_adjust(ptr + hdrlen, NAPI_GRO_CB(skb)->csum, 337 start, offset); 338 339 /* Adjust skb->csum since we changed the packet */ 340 NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta); 341 342 grc->offset = off + hdrlen + offset; 343 grc->delta = delta; 344 345 return ptr; 346 } 347 348 static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb, 349 struct gro_remcsum *grc) 350 { 351 void *ptr; 352 size_t plen = grc->offset + sizeof(u16); 353 354 if (!grc->delta) 355 return; 356 357 ptr = skb_gro_header(skb, plen, grc->offset); 358 if (!ptr) 359 return; 360 361 remcsum_unadjust((__sum16 *)ptr, grc->delta); 362 } 363 364 #ifdef CONFIG_XFRM_OFFLOAD 365 static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush) 366 { 367 if (PTR_ERR(pp) != -EINPROGRESS) 368 NAPI_GRO_CB(skb)->flush |= flush; 369 } 370 static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, 371 struct sk_buff *pp, 372 int flush, 373 struct gro_remcsum *grc) 374 { 375 if (PTR_ERR(pp) != -EINPROGRESS) { 376 NAPI_GRO_CB(skb)->flush |= flush; 377 skb_gro_remcsum_cleanup(skb, grc); 378 skb->remcsum_offload = 0; 379 } 380 } 381 #else 382 static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush) 383 { 384 NAPI_GRO_CB(skb)->flush |= flush; 385 } 386 static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, 387 struct sk_buff *pp, 388 int flush, 389 struct gro_remcsum *grc) 390 { 391 NAPI_GRO_CB(skb)->flush |= flush; 392 skb_gro_remcsum_cleanup(skb, grc); 393 skb->remcsum_offload = 0; 394 } 395 #endif 396 397 INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *, 398 struct sk_buff *)); 399 INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int)); 400 INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *, 401 struct sk_buff *)); 402 INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int)); 403 404 INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *, 405 struct sk_buff *)); 406 INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int)); 407 408 INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *, 409 struct sk_buff *)); 410 INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int)); 411 412 #define indirect_call_gro_receive_inet(cb, f2, f1, head, skb) \ 413 ({ \ 414 unlikely(gro_recursion_inc_test(skb)) ? \ 415 NAPI_GRO_CB(skb)->flush |= 1, NULL : \ 416 INDIRECT_CALL_INET(cb, f2, f1, head, skb); \ 417 }) 418 419 struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, 420 struct udphdr *uh, struct sock *sk); 421 int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup); 422 423 static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) 424 { 425 struct udphdr *uh; 426 unsigned int hlen, off; 427 428 off = skb_gro_offset(skb); 429 hlen = off + sizeof(*uh); 430 uh = skb_gro_header(skb, hlen, off); 431 432 return uh; 433 } 434 435 static inline __wsum ip6_gro_compute_pseudo(const struct sk_buff *skb, 436 int proto) 437 { 438 const struct ipv6hdr *iph = skb_gro_network_header(skb); 439 440 return ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr, 441 skb_gro_len(skb), proto, 0)); 442 } 443 444 static inline int inet_gro_flush(const struct iphdr *iph, const struct iphdr *iph2, 445 struct sk_buff *p, bool inner) 446 { 447 const u32 id = ntohl(*(__be32 *)&iph->id); 448 const u32 id2 = ntohl(*(__be32 *)&iph2->id); 449 const u16 ipid_offset = (id >> 16) - (id2 >> 16); 450 const u16 count = NAPI_GRO_CB(p)->count; 451 452 /* All fields must match except length and checksum. */ 453 if ((iph->ttl ^ iph2->ttl) | (iph->tos ^ iph2->tos) | ((id ^ id2) & IP_DF)) 454 return true; 455 456 /* When we receive our second frame we can make a decision on if we 457 * continue this flow as an atomic flow with a fixed ID or if we use 458 * an incrementing ID. 459 */ 460 if (count == 1 && !ipid_offset) 461 NAPI_GRO_CB(p)->ip_fixedid |= 1 << inner; 462 463 return ipid_offset ^ (count * !(NAPI_GRO_CB(p)->ip_fixedid & (1 << inner))); 464 } 465 466 static inline int ipv6_gro_flush(const struct ipv6hdr *iph, const struct ipv6hdr *iph2) 467 { 468 /* <Version:4><Traffic_Class:8><Flow_Label:20> */ 469 __be32 first_word = *(__be32 *)iph ^ *(__be32 *)iph2; 470 471 /* Flush if Traffic Class fields are different. */ 472 return !!((first_word & htonl(0x0FF00000)) | 473 (__force __be32)(iph->hop_limit ^ iph2->hop_limit)); 474 } 475 476 static inline int __gro_receive_network_flush(const void *th, const void *th2, 477 struct sk_buff *p, const u16 diff, 478 bool inner) 479 { 480 const void *nh = th - diff; 481 const void *nh2 = th2 - diff; 482 483 if (((struct iphdr *)nh)->version == 6) 484 return ipv6_gro_flush(nh, nh2); 485 else 486 return inet_gro_flush(nh, nh2, p, inner); 487 } 488 489 static inline int gro_receive_network_flush(const void *th, const void *th2, 490 struct sk_buff *p) 491 { 492 int off = skb_transport_offset(p); 493 int flush; 494 495 flush = __gro_receive_network_flush(th, th2, p, off - NAPI_GRO_CB(p)->network_offset, false); 496 if (NAPI_GRO_CB(p)->encap_mark) 497 flush |= __gro_receive_network_flush(th, th2, p, off - NAPI_GRO_CB(p)->inner_network_offset, true); 498 499 return flush; 500 } 501 502 int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb); 503 int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb); 504 void __gro_flush(struct gro_node *gro, bool flush_old); 505 506 static inline void gro_flush(struct gro_node *gro, bool flush_old) 507 { 508 if (!gro->bitmask) 509 return; 510 511 __gro_flush(gro, flush_old); 512 } 513 514 static inline void napi_gro_flush(struct napi_struct *napi, bool flush_old) 515 { 516 gro_flush(&napi->gro, flush_old); 517 } 518 519 /* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ 520 static inline void gro_normal_list(struct gro_node *gro) 521 { 522 if (!gro->rx_count) 523 return; 524 netif_receive_skb_list_internal(&gro->rx_list); 525 INIT_LIST_HEAD(&gro->rx_list); 526 gro->rx_count = 0; 527 } 528 529 static inline void gro_flush_normal(struct gro_node *gro, bool flush_old) 530 { 531 gro_flush(gro, flush_old); 532 gro_normal_list(gro); 533 } 534 535 /* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, 536 * pass the whole batch up to the stack. 537 */ 538 static inline void gro_normal_one(struct gro_node *gro, struct sk_buff *skb, 539 int segs) 540 { 541 list_add_tail(&skb->list, &gro->rx_list); 542 gro->rx_count += segs; 543 if (gro->rx_count >= READ_ONCE(net_hotdata.gro_normal_batch)) 544 gro_normal_list(gro); 545 } 546 547 void gro_init(struct gro_node *gro); 548 void gro_cleanup(struct gro_node *gro); 549 550 /* This function is the alternative of 'inet_iif' and 'inet_sdif' 551 * functions in case we can not rely on fields of IPCB. 552 * 553 * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized. 554 * The caller must hold the RCU read lock. 555 */ 556 static inline void inet_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif) 557 { 558 *iif = inet_iif(skb) ?: skb->dev->ifindex; 559 *sdif = 0; 560 561 #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) 562 if (netif_is_l3_slave(skb->dev)) { 563 struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev); 564 565 *sdif = *iif; 566 *iif = master ? master->ifindex : 0; 567 } 568 #endif 569 } 570 571 /* This function is the alternative of 'inet6_iif' and 'inet6_sdif' 572 * functions in case we can not rely on fields of IP6CB. 573 * 574 * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized. 575 * The caller must hold the RCU read lock. 576 */ 577 static inline void inet6_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif) 578 { 579 /* using skb->dev->ifindex because skb_dst(skb) is not initialized */ 580 *iif = skb->dev->ifindex; 581 *sdif = 0; 582 583 #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) 584 if (netif_is_l3_slave(skb->dev)) { 585 struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev); 586 587 *sdif = *iif; 588 *iif = master ? master->ifindex : 0; 589 } 590 #endif 591 } 592 593 struct packet_offload *gro_find_receive_by_type(__be16 type); 594 struct packet_offload *gro_find_complete_by_type(__be16 type); 595 596 #endif /* _NET_GRO_H */ 597