1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 3 #ifndef _NET_GRO_H 4 #define _NET_GRO_H 5 6 #include <linux/indirect_call_wrapper.h> 7 #include <linux/ip.h> 8 #include <linux/ipv6.h> 9 #include <net/ip6_checksum.h> 10 #include <linux/skbuff.h> 11 #include <net/udp.h> 12 #include <net/hotdata.h> 13 14 struct napi_gro_cb { 15 union { 16 struct { 17 /* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */ 18 void *frag0; 19 20 /* Length of frag0. */ 21 unsigned int frag0_len; 22 }; 23 24 struct { 25 /* used in skb_gro_receive() slow path */ 26 struct sk_buff *last; 27 28 /* jiffies when first packet was created/queued */ 29 unsigned long age; 30 }; 31 }; 32 33 /* This indicates where we are processing relative to skb->data. */ 34 int data_offset; 35 36 /* This is non-zero if the packet cannot be merged with the new skb. */ 37 u16 flush; 38 39 /* Number of segments aggregated. */ 40 u16 count; 41 42 /* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */ 43 u16 proto; 44 45 u16 pad; 46 47 /* Used in napi_gro_cb::free */ 48 #define NAPI_GRO_FREE 1 49 #define NAPI_GRO_FREE_STOLEN_HEAD 2 50 /* portion of the cb set to zero at every gro iteration */ 51 struct_group(zeroed, 52 53 /* Start offset for remote checksum offload */ 54 u16 gro_remcsum_start; 55 56 /* This is non-zero if the packet may be of the same flow. */ 57 u8 same_flow:1; 58 59 /* Used in tunnel GRO receive */ 60 u8 encap_mark:1; 61 62 /* GRO checksum is valid */ 63 u8 csum_valid:1; 64 65 /* Number of checksums via CHECKSUM_UNNECESSARY */ 66 u8 csum_cnt:3; 67 68 /* Free the skb? */ 69 u8 free:2; 70 71 /* Used in foo-over-udp, set in udp[46]_gro_receive */ 72 u8 is_ipv6:1; 73 74 /* Used in GRE, set in fou/gue_gro_receive */ 75 u8 is_fou:1; 76 77 /* Used to determine if ipid_offset can be ignored */ 78 u8 ip_fixedid:1; 79 80 /* Number of gro_receive callbacks this packet already went through */ 81 u8 recursion_counter:4; 82 83 /* GRO is done by frag_list pointer chaining. */ 84 u8 is_flist:1; 85 ); 86 87 /* used to support CHECKSUM_COMPLETE for tunneling protocols */ 88 __wsum csum; 89 90 /* L3 offsets */ 91 union { 92 struct { 93 u16 network_offset; 94 u16 inner_network_offset; 95 }; 96 u16 network_offsets[2]; 97 }; 98 }; 99 100 #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) 101 102 #define GRO_RECURSION_LIMIT 15 103 static inline int gro_recursion_inc_test(struct sk_buff *skb) 104 { 105 return ++NAPI_GRO_CB(skb)->recursion_counter == GRO_RECURSION_LIMIT; 106 } 107 108 typedef struct sk_buff *(*gro_receive_t)(struct list_head *, struct sk_buff *); 109 static inline struct sk_buff *call_gro_receive(gro_receive_t cb, 110 struct list_head *head, 111 struct sk_buff *skb) 112 { 113 if (unlikely(gro_recursion_inc_test(skb))) { 114 NAPI_GRO_CB(skb)->flush |= 1; 115 return NULL; 116 } 117 118 return cb(head, skb); 119 } 120 121 typedef struct sk_buff *(*gro_receive_sk_t)(struct sock *, struct list_head *, 122 struct sk_buff *); 123 static inline struct sk_buff *call_gro_receive_sk(gro_receive_sk_t cb, 124 struct sock *sk, 125 struct list_head *head, 126 struct sk_buff *skb) 127 { 128 if (unlikely(gro_recursion_inc_test(skb))) { 129 NAPI_GRO_CB(skb)->flush |= 1; 130 return NULL; 131 } 132 133 return cb(sk, head, skb); 134 } 135 136 static inline unsigned int skb_gro_offset(const struct sk_buff *skb) 137 { 138 return NAPI_GRO_CB(skb)->data_offset; 139 } 140 141 static inline unsigned int skb_gro_len(const struct sk_buff *skb) 142 { 143 return skb->len - NAPI_GRO_CB(skb)->data_offset; 144 } 145 146 static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len) 147 { 148 NAPI_GRO_CB(skb)->data_offset += len; 149 } 150 151 static inline void *skb_gro_header_fast(const struct sk_buff *skb, 152 unsigned int offset) 153 { 154 return NAPI_GRO_CB(skb)->frag0 + offset; 155 } 156 157 static inline bool skb_gro_may_pull(const struct sk_buff *skb, 158 unsigned int hlen) 159 { 160 return likely(hlen <= NAPI_GRO_CB(skb)->frag0_len); 161 } 162 163 static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, 164 unsigned int offset) 165 { 166 if (!pskb_may_pull(skb, hlen)) 167 return NULL; 168 169 return skb->data + offset; 170 } 171 172 static inline void *skb_gro_header(struct sk_buff *skb, unsigned int hlen, 173 unsigned int offset) 174 { 175 void *ptr; 176 177 ptr = skb_gro_header_fast(skb, offset); 178 if (!skb_gro_may_pull(skb, hlen)) 179 ptr = skb_gro_header_slow(skb, hlen, offset); 180 return ptr; 181 } 182 183 static inline int skb_gro_receive_network_offset(const struct sk_buff *skb) 184 { 185 return NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark]; 186 } 187 188 static inline void *skb_gro_network_header(const struct sk_buff *skb) 189 { 190 if (skb_gro_may_pull(skb, skb_gro_offset(skb))) 191 return skb_gro_header_fast(skb, skb_gro_receive_network_offset(skb)); 192 193 return skb->data + skb_gro_receive_network_offset(skb); 194 } 195 196 static inline __wsum inet_gro_compute_pseudo(const struct sk_buff *skb, 197 int proto) 198 { 199 const struct iphdr *iph = skb_gro_network_header(skb); 200 201 return csum_tcpudp_nofold(iph->saddr, iph->daddr, 202 skb_gro_len(skb), proto, 0); 203 } 204 205 static inline void skb_gro_postpull_rcsum(struct sk_buff *skb, 206 const void *start, unsigned int len) 207 { 208 if (NAPI_GRO_CB(skb)->csum_valid) 209 NAPI_GRO_CB(skb)->csum = wsum_negate(csum_partial(start, len, 210 wsum_negate(NAPI_GRO_CB(skb)->csum))); 211 } 212 213 /* GRO checksum functions. These are logical equivalents of the normal 214 * checksum functions (in skbuff.h) except that they operate on the GRO 215 * offsets and fields in sk_buff. 216 */ 217 218 __sum16 __skb_gro_checksum_complete(struct sk_buff *skb); 219 220 static inline bool skb_at_gro_remcsum_start(struct sk_buff *skb) 221 { 222 return (NAPI_GRO_CB(skb)->gro_remcsum_start == skb_gro_offset(skb)); 223 } 224 225 static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb, 226 bool zero_okay, 227 __sum16 check) 228 { 229 return ((skb->ip_summed != CHECKSUM_PARTIAL || 230 skb_checksum_start_offset(skb) < 231 skb_gro_offset(skb)) && 232 !skb_at_gro_remcsum_start(skb) && 233 NAPI_GRO_CB(skb)->csum_cnt == 0 && 234 (!zero_okay || check)); 235 } 236 237 static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb, 238 __wsum psum) 239 { 240 if (NAPI_GRO_CB(skb)->csum_valid && 241 !csum_fold(csum_add(psum, NAPI_GRO_CB(skb)->csum))) 242 return 0; 243 244 NAPI_GRO_CB(skb)->csum = psum; 245 246 return __skb_gro_checksum_complete(skb); 247 } 248 249 static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb) 250 { 251 if (NAPI_GRO_CB(skb)->csum_cnt > 0) { 252 /* Consume a checksum from CHECKSUM_UNNECESSARY */ 253 NAPI_GRO_CB(skb)->csum_cnt--; 254 } else { 255 /* Update skb for CHECKSUM_UNNECESSARY and csum_level when we 256 * verified a new top level checksum or an encapsulated one 257 * during GRO. This saves work if we fallback to normal path. 258 */ 259 __skb_incr_checksum_unnecessary(skb); 260 } 261 } 262 263 #define __skb_gro_checksum_validate(skb, proto, zero_okay, check, \ 264 compute_pseudo) \ 265 ({ \ 266 __sum16 __ret = 0; \ 267 if (__skb_gro_checksum_validate_needed(skb, zero_okay, check)) \ 268 __ret = __skb_gro_checksum_validate_complete(skb, \ 269 compute_pseudo(skb, proto)); \ 270 if (!__ret) \ 271 skb_gro_incr_csum_unnecessary(skb); \ 272 __ret; \ 273 }) 274 275 #define skb_gro_checksum_validate(skb, proto, compute_pseudo) \ 276 __skb_gro_checksum_validate(skb, proto, false, 0, compute_pseudo) 277 278 #define skb_gro_checksum_validate_zero_check(skb, proto, check, \ 279 compute_pseudo) \ 280 __skb_gro_checksum_validate(skb, proto, true, check, compute_pseudo) 281 282 #define skb_gro_checksum_simple_validate(skb) \ 283 __skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo) 284 285 static inline bool __skb_gro_checksum_convert_check(struct sk_buff *skb) 286 { 287 return (NAPI_GRO_CB(skb)->csum_cnt == 0 && 288 !NAPI_GRO_CB(skb)->csum_valid); 289 } 290 291 static inline void __skb_gro_checksum_convert(struct sk_buff *skb, 292 __wsum pseudo) 293 { 294 NAPI_GRO_CB(skb)->csum = ~pseudo; 295 NAPI_GRO_CB(skb)->csum_valid = 1; 296 } 297 298 #define skb_gro_checksum_try_convert(skb, proto, compute_pseudo) \ 299 do { \ 300 if (__skb_gro_checksum_convert_check(skb)) \ 301 __skb_gro_checksum_convert(skb, \ 302 compute_pseudo(skb, proto)); \ 303 } while (0) 304 305 struct gro_remcsum { 306 int offset; 307 __wsum delta; 308 }; 309 310 static inline void skb_gro_remcsum_init(struct gro_remcsum *grc) 311 { 312 grc->offset = 0; 313 grc->delta = 0; 314 } 315 316 static inline void *skb_gro_remcsum_process(struct sk_buff *skb, void *ptr, 317 unsigned int off, size_t hdrlen, 318 int start, int offset, 319 struct gro_remcsum *grc, 320 bool nopartial) 321 { 322 __wsum delta; 323 size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start); 324 325 BUG_ON(!NAPI_GRO_CB(skb)->csum_valid); 326 327 if (!nopartial) { 328 NAPI_GRO_CB(skb)->gro_remcsum_start = off + hdrlen + start; 329 return ptr; 330 } 331 332 ptr = skb_gro_header(skb, off + plen, off); 333 if (!ptr) 334 return NULL; 335 336 delta = remcsum_adjust(ptr + hdrlen, NAPI_GRO_CB(skb)->csum, 337 start, offset); 338 339 /* Adjust skb->csum since we changed the packet */ 340 NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta); 341 342 grc->offset = off + hdrlen + offset; 343 grc->delta = delta; 344 345 return ptr; 346 } 347 348 static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb, 349 struct gro_remcsum *grc) 350 { 351 void *ptr; 352 size_t plen = grc->offset + sizeof(u16); 353 354 if (!grc->delta) 355 return; 356 357 ptr = skb_gro_header(skb, plen, grc->offset); 358 if (!ptr) 359 return; 360 361 remcsum_unadjust((__sum16 *)ptr, grc->delta); 362 } 363 364 #ifdef CONFIG_XFRM_OFFLOAD 365 static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush) 366 { 367 if (PTR_ERR(pp) != -EINPROGRESS) 368 NAPI_GRO_CB(skb)->flush |= flush; 369 } 370 static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, 371 struct sk_buff *pp, 372 int flush, 373 struct gro_remcsum *grc) 374 { 375 if (PTR_ERR(pp) != -EINPROGRESS) { 376 NAPI_GRO_CB(skb)->flush |= flush; 377 skb_gro_remcsum_cleanup(skb, grc); 378 skb->remcsum_offload = 0; 379 } 380 } 381 #else 382 static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush) 383 { 384 NAPI_GRO_CB(skb)->flush |= flush; 385 } 386 static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, 387 struct sk_buff *pp, 388 int flush, 389 struct gro_remcsum *grc) 390 { 391 NAPI_GRO_CB(skb)->flush |= flush; 392 skb_gro_remcsum_cleanup(skb, grc); 393 skb->remcsum_offload = 0; 394 } 395 #endif 396 397 INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *, 398 struct sk_buff *)); 399 INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int)); 400 INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *, 401 struct sk_buff *)); 402 INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int)); 403 404 INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *, 405 struct sk_buff *)); 406 INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int)); 407 408 INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *, 409 struct sk_buff *)); 410 INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int)); 411 412 #define indirect_call_gro_receive_inet(cb, f2, f1, head, skb) \ 413 ({ \ 414 unlikely(gro_recursion_inc_test(skb)) ? \ 415 NAPI_GRO_CB(skb)->flush |= 1, NULL : \ 416 INDIRECT_CALL_INET(cb, f2, f1, head, skb); \ 417 }) 418 419 struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, 420 struct udphdr *uh, struct sock *sk); 421 int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup); 422 423 static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) 424 { 425 struct udphdr *uh; 426 unsigned int hlen, off; 427 428 off = skb_gro_offset(skb); 429 hlen = off + sizeof(*uh); 430 uh = skb_gro_header(skb, hlen, off); 431 432 return uh; 433 } 434 435 static inline __wsum ip6_gro_compute_pseudo(const struct sk_buff *skb, 436 int proto) 437 { 438 const struct ipv6hdr *iph = skb_gro_network_header(skb); 439 440 return ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr, 441 skb_gro_len(skb), proto, 0)); 442 } 443 444 static inline int inet_gro_flush(const struct iphdr *iph, const struct iphdr *iph2, 445 struct sk_buff *p, bool outer) 446 { 447 const u32 id = ntohl(*(__be32 *)&iph->id); 448 const u32 id2 = ntohl(*(__be32 *)&iph2->id); 449 const u16 ipid_offset = (id >> 16) - (id2 >> 16); 450 const u16 count = NAPI_GRO_CB(p)->count; 451 const u32 df = id & IP_DF; 452 int flush; 453 454 /* All fields must match except length and checksum. */ 455 flush = (iph->ttl ^ iph2->ttl) | (iph->tos ^ iph2->tos) | (df ^ (id2 & IP_DF)); 456 457 if (flush | (outer && df)) 458 return flush; 459 460 /* When we receive our second frame we can make a decision on if we 461 * continue this flow as an atomic flow with a fixed ID or if we use 462 * an incrementing ID. 463 */ 464 if (count == 1 && df && !ipid_offset) 465 NAPI_GRO_CB(p)->ip_fixedid = true; 466 467 return ipid_offset ^ (count * !NAPI_GRO_CB(p)->ip_fixedid); 468 } 469 470 static inline int ipv6_gro_flush(const struct ipv6hdr *iph, const struct ipv6hdr *iph2) 471 { 472 /* <Version:4><Traffic_Class:8><Flow_Label:20> */ 473 __be32 first_word = *(__be32 *)iph ^ *(__be32 *)iph2; 474 475 /* Flush if Traffic Class fields are different. */ 476 return !!((first_word & htonl(0x0FF00000)) | 477 (__force __be32)(iph->hop_limit ^ iph2->hop_limit)); 478 } 479 480 static inline int __gro_receive_network_flush(const void *th, const void *th2, 481 struct sk_buff *p, const u16 diff, 482 bool outer) 483 { 484 const void *nh = th - diff; 485 const void *nh2 = th2 - diff; 486 487 if (((struct iphdr *)nh)->version == 6) 488 return ipv6_gro_flush(nh, nh2); 489 else 490 return inet_gro_flush(nh, nh2, p, outer); 491 } 492 493 static inline int gro_receive_network_flush(const void *th, const void *th2, 494 struct sk_buff *p) 495 { 496 const bool encap_mark = NAPI_GRO_CB(p)->encap_mark; 497 int off = skb_transport_offset(p); 498 int flush; 499 500 flush = __gro_receive_network_flush(th, th2, p, off - NAPI_GRO_CB(p)->network_offset, encap_mark); 501 if (encap_mark) 502 flush |= __gro_receive_network_flush(th, th2, p, off - NAPI_GRO_CB(p)->inner_network_offset, false); 503 504 return flush; 505 } 506 507 int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb); 508 int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb); 509 510 /* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ 511 static inline void gro_normal_list(struct napi_struct *napi) 512 { 513 if (!napi->rx_count) 514 return; 515 netif_receive_skb_list_internal(&napi->rx_list); 516 INIT_LIST_HEAD(&napi->rx_list); 517 napi->rx_count = 0; 518 } 519 520 /* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, 521 * pass the whole batch up to the stack. 522 */ 523 static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, int segs) 524 { 525 list_add_tail(&skb->list, &napi->rx_list); 526 napi->rx_count += segs; 527 if (napi->rx_count >= READ_ONCE(net_hotdata.gro_normal_batch)) 528 gro_normal_list(napi); 529 } 530 531 /* This function is the alternative of 'inet_iif' and 'inet_sdif' 532 * functions in case we can not rely on fields of IPCB. 533 * 534 * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized. 535 * The caller must hold the RCU read lock. 536 */ 537 static inline void inet_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif) 538 { 539 *iif = inet_iif(skb) ?: skb->dev->ifindex; 540 *sdif = 0; 541 542 #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) 543 if (netif_is_l3_slave(skb->dev)) { 544 struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev); 545 546 *sdif = *iif; 547 *iif = master ? master->ifindex : 0; 548 } 549 #endif 550 } 551 552 /* This function is the alternative of 'inet6_iif' and 'inet6_sdif' 553 * functions in case we can not rely on fields of IP6CB. 554 * 555 * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized. 556 * The caller must hold the RCU read lock. 557 */ 558 static inline void inet6_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif) 559 { 560 /* using skb->dev->ifindex because skb_dst(skb) is not initialized */ 561 *iif = skb->dev->ifindex; 562 *sdif = 0; 563 564 #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) 565 if (netif_is_l3_slave(skb->dev)) { 566 struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev); 567 568 *sdif = *iif; 569 *iif = master ? master->ifindex : 0; 570 } 571 #endif 572 } 573 574 struct packet_offload *gro_find_receive_by_type(__be16 type); 575 struct packet_offload *gro_find_complete_by_type(__be16 type); 576 577 #endif /* _NET_GRO_H */ 578