12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later 228850dc7SDaniel Borkmann /* 328850dc7SDaniel Borkmann * IPV4 GSO/GRO offload support 428850dc7SDaniel Borkmann * Linux INET implementation 528850dc7SDaniel Borkmann * 628850dc7SDaniel Borkmann * TCPv4 GSO/GRO support 728850dc7SDaniel Borkmann */ 828850dc7SDaniel Borkmann 9028e0a47SPaolo Abeni #include <linux/indirect_call_wrapper.h> 1028850dc7SDaniel Borkmann #include <linux/skbuff.h> 114721031cSEric Dumazet #include <net/gro.h> 12d457a0e3SEric Dumazet #include <net/gso.h> 1328850dc7SDaniel Borkmann #include <net/tcp.h> 1428850dc7SDaniel Borkmann #include <net/protocol.h> 1528850dc7SDaniel Borkmann 16f066e2b0SWillem de Bruijn static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, 17f066e2b0SWillem de Bruijn unsigned int seq, unsigned int mss) 184ed2d765SWillem de Bruijn { 194ed2d765SWillem de Bruijn while (skb) { 20f066e2b0SWillem de Bruijn if (before(ts_seq, seq + mss)) { 21f066e2b0SWillem de Bruijn skb_shinfo(skb)->tx_flags |= SKBTX_SW_TSTAMP; 224ed2d765SWillem de Bruijn skb_shinfo(skb)->tskey = ts_seq; 234ed2d765SWillem de Bruijn return; 244ed2d765SWillem de Bruijn } 254ed2d765SWillem de Bruijn 264ed2d765SWillem de Bruijn skb = skb->next; 274ed2d765SWillem de Bruijn seq += mss; 284ed2d765SWillem de Bruijn } 294ed2d765SWillem de Bruijn } 304ed2d765SWillem de Bruijn 3174abc20cSEric Dumazet static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, 32d020f8f7STom Herbert netdev_features_t features) 33d020f8f7STom Herbert { 34121d57afSWillem de Bruijn if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)) 35121d57afSWillem de Bruijn return ERR_PTR(-EINVAL); 36121d57afSWillem de Bruijn 37d020f8f7STom Herbert if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 38d020f8f7STom Herbert return ERR_PTR(-EINVAL); 39d020f8f7STom Herbert 40d020f8f7STom Herbert if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { 41d020f8f7STom Herbert const struct iphdr *iph = ip_hdr(skb); 42d020f8f7STom Herbert struct tcphdr *th = tcp_hdr(skb); 43d020f8f7STom Herbert 44d020f8f7STom Herbert /* Set up checksum pseudo header, usually expect stack to 45d020f8f7STom Herbert * have done this already. 46d020f8f7STom Herbert */ 47d020f8f7STom Herbert 48d020f8f7STom Herbert th->check = 0; 49d020f8f7STom Herbert skb->ip_summed = CHECKSUM_PARTIAL; 50d020f8f7STom Herbert __tcp_v4_send_check(skb, iph->saddr, iph->daddr); 51d020f8f7STom Herbert } 52d020f8f7STom Herbert 53d020f8f7STom Herbert return tcp_gso_segment(skb, features); 54d020f8f7STom Herbert } 55d020f8f7STom Herbert 5628be6e07SEric Dumazet struct sk_buff *tcp_gso_segment(struct sk_buff *skb, 5728850dc7SDaniel Borkmann netdev_features_t features) 5828850dc7SDaniel Borkmann { 5928850dc7SDaniel Borkmann struct sk_buff *segs = ERR_PTR(-EINVAL); 600d08c42cSEric Dumazet unsigned int sum_truesize = 0; 6128850dc7SDaniel Borkmann struct tcphdr *th; 6228850dc7SDaniel Borkmann unsigned int thlen; 6328850dc7SDaniel Borkmann unsigned int seq; 6428850dc7SDaniel Borkmann unsigned int oldlen; 6528850dc7SDaniel Borkmann unsigned int mss; 6628850dc7SDaniel Borkmann struct sk_buff *gso_skb = skb; 6728850dc7SDaniel Borkmann __sum16 newcheck; 6828850dc7SDaniel Borkmann bool ooo_okay, copy_destructor; 6982a01ab3SEric Dumazet __wsum delta; 7028850dc7SDaniel Borkmann 7128850dc7SDaniel Borkmann th = tcp_hdr(skb); 7228850dc7SDaniel Borkmann thlen = th->doff * 4; 7328850dc7SDaniel Borkmann if (thlen < sizeof(*th)) 7428850dc7SDaniel Borkmann goto out; 7528850dc7SDaniel Borkmann 7628850dc7SDaniel Borkmann if (!pskb_may_pull(skb, thlen)) 7728850dc7SDaniel Borkmann goto out; 7828850dc7SDaniel Borkmann 7982a01ab3SEric Dumazet oldlen = ~skb->len; 8028850dc7SDaniel Borkmann __skb_pull(skb, thlen); 8128850dc7SDaniel Borkmann 82a7eea416SEric Dumazet mss = skb_shinfo(skb)->gso_size; 8328850dc7SDaniel Borkmann if (unlikely(skb->len <= mss)) 8428850dc7SDaniel Borkmann goto out; 8528850dc7SDaniel Borkmann 8628850dc7SDaniel Borkmann if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { 8728850dc7SDaniel Borkmann /* Packet is from an untrusted source, reset gso_segs. */ 8828850dc7SDaniel Borkmann 8928850dc7SDaniel Borkmann skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); 9028850dc7SDaniel Borkmann 9128850dc7SDaniel Borkmann segs = NULL; 9228850dc7SDaniel Borkmann goto out; 9328850dc7SDaniel Borkmann } 9428850dc7SDaniel Borkmann 9528850dc7SDaniel Borkmann copy_destructor = gso_skb->destructor == tcp_wfree; 9628850dc7SDaniel Borkmann ooo_okay = gso_skb->ooo_okay; 9728850dc7SDaniel Borkmann /* All segments but the first should have ooo_okay cleared */ 9828850dc7SDaniel Borkmann skb->ooo_okay = 0; 9928850dc7SDaniel Borkmann 10028850dc7SDaniel Borkmann segs = skb_segment(skb, features); 10128850dc7SDaniel Borkmann if (IS_ERR(segs)) 10228850dc7SDaniel Borkmann goto out; 10328850dc7SDaniel Borkmann 10428850dc7SDaniel Borkmann /* Only first segment might have ooo_okay set */ 10528850dc7SDaniel Borkmann segs->ooo_okay = ooo_okay; 10628850dc7SDaniel Borkmann 10707b26c94SSteffen Klassert /* GSO partial and frag_list segmentation only requires splitting 10807b26c94SSteffen Klassert * the frame into an MSS multiple and possibly a remainder, both 10907b26c94SSteffen Klassert * cases return a GSO skb. So update the mss now. 11007b26c94SSteffen Klassert */ 11107b26c94SSteffen Klassert if (skb_is_gso(segs)) 11207b26c94SSteffen Klassert mss *= skb_shinfo(segs)->gso_segs; 11307b26c94SSteffen Klassert 11482a01ab3SEric Dumazet delta = (__force __wsum)htonl(oldlen + thlen + mss); 11528850dc7SDaniel Borkmann 11628850dc7SDaniel Borkmann skb = segs; 11728850dc7SDaniel Borkmann th = tcp_hdr(skb); 11828850dc7SDaniel Borkmann seq = ntohl(th->seq); 11928850dc7SDaniel Borkmann 1204ed2d765SWillem de Bruijn if (unlikely(skb_shinfo(gso_skb)->tx_flags & SKBTX_SW_TSTAMP)) 1214ed2d765SWillem de Bruijn tcp_gso_tstamp(segs, skb_shinfo(gso_skb)->tskey, seq, mss); 1224ed2d765SWillem de Bruijn 12382a01ab3SEric Dumazet newcheck = ~csum_fold(csum_add(csum_unfold(th->check), delta)); 12428850dc7SDaniel Borkmann 125802ab55aSAlexander Duyck while (skb->next) { 12628850dc7SDaniel Borkmann th->fin = th->psh = 0; 12728850dc7SDaniel Borkmann th->check = newcheck; 12828850dc7SDaniel Borkmann 12908b64fccSAlexander Duyck if (skb->ip_summed == CHECKSUM_PARTIAL) 13008b64fccSAlexander Duyck gso_reset_checksum(skb, ~th->check); 13108b64fccSAlexander Duyck else 132e9c3a24bSTom Herbert th->check = gso_make_checksum(skb, ~th->check); 13328850dc7SDaniel Borkmann 13428850dc7SDaniel Borkmann seq += mss; 13528850dc7SDaniel Borkmann if (copy_destructor) { 13628850dc7SDaniel Borkmann skb->destructor = gso_skb->destructor; 13728850dc7SDaniel Borkmann skb->sk = gso_skb->sk; 1380d08c42cSEric Dumazet sum_truesize += skb->truesize; 13928850dc7SDaniel Borkmann } 14028850dc7SDaniel Borkmann skb = skb->next; 14128850dc7SDaniel Borkmann th = tcp_hdr(skb); 14228850dc7SDaniel Borkmann 14328850dc7SDaniel Borkmann th->seq = htonl(seq); 14428850dc7SDaniel Borkmann th->cwr = 0; 145802ab55aSAlexander Duyck } 14628850dc7SDaniel Borkmann 14728850dc7SDaniel Borkmann /* Following permits TCP Small Queues to work well with GSO : 14828850dc7SDaniel Borkmann * The callback to TCP stack will be called at the time last frag 14928850dc7SDaniel Borkmann * is freed at TX completion, and not right now when gso_skb 15028850dc7SDaniel Borkmann * is freed by GSO engine 15128850dc7SDaniel Borkmann */ 15228850dc7SDaniel Borkmann if (copy_destructor) { 1537ec318feSEric Dumazet int delta; 1547ec318feSEric Dumazet 15528850dc7SDaniel Borkmann swap(gso_skb->sk, skb->sk); 15628850dc7SDaniel Borkmann swap(gso_skb->destructor, skb->destructor); 1570d08c42cSEric Dumazet sum_truesize += skb->truesize; 1587ec318feSEric Dumazet delta = sum_truesize - gso_skb->truesize; 1597ec318feSEric Dumazet /* In some pathological cases, delta can be negative. 1607ec318feSEric Dumazet * We need to either use refcount_add() or refcount_sub_and_test() 1617ec318feSEric Dumazet */ 1627ec318feSEric Dumazet if (likely(delta >= 0)) 1637ec318feSEric Dumazet refcount_add(delta, &skb->sk->sk_wmem_alloc); 1647ec318feSEric Dumazet else 1657ec318feSEric Dumazet WARN_ON_ONCE(refcount_sub_and_test(-delta, &skb->sk->sk_wmem_alloc)); 16628850dc7SDaniel Borkmann } 16728850dc7SDaniel Borkmann 16882a01ab3SEric Dumazet delta = (__force __wsum)htonl(oldlen + 16982a01ab3SEric Dumazet (skb_tail_pointer(skb) - 17028850dc7SDaniel Borkmann skb_transport_header(skb)) + 17128850dc7SDaniel Borkmann skb->data_len); 17282a01ab3SEric Dumazet th->check = ~csum_fold(csum_add(csum_unfold(th->check), delta)); 17308b64fccSAlexander Duyck if (skb->ip_summed == CHECKSUM_PARTIAL) 17408b64fccSAlexander Duyck gso_reset_checksum(skb, ~th->check); 17508b64fccSAlexander Duyck else 176e9c3a24bSTom Herbert th->check = gso_make_checksum(skb, ~th->check); 17728850dc7SDaniel Borkmann out: 17828850dc7SDaniel Borkmann return segs; 17928850dc7SDaniel Borkmann } 18028850dc7SDaniel Borkmann 181d4546c25SDavid Miller struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb) 18228850dc7SDaniel Borkmann { 183d4546c25SDavid Miller struct sk_buff *pp = NULL; 18428850dc7SDaniel Borkmann struct sk_buff *p; 18528850dc7SDaniel Borkmann struct tcphdr *th; 18628850dc7SDaniel Borkmann struct tcphdr *th2; 18728850dc7SDaniel Borkmann unsigned int len; 18828850dc7SDaniel Borkmann unsigned int thlen; 18928850dc7SDaniel Borkmann __be32 flags; 19028850dc7SDaniel Borkmann unsigned int mss = 1; 19128850dc7SDaniel Borkmann unsigned int hlen; 19228850dc7SDaniel Borkmann unsigned int off; 19328850dc7SDaniel Borkmann int flush = 1; 19428850dc7SDaniel Borkmann int i; 19528850dc7SDaniel Borkmann 19628850dc7SDaniel Borkmann off = skb_gro_offset(skb); 19728850dc7SDaniel Borkmann hlen = off + sizeof(*th); 19835ffb665SRichard Gobert th = skb_gro_header(skb, hlen, off); 19928850dc7SDaniel Borkmann if (unlikely(!th)) 20028850dc7SDaniel Borkmann goto out; 20128850dc7SDaniel Borkmann 20228850dc7SDaniel Borkmann thlen = th->doff * 4; 20328850dc7SDaniel Borkmann if (thlen < sizeof(*th)) 20428850dc7SDaniel Borkmann goto out; 20528850dc7SDaniel Borkmann 20628850dc7SDaniel Borkmann hlen = off + thlen; 20793e16ea0SEric Dumazet if (!skb_gro_may_pull(skb, hlen)) { 20828850dc7SDaniel Borkmann th = skb_gro_header_slow(skb, hlen, off); 20928850dc7SDaniel Borkmann if (unlikely(!th)) 21028850dc7SDaniel Borkmann goto out; 21128850dc7SDaniel Borkmann } 21228850dc7SDaniel Borkmann 21328850dc7SDaniel Borkmann skb_gro_pull(skb, thlen); 21428850dc7SDaniel Borkmann 21528850dc7SDaniel Borkmann len = skb_gro_len(skb); 21628850dc7SDaniel Borkmann flags = tcp_flag_word(th); 21728850dc7SDaniel Borkmann 218d4546c25SDavid Miller list_for_each_entry(p, head, list) { 21928850dc7SDaniel Borkmann if (!NAPI_GRO_CB(p)->same_flow) 22028850dc7SDaniel Borkmann continue; 22128850dc7SDaniel Borkmann 22228850dc7SDaniel Borkmann th2 = tcp_hdr(p); 22328850dc7SDaniel Borkmann 22428850dc7SDaniel Borkmann if (*(u32 *)&th->source ^ *(u32 *)&th2->source) { 22528850dc7SDaniel Borkmann NAPI_GRO_CB(p)->same_flow = 0; 22628850dc7SDaniel Borkmann continue; 22728850dc7SDaniel Borkmann } 22828850dc7SDaniel Borkmann 22928850dc7SDaniel Borkmann goto found; 23028850dc7SDaniel Borkmann } 231d4546c25SDavid Miller p = NULL; 23228850dc7SDaniel Borkmann goto out_check_final; 23328850dc7SDaniel Borkmann 23428850dc7SDaniel Borkmann found: 235bf5a755fSJerry Chu /* Include the IP ID check below from the inner most IP hdr */ 2361530545eSAlexander Duyck flush = NAPI_GRO_CB(p)->flush; 23728850dc7SDaniel Borkmann flush |= (__force int)(flags & TCP_FLAG_CWR); 23828850dc7SDaniel Borkmann flush |= (__force int)((flags ^ tcp_flag_word(th2)) & 23928850dc7SDaniel Borkmann ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); 24028850dc7SDaniel Borkmann flush |= (__force int)(th->ack_seq ^ th2->ack_seq); 24128850dc7SDaniel Borkmann for (i = sizeof(*th); i < thlen; i += 4) 24228850dc7SDaniel Borkmann flush |= *(u32 *)((u8 *)th + i) ^ 24328850dc7SDaniel Borkmann *(u32 *)((u8 *)th2 + i); 24428850dc7SDaniel Borkmann 2451530545eSAlexander Duyck /* When we receive our second frame we can made a decision on if we 2461530545eSAlexander Duyck * continue this flow as an atomic flow with a fixed ID or if we use 2471530545eSAlexander Duyck * an incrementing ID. 2481530545eSAlexander Duyck */ 2491530545eSAlexander Duyck if (NAPI_GRO_CB(p)->flush_id != 1 || 2501530545eSAlexander Duyck NAPI_GRO_CB(p)->count != 1 || 2511530545eSAlexander Duyck !NAPI_GRO_CB(p)->is_atomic) 2521530545eSAlexander Duyck flush |= NAPI_GRO_CB(p)->flush_id; 2531530545eSAlexander Duyck else 2541530545eSAlexander Duyck NAPI_GRO_CB(p)->is_atomic = false; 2551530545eSAlexander Duyck 256a7eea416SEric Dumazet mss = skb_shinfo(p)->gso_size; 25728850dc7SDaniel Borkmann 2585eddb249SCoco Li /* If skb is a GRO packet, make sure its gso_size matches prior packet mss. 2595eddb249SCoco Li * If it is a single frame, do not aggregate it if its length 2605eddb249SCoco Li * is bigger than our mss. 2615eddb249SCoco Li */ 2625eddb249SCoco Li if (unlikely(skb_is_gso(skb))) 2635eddb249SCoco Li flush |= (mss != skb_shinfo(skb)->gso_size); 2645eddb249SCoco Li else 26528850dc7SDaniel Borkmann flush |= (len - 1) >= mss; 2665eddb249SCoco Li 26728850dc7SDaniel Borkmann flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); 26841ed9c04SBoris Pismenny #ifdef CONFIG_TLS_DEVICE 26941ed9c04SBoris Pismenny flush |= p->decrypted ^ skb->decrypted; 27041ed9c04SBoris Pismenny #endif 27128850dc7SDaniel Borkmann 272d4546c25SDavid Miller if (flush || skb_gro_receive(p, skb)) { 27328850dc7SDaniel Borkmann mss = 1; 27428850dc7SDaniel Borkmann goto out_check_final; 27528850dc7SDaniel Borkmann } 27628850dc7SDaniel Borkmann 27728850dc7SDaniel Borkmann tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); 27828850dc7SDaniel Borkmann 27928850dc7SDaniel Borkmann out_check_final: 2805eddb249SCoco Li /* Force a flush if last segment is smaller than mss. */ 2815eddb249SCoco Li if (unlikely(skb_is_gso(skb))) 2825eddb249SCoco Li flush = len != NAPI_GRO_CB(skb)->count * skb_shinfo(skb)->gso_size; 2835eddb249SCoco Li else 28428850dc7SDaniel Borkmann flush = len < mss; 2855eddb249SCoco Li 28628850dc7SDaniel Borkmann flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH | 28728850dc7SDaniel Borkmann TCP_FLAG_RST | TCP_FLAG_SYN | 28828850dc7SDaniel Borkmann TCP_FLAG_FIN)); 28928850dc7SDaniel Borkmann 29028850dc7SDaniel Borkmann if (p && (!NAPI_GRO_CB(skb)->same_flow || flush)) 291d4546c25SDavid Miller pp = p; 29228850dc7SDaniel Borkmann 29328850dc7SDaniel Borkmann out: 294bf5a755fSJerry Chu NAPI_GRO_CB(skb)->flush |= (flush != 0); 29528850dc7SDaniel Borkmann 29628850dc7SDaniel Borkmann return pp; 29728850dc7SDaniel Borkmann } 29828850dc7SDaniel Borkmann 299b1f2abcfSParav Pandit void tcp_gro_complete(struct sk_buff *skb) 30028850dc7SDaniel Borkmann { 30128850dc7SDaniel Borkmann struct tcphdr *th = tcp_hdr(skb); 3028f78010bSEric Dumazet struct skb_shared_info *shinfo; 3038f78010bSEric Dumazet 3048f78010bSEric Dumazet if (skb->encapsulation) 3058f78010bSEric Dumazet skb->inner_transport_header = skb->transport_header; 30628850dc7SDaniel Borkmann 307299603e8SJerry Chu skb->csum_start = (unsigned char *)th - skb->head; 30828850dc7SDaniel Borkmann skb->csum_offset = offsetof(struct tcphdr, check); 30928850dc7SDaniel Borkmann skb->ip_summed = CHECKSUM_PARTIAL; 31028850dc7SDaniel Borkmann 3118f78010bSEric Dumazet shinfo = skb_shinfo(skb); 3128f78010bSEric Dumazet shinfo->gso_segs = NAPI_GRO_CB(skb)->count; 31328850dc7SDaniel Borkmann 31428850dc7SDaniel Borkmann if (th->cwr) 3158f78010bSEric Dumazet shinfo->gso_type |= SKB_GSO_TCP_ECN; 31628850dc7SDaniel Borkmann } 31728850dc7SDaniel Borkmann EXPORT_SYMBOL(tcp_gro_complete); 31828850dc7SDaniel Borkmann 319028e0a47SPaolo Abeni INDIRECT_CALLABLE_SCOPE 320028e0a47SPaolo Abeni struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb) 32128850dc7SDaniel Borkmann { 322cc5c00bbSHerbert Xu /* Don't bother verifying checksum if we're going to flush anyway. */ 323149d0774STom Herbert if (!NAPI_GRO_CB(skb)->flush && 324149d0774STom Herbert skb_gro_checksum_validate(skb, IPPROTO_TCP, 325149d0774STom Herbert inet_gro_compute_pseudo)) { 32628850dc7SDaniel Borkmann NAPI_GRO_CB(skb)->flush = 1; 32728850dc7SDaniel Borkmann return NULL; 32828850dc7SDaniel Borkmann } 32928850dc7SDaniel Borkmann 33028850dc7SDaniel Borkmann return tcp_gro_receive(head, skb); 33128850dc7SDaniel Borkmann } 33228850dc7SDaniel Borkmann 333028e0a47SPaolo Abeni INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff) 33428850dc7SDaniel Borkmann { 33528850dc7SDaniel Borkmann const struct iphdr *iph = ip_hdr(skb); 33628850dc7SDaniel Borkmann struct tcphdr *th = tcp_hdr(skb); 33728850dc7SDaniel Borkmann 338299603e8SJerry Chu th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr, 339299603e8SJerry Chu iph->daddr, 0); 34028850dc7SDaniel Borkmann 3418f78010bSEric Dumazet skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4 | 3428f78010bSEric Dumazet (NAPI_GRO_CB(skb)->is_atomic * SKB_GSO_TCP_FIXEDID); 3431530545eSAlexander Duyck 344b1f2abcfSParav Pandit tcp_gro_complete(skb); 345b1f2abcfSParav Pandit return 0; 34628850dc7SDaniel Borkmann } 34728850dc7SDaniel Borkmann 348*0139806eSEric Dumazet int __init tcpv4_offload_init(void) 349*0139806eSEric Dumazet { 350*0139806eSEric Dumazet net_hotdata.tcpv4_offload = (struct net_offload) { 35128850dc7SDaniel Borkmann .callbacks = { 352d020f8f7STom Herbert .gso_segment = tcp4_gso_segment, 35328850dc7SDaniel Borkmann .gro_receive = tcp4_gro_receive, 35428850dc7SDaniel Borkmann .gro_complete = tcp4_gro_complete, 35528850dc7SDaniel Borkmann }, 35628850dc7SDaniel Borkmann }; 357*0139806eSEric Dumazet return inet_add_offload(&net_hotdata.tcpv4_offload, IPPROTO_TCP); 35828850dc7SDaniel Borkmann } 359