128850dc7SDaniel Borkmann /* 228850dc7SDaniel Borkmann * IPV4 GSO/GRO offload support 328850dc7SDaniel Borkmann * Linux INET implementation 428850dc7SDaniel Borkmann * 528850dc7SDaniel Borkmann * This program is free software; you can redistribute it and/or 628850dc7SDaniel Borkmann * modify it under the terms of the GNU General Public License 728850dc7SDaniel Borkmann * as published by the Free Software Foundation; either version 828850dc7SDaniel Borkmann * 2 of the License, or (at your option) any later version. 928850dc7SDaniel Borkmann * 1028850dc7SDaniel Borkmann * TCPv4 GSO/GRO support 1128850dc7SDaniel Borkmann */ 1228850dc7SDaniel Borkmann 1328850dc7SDaniel Borkmann #include <linux/skbuff.h> 1428850dc7SDaniel Borkmann #include <net/tcp.h> 1528850dc7SDaniel Borkmann #include <net/protocol.h> 1628850dc7SDaniel Borkmann 1728be6e07SEric Dumazet struct sk_buff *tcp_gso_segment(struct sk_buff *skb, 1828850dc7SDaniel Borkmann netdev_features_t features) 1928850dc7SDaniel Borkmann { 2028850dc7SDaniel Borkmann struct sk_buff *segs = ERR_PTR(-EINVAL); 210d08c42cSEric Dumazet unsigned int sum_truesize = 0; 2228850dc7SDaniel Borkmann struct tcphdr *th; 2328850dc7SDaniel Borkmann unsigned int thlen; 2428850dc7SDaniel Borkmann unsigned int seq; 2528850dc7SDaniel Borkmann __be32 delta; 2628850dc7SDaniel Borkmann unsigned int oldlen; 2728850dc7SDaniel Borkmann unsigned int mss; 2828850dc7SDaniel Borkmann struct sk_buff *gso_skb = skb; 2928850dc7SDaniel Borkmann __sum16 newcheck; 3028850dc7SDaniel Borkmann bool ooo_okay, copy_destructor; 3128850dc7SDaniel Borkmann 3228850dc7SDaniel Borkmann if (!pskb_may_pull(skb, sizeof(*th))) 3328850dc7SDaniel Borkmann goto out; 3428850dc7SDaniel Borkmann 3528850dc7SDaniel Borkmann th = tcp_hdr(skb); 3628850dc7SDaniel Borkmann thlen = th->doff * 4; 3728850dc7SDaniel Borkmann if (thlen < sizeof(*th)) 3828850dc7SDaniel Borkmann goto out; 3928850dc7SDaniel Borkmann 4028850dc7SDaniel Borkmann if (!pskb_may_pull(skb, thlen)) 4128850dc7SDaniel Borkmann goto out; 4228850dc7SDaniel Borkmann 4328850dc7SDaniel Borkmann oldlen = (u16)~skb->len; 4428850dc7SDaniel Borkmann __skb_pull(skb, thlen); 4528850dc7SDaniel Borkmann 4628850dc7SDaniel Borkmann mss = tcp_skb_mss(skb); 4728850dc7SDaniel Borkmann if (unlikely(skb->len <= mss)) 4828850dc7SDaniel Borkmann goto out; 4928850dc7SDaniel Borkmann 5028850dc7SDaniel Borkmann if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { 5128850dc7SDaniel Borkmann /* Packet is from an untrusted source, reset gso_segs. */ 5228850dc7SDaniel Borkmann int type = skb_shinfo(skb)->gso_type; 5328850dc7SDaniel Borkmann 5428850dc7SDaniel Borkmann if (unlikely(type & 5528850dc7SDaniel Borkmann ~(SKB_GSO_TCPV4 | 5628850dc7SDaniel Borkmann SKB_GSO_DODGY | 5728850dc7SDaniel Borkmann SKB_GSO_TCP_ECN | 5828850dc7SDaniel Borkmann SKB_GSO_TCPV6 | 5928850dc7SDaniel Borkmann SKB_GSO_GRE | 60cb32f511SEric Dumazet SKB_GSO_IPIP | 6161c1db7fSEric Dumazet SKB_GSO_SIT | 6228850dc7SDaniel Borkmann SKB_GSO_MPLS | 6328850dc7SDaniel Borkmann SKB_GSO_UDP_TUNNEL | 6428850dc7SDaniel Borkmann 0) || 6528850dc7SDaniel Borkmann !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) 6628850dc7SDaniel Borkmann goto out; 6728850dc7SDaniel Borkmann 6828850dc7SDaniel Borkmann skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); 6928850dc7SDaniel Borkmann 7028850dc7SDaniel Borkmann segs = NULL; 7128850dc7SDaniel Borkmann goto out; 7228850dc7SDaniel Borkmann } 7328850dc7SDaniel Borkmann 7428850dc7SDaniel Borkmann copy_destructor = gso_skb->destructor == tcp_wfree; 7528850dc7SDaniel Borkmann ooo_okay = gso_skb->ooo_okay; 7628850dc7SDaniel Borkmann /* All segments but the first should have ooo_okay cleared */ 7728850dc7SDaniel Borkmann skb->ooo_okay = 0; 7828850dc7SDaniel Borkmann 7928850dc7SDaniel Borkmann segs = skb_segment(skb, features); 8028850dc7SDaniel Borkmann if (IS_ERR(segs)) 8128850dc7SDaniel Borkmann goto out; 8228850dc7SDaniel Borkmann 8328850dc7SDaniel Borkmann /* Only first segment might have ooo_okay set */ 8428850dc7SDaniel Borkmann segs->ooo_okay = ooo_okay; 8528850dc7SDaniel Borkmann 8628850dc7SDaniel Borkmann delta = htonl(oldlen + (thlen + mss)); 8728850dc7SDaniel Borkmann 8828850dc7SDaniel Borkmann skb = segs; 8928850dc7SDaniel Borkmann th = tcp_hdr(skb); 9028850dc7SDaniel Borkmann seq = ntohl(th->seq); 9128850dc7SDaniel Borkmann 9228850dc7SDaniel Borkmann newcheck = ~csum_fold((__force __wsum)((__force u32)th->check + 9328850dc7SDaniel Borkmann (__force u32)delta)); 9428850dc7SDaniel Borkmann 9528850dc7SDaniel Borkmann do { 9628850dc7SDaniel Borkmann th->fin = th->psh = 0; 9728850dc7SDaniel Borkmann th->check = newcheck; 9828850dc7SDaniel Borkmann 9928850dc7SDaniel Borkmann if (skb->ip_summed != CHECKSUM_PARTIAL) 10028850dc7SDaniel Borkmann th->check = 10128850dc7SDaniel Borkmann csum_fold(csum_partial(skb_transport_header(skb), 10228850dc7SDaniel Borkmann thlen, skb->csum)); 10328850dc7SDaniel Borkmann 10428850dc7SDaniel Borkmann seq += mss; 10528850dc7SDaniel Borkmann if (copy_destructor) { 10628850dc7SDaniel Borkmann skb->destructor = gso_skb->destructor; 10728850dc7SDaniel Borkmann skb->sk = gso_skb->sk; 1080d08c42cSEric Dumazet sum_truesize += skb->truesize; 10928850dc7SDaniel Borkmann } 11028850dc7SDaniel Borkmann skb = skb->next; 11128850dc7SDaniel Borkmann th = tcp_hdr(skb); 11228850dc7SDaniel Borkmann 11328850dc7SDaniel Borkmann th->seq = htonl(seq); 11428850dc7SDaniel Borkmann th->cwr = 0; 11528850dc7SDaniel Borkmann } while (skb->next); 11628850dc7SDaniel Borkmann 11728850dc7SDaniel Borkmann /* Following permits TCP Small Queues to work well with GSO : 11828850dc7SDaniel Borkmann * The callback to TCP stack will be called at the time last frag 11928850dc7SDaniel Borkmann * is freed at TX completion, and not right now when gso_skb 12028850dc7SDaniel Borkmann * is freed by GSO engine 12128850dc7SDaniel Borkmann */ 12228850dc7SDaniel Borkmann if (copy_destructor) { 12328850dc7SDaniel Borkmann swap(gso_skb->sk, skb->sk); 12428850dc7SDaniel Borkmann swap(gso_skb->destructor, skb->destructor); 1250d08c42cSEric Dumazet sum_truesize += skb->truesize; 1260d08c42cSEric Dumazet atomic_add(sum_truesize - gso_skb->truesize, 1270d08c42cSEric Dumazet &skb->sk->sk_wmem_alloc); 12828850dc7SDaniel Borkmann } 12928850dc7SDaniel Borkmann 13028850dc7SDaniel Borkmann delta = htonl(oldlen + (skb_tail_pointer(skb) - 13128850dc7SDaniel Borkmann skb_transport_header(skb)) + 13228850dc7SDaniel Borkmann skb->data_len); 13328850dc7SDaniel Borkmann th->check = ~csum_fold((__force __wsum)((__force u32)th->check + 13428850dc7SDaniel Borkmann (__force u32)delta)); 13528850dc7SDaniel Borkmann if (skb->ip_summed != CHECKSUM_PARTIAL) 13628850dc7SDaniel Borkmann th->check = csum_fold(csum_partial(skb_transport_header(skb), 13728850dc7SDaniel Borkmann thlen, skb->csum)); 13828850dc7SDaniel Borkmann out: 13928850dc7SDaniel Borkmann return segs; 14028850dc7SDaniel Borkmann } 14128be6e07SEric Dumazet EXPORT_SYMBOL(tcp_gso_segment); 14228850dc7SDaniel Borkmann 14328850dc7SDaniel Borkmann struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) 14428850dc7SDaniel Borkmann { 14528850dc7SDaniel Borkmann struct sk_buff **pp = NULL; 14628850dc7SDaniel Borkmann struct sk_buff *p; 14728850dc7SDaniel Borkmann struct tcphdr *th; 14828850dc7SDaniel Borkmann struct tcphdr *th2; 14928850dc7SDaniel Borkmann unsigned int len; 15028850dc7SDaniel Borkmann unsigned int thlen; 15128850dc7SDaniel Borkmann __be32 flags; 15228850dc7SDaniel Borkmann unsigned int mss = 1; 15328850dc7SDaniel Borkmann unsigned int hlen; 15428850dc7SDaniel Borkmann unsigned int off; 15528850dc7SDaniel Borkmann int flush = 1; 15628850dc7SDaniel Borkmann int i; 15728850dc7SDaniel Borkmann 15828850dc7SDaniel Borkmann off = skb_gro_offset(skb); 15928850dc7SDaniel Borkmann hlen = off + sizeof(*th); 16028850dc7SDaniel Borkmann th = skb_gro_header_fast(skb, off); 16128850dc7SDaniel Borkmann if (skb_gro_header_hard(skb, hlen)) { 16228850dc7SDaniel Borkmann th = skb_gro_header_slow(skb, hlen, off); 16328850dc7SDaniel Borkmann if (unlikely(!th)) 16428850dc7SDaniel Borkmann goto out; 16528850dc7SDaniel Borkmann } 16628850dc7SDaniel Borkmann 16728850dc7SDaniel Borkmann thlen = th->doff * 4; 16828850dc7SDaniel Borkmann if (thlen < sizeof(*th)) 16928850dc7SDaniel Borkmann goto out; 17028850dc7SDaniel Borkmann 17128850dc7SDaniel Borkmann hlen = off + thlen; 17228850dc7SDaniel Borkmann if (skb_gro_header_hard(skb, hlen)) { 17328850dc7SDaniel Borkmann th = skb_gro_header_slow(skb, hlen, off); 17428850dc7SDaniel Borkmann if (unlikely(!th)) 17528850dc7SDaniel Borkmann goto out; 17628850dc7SDaniel Borkmann } 17728850dc7SDaniel Borkmann 17828850dc7SDaniel Borkmann skb_gro_pull(skb, thlen); 17928850dc7SDaniel Borkmann 18028850dc7SDaniel Borkmann len = skb_gro_len(skb); 18128850dc7SDaniel Borkmann flags = tcp_flag_word(th); 18228850dc7SDaniel Borkmann 18328850dc7SDaniel Borkmann for (; (p = *head); head = &p->next) { 18428850dc7SDaniel Borkmann if (!NAPI_GRO_CB(p)->same_flow) 18528850dc7SDaniel Borkmann continue; 18628850dc7SDaniel Borkmann 18728850dc7SDaniel Borkmann th2 = tcp_hdr(p); 18828850dc7SDaniel Borkmann 18928850dc7SDaniel Borkmann if (*(u32 *)&th->source ^ *(u32 *)&th2->source) { 19028850dc7SDaniel Borkmann NAPI_GRO_CB(p)->same_flow = 0; 19128850dc7SDaniel Borkmann continue; 19228850dc7SDaniel Borkmann } 19328850dc7SDaniel Borkmann 19428850dc7SDaniel Borkmann goto found; 19528850dc7SDaniel Borkmann } 19628850dc7SDaniel Borkmann 19728850dc7SDaniel Borkmann goto out_check_final; 19828850dc7SDaniel Borkmann 19928850dc7SDaniel Borkmann found: 200*bf5a755fSJerry Chu /* Include the IP ID check below from the inner most IP hdr */ 201*bf5a755fSJerry Chu flush = NAPI_GRO_CB(p)->flush | NAPI_GRO_CB(p)->flush_id; 20228850dc7SDaniel Borkmann flush |= (__force int)(flags & TCP_FLAG_CWR); 20328850dc7SDaniel Borkmann flush |= (__force int)((flags ^ tcp_flag_word(th2)) & 20428850dc7SDaniel Borkmann ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); 20528850dc7SDaniel Borkmann flush |= (__force int)(th->ack_seq ^ th2->ack_seq); 20628850dc7SDaniel Borkmann for (i = sizeof(*th); i < thlen; i += 4) 20728850dc7SDaniel Borkmann flush |= *(u32 *)((u8 *)th + i) ^ 20828850dc7SDaniel Borkmann *(u32 *)((u8 *)th2 + i); 20928850dc7SDaniel Borkmann 21028850dc7SDaniel Borkmann mss = tcp_skb_mss(p); 21128850dc7SDaniel Borkmann 21228850dc7SDaniel Borkmann flush |= (len - 1) >= mss; 21328850dc7SDaniel Borkmann flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); 21428850dc7SDaniel Borkmann 21528850dc7SDaniel Borkmann if (flush || skb_gro_receive(head, skb)) { 21628850dc7SDaniel Borkmann mss = 1; 21728850dc7SDaniel Borkmann goto out_check_final; 21828850dc7SDaniel Borkmann } 21928850dc7SDaniel Borkmann 22028850dc7SDaniel Borkmann p = *head; 22128850dc7SDaniel Borkmann th2 = tcp_hdr(p); 22228850dc7SDaniel Borkmann tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); 22328850dc7SDaniel Borkmann 22428850dc7SDaniel Borkmann out_check_final: 22528850dc7SDaniel Borkmann flush = len < mss; 22628850dc7SDaniel Borkmann flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH | 22728850dc7SDaniel Borkmann TCP_FLAG_RST | TCP_FLAG_SYN | 22828850dc7SDaniel Borkmann TCP_FLAG_FIN)); 22928850dc7SDaniel Borkmann 23028850dc7SDaniel Borkmann if (p && (!NAPI_GRO_CB(skb)->same_flow || flush)) 23128850dc7SDaniel Borkmann pp = head; 23228850dc7SDaniel Borkmann 23328850dc7SDaniel Borkmann out: 234*bf5a755fSJerry Chu NAPI_GRO_CB(skb)->flush |= (flush != 0); 23528850dc7SDaniel Borkmann 23628850dc7SDaniel Borkmann return pp; 23728850dc7SDaniel Borkmann } 23828850dc7SDaniel Borkmann EXPORT_SYMBOL(tcp_gro_receive); 23928850dc7SDaniel Borkmann 24028850dc7SDaniel Borkmann int tcp_gro_complete(struct sk_buff *skb) 24128850dc7SDaniel Borkmann { 24228850dc7SDaniel Borkmann struct tcphdr *th = tcp_hdr(skb); 24328850dc7SDaniel Borkmann 244299603e8SJerry Chu skb->csum_start = (unsigned char *)th - skb->head; 24528850dc7SDaniel Borkmann skb->csum_offset = offsetof(struct tcphdr, check); 24628850dc7SDaniel Borkmann skb->ip_summed = CHECKSUM_PARTIAL; 24728850dc7SDaniel Borkmann 24828850dc7SDaniel Borkmann skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; 24928850dc7SDaniel Borkmann 25028850dc7SDaniel Borkmann if (th->cwr) 25128850dc7SDaniel Borkmann skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; 25228850dc7SDaniel Borkmann 25328850dc7SDaniel Borkmann return 0; 25428850dc7SDaniel Borkmann } 25528850dc7SDaniel Borkmann EXPORT_SYMBOL(tcp_gro_complete); 25628850dc7SDaniel Borkmann 25728850dc7SDaniel Borkmann static int tcp_v4_gso_send_check(struct sk_buff *skb) 25828850dc7SDaniel Borkmann { 25928850dc7SDaniel Borkmann const struct iphdr *iph; 26028850dc7SDaniel Borkmann struct tcphdr *th; 26128850dc7SDaniel Borkmann 26228850dc7SDaniel Borkmann if (!pskb_may_pull(skb, sizeof(*th))) 26328850dc7SDaniel Borkmann return -EINVAL; 26428850dc7SDaniel Borkmann 26528850dc7SDaniel Borkmann iph = ip_hdr(skb); 26628850dc7SDaniel Borkmann th = tcp_hdr(skb); 26728850dc7SDaniel Borkmann 26828850dc7SDaniel Borkmann th->check = 0; 26928850dc7SDaniel Borkmann skb->ip_summed = CHECKSUM_PARTIAL; 27028850dc7SDaniel Borkmann __tcp_v4_send_check(skb, iph->saddr, iph->daddr); 27128850dc7SDaniel Borkmann return 0; 27228850dc7SDaniel Borkmann } 27328850dc7SDaniel Borkmann 27428850dc7SDaniel Borkmann static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) 27528850dc7SDaniel Borkmann { 276299603e8SJerry Chu /* Use the IP hdr immediately proceeding for this transport */ 27728850dc7SDaniel Borkmann const struct iphdr *iph = skb_gro_network_header(skb); 27828850dc7SDaniel Borkmann __wsum wsum; 27928850dc7SDaniel Borkmann 280cc5c00bbSHerbert Xu /* Don't bother verifying checksum if we're going to flush anyway. */ 281cc5c00bbSHerbert Xu if (NAPI_GRO_CB(skb)->flush) 282cc5c00bbSHerbert Xu goto skip_csum; 283cc5c00bbSHerbert Xu 284*bf5a755fSJerry Chu wsum = NAPI_GRO_CB(skb)->csum; 285b8ee93baSHerbert Xu 28628850dc7SDaniel Borkmann switch (skb->ip_summed) { 287b8ee93baSHerbert Xu case CHECKSUM_NONE: 288b8ee93baSHerbert Xu wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), 289b8ee93baSHerbert Xu 0); 290b8ee93baSHerbert Xu 291b8ee93baSHerbert Xu /* fall through */ 292b8ee93baSHerbert Xu 29328850dc7SDaniel Borkmann case CHECKSUM_COMPLETE: 29428850dc7SDaniel Borkmann if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, 295b8ee93baSHerbert Xu wsum)) { 29628850dc7SDaniel Borkmann skb->ip_summed = CHECKSUM_UNNECESSARY; 29728850dc7SDaniel Borkmann break; 29828850dc7SDaniel Borkmann } 299b8ee93baSHerbert Xu 30028850dc7SDaniel Borkmann NAPI_GRO_CB(skb)->flush = 1; 30128850dc7SDaniel Borkmann return NULL; 30228850dc7SDaniel Borkmann } 30328850dc7SDaniel Borkmann 304cc5c00bbSHerbert Xu skip_csum: 30528850dc7SDaniel Borkmann return tcp_gro_receive(head, skb); 30628850dc7SDaniel Borkmann } 30728850dc7SDaniel Borkmann 308299603e8SJerry Chu static int tcp4_gro_complete(struct sk_buff *skb, int thoff) 30928850dc7SDaniel Borkmann { 31028850dc7SDaniel Borkmann const struct iphdr *iph = ip_hdr(skb); 31128850dc7SDaniel Borkmann struct tcphdr *th = tcp_hdr(skb); 31228850dc7SDaniel Borkmann 313299603e8SJerry Chu th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr, 314299603e8SJerry Chu iph->daddr, 0); 31528850dc7SDaniel Borkmann skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; 31628850dc7SDaniel Borkmann 31728850dc7SDaniel Borkmann return tcp_gro_complete(skb); 31828850dc7SDaniel Borkmann } 31928850dc7SDaniel Borkmann 32028850dc7SDaniel Borkmann static const struct net_offload tcpv4_offload = { 32128850dc7SDaniel Borkmann .callbacks = { 32228850dc7SDaniel Borkmann .gso_send_check = tcp_v4_gso_send_check, 32328be6e07SEric Dumazet .gso_segment = tcp_gso_segment, 32428850dc7SDaniel Borkmann .gro_receive = tcp4_gro_receive, 32528850dc7SDaniel Borkmann .gro_complete = tcp4_gro_complete, 32628850dc7SDaniel Borkmann }, 32728850dc7SDaniel Borkmann }; 32828850dc7SDaniel Borkmann 32928850dc7SDaniel Borkmann int __init tcpv4_offload_init(void) 33028850dc7SDaniel Borkmann { 33128850dc7SDaniel Borkmann return inet_add_offload(&tcpv4_offload, IPPROTO_TCP); 33228850dc7SDaniel Borkmann } 333