128850dc7SDaniel Borkmann /* 228850dc7SDaniel Borkmann * IPV4 GSO/GRO offload support 328850dc7SDaniel Borkmann * Linux INET implementation 428850dc7SDaniel Borkmann * 528850dc7SDaniel Borkmann * This program is free software; you can redistribute it and/or 628850dc7SDaniel Borkmann * modify it under the terms of the GNU General Public License 728850dc7SDaniel Borkmann * as published by the Free Software Foundation; either version 828850dc7SDaniel Borkmann * 2 of the License, or (at your option) any later version. 928850dc7SDaniel Borkmann * 1028850dc7SDaniel Borkmann * TCPv4 GSO/GRO support 1128850dc7SDaniel Borkmann */ 1228850dc7SDaniel Borkmann 1328850dc7SDaniel Borkmann #include <linux/skbuff.h> 1428850dc7SDaniel Borkmann #include <net/tcp.h> 1528850dc7SDaniel Borkmann #include <net/protocol.h> 1628850dc7SDaniel Borkmann 17*4ed2d765SWillem de Bruijn void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, unsigned int seq, 18*4ed2d765SWillem de Bruijn unsigned int mss) 19*4ed2d765SWillem de Bruijn { 20*4ed2d765SWillem de Bruijn while (skb) { 21*4ed2d765SWillem de Bruijn if (ts_seq < (__u64) seq + mss) { 22*4ed2d765SWillem de Bruijn skb_shinfo(skb)->tx_flags = SKBTX_SW_TSTAMP; 23*4ed2d765SWillem de Bruijn skb_shinfo(skb)->tskey = ts_seq; 24*4ed2d765SWillem de Bruijn return; 25*4ed2d765SWillem de Bruijn } 26*4ed2d765SWillem de Bruijn 27*4ed2d765SWillem de Bruijn skb = skb->next; 28*4ed2d765SWillem de Bruijn seq += mss; 29*4ed2d765SWillem de Bruijn } 30*4ed2d765SWillem de Bruijn } 31*4ed2d765SWillem de Bruijn 3228be6e07SEric Dumazet struct sk_buff *tcp_gso_segment(struct sk_buff *skb, 3328850dc7SDaniel Borkmann netdev_features_t features) 3428850dc7SDaniel Borkmann { 3528850dc7SDaniel Borkmann struct sk_buff *segs = ERR_PTR(-EINVAL); 360d08c42cSEric Dumazet unsigned int sum_truesize = 0; 3728850dc7SDaniel Borkmann struct tcphdr *th; 3828850dc7SDaniel Borkmann unsigned int thlen; 3928850dc7SDaniel Borkmann unsigned int seq; 4028850dc7SDaniel Borkmann __be32 delta; 4128850dc7SDaniel Borkmann unsigned int oldlen; 4228850dc7SDaniel Borkmann unsigned int mss; 4328850dc7SDaniel Borkmann struct sk_buff *gso_skb = skb; 4428850dc7SDaniel Borkmann __sum16 newcheck; 4528850dc7SDaniel Borkmann bool ooo_okay, copy_destructor; 4628850dc7SDaniel Borkmann 4728850dc7SDaniel Borkmann if (!pskb_may_pull(skb, sizeof(*th))) 4828850dc7SDaniel Borkmann goto out; 4928850dc7SDaniel Borkmann 5028850dc7SDaniel Borkmann th = tcp_hdr(skb); 5128850dc7SDaniel Borkmann thlen = th->doff * 4; 5228850dc7SDaniel Borkmann if (thlen < sizeof(*th)) 5328850dc7SDaniel Borkmann goto out; 5428850dc7SDaniel Borkmann 5528850dc7SDaniel Borkmann if (!pskb_may_pull(skb, thlen)) 5628850dc7SDaniel Borkmann goto out; 5728850dc7SDaniel Borkmann 5828850dc7SDaniel Borkmann oldlen = (u16)~skb->len; 5928850dc7SDaniel Borkmann __skb_pull(skb, thlen); 6028850dc7SDaniel Borkmann 6128850dc7SDaniel Borkmann mss = tcp_skb_mss(skb); 6228850dc7SDaniel Borkmann if (unlikely(skb->len <= mss)) 6328850dc7SDaniel Borkmann goto out; 6428850dc7SDaniel Borkmann 6528850dc7SDaniel Borkmann if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { 6628850dc7SDaniel Borkmann /* Packet is from an untrusted source, reset gso_segs. */ 6728850dc7SDaniel Borkmann int type = skb_shinfo(skb)->gso_type; 6828850dc7SDaniel Borkmann 6928850dc7SDaniel Borkmann if (unlikely(type & 7028850dc7SDaniel Borkmann ~(SKB_GSO_TCPV4 | 7128850dc7SDaniel Borkmann SKB_GSO_DODGY | 7228850dc7SDaniel Borkmann SKB_GSO_TCP_ECN | 7328850dc7SDaniel Borkmann SKB_GSO_TCPV6 | 7428850dc7SDaniel Borkmann SKB_GSO_GRE | 754749c09cSTom Herbert SKB_GSO_GRE_CSUM | 76cb32f511SEric Dumazet SKB_GSO_IPIP | 7761c1db7fSEric Dumazet SKB_GSO_SIT | 7828850dc7SDaniel Borkmann SKB_GSO_MPLS | 7928850dc7SDaniel Borkmann SKB_GSO_UDP_TUNNEL | 800f4f4ffaSTom Herbert SKB_GSO_UDP_TUNNEL_CSUM | 8128850dc7SDaniel Borkmann 0) || 8228850dc7SDaniel Borkmann !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) 8328850dc7SDaniel Borkmann goto out; 8428850dc7SDaniel Borkmann 8528850dc7SDaniel Borkmann skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); 8628850dc7SDaniel Borkmann 8728850dc7SDaniel Borkmann segs = NULL; 8828850dc7SDaniel Borkmann goto out; 8928850dc7SDaniel Borkmann } 9028850dc7SDaniel Borkmann 9128850dc7SDaniel Borkmann copy_destructor = gso_skb->destructor == tcp_wfree; 9228850dc7SDaniel Borkmann ooo_okay = gso_skb->ooo_okay; 9328850dc7SDaniel Borkmann /* All segments but the first should have ooo_okay cleared */ 9428850dc7SDaniel Borkmann skb->ooo_okay = 0; 9528850dc7SDaniel Borkmann 9628850dc7SDaniel Borkmann segs = skb_segment(skb, features); 9728850dc7SDaniel Borkmann if (IS_ERR(segs)) 9828850dc7SDaniel Borkmann goto out; 9928850dc7SDaniel Borkmann 10028850dc7SDaniel Borkmann /* Only first segment might have ooo_okay set */ 10128850dc7SDaniel Borkmann segs->ooo_okay = ooo_okay; 10228850dc7SDaniel Borkmann 10328850dc7SDaniel Borkmann delta = htonl(oldlen + (thlen + mss)); 10428850dc7SDaniel Borkmann 10528850dc7SDaniel Borkmann skb = segs; 10628850dc7SDaniel Borkmann th = tcp_hdr(skb); 10728850dc7SDaniel Borkmann seq = ntohl(th->seq); 10828850dc7SDaniel Borkmann 109*4ed2d765SWillem de Bruijn if (unlikely(skb_shinfo(gso_skb)->tx_flags & SKBTX_SW_TSTAMP)) 110*4ed2d765SWillem de Bruijn tcp_gso_tstamp(segs, skb_shinfo(gso_skb)->tskey, seq, mss); 111*4ed2d765SWillem de Bruijn 11228850dc7SDaniel Borkmann newcheck = ~csum_fold((__force __wsum)((__force u32)th->check + 11328850dc7SDaniel Borkmann (__force u32)delta)); 11428850dc7SDaniel Borkmann 11528850dc7SDaniel Borkmann do { 11628850dc7SDaniel Borkmann th->fin = th->psh = 0; 11728850dc7SDaniel Borkmann th->check = newcheck; 11828850dc7SDaniel Borkmann 11928850dc7SDaniel Borkmann if (skb->ip_summed != CHECKSUM_PARTIAL) 120e9c3a24bSTom Herbert th->check = gso_make_checksum(skb, ~th->check); 12128850dc7SDaniel Borkmann 12228850dc7SDaniel Borkmann seq += mss; 12328850dc7SDaniel Borkmann if (copy_destructor) { 12428850dc7SDaniel Borkmann skb->destructor = gso_skb->destructor; 12528850dc7SDaniel Borkmann skb->sk = gso_skb->sk; 1260d08c42cSEric Dumazet sum_truesize += skb->truesize; 12728850dc7SDaniel Borkmann } 12828850dc7SDaniel Borkmann skb = skb->next; 12928850dc7SDaniel Borkmann th = tcp_hdr(skb); 13028850dc7SDaniel Borkmann 13128850dc7SDaniel Borkmann th->seq = htonl(seq); 13228850dc7SDaniel Borkmann th->cwr = 0; 13328850dc7SDaniel Borkmann } while (skb->next); 13428850dc7SDaniel Borkmann 13528850dc7SDaniel Borkmann /* Following permits TCP Small Queues to work well with GSO : 13628850dc7SDaniel Borkmann * The callback to TCP stack will be called at the time last frag 13728850dc7SDaniel Borkmann * is freed at TX completion, and not right now when gso_skb 13828850dc7SDaniel Borkmann * is freed by GSO engine 13928850dc7SDaniel Borkmann */ 14028850dc7SDaniel Borkmann if (copy_destructor) { 14128850dc7SDaniel Borkmann swap(gso_skb->sk, skb->sk); 14228850dc7SDaniel Borkmann swap(gso_skb->destructor, skb->destructor); 1430d08c42cSEric Dumazet sum_truesize += skb->truesize; 1440d08c42cSEric Dumazet atomic_add(sum_truesize - gso_skb->truesize, 1450d08c42cSEric Dumazet &skb->sk->sk_wmem_alloc); 14628850dc7SDaniel Borkmann } 14728850dc7SDaniel Borkmann 14828850dc7SDaniel Borkmann delta = htonl(oldlen + (skb_tail_pointer(skb) - 14928850dc7SDaniel Borkmann skb_transport_header(skb)) + 15028850dc7SDaniel Borkmann skb->data_len); 15128850dc7SDaniel Borkmann th->check = ~csum_fold((__force __wsum)((__force u32)th->check + 15228850dc7SDaniel Borkmann (__force u32)delta)); 15328850dc7SDaniel Borkmann if (skb->ip_summed != CHECKSUM_PARTIAL) 154e9c3a24bSTom Herbert th->check = gso_make_checksum(skb, ~th->check); 15528850dc7SDaniel Borkmann out: 15628850dc7SDaniel Borkmann return segs; 15728850dc7SDaniel Borkmann } 15828850dc7SDaniel Borkmann 15928850dc7SDaniel Borkmann struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) 16028850dc7SDaniel Borkmann { 16128850dc7SDaniel Borkmann struct sk_buff **pp = NULL; 16228850dc7SDaniel Borkmann struct sk_buff *p; 16328850dc7SDaniel Borkmann struct tcphdr *th; 16428850dc7SDaniel Borkmann struct tcphdr *th2; 16528850dc7SDaniel Borkmann unsigned int len; 16628850dc7SDaniel Borkmann unsigned int thlen; 16728850dc7SDaniel Borkmann __be32 flags; 16828850dc7SDaniel Borkmann unsigned int mss = 1; 16928850dc7SDaniel Borkmann unsigned int hlen; 17028850dc7SDaniel Borkmann unsigned int off; 17128850dc7SDaniel Borkmann int flush = 1; 17228850dc7SDaniel Borkmann int i; 17328850dc7SDaniel Borkmann 17428850dc7SDaniel Borkmann off = skb_gro_offset(skb); 17528850dc7SDaniel Borkmann hlen = off + sizeof(*th); 17628850dc7SDaniel Borkmann th = skb_gro_header_fast(skb, off); 17728850dc7SDaniel Borkmann if (skb_gro_header_hard(skb, hlen)) { 17828850dc7SDaniel Borkmann th = skb_gro_header_slow(skb, hlen, off); 17928850dc7SDaniel Borkmann if (unlikely(!th)) 18028850dc7SDaniel Borkmann goto out; 18128850dc7SDaniel Borkmann } 18228850dc7SDaniel Borkmann 18328850dc7SDaniel Borkmann thlen = th->doff * 4; 18428850dc7SDaniel Borkmann if (thlen < sizeof(*th)) 18528850dc7SDaniel Borkmann goto out; 18628850dc7SDaniel Borkmann 18728850dc7SDaniel Borkmann hlen = off + thlen; 18828850dc7SDaniel Borkmann if (skb_gro_header_hard(skb, hlen)) { 18928850dc7SDaniel Borkmann th = skb_gro_header_slow(skb, hlen, off); 19028850dc7SDaniel Borkmann if (unlikely(!th)) 19128850dc7SDaniel Borkmann goto out; 19228850dc7SDaniel Borkmann } 19328850dc7SDaniel Borkmann 19428850dc7SDaniel Borkmann skb_gro_pull(skb, thlen); 19528850dc7SDaniel Borkmann 19628850dc7SDaniel Borkmann len = skb_gro_len(skb); 19728850dc7SDaniel Borkmann flags = tcp_flag_word(th); 19828850dc7SDaniel Borkmann 19928850dc7SDaniel Borkmann for (; (p = *head); head = &p->next) { 20028850dc7SDaniel Borkmann if (!NAPI_GRO_CB(p)->same_flow) 20128850dc7SDaniel Borkmann continue; 20228850dc7SDaniel Borkmann 20328850dc7SDaniel Borkmann th2 = tcp_hdr(p); 20428850dc7SDaniel Borkmann 20528850dc7SDaniel Borkmann if (*(u32 *)&th->source ^ *(u32 *)&th2->source) { 20628850dc7SDaniel Borkmann NAPI_GRO_CB(p)->same_flow = 0; 20728850dc7SDaniel Borkmann continue; 20828850dc7SDaniel Borkmann } 20928850dc7SDaniel Borkmann 21028850dc7SDaniel Borkmann goto found; 21128850dc7SDaniel Borkmann } 21228850dc7SDaniel Borkmann 21328850dc7SDaniel Borkmann goto out_check_final; 21428850dc7SDaniel Borkmann 21528850dc7SDaniel Borkmann found: 216bf5a755fSJerry Chu /* Include the IP ID check below from the inner most IP hdr */ 217bf5a755fSJerry Chu flush = NAPI_GRO_CB(p)->flush | NAPI_GRO_CB(p)->flush_id; 21828850dc7SDaniel Borkmann flush |= (__force int)(flags & TCP_FLAG_CWR); 21928850dc7SDaniel Borkmann flush |= (__force int)((flags ^ tcp_flag_word(th2)) & 22028850dc7SDaniel Borkmann ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); 22128850dc7SDaniel Borkmann flush |= (__force int)(th->ack_seq ^ th2->ack_seq); 22228850dc7SDaniel Borkmann for (i = sizeof(*th); i < thlen; i += 4) 22328850dc7SDaniel Borkmann flush |= *(u32 *)((u8 *)th + i) ^ 22428850dc7SDaniel Borkmann *(u32 *)((u8 *)th2 + i); 22528850dc7SDaniel Borkmann 22628850dc7SDaniel Borkmann mss = tcp_skb_mss(p); 22728850dc7SDaniel Borkmann 22828850dc7SDaniel Borkmann flush |= (len - 1) >= mss; 22928850dc7SDaniel Borkmann flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); 23028850dc7SDaniel Borkmann 23128850dc7SDaniel Borkmann if (flush || skb_gro_receive(head, skb)) { 23228850dc7SDaniel Borkmann mss = 1; 23328850dc7SDaniel Borkmann goto out_check_final; 23428850dc7SDaniel Borkmann } 23528850dc7SDaniel Borkmann 23628850dc7SDaniel Borkmann p = *head; 23728850dc7SDaniel Borkmann th2 = tcp_hdr(p); 23828850dc7SDaniel Borkmann tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); 23928850dc7SDaniel Borkmann 24028850dc7SDaniel Borkmann out_check_final: 24128850dc7SDaniel Borkmann flush = len < mss; 24228850dc7SDaniel Borkmann flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH | 24328850dc7SDaniel Borkmann TCP_FLAG_RST | TCP_FLAG_SYN | 24428850dc7SDaniel Borkmann TCP_FLAG_FIN)); 24528850dc7SDaniel Borkmann 24628850dc7SDaniel Borkmann if (p && (!NAPI_GRO_CB(skb)->same_flow || flush)) 24728850dc7SDaniel Borkmann pp = head; 24828850dc7SDaniel Borkmann 24928850dc7SDaniel Borkmann out: 250bf5a755fSJerry Chu NAPI_GRO_CB(skb)->flush |= (flush != 0); 25128850dc7SDaniel Borkmann 25228850dc7SDaniel Borkmann return pp; 25328850dc7SDaniel Borkmann } 25428850dc7SDaniel Borkmann 25528850dc7SDaniel Borkmann int tcp_gro_complete(struct sk_buff *skb) 25628850dc7SDaniel Borkmann { 25728850dc7SDaniel Borkmann struct tcphdr *th = tcp_hdr(skb); 25828850dc7SDaniel Borkmann 259299603e8SJerry Chu skb->csum_start = (unsigned char *)th - skb->head; 26028850dc7SDaniel Borkmann skb->csum_offset = offsetof(struct tcphdr, check); 26128850dc7SDaniel Borkmann skb->ip_summed = CHECKSUM_PARTIAL; 26228850dc7SDaniel Borkmann 26328850dc7SDaniel Borkmann skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; 26428850dc7SDaniel Borkmann 26528850dc7SDaniel Borkmann if (th->cwr) 26628850dc7SDaniel Borkmann skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; 26728850dc7SDaniel Borkmann 26828850dc7SDaniel Borkmann return 0; 26928850dc7SDaniel Borkmann } 27028850dc7SDaniel Borkmann EXPORT_SYMBOL(tcp_gro_complete); 27128850dc7SDaniel Borkmann 27228850dc7SDaniel Borkmann static int tcp_v4_gso_send_check(struct sk_buff *skb) 27328850dc7SDaniel Borkmann { 27428850dc7SDaniel Borkmann const struct iphdr *iph; 27528850dc7SDaniel Borkmann struct tcphdr *th; 27628850dc7SDaniel Borkmann 27728850dc7SDaniel Borkmann if (!pskb_may_pull(skb, sizeof(*th))) 27828850dc7SDaniel Borkmann return -EINVAL; 27928850dc7SDaniel Borkmann 28028850dc7SDaniel Borkmann iph = ip_hdr(skb); 28128850dc7SDaniel Borkmann th = tcp_hdr(skb); 28228850dc7SDaniel Borkmann 28328850dc7SDaniel Borkmann th->check = 0; 28428850dc7SDaniel Borkmann skb->ip_summed = CHECKSUM_PARTIAL; 28528850dc7SDaniel Borkmann __tcp_v4_send_check(skb, iph->saddr, iph->daddr); 28628850dc7SDaniel Borkmann return 0; 28728850dc7SDaniel Borkmann } 28828850dc7SDaniel Borkmann 28928850dc7SDaniel Borkmann static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) 29028850dc7SDaniel Borkmann { 291299603e8SJerry Chu /* Use the IP hdr immediately proceeding for this transport */ 29228850dc7SDaniel Borkmann const struct iphdr *iph = skb_gro_network_header(skb); 29328850dc7SDaniel Borkmann __wsum wsum; 29428850dc7SDaniel Borkmann 295cc5c00bbSHerbert Xu /* Don't bother verifying checksum if we're going to flush anyway. */ 296cc5c00bbSHerbert Xu if (NAPI_GRO_CB(skb)->flush) 297cc5c00bbSHerbert Xu goto skip_csum; 298cc5c00bbSHerbert Xu 299bf5a755fSJerry Chu wsum = NAPI_GRO_CB(skb)->csum; 300b8ee93baSHerbert Xu 30128850dc7SDaniel Borkmann switch (skb->ip_summed) { 302b8ee93baSHerbert Xu case CHECKSUM_NONE: 303b8ee93baSHerbert Xu wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), 304b8ee93baSHerbert Xu 0); 305b8ee93baSHerbert Xu 306b8ee93baSHerbert Xu /* fall through */ 307b8ee93baSHerbert Xu 30828850dc7SDaniel Borkmann case CHECKSUM_COMPLETE: 30928850dc7SDaniel Borkmann if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, 310b8ee93baSHerbert Xu wsum)) { 31128850dc7SDaniel Borkmann skb->ip_summed = CHECKSUM_UNNECESSARY; 31228850dc7SDaniel Borkmann break; 31328850dc7SDaniel Borkmann } 314b8ee93baSHerbert Xu 31528850dc7SDaniel Borkmann NAPI_GRO_CB(skb)->flush = 1; 31628850dc7SDaniel Borkmann return NULL; 31728850dc7SDaniel Borkmann } 31828850dc7SDaniel Borkmann 319cc5c00bbSHerbert Xu skip_csum: 32028850dc7SDaniel Borkmann return tcp_gro_receive(head, skb); 32128850dc7SDaniel Borkmann } 32228850dc7SDaniel Borkmann 323299603e8SJerry Chu static int tcp4_gro_complete(struct sk_buff *skb, int thoff) 32428850dc7SDaniel Borkmann { 32528850dc7SDaniel Borkmann const struct iphdr *iph = ip_hdr(skb); 32628850dc7SDaniel Borkmann struct tcphdr *th = tcp_hdr(skb); 32728850dc7SDaniel Borkmann 328299603e8SJerry Chu th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr, 329299603e8SJerry Chu iph->daddr, 0); 330c3caf119SJerry Chu skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; 33128850dc7SDaniel Borkmann 33228850dc7SDaniel Borkmann return tcp_gro_complete(skb); 33328850dc7SDaniel Borkmann } 33428850dc7SDaniel Borkmann 33528850dc7SDaniel Borkmann static const struct net_offload tcpv4_offload = { 33628850dc7SDaniel Borkmann .callbacks = { 33728850dc7SDaniel Borkmann .gso_send_check = tcp_v4_gso_send_check, 33828be6e07SEric Dumazet .gso_segment = tcp_gso_segment, 33928850dc7SDaniel Borkmann .gro_receive = tcp4_gro_receive, 34028850dc7SDaniel Borkmann .gro_complete = tcp4_gro_complete, 34128850dc7SDaniel Borkmann }, 34228850dc7SDaniel Borkmann }; 34328850dc7SDaniel Borkmann 34428850dc7SDaniel Borkmann int __init tcpv4_offload_init(void) 34528850dc7SDaniel Borkmann { 34628850dc7SDaniel Borkmann return inet_add_offload(&tcpv4_offload, IPPROTO_TCP); 34728850dc7SDaniel Borkmann } 348