127f190a3SBjoern A. Zeeb /*- 227f190a3SBjoern A. Zeeb * Copyright (c) 2007, Myricom Inc. 327f190a3SBjoern A. Zeeb * Copyright (c) 2008, Intel Corporation. 4*62b5b6ecSBjoern A. Zeeb * Copyright (c) 2012 The FreeBSD Foundation 527f190a3SBjoern A. Zeeb * All rights reserved. 627f190a3SBjoern A. Zeeb * 7*62b5b6ecSBjoern A. Zeeb * Portions of this software were developed by Bjoern Zeeb 8*62b5b6ecSBjoern A. Zeeb * under sponsorship from the FreeBSD Foundation. 9*62b5b6ecSBjoern A. Zeeb * 1027f190a3SBjoern A. Zeeb * Redistribution and use in source and binary forms, with or without 1127f190a3SBjoern A. Zeeb * modification, are permitted provided that the following conditions 1227f190a3SBjoern A. Zeeb * are met: 1327f190a3SBjoern A. Zeeb * 1. Redistributions of source code must retain the above copyright 1427f190a3SBjoern A. Zeeb * notice, this list of conditions and the following disclaimer. 1527f190a3SBjoern A. Zeeb * 2. Redistributions in binary form must reproduce the above copyright 1627f190a3SBjoern A. Zeeb * notice, this list of conditions and the following disclaimer in the 1727f190a3SBjoern A. Zeeb * documentation and/or other materials provided with the distribution. 1827f190a3SBjoern A. Zeeb * 1927f190a3SBjoern A. Zeeb * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 2027f190a3SBjoern A. Zeeb * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2127f190a3SBjoern A. Zeeb * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2227f190a3SBjoern A. Zeeb * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 2327f190a3SBjoern A. Zeeb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2427f190a3SBjoern A. Zeeb * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2527f190a3SBjoern A. Zeeb * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2627f190a3SBjoern A. Zeeb * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2727f190a3SBjoern A. Zeeb * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2827f190a3SBjoern A. Zeeb * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2927f190a3SBjoern A. Zeeb * SUCH DAMAGE. 3027f190a3SBjoern A. Zeeb * 3127f190a3SBjoern A. Zeeb * $FreeBSD$ 3227f190a3SBjoern A. Zeeb */ 336c5087a8SJack F Vogel 34*62b5b6ecSBjoern A. Zeeb #include <sys/cdefs.h> 35*62b5b6ecSBjoern A. Zeeb __FBSDID("$FreeBSD$"); 36*62b5b6ecSBjoern A. Zeeb 37*62b5b6ecSBjoern A. Zeeb #include "opt_inet.h" 38*62b5b6ecSBjoern A. Zeeb #include "opt_inet6.h" 39*62b5b6ecSBjoern A. Zeeb 406c5087a8SJack F Vogel #include <sys/param.h> 416c5087a8SJack F Vogel #include <sys/systm.h> 426c5087a8SJack F Vogel #include <sys/mbuf.h> 436c5087a8SJack F Vogel #include <sys/kernel.h> 446c5087a8SJack F Vogel #include <sys/socket.h> 456c5087a8SJack F Vogel 466c5087a8SJack F Vogel #include <net/if.h> 47*62b5b6ecSBjoern A. Zeeb #include <net/if_var.h> 486c5087a8SJack F Vogel #include <net/ethernet.h> 496c5087a8SJack F Vogel 506c5087a8SJack F Vogel #include <netinet/in_systm.h> 516c5087a8SJack F Vogel #include <netinet/in.h> 52*62b5b6ecSBjoern A. Zeeb #include <netinet/ip6.h> 536c5087a8SJack F Vogel #include <netinet/ip.h> 546c5087a8SJack F Vogel #include <netinet/tcp.h> 556c5087a8SJack F Vogel #include <netinet/tcp_lro.h> 566c5087a8SJack F Vogel 576c5087a8SJack F Vogel #include <machine/in_cksum.h> 586c5087a8SJack F Vogel 59*62b5b6ecSBjoern A. Zeeb #ifndef LRO_ENTRIES 60*62b5b6ecSBjoern A. Zeeb #define LRO_ENTRIES 8 /* # of LRO entries per RX queue. */ 61*62b5b6ecSBjoern A. Zeeb #endif 626c5087a8SJack F Vogel 63*62b5b6ecSBjoern A. Zeeb #define TCP_LRO_UPDATE_CSUM 1 64*62b5b6ecSBjoern A. Zeeb #ifndef TCP_LRO_UPDATE_CSUM 65*62b5b6ecSBjoern A. Zeeb #define TCP_LRO_INVALID_CSUM 0x0000 66*62b5b6ecSBjoern A. Zeeb #endif 676c5087a8SJack F Vogel 686c5087a8SJack F Vogel int 69*62b5b6ecSBjoern A. Zeeb tcp_lro_init(struct lro_ctrl *lc) 706c5087a8SJack F Vogel { 71*62b5b6ecSBjoern A. Zeeb struct lro_entry *le; 72*62b5b6ecSBjoern A. Zeeb int error, i; 736c5087a8SJack F Vogel 74*62b5b6ecSBjoern A. Zeeb lc->lro_bad_csum = 0; 75*62b5b6ecSBjoern A. Zeeb lc->lro_queued = 0; 76*62b5b6ecSBjoern A. Zeeb lc->lro_flushed = 0; 77*62b5b6ecSBjoern A. Zeeb lc->lro_cnt = 0; 78*62b5b6ecSBjoern A. Zeeb SLIST_INIT(&lc->lro_free); 79*62b5b6ecSBjoern A. Zeeb SLIST_INIT(&lc->lro_active); 806c5087a8SJack F Vogel 81*62b5b6ecSBjoern A. Zeeb error = 0; 826c5087a8SJack F Vogel for (i = 0; i < LRO_ENTRIES; i++) { 83*62b5b6ecSBjoern A. Zeeb le = (struct lro_entry *)malloc(sizeof(*le), M_DEVBUF, 84*62b5b6ecSBjoern A. Zeeb M_NOWAIT | M_ZERO); 85*62b5b6ecSBjoern A. Zeeb if (le == NULL) { 866c5087a8SJack F Vogel if (i == 0) 876c5087a8SJack F Vogel error = ENOMEM; 886c5087a8SJack F Vogel break; 896c5087a8SJack F Vogel } 90*62b5b6ecSBjoern A. Zeeb lc->lro_cnt = i + 1; 91*62b5b6ecSBjoern A. Zeeb SLIST_INSERT_HEAD(&lc->lro_free, le, next); 926c5087a8SJack F Vogel } 936c5087a8SJack F Vogel 946c5087a8SJack F Vogel return (error); 956c5087a8SJack F Vogel } 966c5087a8SJack F Vogel 976c5087a8SJack F Vogel void 98*62b5b6ecSBjoern A. Zeeb tcp_lro_free(struct lro_ctrl *lc) 996c5087a8SJack F Vogel { 100*62b5b6ecSBjoern A. Zeeb struct lro_entry *le; 1016c5087a8SJack F Vogel 102*62b5b6ecSBjoern A. Zeeb while (!SLIST_EMPTY(&lc->lro_free)) { 103*62b5b6ecSBjoern A. Zeeb le = SLIST_FIRST(&lc->lro_free); 104*62b5b6ecSBjoern A. Zeeb SLIST_REMOVE_HEAD(&lc->lro_free, next); 105*62b5b6ecSBjoern A. Zeeb free(le, M_DEVBUF); 1066c5087a8SJack F Vogel } 1076c5087a8SJack F Vogel } 1086c5087a8SJack F Vogel 109*62b5b6ecSBjoern A. Zeeb #ifdef TCP_LRO_UPDATE_CSUM 110*62b5b6ecSBjoern A. Zeeb static uint16_t 111*62b5b6ecSBjoern A. Zeeb tcp_lro_csum_th(struct tcphdr *th) 112*62b5b6ecSBjoern A. Zeeb { 113*62b5b6ecSBjoern A. Zeeb uint32_t ch; 114*62b5b6ecSBjoern A. Zeeb uint16_t *p, l; 115*62b5b6ecSBjoern A. Zeeb 116*62b5b6ecSBjoern A. Zeeb ch = th->th_sum = 0x0000; 117*62b5b6ecSBjoern A. Zeeb l = th->th_off; 118*62b5b6ecSBjoern A. Zeeb p = (uint16_t *)th; 119*62b5b6ecSBjoern A. Zeeb while (l > 0) { 120*62b5b6ecSBjoern A. Zeeb ch += *p; 121*62b5b6ecSBjoern A. Zeeb p++; 122*62b5b6ecSBjoern A. Zeeb ch += *p; 123*62b5b6ecSBjoern A. Zeeb p++; 124*62b5b6ecSBjoern A. Zeeb l--; 125*62b5b6ecSBjoern A. Zeeb } 126*62b5b6ecSBjoern A. Zeeb while (ch > 0xffff) 127*62b5b6ecSBjoern A. Zeeb ch = (ch >> 16) + (ch & 0xffff); 128*62b5b6ecSBjoern A. Zeeb 129*62b5b6ecSBjoern A. Zeeb return (ch & 0xffff); 130*62b5b6ecSBjoern A. Zeeb } 131*62b5b6ecSBjoern A. Zeeb 132*62b5b6ecSBjoern A. Zeeb static uint16_t 133*62b5b6ecSBjoern A. Zeeb tcp_lro_rx_csum_fixup(struct lro_entry *le, void *l3hdr, struct tcphdr *th, 134*62b5b6ecSBjoern A. Zeeb uint16_t tcp_data_len, uint16_t csum) 135*62b5b6ecSBjoern A. Zeeb { 136*62b5b6ecSBjoern A. Zeeb uint32_t c; 137*62b5b6ecSBjoern A. Zeeb uint16_t cs; 138*62b5b6ecSBjoern A. Zeeb 139*62b5b6ecSBjoern A. Zeeb c = csum; 140*62b5b6ecSBjoern A. Zeeb 141*62b5b6ecSBjoern A. Zeeb /* Remove length from checksum. */ 142*62b5b6ecSBjoern A. Zeeb switch (le->eh_type) { 143*62b5b6ecSBjoern A. Zeeb #ifdef INET6 144*62b5b6ecSBjoern A. Zeeb case ETHERTYPE_IPV6: 145*62b5b6ecSBjoern A. Zeeb { 146*62b5b6ecSBjoern A. Zeeb struct ip6_hdr *ip6; 147*62b5b6ecSBjoern A. Zeeb 148*62b5b6ecSBjoern A. Zeeb ip6 = (struct ip6_hdr *)l3hdr; 149*62b5b6ecSBjoern A. Zeeb if (le->append_cnt == 0) 150*62b5b6ecSBjoern A. Zeeb cs = ip6->ip6_plen; 151*62b5b6ecSBjoern A. Zeeb else { 152*62b5b6ecSBjoern A. Zeeb uint32_t cx; 153*62b5b6ecSBjoern A. Zeeb 154*62b5b6ecSBjoern A. Zeeb cx = ntohs(ip6->ip6_plen); 155*62b5b6ecSBjoern A. Zeeb cs = in6_cksum_pseudo(ip6, cx, ip6->ip6_nxt, 0); 156*62b5b6ecSBjoern A. Zeeb } 157*62b5b6ecSBjoern A. Zeeb break; 158*62b5b6ecSBjoern A. Zeeb } 159*62b5b6ecSBjoern A. Zeeb #endif 160*62b5b6ecSBjoern A. Zeeb #ifdef INET 161*62b5b6ecSBjoern A. Zeeb case ETHERTYPE_IP: 162*62b5b6ecSBjoern A. Zeeb { 163*62b5b6ecSBjoern A. Zeeb struct ip *ip4; 164*62b5b6ecSBjoern A. Zeeb 165*62b5b6ecSBjoern A. Zeeb ip4 = (struct ip *)l3hdr; 166*62b5b6ecSBjoern A. Zeeb if (le->append_cnt == 0) 167*62b5b6ecSBjoern A. Zeeb cs = ip4->ip_len; 168*62b5b6ecSBjoern A. Zeeb else { 169*62b5b6ecSBjoern A. Zeeb cs = in_addword(ntohs(ip4->ip_len) - sizeof(*ip4), 170*62b5b6ecSBjoern A. Zeeb IPPROTO_TCP); 171*62b5b6ecSBjoern A. Zeeb cs = in_pseudo(ip4->ip_src.s_addr, ip4->ip_dst.s_addr, 172*62b5b6ecSBjoern A. Zeeb htons(cs)); 173*62b5b6ecSBjoern A. Zeeb } 174*62b5b6ecSBjoern A. Zeeb break; 175*62b5b6ecSBjoern A. Zeeb } 176*62b5b6ecSBjoern A. Zeeb #endif 177*62b5b6ecSBjoern A. Zeeb default: 178*62b5b6ecSBjoern A. Zeeb cs = 0; /* Keep compiler happy. */ 179*62b5b6ecSBjoern A. Zeeb } 180*62b5b6ecSBjoern A. Zeeb 181*62b5b6ecSBjoern A. Zeeb cs = ~cs; 182*62b5b6ecSBjoern A. Zeeb c += cs; 183*62b5b6ecSBjoern A. Zeeb 184*62b5b6ecSBjoern A. Zeeb /* Remove TCP header csum. */ 185*62b5b6ecSBjoern A. Zeeb cs = ~tcp_lro_csum_th(th); 186*62b5b6ecSBjoern A. Zeeb c += cs; 187*62b5b6ecSBjoern A. Zeeb while (c > 0xffff) 188*62b5b6ecSBjoern A. Zeeb c = (c >> 16) + (c & 0xffff); 189*62b5b6ecSBjoern A. Zeeb 190*62b5b6ecSBjoern A. Zeeb return (c & 0xffff); 191*62b5b6ecSBjoern A. Zeeb } 192*62b5b6ecSBjoern A. Zeeb #endif 193*62b5b6ecSBjoern A. Zeeb 1946c5087a8SJack F Vogel void 195*62b5b6ecSBjoern A. Zeeb tcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *le) 1966c5087a8SJack F Vogel { 1976c5087a8SJack F Vogel 198*62b5b6ecSBjoern A. Zeeb if (le->append_cnt > 0) { 199*62b5b6ecSBjoern A. Zeeb struct tcphdr *th; 200*62b5b6ecSBjoern A. Zeeb uint16_t p_len; 2016c5087a8SJack F Vogel 202*62b5b6ecSBjoern A. Zeeb p_len = htons(le->p_len); 203*62b5b6ecSBjoern A. Zeeb switch (le->eh_type) { 204*62b5b6ecSBjoern A. Zeeb #ifdef INET6 205*62b5b6ecSBjoern A. Zeeb case ETHERTYPE_IPV6: 2066c5087a8SJack F Vogel { 207*62b5b6ecSBjoern A. Zeeb struct ip6_hdr *ip6; 208*62b5b6ecSBjoern A. Zeeb 209*62b5b6ecSBjoern A. Zeeb ip6 = le->le_ip6; 210*62b5b6ecSBjoern A. Zeeb ip6->ip6_plen = p_len; 211*62b5b6ecSBjoern A. Zeeb th = (struct tcphdr *)(ip6 + 1); 212*62b5b6ecSBjoern A. Zeeb le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID | 213*62b5b6ecSBjoern A. Zeeb CSUM_PSEUDO_HDR; 214*62b5b6ecSBjoern A. Zeeb le->p_len += ETHER_HDR_LEN + sizeof(*ip6); 215*62b5b6ecSBjoern A. Zeeb break; 216*62b5b6ecSBjoern A. Zeeb } 217*62b5b6ecSBjoern A. Zeeb #endif 218*62b5b6ecSBjoern A. Zeeb #ifdef INET 219*62b5b6ecSBjoern A. Zeeb case ETHERTYPE_IP: 220*62b5b6ecSBjoern A. Zeeb { 221*62b5b6ecSBjoern A. Zeeb struct ip *ip4; 222*62b5b6ecSBjoern A. Zeeb #ifdef TCP_LRO_UPDATE_CSUM 223*62b5b6ecSBjoern A. Zeeb uint32_t cl; 224*62b5b6ecSBjoern A. Zeeb uint16_t c; 225*62b5b6ecSBjoern A. Zeeb #endif 226*62b5b6ecSBjoern A. Zeeb 227*62b5b6ecSBjoern A. Zeeb ip4 = le->le_ip4; 228*62b5b6ecSBjoern A. Zeeb #ifdef TCP_LRO_UPDATE_CSUM 229*62b5b6ecSBjoern A. Zeeb /* Fix IP header checksum for new length. */ 230*62b5b6ecSBjoern A. Zeeb c = ~ip4->ip_sum; 231*62b5b6ecSBjoern A. Zeeb cl = c; 232*62b5b6ecSBjoern A. Zeeb c = ~ip4->ip_len; 233*62b5b6ecSBjoern A. Zeeb cl += c + p_len; 234*62b5b6ecSBjoern A. Zeeb while (cl > 0xffff) 235*62b5b6ecSBjoern A. Zeeb cl = (cl >> 16) + (cl & 0xffff); 236*62b5b6ecSBjoern A. Zeeb c = cl; 237*62b5b6ecSBjoern A. Zeeb ip4->ip_sum = ~c; 238*62b5b6ecSBjoern A. Zeeb #else 239*62b5b6ecSBjoern A. Zeeb ip4->ip_sum = TCP_LRO_INVALID_CSUM; 240*62b5b6ecSBjoern A. Zeeb #endif 241*62b5b6ecSBjoern A. Zeeb ip4->ip_len = p_len; 242*62b5b6ecSBjoern A. Zeeb th = (struct tcphdr *)(ip4 + 1); 243*62b5b6ecSBjoern A. Zeeb le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID | 244*62b5b6ecSBjoern A. Zeeb CSUM_PSEUDO_HDR | CSUM_IP_CHECKED | CSUM_IP_VALID; 245*62b5b6ecSBjoern A. Zeeb le->p_len += ETHER_HDR_LEN; 246*62b5b6ecSBjoern A. Zeeb break; 247*62b5b6ecSBjoern A. Zeeb } 248*62b5b6ecSBjoern A. Zeeb #endif 249*62b5b6ecSBjoern A. Zeeb default: 250*62b5b6ecSBjoern A. Zeeb th = NULL; /* Keep compiler happy. */ 251*62b5b6ecSBjoern A. Zeeb } 252*62b5b6ecSBjoern A. Zeeb le->m_head->m_pkthdr.csum_data = 0xffff; 253*62b5b6ecSBjoern A. Zeeb le->m_head->m_pkthdr.len = le->p_len; 254*62b5b6ecSBjoern A. Zeeb 255*62b5b6ecSBjoern A. Zeeb /* Incorporate the latest ACK into the TCP header. */ 256*62b5b6ecSBjoern A. Zeeb th->th_ack = le->ack_seq; 257*62b5b6ecSBjoern A. Zeeb th->th_win = le->window; 258*62b5b6ecSBjoern A. Zeeb /* Incorporate latest timestamp into the TCP header. */ 259*62b5b6ecSBjoern A. Zeeb if (le->timestamp != 0) { 2606c5087a8SJack F Vogel uint32_t *ts_ptr; 2616c5087a8SJack F Vogel 262*62b5b6ecSBjoern A. Zeeb ts_ptr = (uint32_t *)(th + 1); 263*62b5b6ecSBjoern A. Zeeb ts_ptr[1] = htonl(le->tsval); 264*62b5b6ecSBjoern A. Zeeb ts_ptr[2] = le->tsecr; 265*62b5b6ecSBjoern A. Zeeb } 266*62b5b6ecSBjoern A. Zeeb #ifdef TCP_LRO_UPDATE_CSUM 267*62b5b6ecSBjoern A. Zeeb /* Update the TCP header checksum. */ 268*62b5b6ecSBjoern A. Zeeb le->ulp_csum += p_len; 269*62b5b6ecSBjoern A. Zeeb le->ulp_csum += tcp_lro_csum_th(th); 270*62b5b6ecSBjoern A. Zeeb while (le->ulp_csum > 0xffff) 271*62b5b6ecSBjoern A. Zeeb le->ulp_csum = (le->ulp_csum >> 16) + 272*62b5b6ecSBjoern A. Zeeb (le->ulp_csum & 0xffff); 273*62b5b6ecSBjoern A. Zeeb th->th_sum = (le->ulp_csum & 0xffff); 274*62b5b6ecSBjoern A. Zeeb th->th_sum = ~th->th_sum; 275*62b5b6ecSBjoern A. Zeeb #else 276*62b5b6ecSBjoern A. Zeeb th->th_sum = TCP_LRO_INVALID_CSUM; 277*62b5b6ecSBjoern A. Zeeb #endif 278*62b5b6ecSBjoern A. Zeeb } 2796c5087a8SJack F Vogel 280*62b5b6ecSBjoern A. Zeeb (*lc->ifp->if_input)(lc->ifp, le->m_head); 281*62b5b6ecSBjoern A. Zeeb lc->lro_queued += le->append_cnt + 1; 282*62b5b6ecSBjoern A. Zeeb lc->lro_flushed++; 283*62b5b6ecSBjoern A. Zeeb bzero(le, sizeof(*le)); 284*62b5b6ecSBjoern A. Zeeb SLIST_INSERT_HEAD(&lc->lro_free, le, next); 285*62b5b6ecSBjoern A. Zeeb } 2866c5087a8SJack F Vogel 287*62b5b6ecSBjoern A. Zeeb #ifdef INET6 288*62b5b6ecSBjoern A. Zeeb static int 289*62b5b6ecSBjoern A. Zeeb tcp_lro_rx_ipv6(struct lro_ctrl *lc, struct mbuf *m, struct ip6_hdr *ip6, 290*62b5b6ecSBjoern A. Zeeb struct tcphdr **th) 291*62b5b6ecSBjoern A. Zeeb { 2926c5087a8SJack F Vogel 293*62b5b6ecSBjoern A. Zeeb /* XXX-BZ we should check the flow-label. */ 2946c5087a8SJack F Vogel 295*62b5b6ecSBjoern A. Zeeb /* XXX-BZ We do not yet support ext. hdrs. */ 296*62b5b6ecSBjoern A. Zeeb if (ip6->ip6_nxt != IPPROTO_TCP) 297*62b5b6ecSBjoern A. Zeeb return (TCP_LRO_NOT_SUPPORTED); 298*62b5b6ecSBjoern A. Zeeb 299*62b5b6ecSBjoern A. Zeeb /* Find the TCP header. */ 300*62b5b6ecSBjoern A. Zeeb *th = (struct tcphdr *)(ip6 + 1); 301*62b5b6ecSBjoern A. Zeeb 302*62b5b6ecSBjoern A. Zeeb return (0); 303*62b5b6ecSBjoern A. Zeeb } 304*62b5b6ecSBjoern A. Zeeb #endif 305*62b5b6ecSBjoern A. Zeeb 306*62b5b6ecSBjoern A. Zeeb #ifdef INET 307*62b5b6ecSBjoern A. Zeeb static int 308*62b5b6ecSBjoern A. Zeeb tcp_lro_rx_ipv4(struct lro_ctrl *lc, struct mbuf *m, struct ip *ip4, 309*62b5b6ecSBjoern A. Zeeb struct tcphdr **th) 310*62b5b6ecSBjoern A. Zeeb { 311*62b5b6ecSBjoern A. Zeeb int csum_flags; 312*62b5b6ecSBjoern A. Zeeb uint16_t csum; 313*62b5b6ecSBjoern A. Zeeb 314*62b5b6ecSBjoern A. Zeeb if (ip4->ip_p != IPPROTO_TCP) 315*62b5b6ecSBjoern A. Zeeb return (TCP_LRO_NOT_SUPPORTED); 316*62b5b6ecSBjoern A. Zeeb 317*62b5b6ecSBjoern A. Zeeb /* Ensure there are no options. */ 318*62b5b6ecSBjoern A. Zeeb if ((ip4->ip_hl << 2) != sizeof (*ip4)) 319*62b5b6ecSBjoern A. Zeeb return (TCP_LRO_CANNOT); 320*62b5b6ecSBjoern A. Zeeb 321*62b5b6ecSBjoern A. Zeeb /* .. and the packet is not fragmented. */ 322*62b5b6ecSBjoern A. Zeeb if (ip4->ip_off & htons(IP_MF|IP_OFFMASK)) 323*62b5b6ecSBjoern A. Zeeb return (TCP_LRO_CANNOT); 324*62b5b6ecSBjoern A. Zeeb 325*62b5b6ecSBjoern A. Zeeb /* Legacy IP has a header checksum that needs to be correct. */ 326*62b5b6ecSBjoern A. Zeeb csum_flags = m->m_pkthdr.csum_flags; 32745709593SKip Macy if (csum_flags & CSUM_IP_CHECKED) { 32845709593SKip Macy if (__predict_false((csum_flags & CSUM_IP_VALID) == 0)) { 329*62b5b6ecSBjoern A. Zeeb lc->lro_bad_csum++; 330*62b5b6ecSBjoern A. Zeeb return (TCP_LRO_CANNOT); 33145709593SKip Macy } 33245709593SKip Macy } else { 333*62b5b6ecSBjoern A. Zeeb csum = in_cksum_hdr(ip4); 334*62b5b6ecSBjoern A. Zeeb if (__predict_false((csum ^ 0xffff) != 0)) { 335*62b5b6ecSBjoern A. Zeeb lc->lro_bad_csum++; 336*62b5b6ecSBjoern A. Zeeb return (TCP_LRO_CANNOT); 3376c5087a8SJack F Vogel } 33845709593SKip Macy } 3396c5087a8SJack F Vogel 340*62b5b6ecSBjoern A. Zeeb /* Find the TCP header (we assured there are no IP options). */ 341*62b5b6ecSBjoern A. Zeeb *th = (struct tcphdr *)(ip4 + 1); 3426c5087a8SJack F Vogel 343*62b5b6ecSBjoern A. Zeeb return (0); 3446c5087a8SJack F Vogel } 345*62b5b6ecSBjoern A. Zeeb #endif 3466c5087a8SJack F Vogel 347*62b5b6ecSBjoern A. Zeeb int 348*62b5b6ecSBjoern A. Zeeb tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum) 349*62b5b6ecSBjoern A. Zeeb { 350*62b5b6ecSBjoern A. Zeeb struct lro_entry *le; 351*62b5b6ecSBjoern A. Zeeb struct ether_header *eh; 352*62b5b6ecSBjoern A. Zeeb #ifdef INET6 353*62b5b6ecSBjoern A. Zeeb struct ip6_hdr *ip6 = NULL; /* Keep compiler happy. */ 354*62b5b6ecSBjoern A. Zeeb #endif 355*62b5b6ecSBjoern A. Zeeb #ifdef INET 356*62b5b6ecSBjoern A. Zeeb struct ip *ip4 = NULL; /* Keep compiler happy. */ 357*62b5b6ecSBjoern A. Zeeb #endif 358*62b5b6ecSBjoern A. Zeeb struct tcphdr *th; 359*62b5b6ecSBjoern A. Zeeb void *l3hdr = NULL; /* Keep compiler happy. */ 360*62b5b6ecSBjoern A. Zeeb uint32_t *ts_ptr; 361*62b5b6ecSBjoern A. Zeeb tcp_seq seq; 362*62b5b6ecSBjoern A. Zeeb int error, ip_len, l; 363*62b5b6ecSBjoern A. Zeeb uint16_t eh_type, tcp_data_len; 3646c5087a8SJack F Vogel 365*62b5b6ecSBjoern A. Zeeb /* We expect a contiguous header [eh, ip, tcp]. */ 366*62b5b6ecSBjoern A. Zeeb 367*62b5b6ecSBjoern A. Zeeb eh = mtod(m, struct ether_header *); 368*62b5b6ecSBjoern A. Zeeb eh_type = ntohs(eh->ether_type); 369*62b5b6ecSBjoern A. Zeeb switch (eh_type) { 370*62b5b6ecSBjoern A. Zeeb #ifdef INET6 371*62b5b6ecSBjoern A. Zeeb case ETHERTYPE_IPV6: 372*62b5b6ecSBjoern A. Zeeb l3hdr = ip6 = (struct ip6_hdr *)(eh + 1); 373*62b5b6ecSBjoern A. Zeeb error = tcp_lro_rx_ipv6(lc, m, ip6, &th); 374*62b5b6ecSBjoern A. Zeeb if (error != 0) 375*62b5b6ecSBjoern A. Zeeb return (error); 376*62b5b6ecSBjoern A. Zeeb tcp_data_len = ntohs(ip6->ip6_plen); 377*62b5b6ecSBjoern A. Zeeb ip_len = sizeof(*ip6) + tcp_data_len; 378*62b5b6ecSBjoern A. Zeeb break; 379*62b5b6ecSBjoern A. Zeeb #endif 380*62b5b6ecSBjoern A. Zeeb #ifdef INET 381*62b5b6ecSBjoern A. Zeeb case ETHERTYPE_IP: 382*62b5b6ecSBjoern A. Zeeb l3hdr = ip4 = (struct ip *)(eh + 1); 383*62b5b6ecSBjoern A. Zeeb error = tcp_lro_rx_ipv4(lc, m, ip4, &th); 384*62b5b6ecSBjoern A. Zeeb if (error != 0) 385*62b5b6ecSBjoern A. Zeeb return (error); 386*62b5b6ecSBjoern A. Zeeb ip_len = ntohs(ip4->ip_len); 387*62b5b6ecSBjoern A. Zeeb tcp_data_len = ip_len - sizeof(*ip4); 388*62b5b6ecSBjoern A. Zeeb break; 389*62b5b6ecSBjoern A. Zeeb #endif 390*62b5b6ecSBjoern A. Zeeb /* XXX-BZ what happens in case of VLAN(s)? */ 391*62b5b6ecSBjoern A. Zeeb default: 392*62b5b6ecSBjoern A. Zeeb return (TCP_LRO_NOT_SUPPORTED); 393*62b5b6ecSBjoern A. Zeeb } 3946c5087a8SJack F Vogel 3956c5087a8SJack F Vogel /* 396*62b5b6ecSBjoern A. Zeeb * If the frame is padded beyond the end of the IP packet, then we must 397*62b5b6ecSBjoern A. Zeeb * trim the extra bytes off. 3986c5087a8SJack F Vogel */ 399*62b5b6ecSBjoern A. Zeeb l = m->m_pkthdr.len - (ETHER_HDR_LEN + ip_len); 400*62b5b6ecSBjoern A. Zeeb if (l != 0) { 401*62b5b6ecSBjoern A. Zeeb if (l < 0) 402*62b5b6ecSBjoern A. Zeeb /* Truncated packet. */ 403*62b5b6ecSBjoern A. Zeeb return (TCP_LRO_CANNOT); 404*62b5b6ecSBjoern A. Zeeb 405*62b5b6ecSBjoern A. Zeeb m_adj(m, -l); 4066c5087a8SJack F Vogel } 4076c5087a8SJack F Vogel 408*62b5b6ecSBjoern A. Zeeb /* 409*62b5b6ecSBjoern A. Zeeb * Check TCP header constraints. 410*62b5b6ecSBjoern A. Zeeb */ 411*62b5b6ecSBjoern A. Zeeb /* Ensure no bits set besides ACK or PSH. */ 412*62b5b6ecSBjoern A. Zeeb if ((th->th_flags & ~(TH_ACK | TH_PUSH)) != 0) 413*62b5b6ecSBjoern A. Zeeb return (TCP_LRO_CANNOT); 414*62b5b6ecSBjoern A. Zeeb 415*62b5b6ecSBjoern A. Zeeb /* XXX-BZ We lose a AKC|PUSH flag concatinating multiple segments. */ 416*62b5b6ecSBjoern A. Zeeb /* XXX-BZ Ideally we'd flush on PUSH? */ 417*62b5b6ecSBjoern A. Zeeb 418*62b5b6ecSBjoern A. Zeeb /* 419*62b5b6ecSBjoern A. Zeeb * Check for timestamps. 420*62b5b6ecSBjoern A. Zeeb * Since the only option we handle are timestamps, we only have to 421*62b5b6ecSBjoern A. Zeeb * handle the simple case of aligned timestamps. 422*62b5b6ecSBjoern A. Zeeb */ 423*62b5b6ecSBjoern A. Zeeb l = (th->th_off << 2); 424*62b5b6ecSBjoern A. Zeeb tcp_data_len -= l; 425*62b5b6ecSBjoern A. Zeeb l -= sizeof(*th); 426*62b5b6ecSBjoern A. Zeeb ts_ptr = (uint32_t *)(th + 1); 427*62b5b6ecSBjoern A. Zeeb if (l != 0 && (__predict_false(l != TCPOLEN_TSTAMP_APPA) || 428*62b5b6ecSBjoern A. Zeeb (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16| 429*62b5b6ecSBjoern A. Zeeb TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))) 430*62b5b6ecSBjoern A. Zeeb return (TCP_LRO_CANNOT); 431*62b5b6ecSBjoern A. Zeeb 432*62b5b6ecSBjoern A. Zeeb /* If the driver did not pass in the checksum, set it now. */ 433*62b5b6ecSBjoern A. Zeeb if (csum == 0x0000) 434*62b5b6ecSBjoern A. Zeeb csum = th->th_sum; 435*62b5b6ecSBjoern A. Zeeb 436*62b5b6ecSBjoern A. Zeeb seq = ntohl(th->th_seq); 437*62b5b6ecSBjoern A. Zeeb 438*62b5b6ecSBjoern A. Zeeb /* Try to find a matching previous segment. */ 439*62b5b6ecSBjoern A. Zeeb SLIST_FOREACH(le, &lc->lro_active, next) { 440*62b5b6ecSBjoern A. Zeeb if (le->eh_type != eh_type) 441*62b5b6ecSBjoern A. Zeeb continue; 442*62b5b6ecSBjoern A. Zeeb if (le->source_port != th->th_sport || 443*62b5b6ecSBjoern A. Zeeb le->dest_port != th->th_dport) 444*62b5b6ecSBjoern A. Zeeb continue; 445*62b5b6ecSBjoern A. Zeeb switch (eh_type) { 446*62b5b6ecSBjoern A. Zeeb #ifdef INET6 447*62b5b6ecSBjoern A. Zeeb case ETHERTYPE_IPV6: 448*62b5b6ecSBjoern A. Zeeb if (bcmp(&le->source_ip6, &ip6->ip6_src, 449*62b5b6ecSBjoern A. Zeeb sizeof(struct in6_addr)) != 0 || 450*62b5b6ecSBjoern A. Zeeb bcmp(&le->dest_ip6, &ip6->ip6_dst, 451*62b5b6ecSBjoern A. Zeeb sizeof(struct in6_addr)) != 0) 452*62b5b6ecSBjoern A. Zeeb continue; 453*62b5b6ecSBjoern A. Zeeb break; 454*62b5b6ecSBjoern A. Zeeb #endif 455*62b5b6ecSBjoern A. Zeeb #ifdef INET 456*62b5b6ecSBjoern A. Zeeb case ETHERTYPE_IP: 457*62b5b6ecSBjoern A. Zeeb if (le->source_ip4 != ip4->ip_src.s_addr || 458*62b5b6ecSBjoern A. Zeeb le->dest_ip4 != ip4->ip_dst.s_addr) 459*62b5b6ecSBjoern A. Zeeb continue; 460*62b5b6ecSBjoern A. Zeeb break; 461*62b5b6ecSBjoern A. Zeeb #endif 4626c5087a8SJack F Vogel } 4636c5087a8SJack F Vogel 464ca712262SColin Percival /* Flush now if appending will result in overflow. */ 465*62b5b6ecSBjoern A. Zeeb if (le->p_len > (65535 - tcp_data_len)) { 466*62b5b6ecSBjoern A. Zeeb SLIST_REMOVE(&lc->lro_active, le, lro_entry, next); 467*62b5b6ecSBjoern A. Zeeb tcp_lro_flush(lc, le); 468ca712262SColin Percival break; 469ca712262SColin Percival } 470ca712262SColin Percival 471*62b5b6ecSBjoern A. Zeeb /* Try to append the new segment. */ 472*62b5b6ecSBjoern A. Zeeb if (__predict_false(seq != le->next_seq || 473*62b5b6ecSBjoern A. Zeeb (tcp_data_len == 0 && le->ack_seq == th->th_ack))) { 474*62b5b6ecSBjoern A. Zeeb /* Out of order packet or duplicate ACK. */ 475*62b5b6ecSBjoern A. Zeeb SLIST_REMOVE(&lc->lro_active, le, lro_entry, next); 476*62b5b6ecSBjoern A. Zeeb tcp_lro_flush(lc, le); 477*62b5b6ecSBjoern A. Zeeb return (TCP_LRO_CANNOT); 4786c5087a8SJack F Vogel } 4796c5087a8SJack F Vogel 480*62b5b6ecSBjoern A. Zeeb if (l != 0) { 4816c5087a8SJack F Vogel uint32_t tsval = ntohl(*(ts_ptr + 1)); 482*62b5b6ecSBjoern A. Zeeb /* Make sure timestamp values are increasing. */ 483*62b5b6ecSBjoern A. Zeeb /* XXX-BZ flip and use TSTMP_GEQ macro for this? */ 484*62b5b6ecSBjoern A. Zeeb if (__predict_false(le->tsval > tsval || 485*62b5b6ecSBjoern A. Zeeb *(ts_ptr + 2) == 0)) 486*62b5b6ecSBjoern A. Zeeb return (TCP_LRO_CANNOT); 487*62b5b6ecSBjoern A. Zeeb le->tsval = tsval; 488*62b5b6ecSBjoern A. Zeeb le->tsecr = *(ts_ptr + 2); 4896c5087a8SJack F Vogel } 4906c5087a8SJack F Vogel 491*62b5b6ecSBjoern A. Zeeb le->next_seq += tcp_data_len; 492*62b5b6ecSBjoern A. Zeeb le->ack_seq = th->th_ack; 493*62b5b6ecSBjoern A. Zeeb le->window = th->th_win; 494*62b5b6ecSBjoern A. Zeeb le->append_cnt++; 495*62b5b6ecSBjoern A. Zeeb 496*62b5b6ecSBjoern A. Zeeb #ifdef TCP_LRO_UPDATE_CSUM 497*62b5b6ecSBjoern A. Zeeb le->ulp_csum += tcp_lro_rx_csum_fixup(le, l3hdr, th, 498*62b5b6ecSBjoern A. Zeeb tcp_data_len, ~csum); 499*62b5b6ecSBjoern A. Zeeb #endif 500*62b5b6ecSBjoern A. Zeeb 5016c5087a8SJack F Vogel if (tcp_data_len == 0) { 502*62b5b6ecSBjoern A. Zeeb m_freem(m); 503*62b5b6ecSBjoern A. Zeeb return (0); 5046c5087a8SJack F Vogel } 505*62b5b6ecSBjoern A. Zeeb 506*62b5b6ecSBjoern A. Zeeb le->p_len += tcp_data_len; 507*62b5b6ecSBjoern A. Zeeb 508*62b5b6ecSBjoern A. Zeeb /* 509*62b5b6ecSBjoern A. Zeeb * Adjust the mbuf so that m_data points to the first byte of 510*62b5b6ecSBjoern A. Zeeb * the ULP payload. Adjust the mbuf to avoid complications and 511*62b5b6ecSBjoern A. Zeeb * append new segment to existing mbuf chain. 5126c5087a8SJack F Vogel */ 513*62b5b6ecSBjoern A. Zeeb m_adj(m, m->m_pkthdr.len - tcp_data_len); 514*62b5b6ecSBjoern A. Zeeb m->m_flags &= ~M_PKTHDR; 5156c5087a8SJack F Vogel 516*62b5b6ecSBjoern A. Zeeb le->m_tail->m_next = m; 517*62b5b6ecSBjoern A. Zeeb le->m_tail = m_last(m); 5186c5087a8SJack F Vogel 519*62b5b6ecSBjoern A. Zeeb /* 520*62b5b6ecSBjoern A. Zeeb * If a possible next full length packet would cause an 521*62b5b6ecSBjoern A. Zeeb * overflow, pro-actively flush now. 5226c5087a8SJack F Vogel */ 523*62b5b6ecSBjoern A. Zeeb if (le->p_len > (65535 - lc->ifp->if_mtu)) { 524*62b5b6ecSBjoern A. Zeeb SLIST_REMOVE(&lc->lro_active, le, lro_entry, next); 525*62b5b6ecSBjoern A. Zeeb tcp_lro_flush(lc, le); 526*62b5b6ecSBjoern A. Zeeb } 5276c5087a8SJack F Vogel 528*62b5b6ecSBjoern A. Zeeb return (0); 5296c5087a8SJack F Vogel } 530*62b5b6ecSBjoern A. Zeeb 531*62b5b6ecSBjoern A. Zeeb /* Try to find an empty slot. */ 532*62b5b6ecSBjoern A. Zeeb if (SLIST_EMPTY(&lc->lro_free)) 533*62b5b6ecSBjoern A. Zeeb return (TCP_LRO_CANNOT); 534*62b5b6ecSBjoern A. Zeeb 535*62b5b6ecSBjoern A. Zeeb /* Start a new segment chain. */ 536*62b5b6ecSBjoern A. Zeeb le = SLIST_FIRST(&lc->lro_free); 537*62b5b6ecSBjoern A. Zeeb SLIST_REMOVE_HEAD(&lc->lro_free, next); 538*62b5b6ecSBjoern A. Zeeb SLIST_INSERT_HEAD(&lc->lro_active, le, next); 539*62b5b6ecSBjoern A. Zeeb 540*62b5b6ecSBjoern A. Zeeb /* Start filling in details. */ 541*62b5b6ecSBjoern A. Zeeb switch (eh_type) { 542*62b5b6ecSBjoern A. Zeeb #ifdef INET6 543*62b5b6ecSBjoern A. Zeeb case ETHERTYPE_IPV6: 544*62b5b6ecSBjoern A. Zeeb le->le_ip6 = ip6; 545*62b5b6ecSBjoern A. Zeeb le->source_ip6 = ip6->ip6_src; 546*62b5b6ecSBjoern A. Zeeb le->dest_ip6 = ip6->ip6_dst; 547*62b5b6ecSBjoern A. Zeeb le->eh_type = eh_type; 548*62b5b6ecSBjoern A. Zeeb le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN - sizeof(*ip6); 549*62b5b6ecSBjoern A. Zeeb break; 550*62b5b6ecSBjoern A. Zeeb #endif 551*62b5b6ecSBjoern A. Zeeb #ifdef INET 552*62b5b6ecSBjoern A. Zeeb case ETHERTYPE_IP: 553*62b5b6ecSBjoern A. Zeeb le->le_ip4 = ip4; 554*62b5b6ecSBjoern A. Zeeb le->source_ip4 = ip4->ip_src.s_addr; 555*62b5b6ecSBjoern A. Zeeb le->dest_ip4 = ip4->ip_dst.s_addr; 556*62b5b6ecSBjoern A. Zeeb le->eh_type = eh_type; 557*62b5b6ecSBjoern A. Zeeb le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN; 558*62b5b6ecSBjoern A. Zeeb break; 559*62b5b6ecSBjoern A. Zeeb #endif 5606c5087a8SJack F Vogel } 561*62b5b6ecSBjoern A. Zeeb le->source_port = th->th_sport; 562*62b5b6ecSBjoern A. Zeeb le->dest_port = th->th_dport; 563*62b5b6ecSBjoern A. Zeeb 564*62b5b6ecSBjoern A. Zeeb le->next_seq = seq + tcp_data_len; 565*62b5b6ecSBjoern A. Zeeb le->ack_seq = th->th_ack; 566*62b5b6ecSBjoern A. Zeeb le->window = th->th_win; 567*62b5b6ecSBjoern A. Zeeb if (l != 0) { 568*62b5b6ecSBjoern A. Zeeb le->timestamp = 1; 569*62b5b6ecSBjoern A. Zeeb le->tsval = ntohl(*(ts_ptr + 1)); 570*62b5b6ecSBjoern A. Zeeb le->tsecr = *(ts_ptr + 2); 571*62b5b6ecSBjoern A. Zeeb } 572*62b5b6ecSBjoern A. Zeeb 573*62b5b6ecSBjoern A. Zeeb #ifdef TCP_LRO_UPDATE_CSUM 574*62b5b6ecSBjoern A. Zeeb /* 575*62b5b6ecSBjoern A. Zeeb * Do not touch the csum of the first packet. However save the 576*62b5b6ecSBjoern A. Zeeb * "adjusted" checksum of just the source and destination addresses, 577*62b5b6ecSBjoern A. Zeeb * the next header and the TCP payload. The length and TCP header 578*62b5b6ecSBjoern A. Zeeb * parts may change, so we remove those from the saved checksum and 579*62b5b6ecSBjoern A. Zeeb * re-add with final values on tcp_lro_flush() if needed. 580*62b5b6ecSBjoern A. Zeeb */ 581*62b5b6ecSBjoern A. Zeeb KASSERT(le->ulp_csum == 0, ("%s: le=%p le->ulp_csum=0x%04x\n", 582*62b5b6ecSBjoern A. Zeeb __func__, le, le->ulp_csum)); 583*62b5b6ecSBjoern A. Zeeb 584*62b5b6ecSBjoern A. Zeeb le->ulp_csum = tcp_lro_rx_csum_fixup(le, l3hdr, th, tcp_data_len, 585*62b5b6ecSBjoern A. Zeeb ~csum); 586*62b5b6ecSBjoern A. Zeeb th->th_sum = csum; /* Restore checksum on first packet. */ 587*62b5b6ecSBjoern A. Zeeb #endif 588*62b5b6ecSBjoern A. Zeeb 589*62b5b6ecSBjoern A. Zeeb le->m_head = m; 590*62b5b6ecSBjoern A. Zeeb le->m_tail = m_last(m); 591*62b5b6ecSBjoern A. Zeeb 592*62b5b6ecSBjoern A. Zeeb return (0); 593*62b5b6ecSBjoern A. Zeeb } 594*62b5b6ecSBjoern A. Zeeb 595*62b5b6ecSBjoern A. Zeeb /* end */ 596