xref: /freebsd/sys/netinet/tcp_lro.c (revision 7ae3d4bf547fbeb9dda13fb8136ad92ae909ae21)
127f190a3SBjoern A. Zeeb /*-
227f190a3SBjoern A. Zeeb  * Copyright (c) 2007, Myricom Inc.
327f190a3SBjoern A. Zeeb  * Copyright (c) 2008, Intel Corporation.
462b5b6ecSBjoern A. Zeeb  * Copyright (c) 2012 The FreeBSD Foundation
5e936121dSHans Petter Selasky  * Copyright (c) 2016 Mellanox Technologies.
627f190a3SBjoern A. Zeeb  * All rights reserved.
727f190a3SBjoern A. Zeeb  *
862b5b6ecSBjoern A. Zeeb  * Portions of this software were developed by Bjoern Zeeb
962b5b6ecSBjoern A. Zeeb  * under sponsorship from the FreeBSD Foundation.
1062b5b6ecSBjoern A. Zeeb  *
1127f190a3SBjoern A. Zeeb  * Redistribution and use in source and binary forms, with or without
1227f190a3SBjoern A. Zeeb  * modification, are permitted provided that the following conditions
1327f190a3SBjoern A. Zeeb  * are met:
1427f190a3SBjoern A. Zeeb  * 1. Redistributions of source code must retain the above copyright
1527f190a3SBjoern A. Zeeb  *    notice, this list of conditions and the following disclaimer.
1627f190a3SBjoern A. Zeeb  * 2. Redistributions in binary form must reproduce the above copyright
1727f190a3SBjoern A. Zeeb  *    notice, this list of conditions and the following disclaimer in the
1827f190a3SBjoern A. Zeeb  *    documentation and/or other materials provided with the distribution.
1927f190a3SBjoern A. Zeeb  *
2027f190a3SBjoern A. Zeeb  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
2127f190a3SBjoern A. Zeeb  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2227f190a3SBjoern A. Zeeb  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2327f190a3SBjoern A. Zeeb  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
2427f190a3SBjoern A. Zeeb  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2527f190a3SBjoern A. Zeeb  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2627f190a3SBjoern A. Zeeb  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2727f190a3SBjoern A. Zeeb  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2827f190a3SBjoern A. Zeeb  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2927f190a3SBjoern A. Zeeb  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3027f190a3SBjoern A. Zeeb  * SUCH DAMAGE.
3127f190a3SBjoern A. Zeeb  */
326c5087a8SJack F Vogel 
3362b5b6ecSBjoern A. Zeeb #include <sys/cdefs.h>
3462b5b6ecSBjoern A. Zeeb __FBSDID("$FreeBSD$");
3562b5b6ecSBjoern A. Zeeb 
3662b5b6ecSBjoern A. Zeeb #include "opt_inet.h"
3762b5b6ecSBjoern A. Zeeb #include "opt_inet6.h"
3862b5b6ecSBjoern A. Zeeb 
396c5087a8SJack F Vogel #include <sys/param.h>
406c5087a8SJack F Vogel #include <sys/systm.h>
416c5087a8SJack F Vogel #include <sys/kernel.h>
428ec07310SGleb Smirnoff #include <sys/malloc.h>
438ec07310SGleb Smirnoff #include <sys/mbuf.h>
446c5087a8SJack F Vogel #include <sys/socket.h>
456c5087a8SJack F Vogel 
466c5087a8SJack F Vogel #include <net/if.h>
4762b5b6ecSBjoern A. Zeeb #include <net/if_var.h>
486c5087a8SJack F Vogel #include <net/ethernet.h>
495fa2656eSBjoern A. Zeeb #include <net/vnet.h>
506c5087a8SJack F Vogel 
516c5087a8SJack F Vogel #include <netinet/in_systm.h>
526c5087a8SJack F Vogel #include <netinet/in.h>
5362b5b6ecSBjoern A. Zeeb #include <netinet/ip6.h>
546c5087a8SJack F Vogel #include <netinet/ip.h>
5531bfc56eSBjoern A. Zeeb #include <netinet/ip_var.h>
566c5087a8SJack F Vogel #include <netinet/tcp.h>
576c5087a8SJack F Vogel #include <netinet/tcp_lro.h>
586c5087a8SJack F Vogel 
5931bfc56eSBjoern A. Zeeb #include <netinet6/ip6_var.h>
6031bfc56eSBjoern A. Zeeb 
616c5087a8SJack F Vogel #include <machine/in_cksum.h>
626c5087a8SJack F Vogel 
63e936121dSHans Petter Selasky static MALLOC_DEFINE(M_LRO, "LRO", "LRO control structures");
646c5087a8SJack F Vogel 
6562b5b6ecSBjoern A. Zeeb #define	TCP_LRO_UPDATE_CSUM	1
6662b5b6ecSBjoern A. Zeeb #ifndef	TCP_LRO_UPDATE_CSUM
6762b5b6ecSBjoern A. Zeeb #define	TCP_LRO_INVALID_CSUM	0x0000
6862b5b6ecSBjoern A. Zeeb #endif
696c5087a8SJack F Vogel 
706c5087a8SJack F Vogel int
7162b5b6ecSBjoern A. Zeeb tcp_lro_init(struct lro_ctrl *lc)
726c5087a8SJack F Vogel {
73e936121dSHans Petter Selasky 	return (tcp_lro_init_args(lc, NULL, TCP_LRO_ENTRIES, 0));
74e936121dSHans Petter Selasky }
75e936121dSHans Petter Selasky 
76e936121dSHans Petter Selasky int
77e936121dSHans Petter Selasky tcp_lro_init_args(struct lro_ctrl *lc, struct ifnet *ifp,
78e936121dSHans Petter Selasky     unsigned lro_entries, unsigned lro_mbufs)
79e936121dSHans Petter Selasky {
8062b5b6ecSBjoern A. Zeeb 	struct lro_entry *le;
81e936121dSHans Petter Selasky 	size_t size;
82e936121dSHans Petter Selasky 	unsigned i;
836c5087a8SJack F Vogel 
8462b5b6ecSBjoern A. Zeeb 	lc->lro_bad_csum = 0;
8562b5b6ecSBjoern A. Zeeb 	lc->lro_queued = 0;
8662b5b6ecSBjoern A. Zeeb 	lc->lro_flushed = 0;
8762b5b6ecSBjoern A. Zeeb 	lc->lro_cnt = 0;
88e936121dSHans Petter Selasky 	lc->lro_mbuf_count = 0;
89e936121dSHans Petter Selasky 	lc->lro_mbuf_max = lro_mbufs;
90e936121dSHans Petter Selasky 	lc->lro_cnt = lro_entries;
91*7ae3d4bfSSepherosa Ziehau 	lc->lro_ackcnt_lim = TCP_LRO_ACKCNT_MAX;
92*7ae3d4bfSSepherosa Ziehau 	lc->lro_length_lim = TCP_LRO_LENGTH_MAX;
93e936121dSHans Petter Selasky 	lc->ifp = ifp;
9462b5b6ecSBjoern A. Zeeb 	SLIST_INIT(&lc->lro_free);
9562b5b6ecSBjoern A. Zeeb 	SLIST_INIT(&lc->lro_active);
966c5087a8SJack F Vogel 
97e936121dSHans Petter Selasky 	/* compute size to allocate */
98e936121dSHans Petter Selasky 	size = (lro_mbufs * sizeof(struct mbuf *)) +
99e936121dSHans Petter Selasky 	    (lro_entries * sizeof(*le));
100e936121dSHans Petter Selasky 	lc->lro_mbuf_data = (struct mbuf **)
101e936121dSHans Petter Selasky 	    malloc(size, M_LRO, M_NOWAIT | M_ZERO);
1026c5087a8SJack F Vogel 
103e936121dSHans Petter Selasky 	/* check for out of memory */
104e936121dSHans Petter Selasky 	if (lc->lro_mbuf_data == NULL) {
105e936121dSHans Petter Selasky 		memset(lc, 0, sizeof(*lc));
106e936121dSHans Petter Selasky 		return (ENOMEM);
107e936121dSHans Petter Selasky 	}
108e936121dSHans Petter Selasky 	/* compute offset for LRO entries */
109e936121dSHans Petter Selasky 	le = (struct lro_entry *)
110e936121dSHans Petter Selasky 	    (lc->lro_mbuf_data + lro_mbufs);
111e936121dSHans Petter Selasky 
112e936121dSHans Petter Selasky 	/* setup linked list */
113e936121dSHans Petter Selasky 	for (i = 0; i != lro_entries; i++)
114e936121dSHans Petter Selasky 		SLIST_INSERT_HEAD(&lc->lro_free, le + i, next);
115e936121dSHans Petter Selasky 
116e936121dSHans Petter Selasky 	return (0);
1176c5087a8SJack F Vogel }
1186c5087a8SJack F Vogel 
1196c5087a8SJack F Vogel void
12062b5b6ecSBjoern A. Zeeb tcp_lro_free(struct lro_ctrl *lc)
1216c5087a8SJack F Vogel {
12262b5b6ecSBjoern A. Zeeb 	struct lro_entry *le;
123e936121dSHans Petter Selasky 	unsigned x;
1246c5087a8SJack F Vogel 
125e936121dSHans Petter Selasky 	/* reset LRO free list */
126e936121dSHans Petter Selasky 	SLIST_INIT(&lc->lro_free);
127e936121dSHans Petter Selasky 
128e936121dSHans Petter Selasky 	/* free active mbufs, if any */
129e936121dSHans Petter Selasky 	while ((le = SLIST_FIRST(&lc->lro_active)) != NULL) {
130e936121dSHans Petter Selasky 		SLIST_REMOVE_HEAD(&lc->lro_active, next);
131e936121dSHans Petter Selasky 		m_freem(le->m_head);
1326c5087a8SJack F Vogel 	}
133e936121dSHans Petter Selasky 
134e936121dSHans Petter Selasky 	/* free mbuf array, if any */
135e936121dSHans Petter Selasky 	for (x = 0; x != lc->lro_mbuf_count; x++)
136e936121dSHans Petter Selasky 		m_freem(lc->lro_mbuf_data[x]);
137e936121dSHans Petter Selasky 	lc->lro_mbuf_count = 0;
138e936121dSHans Petter Selasky 
139e936121dSHans Petter Selasky 	/* free allocated memory, if any */
140e936121dSHans Petter Selasky 	free(lc->lro_mbuf_data, M_LRO);
141e936121dSHans Petter Selasky 	lc->lro_mbuf_data = NULL;
1426c5087a8SJack F Vogel }
1436c5087a8SJack F Vogel 
14462b5b6ecSBjoern A. Zeeb #ifdef TCP_LRO_UPDATE_CSUM
14562b5b6ecSBjoern A. Zeeb static uint16_t
14662b5b6ecSBjoern A. Zeeb tcp_lro_csum_th(struct tcphdr *th)
14762b5b6ecSBjoern A. Zeeb {
14862b5b6ecSBjoern A. Zeeb 	uint32_t ch;
14962b5b6ecSBjoern A. Zeeb 	uint16_t *p, l;
15062b5b6ecSBjoern A. Zeeb 
15162b5b6ecSBjoern A. Zeeb 	ch = th->th_sum = 0x0000;
15262b5b6ecSBjoern A. Zeeb 	l = th->th_off;
15362b5b6ecSBjoern A. Zeeb 	p = (uint16_t *)th;
15462b5b6ecSBjoern A. Zeeb 	while (l > 0) {
15562b5b6ecSBjoern A. Zeeb 		ch += *p;
15662b5b6ecSBjoern A. Zeeb 		p++;
15762b5b6ecSBjoern A. Zeeb 		ch += *p;
15862b5b6ecSBjoern A. Zeeb 		p++;
15962b5b6ecSBjoern A. Zeeb 		l--;
16062b5b6ecSBjoern A. Zeeb 	}
16162b5b6ecSBjoern A. Zeeb 	while (ch > 0xffff)
16262b5b6ecSBjoern A. Zeeb 		ch = (ch >> 16) + (ch & 0xffff);
16362b5b6ecSBjoern A. Zeeb 
16462b5b6ecSBjoern A. Zeeb 	return (ch & 0xffff);
16562b5b6ecSBjoern A. Zeeb }
16662b5b6ecSBjoern A. Zeeb 
16762b5b6ecSBjoern A. Zeeb static uint16_t
16862b5b6ecSBjoern A. Zeeb tcp_lro_rx_csum_fixup(struct lro_entry *le, void *l3hdr, struct tcphdr *th,
16962b5b6ecSBjoern A. Zeeb     uint16_t tcp_data_len, uint16_t csum)
17062b5b6ecSBjoern A. Zeeb {
17162b5b6ecSBjoern A. Zeeb 	uint32_t c;
17262b5b6ecSBjoern A. Zeeb 	uint16_t cs;
17362b5b6ecSBjoern A. Zeeb 
17462b5b6ecSBjoern A. Zeeb 	c = csum;
17562b5b6ecSBjoern A. Zeeb 
17662b5b6ecSBjoern A. Zeeb 	/* Remove length from checksum. */
17762b5b6ecSBjoern A. Zeeb 	switch (le->eh_type) {
17862b5b6ecSBjoern A. Zeeb #ifdef INET6
17962b5b6ecSBjoern A. Zeeb 	case ETHERTYPE_IPV6:
18062b5b6ecSBjoern A. Zeeb 	{
18162b5b6ecSBjoern A. Zeeb 		struct ip6_hdr *ip6;
18262b5b6ecSBjoern A. Zeeb 
18362b5b6ecSBjoern A. Zeeb 		ip6 = (struct ip6_hdr *)l3hdr;
18462b5b6ecSBjoern A. Zeeb 		if (le->append_cnt == 0)
18562b5b6ecSBjoern A. Zeeb 			cs = ip6->ip6_plen;
18662b5b6ecSBjoern A. Zeeb 		else {
18762b5b6ecSBjoern A. Zeeb 			uint32_t cx;
18862b5b6ecSBjoern A. Zeeb 
18962b5b6ecSBjoern A. Zeeb 			cx = ntohs(ip6->ip6_plen);
19062b5b6ecSBjoern A. Zeeb 			cs = in6_cksum_pseudo(ip6, cx, ip6->ip6_nxt, 0);
19162b5b6ecSBjoern A. Zeeb 		}
19262b5b6ecSBjoern A. Zeeb 		break;
19362b5b6ecSBjoern A. Zeeb 	}
19462b5b6ecSBjoern A. Zeeb #endif
19562b5b6ecSBjoern A. Zeeb #ifdef INET
19662b5b6ecSBjoern A. Zeeb 	case ETHERTYPE_IP:
19762b5b6ecSBjoern A. Zeeb 	{
19862b5b6ecSBjoern A. Zeeb 		struct ip *ip4;
19962b5b6ecSBjoern A. Zeeb 
20062b5b6ecSBjoern A. Zeeb 		ip4 = (struct ip *)l3hdr;
20162b5b6ecSBjoern A. Zeeb 		if (le->append_cnt == 0)
20262b5b6ecSBjoern A. Zeeb 			cs = ip4->ip_len;
20362b5b6ecSBjoern A. Zeeb 		else {
20462b5b6ecSBjoern A. Zeeb 			cs = in_addword(ntohs(ip4->ip_len) - sizeof(*ip4),
20562b5b6ecSBjoern A. Zeeb 			    IPPROTO_TCP);
20662b5b6ecSBjoern A. Zeeb 			cs = in_pseudo(ip4->ip_src.s_addr, ip4->ip_dst.s_addr,
20762b5b6ecSBjoern A. Zeeb 			    htons(cs));
20862b5b6ecSBjoern A. Zeeb 		}
20962b5b6ecSBjoern A. Zeeb 		break;
21062b5b6ecSBjoern A. Zeeb 	}
21162b5b6ecSBjoern A. Zeeb #endif
21262b5b6ecSBjoern A. Zeeb 	default:
21362b5b6ecSBjoern A. Zeeb 		cs = 0;		/* Keep compiler happy. */
21462b5b6ecSBjoern A. Zeeb 	}
21562b5b6ecSBjoern A. Zeeb 
21662b5b6ecSBjoern A. Zeeb 	cs = ~cs;
21762b5b6ecSBjoern A. Zeeb 	c += cs;
21862b5b6ecSBjoern A. Zeeb 
21962b5b6ecSBjoern A. Zeeb 	/* Remove TCP header csum. */
22062b5b6ecSBjoern A. Zeeb 	cs = ~tcp_lro_csum_th(th);
22162b5b6ecSBjoern A. Zeeb 	c += cs;
22262b5b6ecSBjoern A. Zeeb 	while (c > 0xffff)
22362b5b6ecSBjoern A. Zeeb 		c = (c >> 16) + (c & 0xffff);
22462b5b6ecSBjoern A. Zeeb 
22562b5b6ecSBjoern A. Zeeb 	return (c & 0xffff);
22662b5b6ecSBjoern A. Zeeb }
22762b5b6ecSBjoern A. Zeeb #endif
22862b5b6ecSBjoern A. Zeeb 
2296c5087a8SJack F Vogel void
2307127e6acSNavdeep Parhar tcp_lro_flush_inactive(struct lro_ctrl *lc, const struct timeval *timeout)
2317127e6acSNavdeep Parhar {
2327127e6acSNavdeep Parhar 	struct lro_entry *le, *le_tmp;
2337127e6acSNavdeep Parhar 	struct timeval tv;
2347127e6acSNavdeep Parhar 
2357127e6acSNavdeep Parhar 	if (SLIST_EMPTY(&lc->lro_active))
2367127e6acSNavdeep Parhar 		return;
2377127e6acSNavdeep Parhar 
2387127e6acSNavdeep Parhar 	getmicrotime(&tv);
2397127e6acSNavdeep Parhar 	timevalsub(&tv, timeout);
2407127e6acSNavdeep Parhar 	SLIST_FOREACH_SAFE(le, &lc->lro_active, next, le_tmp) {
2417127e6acSNavdeep Parhar 		if (timevalcmp(&tv, &le->mtime, >=)) {
2427127e6acSNavdeep Parhar 			SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
2437127e6acSNavdeep Parhar 			tcp_lro_flush(lc, le);
2447127e6acSNavdeep Parhar 		}
2457127e6acSNavdeep Parhar 	}
2467127e6acSNavdeep Parhar }
2477127e6acSNavdeep Parhar 
2487127e6acSNavdeep Parhar void
24962b5b6ecSBjoern A. Zeeb tcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *le)
2506c5087a8SJack F Vogel {
2516c5087a8SJack F Vogel 
25262b5b6ecSBjoern A. Zeeb 	if (le->append_cnt > 0) {
25362b5b6ecSBjoern A. Zeeb 		struct tcphdr *th;
25462b5b6ecSBjoern A. Zeeb 		uint16_t p_len;
2556c5087a8SJack F Vogel 
25662b5b6ecSBjoern A. Zeeb 		p_len = htons(le->p_len);
25762b5b6ecSBjoern A. Zeeb 		switch (le->eh_type) {
25862b5b6ecSBjoern A. Zeeb #ifdef INET6
25962b5b6ecSBjoern A. Zeeb 		case ETHERTYPE_IPV6:
2606c5087a8SJack F Vogel 		{
26162b5b6ecSBjoern A. Zeeb 			struct ip6_hdr *ip6;
26262b5b6ecSBjoern A. Zeeb 
26362b5b6ecSBjoern A. Zeeb 			ip6 = le->le_ip6;
26462b5b6ecSBjoern A. Zeeb 			ip6->ip6_plen = p_len;
26562b5b6ecSBjoern A. Zeeb 			th = (struct tcphdr *)(ip6 + 1);
26662b5b6ecSBjoern A. Zeeb 			le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID |
26762b5b6ecSBjoern A. Zeeb 			    CSUM_PSEUDO_HDR;
26862b5b6ecSBjoern A. Zeeb 			le->p_len += ETHER_HDR_LEN + sizeof(*ip6);
26962b5b6ecSBjoern A. Zeeb 			break;
27062b5b6ecSBjoern A. Zeeb 		}
27162b5b6ecSBjoern A. Zeeb #endif
27262b5b6ecSBjoern A. Zeeb #ifdef INET
27362b5b6ecSBjoern A. Zeeb 		case ETHERTYPE_IP:
27462b5b6ecSBjoern A. Zeeb 		{
27562b5b6ecSBjoern A. Zeeb 			struct ip *ip4;
27662b5b6ecSBjoern A. Zeeb #ifdef TCP_LRO_UPDATE_CSUM
27762b5b6ecSBjoern A. Zeeb 			uint32_t cl;
27862b5b6ecSBjoern A. Zeeb 			uint16_t c;
27962b5b6ecSBjoern A. Zeeb #endif
28062b5b6ecSBjoern A. Zeeb 
28162b5b6ecSBjoern A. Zeeb 			ip4 = le->le_ip4;
28262b5b6ecSBjoern A. Zeeb #ifdef TCP_LRO_UPDATE_CSUM
28362b5b6ecSBjoern A. Zeeb 			/* Fix IP header checksum for new length. */
28462b5b6ecSBjoern A. Zeeb 			c = ~ip4->ip_sum;
28562b5b6ecSBjoern A. Zeeb 			cl = c;
28662b5b6ecSBjoern A. Zeeb 			c = ~ip4->ip_len;
28762b5b6ecSBjoern A. Zeeb 			cl += c + p_len;
28862b5b6ecSBjoern A. Zeeb 			while (cl > 0xffff)
28962b5b6ecSBjoern A. Zeeb 				cl = (cl >> 16) + (cl & 0xffff);
29062b5b6ecSBjoern A. Zeeb 			c = cl;
29162b5b6ecSBjoern A. Zeeb 			ip4->ip_sum = ~c;
29262b5b6ecSBjoern A. Zeeb #else
29362b5b6ecSBjoern A. Zeeb 			ip4->ip_sum = TCP_LRO_INVALID_CSUM;
29462b5b6ecSBjoern A. Zeeb #endif
29562b5b6ecSBjoern A. Zeeb 			ip4->ip_len = p_len;
29662b5b6ecSBjoern A. Zeeb 			th = (struct tcphdr *)(ip4 + 1);
29762b5b6ecSBjoern A. Zeeb 			le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID |
29862b5b6ecSBjoern A. Zeeb 			    CSUM_PSEUDO_HDR | CSUM_IP_CHECKED | CSUM_IP_VALID;
29962b5b6ecSBjoern A. Zeeb 			le->p_len += ETHER_HDR_LEN;
30062b5b6ecSBjoern A. Zeeb 			break;
30162b5b6ecSBjoern A. Zeeb 		}
30262b5b6ecSBjoern A. Zeeb #endif
30362b5b6ecSBjoern A. Zeeb 		default:
30462b5b6ecSBjoern A. Zeeb 			th = NULL;	/* Keep compiler happy. */
30562b5b6ecSBjoern A. Zeeb 		}
30662b5b6ecSBjoern A. Zeeb 		le->m_head->m_pkthdr.csum_data = 0xffff;
30762b5b6ecSBjoern A. Zeeb 		le->m_head->m_pkthdr.len = le->p_len;
30862b5b6ecSBjoern A. Zeeb 
30962b5b6ecSBjoern A. Zeeb 		/* Incorporate the latest ACK into the TCP header. */
31062b5b6ecSBjoern A. Zeeb 		th->th_ack = le->ack_seq;
31162b5b6ecSBjoern A. Zeeb 		th->th_win = le->window;
31262b5b6ecSBjoern A. Zeeb 		/* Incorporate latest timestamp into the TCP header. */
31362b5b6ecSBjoern A. Zeeb 		if (le->timestamp != 0) {
3146c5087a8SJack F Vogel 			uint32_t *ts_ptr;
3156c5087a8SJack F Vogel 
31662b5b6ecSBjoern A. Zeeb 			ts_ptr = (uint32_t *)(th + 1);
31762b5b6ecSBjoern A. Zeeb 			ts_ptr[1] = htonl(le->tsval);
31862b5b6ecSBjoern A. Zeeb 			ts_ptr[2] = le->tsecr;
31962b5b6ecSBjoern A. Zeeb 		}
32062b5b6ecSBjoern A. Zeeb #ifdef TCP_LRO_UPDATE_CSUM
32162b5b6ecSBjoern A. Zeeb 		/* Update the TCP header checksum. */
32262b5b6ecSBjoern A. Zeeb 		le->ulp_csum += p_len;
32362b5b6ecSBjoern A. Zeeb 		le->ulp_csum += tcp_lro_csum_th(th);
32462b5b6ecSBjoern A. Zeeb 		while (le->ulp_csum > 0xffff)
32562b5b6ecSBjoern A. Zeeb 			le->ulp_csum = (le->ulp_csum >> 16) +
32662b5b6ecSBjoern A. Zeeb 			    (le->ulp_csum & 0xffff);
32762b5b6ecSBjoern A. Zeeb 		th->th_sum = (le->ulp_csum & 0xffff);
32862b5b6ecSBjoern A. Zeeb 		th->th_sum = ~th->th_sum;
32962b5b6ecSBjoern A. Zeeb #else
33062b5b6ecSBjoern A. Zeeb 		th->th_sum = TCP_LRO_INVALID_CSUM;
33162b5b6ecSBjoern A. Zeeb #endif
33262b5b6ecSBjoern A. Zeeb 	}
3336c5087a8SJack F Vogel 
33462b5b6ecSBjoern A. Zeeb 	(*lc->ifp->if_input)(lc->ifp, le->m_head);
33562b5b6ecSBjoern A. Zeeb 	lc->lro_queued += le->append_cnt + 1;
33662b5b6ecSBjoern A. Zeeb 	lc->lro_flushed++;
33762b5b6ecSBjoern A. Zeeb 	bzero(le, sizeof(*le));
33862b5b6ecSBjoern A. Zeeb 	SLIST_INSERT_HEAD(&lc->lro_free, le, next);
33962b5b6ecSBjoern A. Zeeb }
3406c5087a8SJack F Vogel 
341e936121dSHans Petter Selasky static int
342e936121dSHans Petter Selasky tcp_lro_mbuf_compare_header(const void *ppa, const void *ppb)
343e936121dSHans Petter Selasky {
344e936121dSHans Petter Selasky 	const struct mbuf *ma = *((const struct mbuf * const *)ppa);
345e936121dSHans Petter Selasky 	const struct mbuf *mb = *((const struct mbuf * const *)ppb);
346e936121dSHans Petter Selasky 	int ret;
347e936121dSHans Petter Selasky 
348e936121dSHans Petter Selasky 	ret = M_HASHTYPE_GET(ma) - M_HASHTYPE_GET(mb);
349e936121dSHans Petter Selasky 	if (ret != 0)
350e936121dSHans Petter Selasky 		goto done;
351e936121dSHans Petter Selasky 
3523e9470b7SHans Petter Selasky 	if (ma->m_pkthdr.flowid > mb->m_pkthdr.flowid)
3533e9470b7SHans Petter Selasky 		return (1);
3543e9470b7SHans Petter Selasky 	else if (ma->m_pkthdr.flowid < mb->m_pkthdr.flowid)
3553e9470b7SHans Petter Selasky 		return (-1);
356e936121dSHans Petter Selasky 
357e936121dSHans Petter Selasky 	ret = TCP_LRO_SEQUENCE(ma) - TCP_LRO_SEQUENCE(mb);
358e936121dSHans Petter Selasky done:
359e936121dSHans Petter Selasky 	return (ret);
360e936121dSHans Petter Selasky }
361e936121dSHans Petter Selasky 
362e936121dSHans Petter Selasky void
363e936121dSHans Petter Selasky tcp_lro_flush_all(struct lro_ctrl *lc)
364e936121dSHans Petter Selasky {
365e936121dSHans Petter Selasky 	struct lro_entry *le;
366e936121dSHans Petter Selasky 	uint32_t hashtype;
367e936121dSHans Petter Selasky 	uint32_t flowid;
368e936121dSHans Petter Selasky 	unsigned x;
369e936121dSHans Petter Selasky 
370e936121dSHans Petter Selasky 	/* check if no mbufs to flush */
371e936121dSHans Petter Selasky 	if (__predict_false(lc->lro_mbuf_count == 0))
372e936121dSHans Petter Selasky 		goto done;
373e936121dSHans Petter Selasky 
374e936121dSHans Petter Selasky 	/* sort all mbufs according to stream */
375e936121dSHans Petter Selasky 	qsort(lc->lro_mbuf_data, lc->lro_mbuf_count, sizeof(struct mbuf *),
376e936121dSHans Petter Selasky 	    &tcp_lro_mbuf_compare_header);
377e936121dSHans Petter Selasky 
378e936121dSHans Petter Selasky 	/* input data into LRO engine, stream by stream */
379e936121dSHans Petter Selasky 	flowid = 0;
380e936121dSHans Petter Selasky 	hashtype = M_HASHTYPE_NONE;
381e936121dSHans Petter Selasky 	for (x = 0; x != lc->lro_mbuf_count; x++) {
382e936121dSHans Petter Selasky 		struct mbuf *mb;
383e936121dSHans Petter Selasky 
384e936121dSHans Petter Selasky 		mb = lc->lro_mbuf_data[x];
385e936121dSHans Petter Selasky 
386e936121dSHans Petter Selasky 		/* check for new stream */
387e936121dSHans Petter Selasky 		if (mb->m_pkthdr.flowid != flowid ||
388e936121dSHans Petter Selasky 		    M_HASHTYPE_GET(mb) != hashtype) {
389e936121dSHans Petter Selasky 			flowid = mb->m_pkthdr.flowid;
390e936121dSHans Petter Selasky 			hashtype = M_HASHTYPE_GET(mb);
391e936121dSHans Petter Selasky 
392e936121dSHans Petter Selasky 			/* flush active streams */
393e936121dSHans Petter Selasky 			while ((le = SLIST_FIRST(&lc->lro_active)) != NULL) {
394e936121dSHans Petter Selasky 				SLIST_REMOVE_HEAD(&lc->lro_active, next);
395e936121dSHans Petter Selasky 				tcp_lro_flush(lc, le);
396e936121dSHans Petter Selasky 			}
397e936121dSHans Petter Selasky 		}
398e936121dSHans Petter Selasky #ifdef TCP_LRO_RESET_SEQUENCE
399e936121dSHans Petter Selasky 		/* reset sequence number */
400e936121dSHans Petter Selasky 		TCP_LRO_SEQUENCE(mb) = 0;
401e936121dSHans Petter Selasky #endif
402e936121dSHans Petter Selasky 		/* add packet to LRO engine */
403e936121dSHans Petter Selasky 		if (tcp_lro_rx(lc, mb, 0) != 0) {
404e936121dSHans Petter Selasky 			/* input packet to network layer */
405e936121dSHans Petter Selasky 			(*lc->ifp->if_input)(lc->ifp, mb);
406e936121dSHans Petter Selasky 			lc->lro_queued++;
407e936121dSHans Petter Selasky 			lc->lro_flushed++;
408e936121dSHans Petter Selasky 		}
409e936121dSHans Petter Selasky 	}
410e936121dSHans Petter Selasky done:
411e936121dSHans Petter Selasky 	/* flush active streams */
412e936121dSHans Petter Selasky 	while ((le = SLIST_FIRST(&lc->lro_active)) != NULL) {
413e936121dSHans Petter Selasky 		SLIST_REMOVE_HEAD(&lc->lro_active, next);
414e936121dSHans Petter Selasky 		tcp_lro_flush(lc, le);
415e936121dSHans Petter Selasky 	}
416e936121dSHans Petter Selasky 	lc->lro_mbuf_count = 0;
417e936121dSHans Petter Selasky }
418e936121dSHans Petter Selasky 
41962b5b6ecSBjoern A. Zeeb #ifdef INET6
42062b5b6ecSBjoern A. Zeeb static int
42162b5b6ecSBjoern A. Zeeb tcp_lro_rx_ipv6(struct lro_ctrl *lc, struct mbuf *m, struct ip6_hdr *ip6,
42262b5b6ecSBjoern A. Zeeb     struct tcphdr **th)
42362b5b6ecSBjoern A. Zeeb {
4246c5087a8SJack F Vogel 
42562b5b6ecSBjoern A. Zeeb 	/* XXX-BZ we should check the flow-label. */
4266c5087a8SJack F Vogel 
42762b5b6ecSBjoern A. Zeeb 	/* XXX-BZ We do not yet support ext. hdrs. */
42862b5b6ecSBjoern A. Zeeb 	if (ip6->ip6_nxt != IPPROTO_TCP)
42962b5b6ecSBjoern A. Zeeb 		return (TCP_LRO_NOT_SUPPORTED);
43062b5b6ecSBjoern A. Zeeb 
43162b5b6ecSBjoern A. Zeeb 	/* Find the TCP header. */
43262b5b6ecSBjoern A. Zeeb 	*th = (struct tcphdr *)(ip6 + 1);
43362b5b6ecSBjoern A. Zeeb 
43462b5b6ecSBjoern A. Zeeb 	return (0);
43562b5b6ecSBjoern A. Zeeb }
43662b5b6ecSBjoern A. Zeeb #endif
43762b5b6ecSBjoern A. Zeeb 
43862b5b6ecSBjoern A. Zeeb #ifdef INET
43962b5b6ecSBjoern A. Zeeb static int
44062b5b6ecSBjoern A. Zeeb tcp_lro_rx_ipv4(struct lro_ctrl *lc, struct mbuf *m, struct ip *ip4,
44162b5b6ecSBjoern A. Zeeb     struct tcphdr **th)
44262b5b6ecSBjoern A. Zeeb {
44362b5b6ecSBjoern A. Zeeb 	int csum_flags;
44462b5b6ecSBjoern A. Zeeb 	uint16_t csum;
44562b5b6ecSBjoern A. Zeeb 
44662b5b6ecSBjoern A. Zeeb 	if (ip4->ip_p != IPPROTO_TCP)
44762b5b6ecSBjoern A. Zeeb 		return (TCP_LRO_NOT_SUPPORTED);
44862b5b6ecSBjoern A. Zeeb 
44962b5b6ecSBjoern A. Zeeb 	/* Ensure there are no options. */
45062b5b6ecSBjoern A. Zeeb 	if ((ip4->ip_hl << 2) != sizeof (*ip4))
45162b5b6ecSBjoern A. Zeeb 		return (TCP_LRO_CANNOT);
45262b5b6ecSBjoern A. Zeeb 
45362b5b6ecSBjoern A. Zeeb 	/* .. and the packet is not fragmented. */
45462b5b6ecSBjoern A. Zeeb 	if (ip4->ip_off & htons(IP_MF|IP_OFFMASK))
45562b5b6ecSBjoern A. Zeeb 		return (TCP_LRO_CANNOT);
45662b5b6ecSBjoern A. Zeeb 
45762b5b6ecSBjoern A. Zeeb 	/* Legacy IP has a header checksum that needs to be correct. */
45862b5b6ecSBjoern A. Zeeb 	csum_flags = m->m_pkthdr.csum_flags;
45945709593SKip Macy 	if (csum_flags & CSUM_IP_CHECKED) {
46045709593SKip Macy 		if (__predict_false((csum_flags & CSUM_IP_VALID) == 0)) {
46162b5b6ecSBjoern A. Zeeb 			lc->lro_bad_csum++;
46262b5b6ecSBjoern A. Zeeb 			return (TCP_LRO_CANNOT);
46345709593SKip Macy 		}
46445709593SKip Macy 	} else {
46562b5b6ecSBjoern A. Zeeb 		csum = in_cksum_hdr(ip4);
466e5ca1ffaSAndrew Gallatin 		if (__predict_false((csum) != 0)) {
46762b5b6ecSBjoern A. Zeeb 			lc->lro_bad_csum++;
46862b5b6ecSBjoern A. Zeeb 			return (TCP_LRO_CANNOT);
4696c5087a8SJack F Vogel 		}
47045709593SKip Macy 	}
4716c5087a8SJack F Vogel 
47262b5b6ecSBjoern A. Zeeb 	/* Find the TCP header (we assured there are no IP options). */
47362b5b6ecSBjoern A. Zeeb 	*th = (struct tcphdr *)(ip4 + 1);
4746c5087a8SJack F Vogel 
47562b5b6ecSBjoern A. Zeeb 	return (0);
4766c5087a8SJack F Vogel }
47762b5b6ecSBjoern A. Zeeb #endif
4786c5087a8SJack F Vogel 
47962b5b6ecSBjoern A. Zeeb int
48062b5b6ecSBjoern A. Zeeb tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
48162b5b6ecSBjoern A. Zeeb {
48262b5b6ecSBjoern A. Zeeb 	struct lro_entry *le;
48362b5b6ecSBjoern A. Zeeb 	struct ether_header *eh;
48462b5b6ecSBjoern A. Zeeb #ifdef INET6
48562b5b6ecSBjoern A. Zeeb 	struct ip6_hdr *ip6 = NULL;	/* Keep compiler happy. */
48662b5b6ecSBjoern A. Zeeb #endif
48762b5b6ecSBjoern A. Zeeb #ifdef INET
48862b5b6ecSBjoern A. Zeeb 	struct ip *ip4 = NULL;		/* Keep compiler happy. */
48962b5b6ecSBjoern A. Zeeb #endif
49062b5b6ecSBjoern A. Zeeb 	struct tcphdr *th;
49162b5b6ecSBjoern A. Zeeb 	void *l3hdr = NULL;		/* Keep compiler happy. */
49262b5b6ecSBjoern A. Zeeb 	uint32_t *ts_ptr;
49362b5b6ecSBjoern A. Zeeb 	tcp_seq seq;
49462b5b6ecSBjoern A. Zeeb 	int error, ip_len, l;
49562b5b6ecSBjoern A. Zeeb 	uint16_t eh_type, tcp_data_len;
4966c5087a8SJack F Vogel 
49762b5b6ecSBjoern A. Zeeb 	/* We expect a contiguous header [eh, ip, tcp]. */
49862b5b6ecSBjoern A. Zeeb 
49962b5b6ecSBjoern A. Zeeb 	eh = mtod(m, struct ether_header *);
50062b5b6ecSBjoern A. Zeeb 	eh_type = ntohs(eh->ether_type);
50162b5b6ecSBjoern A. Zeeb 	switch (eh_type) {
50262b5b6ecSBjoern A. Zeeb #ifdef INET6
50362b5b6ecSBjoern A. Zeeb 	case ETHERTYPE_IPV6:
5045fa2656eSBjoern A. Zeeb 	{
5055fa2656eSBjoern A. Zeeb 		CURVNET_SET(lc->ifp->if_vnet);
50631bfc56eSBjoern A. Zeeb 		if (V_ip6_forwarding != 0) {
50731bfc56eSBjoern A. Zeeb 			/* XXX-BZ stats but changing lro_ctrl is a problem. */
5085fa2656eSBjoern A. Zeeb 			CURVNET_RESTORE();
50931bfc56eSBjoern A. Zeeb 			return (TCP_LRO_CANNOT);
51031bfc56eSBjoern A. Zeeb 		}
5115fa2656eSBjoern A. Zeeb 		CURVNET_RESTORE();
51262b5b6ecSBjoern A. Zeeb 		l3hdr = ip6 = (struct ip6_hdr *)(eh + 1);
51362b5b6ecSBjoern A. Zeeb 		error = tcp_lro_rx_ipv6(lc, m, ip6, &th);
51462b5b6ecSBjoern A. Zeeb 		if (error != 0)
51562b5b6ecSBjoern A. Zeeb 			return (error);
51662b5b6ecSBjoern A. Zeeb 		tcp_data_len = ntohs(ip6->ip6_plen);
51762b5b6ecSBjoern A. Zeeb 		ip_len = sizeof(*ip6) + tcp_data_len;
51862b5b6ecSBjoern A. Zeeb 		break;
5195fa2656eSBjoern A. Zeeb 	}
52062b5b6ecSBjoern A. Zeeb #endif
52162b5b6ecSBjoern A. Zeeb #ifdef INET
52262b5b6ecSBjoern A. Zeeb 	case ETHERTYPE_IP:
5235fa2656eSBjoern A. Zeeb 	{
5245fa2656eSBjoern A. Zeeb 		CURVNET_SET(lc->ifp->if_vnet);
52531bfc56eSBjoern A. Zeeb 		if (V_ipforwarding != 0) {
52631bfc56eSBjoern A. Zeeb 			/* XXX-BZ stats but changing lro_ctrl is a problem. */
5275fa2656eSBjoern A. Zeeb 			CURVNET_RESTORE();
52831bfc56eSBjoern A. Zeeb 			return (TCP_LRO_CANNOT);
52931bfc56eSBjoern A. Zeeb 		}
5305fa2656eSBjoern A. Zeeb 		CURVNET_RESTORE();
53162b5b6ecSBjoern A. Zeeb 		l3hdr = ip4 = (struct ip *)(eh + 1);
53262b5b6ecSBjoern A. Zeeb 		error = tcp_lro_rx_ipv4(lc, m, ip4, &th);
53362b5b6ecSBjoern A. Zeeb 		if (error != 0)
53462b5b6ecSBjoern A. Zeeb 			return (error);
53562b5b6ecSBjoern A. Zeeb 		ip_len = ntohs(ip4->ip_len);
53662b5b6ecSBjoern A. Zeeb 		tcp_data_len = ip_len - sizeof(*ip4);
53762b5b6ecSBjoern A. Zeeb 		break;
5385fa2656eSBjoern A. Zeeb 	}
53962b5b6ecSBjoern A. Zeeb #endif
54062b5b6ecSBjoern A. Zeeb 	/* XXX-BZ what happens in case of VLAN(s)? */
54162b5b6ecSBjoern A. Zeeb 	default:
54262b5b6ecSBjoern A. Zeeb 		return (TCP_LRO_NOT_SUPPORTED);
54362b5b6ecSBjoern A. Zeeb 	}
5446c5087a8SJack F Vogel 
5456c5087a8SJack F Vogel 	/*
54662b5b6ecSBjoern A. Zeeb 	 * If the frame is padded beyond the end of the IP packet, then we must
54762b5b6ecSBjoern A. Zeeb 	 * trim the extra bytes off.
5486c5087a8SJack F Vogel 	 */
54962b5b6ecSBjoern A. Zeeb 	l = m->m_pkthdr.len - (ETHER_HDR_LEN + ip_len);
55062b5b6ecSBjoern A. Zeeb 	if (l != 0) {
55162b5b6ecSBjoern A. Zeeb 		if (l < 0)
55262b5b6ecSBjoern A. Zeeb 			/* Truncated packet. */
55362b5b6ecSBjoern A. Zeeb 			return (TCP_LRO_CANNOT);
55462b5b6ecSBjoern A. Zeeb 
55562b5b6ecSBjoern A. Zeeb 		m_adj(m, -l);
5566c5087a8SJack F Vogel 	}
5576c5087a8SJack F Vogel 
55862b5b6ecSBjoern A. Zeeb 	/*
55962b5b6ecSBjoern A. Zeeb 	 * Check TCP header constraints.
56062b5b6ecSBjoern A. Zeeb 	 */
56162b5b6ecSBjoern A. Zeeb 	/* Ensure no bits set besides ACK or PSH. */
56262b5b6ecSBjoern A. Zeeb 	if ((th->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
56362b5b6ecSBjoern A. Zeeb 		return (TCP_LRO_CANNOT);
56462b5b6ecSBjoern A. Zeeb 
56562b5b6ecSBjoern A. Zeeb 	/* XXX-BZ We lose a AKC|PUSH flag concatinating multiple segments. */
56662b5b6ecSBjoern A. Zeeb 	/* XXX-BZ Ideally we'd flush on PUSH? */
56762b5b6ecSBjoern A. Zeeb 
56862b5b6ecSBjoern A. Zeeb 	/*
56962b5b6ecSBjoern A. Zeeb 	 * Check for timestamps.
57062b5b6ecSBjoern A. Zeeb 	 * Since the only option we handle are timestamps, we only have to
57162b5b6ecSBjoern A. Zeeb 	 * handle the simple case of aligned timestamps.
57262b5b6ecSBjoern A. Zeeb 	 */
57362b5b6ecSBjoern A. Zeeb 	l = (th->th_off << 2);
57462b5b6ecSBjoern A. Zeeb 	tcp_data_len -= l;
57562b5b6ecSBjoern A. Zeeb 	l -= sizeof(*th);
57662b5b6ecSBjoern A. Zeeb 	ts_ptr = (uint32_t *)(th + 1);
57762b5b6ecSBjoern A. Zeeb 	if (l != 0 && (__predict_false(l != TCPOLEN_TSTAMP_APPA) ||
57862b5b6ecSBjoern A. Zeeb 	    (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|
57962b5b6ecSBjoern A. Zeeb 	    TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP))))
58062b5b6ecSBjoern A. Zeeb 		return (TCP_LRO_CANNOT);
58162b5b6ecSBjoern A. Zeeb 
58262b5b6ecSBjoern A. Zeeb 	/* If the driver did not pass in the checksum, set it now. */
58362b5b6ecSBjoern A. Zeeb 	if (csum == 0x0000)
58462b5b6ecSBjoern A. Zeeb 		csum = th->th_sum;
58562b5b6ecSBjoern A. Zeeb 
58662b5b6ecSBjoern A. Zeeb 	seq = ntohl(th->th_seq);
58762b5b6ecSBjoern A. Zeeb 
58862b5b6ecSBjoern A. Zeeb 	/* Try to find a matching previous segment. */
58962b5b6ecSBjoern A. Zeeb 	SLIST_FOREACH(le, &lc->lro_active, next) {
59062b5b6ecSBjoern A. Zeeb 		if (le->eh_type != eh_type)
59162b5b6ecSBjoern A. Zeeb 			continue;
59262b5b6ecSBjoern A. Zeeb 		if (le->source_port != th->th_sport ||
59362b5b6ecSBjoern A. Zeeb 		    le->dest_port != th->th_dport)
59462b5b6ecSBjoern A. Zeeb 			continue;
59562b5b6ecSBjoern A. Zeeb 		switch (eh_type) {
59662b5b6ecSBjoern A. Zeeb #ifdef INET6
59762b5b6ecSBjoern A. Zeeb 		case ETHERTYPE_IPV6:
59862b5b6ecSBjoern A. Zeeb 			if (bcmp(&le->source_ip6, &ip6->ip6_src,
59962b5b6ecSBjoern A. Zeeb 			    sizeof(struct in6_addr)) != 0 ||
60062b5b6ecSBjoern A. Zeeb 			    bcmp(&le->dest_ip6, &ip6->ip6_dst,
60162b5b6ecSBjoern A. Zeeb 			    sizeof(struct in6_addr)) != 0)
60262b5b6ecSBjoern A. Zeeb 				continue;
60362b5b6ecSBjoern A. Zeeb 			break;
60462b5b6ecSBjoern A. Zeeb #endif
60562b5b6ecSBjoern A. Zeeb #ifdef INET
60662b5b6ecSBjoern A. Zeeb 		case ETHERTYPE_IP:
60762b5b6ecSBjoern A. Zeeb 			if (le->source_ip4 != ip4->ip_src.s_addr ||
60862b5b6ecSBjoern A. Zeeb 			    le->dest_ip4 != ip4->ip_dst.s_addr)
60962b5b6ecSBjoern A. Zeeb 				continue;
61062b5b6ecSBjoern A. Zeeb 			break;
61162b5b6ecSBjoern A. Zeeb #endif
6126c5087a8SJack F Vogel 		}
6136c5087a8SJack F Vogel 
614ca712262SColin Percival 		/* Flush now if appending will result in overflow. */
615*7ae3d4bfSSepherosa Ziehau 		if (le->p_len > (lc->lro_length_lim - tcp_data_len)) {
61662b5b6ecSBjoern A. Zeeb 			SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
61762b5b6ecSBjoern A. Zeeb 			tcp_lro_flush(lc, le);
618ca712262SColin Percival 			break;
619ca712262SColin Percival 		}
620ca712262SColin Percival 
62162b5b6ecSBjoern A. Zeeb 		/* Try to append the new segment. */
62262b5b6ecSBjoern A. Zeeb 		if (__predict_false(seq != le->next_seq ||
62362b5b6ecSBjoern A. Zeeb 		    (tcp_data_len == 0 && le->ack_seq == th->th_ack))) {
62462b5b6ecSBjoern A. Zeeb 			/* Out of order packet or duplicate ACK. */
62562b5b6ecSBjoern A. Zeeb 			SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
62662b5b6ecSBjoern A. Zeeb 			tcp_lro_flush(lc, le);
62762b5b6ecSBjoern A. Zeeb 			return (TCP_LRO_CANNOT);
6286c5087a8SJack F Vogel 		}
6296c5087a8SJack F Vogel 
63062b5b6ecSBjoern A. Zeeb 		if (l != 0) {
6316c5087a8SJack F Vogel 			uint32_t tsval = ntohl(*(ts_ptr + 1));
63262b5b6ecSBjoern A. Zeeb 			/* Make sure timestamp values are increasing. */
63362b5b6ecSBjoern A. Zeeb 			/* XXX-BZ flip and use TSTMP_GEQ macro for this? */
63462b5b6ecSBjoern A. Zeeb 			if (__predict_false(le->tsval > tsval ||
63562b5b6ecSBjoern A. Zeeb 			    *(ts_ptr + 2) == 0))
63662b5b6ecSBjoern A. Zeeb 				return (TCP_LRO_CANNOT);
63762b5b6ecSBjoern A. Zeeb 			le->tsval = tsval;
63862b5b6ecSBjoern A. Zeeb 			le->tsecr = *(ts_ptr + 2);
6396c5087a8SJack F Vogel 		}
6406c5087a8SJack F Vogel 
64162b5b6ecSBjoern A. Zeeb 		le->next_seq += tcp_data_len;
64262b5b6ecSBjoern A. Zeeb 		le->ack_seq = th->th_ack;
64362b5b6ecSBjoern A. Zeeb 		le->window = th->th_win;
64462b5b6ecSBjoern A. Zeeb 		le->append_cnt++;
64562b5b6ecSBjoern A. Zeeb 
64662b5b6ecSBjoern A. Zeeb #ifdef TCP_LRO_UPDATE_CSUM
64762b5b6ecSBjoern A. Zeeb 		le->ulp_csum += tcp_lro_rx_csum_fixup(le, l3hdr, th,
64862b5b6ecSBjoern A. Zeeb 		    tcp_data_len, ~csum);
64962b5b6ecSBjoern A. Zeeb #endif
65062b5b6ecSBjoern A. Zeeb 
6516c5087a8SJack F Vogel 		if (tcp_data_len == 0) {
65262b5b6ecSBjoern A. Zeeb 			m_freem(m);
653*7ae3d4bfSSepherosa Ziehau 			/*
654*7ae3d4bfSSepherosa Ziehau 			 * Flush this LRO entry, if this ACK should not
655*7ae3d4bfSSepherosa Ziehau 			 * be further delayed.
656*7ae3d4bfSSepherosa Ziehau 			 */
657*7ae3d4bfSSepherosa Ziehau 			if (le->append_cnt >= lc->lro_ackcnt_lim) {
658*7ae3d4bfSSepherosa Ziehau 				SLIST_REMOVE(&lc->lro_active, le, lro_entry,
659*7ae3d4bfSSepherosa Ziehau 				    next);
660*7ae3d4bfSSepherosa Ziehau 				tcp_lro_flush(lc, le);
661*7ae3d4bfSSepherosa Ziehau 			}
66262b5b6ecSBjoern A. Zeeb 			return (0);
6636c5087a8SJack F Vogel 		}
66462b5b6ecSBjoern A. Zeeb 
66562b5b6ecSBjoern A. Zeeb 		le->p_len += tcp_data_len;
66662b5b6ecSBjoern A. Zeeb 
66762b5b6ecSBjoern A. Zeeb 		/*
66862b5b6ecSBjoern A. Zeeb 		 * Adjust the mbuf so that m_data points to the first byte of
66962b5b6ecSBjoern A. Zeeb 		 * the ULP payload.  Adjust the mbuf to avoid complications and
67062b5b6ecSBjoern A. Zeeb 		 * append new segment to existing mbuf chain.
6716c5087a8SJack F Vogel 		 */
67262b5b6ecSBjoern A. Zeeb 		m_adj(m, m->m_pkthdr.len - tcp_data_len);
6739523d1bfSNavdeep Parhar 		m_demote_pkthdr(m);
6746c5087a8SJack F Vogel 
67562b5b6ecSBjoern A. Zeeb 		le->m_tail->m_next = m;
67662b5b6ecSBjoern A. Zeeb 		le->m_tail = m_last(m);
6776c5087a8SJack F Vogel 
67862b5b6ecSBjoern A. Zeeb 		/*
67962b5b6ecSBjoern A. Zeeb 		 * If a possible next full length packet would cause an
68062b5b6ecSBjoern A. Zeeb 		 * overflow, pro-actively flush now.
6816c5087a8SJack F Vogel 		 */
682*7ae3d4bfSSepherosa Ziehau 		if (le->p_len > (lc->lro_length_lim - lc->ifp->if_mtu)) {
68362b5b6ecSBjoern A. Zeeb 			SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
68462b5b6ecSBjoern A. Zeeb 			tcp_lro_flush(lc, le);
6857127e6acSNavdeep Parhar 		} else
6867127e6acSNavdeep Parhar 			getmicrotime(&le->mtime);
6876c5087a8SJack F Vogel 
68862b5b6ecSBjoern A. Zeeb 		return (0);
6896c5087a8SJack F Vogel 	}
69062b5b6ecSBjoern A. Zeeb 
69162b5b6ecSBjoern A. Zeeb 	/* Try to find an empty slot. */
69262b5b6ecSBjoern A. Zeeb 	if (SLIST_EMPTY(&lc->lro_free))
69362b5b6ecSBjoern A. Zeeb 		return (TCP_LRO_CANNOT);
69462b5b6ecSBjoern A. Zeeb 
69562b5b6ecSBjoern A. Zeeb 	/* Start a new segment chain. */
69662b5b6ecSBjoern A. Zeeb 	le = SLIST_FIRST(&lc->lro_free);
69762b5b6ecSBjoern A. Zeeb 	SLIST_REMOVE_HEAD(&lc->lro_free, next);
69862b5b6ecSBjoern A. Zeeb 	SLIST_INSERT_HEAD(&lc->lro_active, le, next);
6997127e6acSNavdeep Parhar 	getmicrotime(&le->mtime);
70062b5b6ecSBjoern A. Zeeb 
70162b5b6ecSBjoern A. Zeeb 	/* Start filling in details. */
70262b5b6ecSBjoern A. Zeeb 	switch (eh_type) {
70362b5b6ecSBjoern A. Zeeb #ifdef INET6
70462b5b6ecSBjoern A. Zeeb 	case ETHERTYPE_IPV6:
70562b5b6ecSBjoern A. Zeeb 		le->le_ip6 = ip6;
70662b5b6ecSBjoern A. Zeeb 		le->source_ip6 = ip6->ip6_src;
70762b5b6ecSBjoern A. Zeeb 		le->dest_ip6 = ip6->ip6_dst;
70862b5b6ecSBjoern A. Zeeb 		le->eh_type = eh_type;
70962b5b6ecSBjoern A. Zeeb 		le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN - sizeof(*ip6);
71062b5b6ecSBjoern A. Zeeb 		break;
71162b5b6ecSBjoern A. Zeeb #endif
71262b5b6ecSBjoern A. Zeeb #ifdef INET
71362b5b6ecSBjoern A. Zeeb 	case ETHERTYPE_IP:
71462b5b6ecSBjoern A. Zeeb 		le->le_ip4 = ip4;
71562b5b6ecSBjoern A. Zeeb 		le->source_ip4 = ip4->ip_src.s_addr;
71662b5b6ecSBjoern A. Zeeb 		le->dest_ip4 = ip4->ip_dst.s_addr;
71762b5b6ecSBjoern A. Zeeb 		le->eh_type = eh_type;
71862b5b6ecSBjoern A. Zeeb 		le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN;
71962b5b6ecSBjoern A. Zeeb 		break;
72062b5b6ecSBjoern A. Zeeb #endif
7216c5087a8SJack F Vogel 	}
72262b5b6ecSBjoern A. Zeeb 	le->source_port = th->th_sport;
72362b5b6ecSBjoern A. Zeeb 	le->dest_port = th->th_dport;
72462b5b6ecSBjoern A. Zeeb 
72562b5b6ecSBjoern A. Zeeb 	le->next_seq = seq + tcp_data_len;
72662b5b6ecSBjoern A. Zeeb 	le->ack_seq = th->th_ack;
72762b5b6ecSBjoern A. Zeeb 	le->window = th->th_win;
72862b5b6ecSBjoern A. Zeeb 	if (l != 0) {
72962b5b6ecSBjoern A. Zeeb 		le->timestamp = 1;
73062b5b6ecSBjoern A. Zeeb 		le->tsval = ntohl(*(ts_ptr + 1));
73162b5b6ecSBjoern A. Zeeb 		le->tsecr = *(ts_ptr + 2);
73262b5b6ecSBjoern A. Zeeb 	}
73362b5b6ecSBjoern A. Zeeb 
73462b5b6ecSBjoern A. Zeeb #ifdef TCP_LRO_UPDATE_CSUM
73562b5b6ecSBjoern A. Zeeb 	/*
73662b5b6ecSBjoern A. Zeeb 	 * Do not touch the csum of the first packet.  However save the
73762b5b6ecSBjoern A. Zeeb 	 * "adjusted" checksum of just the source and destination addresses,
73862b5b6ecSBjoern A. Zeeb 	 * the next header and the TCP payload.  The length and TCP header
73962b5b6ecSBjoern A. Zeeb 	 * parts may change, so we remove those from the saved checksum and
74062b5b6ecSBjoern A. Zeeb 	 * re-add with final values on tcp_lro_flush() if needed.
74162b5b6ecSBjoern A. Zeeb 	 */
74262b5b6ecSBjoern A. Zeeb 	KASSERT(le->ulp_csum == 0, ("%s: le=%p le->ulp_csum=0x%04x\n",
74362b5b6ecSBjoern A. Zeeb 	    __func__, le, le->ulp_csum));
74462b5b6ecSBjoern A. Zeeb 
74562b5b6ecSBjoern A. Zeeb 	le->ulp_csum = tcp_lro_rx_csum_fixup(le, l3hdr, th, tcp_data_len,
74662b5b6ecSBjoern A. Zeeb 	    ~csum);
74762b5b6ecSBjoern A. Zeeb 	th->th_sum = csum;	/* Restore checksum on first packet. */
74862b5b6ecSBjoern A. Zeeb #endif
74962b5b6ecSBjoern A. Zeeb 
75062b5b6ecSBjoern A. Zeeb 	le->m_head = m;
75162b5b6ecSBjoern A. Zeeb 	le->m_tail = m_last(m);
75262b5b6ecSBjoern A. Zeeb 
75362b5b6ecSBjoern A. Zeeb 	return (0);
75462b5b6ecSBjoern A. Zeeb }
75562b5b6ecSBjoern A. Zeeb 
756e936121dSHans Petter Selasky void
757e936121dSHans Petter Selasky tcp_lro_queue_mbuf(struct lro_ctrl *lc, struct mbuf *mb)
758e936121dSHans Petter Selasky {
759e936121dSHans Petter Selasky 	/* sanity checks */
760e936121dSHans Petter Selasky 	if (__predict_false(lc->ifp == NULL || lc->lro_mbuf_data == NULL ||
761e936121dSHans Petter Selasky 	    lc->lro_mbuf_max == 0)) {
762e936121dSHans Petter Selasky 		/* packet drop */
763e936121dSHans Petter Selasky 		m_freem(mb);
764e936121dSHans Petter Selasky 		return;
765e936121dSHans Petter Selasky 	}
766e936121dSHans Petter Selasky 
767e936121dSHans Petter Selasky 	/* check if packet is not LRO capable */
768e936121dSHans Petter Selasky 	if (__predict_false(mb->m_pkthdr.csum_flags == 0 ||
769e936121dSHans Petter Selasky 	    (lc->ifp->if_capenable & IFCAP_LRO) == 0)) {
770e936121dSHans Petter Selasky 		lc->lro_flushed++;
771e936121dSHans Petter Selasky 		lc->lro_queued++;
772e936121dSHans Petter Selasky 
773e936121dSHans Petter Selasky 		/* input packet to network layer */
774e936121dSHans Petter Selasky 		(*lc->ifp->if_input) (lc->ifp, mb);
775e936121dSHans Petter Selasky 		return;
776e936121dSHans Petter Selasky 	}
777e936121dSHans Petter Selasky 
778e936121dSHans Petter Selasky 	/* check if array is full */
779e936121dSHans Petter Selasky 	if (__predict_false(lc->lro_mbuf_count == lc->lro_mbuf_max))
780e936121dSHans Petter Selasky 		tcp_lro_flush_all(lc);
781e936121dSHans Petter Selasky 
782e936121dSHans Petter Selasky 	/* store sequence number */
783e936121dSHans Petter Selasky 	TCP_LRO_SEQUENCE(mb) = lc->lro_mbuf_count;
784e936121dSHans Petter Selasky 
785e936121dSHans Petter Selasky 	/* enter mbuf */
786e936121dSHans Petter Selasky 	lc->lro_mbuf_data[lc->lro_mbuf_count++] = mb;
787e936121dSHans Petter Selasky }
788e936121dSHans Petter Selasky 
78962b5b6ecSBjoern A. Zeeb /* end */
790