xref: /freebsd/sys/netinet/tcp_lro.c (revision 6dd38b8716bd9c53029707749f00ced53c5ceb7c)
127f190a3SBjoern A. Zeeb /*-
227f190a3SBjoern A. Zeeb  * Copyright (c) 2007, Myricom Inc.
327f190a3SBjoern A. Zeeb  * Copyright (c) 2008, Intel Corporation.
462b5b6ecSBjoern A. Zeeb  * Copyright (c) 2012 The FreeBSD Foundation
5e936121dSHans Petter Selasky  * Copyright (c) 2016 Mellanox Technologies.
627f190a3SBjoern A. Zeeb  * All rights reserved.
727f190a3SBjoern A. Zeeb  *
862b5b6ecSBjoern A. Zeeb  * Portions of this software were developed by Bjoern Zeeb
962b5b6ecSBjoern A. Zeeb  * under sponsorship from the FreeBSD Foundation.
1062b5b6ecSBjoern A. Zeeb  *
1127f190a3SBjoern A. Zeeb  * Redistribution and use in source and binary forms, with or without
1227f190a3SBjoern A. Zeeb  * modification, are permitted provided that the following conditions
1327f190a3SBjoern A. Zeeb  * are met:
1427f190a3SBjoern A. Zeeb  * 1. Redistributions of source code must retain the above copyright
1527f190a3SBjoern A. Zeeb  *    notice, this list of conditions and the following disclaimer.
1627f190a3SBjoern A. Zeeb  * 2. Redistributions in binary form must reproduce the above copyright
1727f190a3SBjoern A. Zeeb  *    notice, this list of conditions and the following disclaimer in the
1827f190a3SBjoern A. Zeeb  *    documentation and/or other materials provided with the distribution.
1927f190a3SBjoern A. Zeeb  *
2027f190a3SBjoern A. Zeeb  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
2127f190a3SBjoern A. Zeeb  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2227f190a3SBjoern A. Zeeb  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2327f190a3SBjoern A. Zeeb  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
2427f190a3SBjoern A. Zeeb  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2527f190a3SBjoern A. Zeeb  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2627f190a3SBjoern A. Zeeb  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2727f190a3SBjoern A. Zeeb  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2827f190a3SBjoern A. Zeeb  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2927f190a3SBjoern A. Zeeb  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3027f190a3SBjoern A. Zeeb  * SUCH DAMAGE.
3127f190a3SBjoern A. Zeeb  */
326c5087a8SJack F Vogel 
3362b5b6ecSBjoern A. Zeeb #include <sys/cdefs.h>
3462b5b6ecSBjoern A. Zeeb __FBSDID("$FreeBSD$");
3562b5b6ecSBjoern A. Zeeb 
3662b5b6ecSBjoern A. Zeeb #include "opt_inet.h"
3762b5b6ecSBjoern A. Zeeb #include "opt_inet6.h"
3862b5b6ecSBjoern A. Zeeb 
396c5087a8SJack F Vogel #include <sys/param.h>
406c5087a8SJack F Vogel #include <sys/systm.h>
416c5087a8SJack F Vogel #include <sys/kernel.h>
428ec07310SGleb Smirnoff #include <sys/malloc.h>
438ec07310SGleb Smirnoff #include <sys/mbuf.h>
446c5087a8SJack F Vogel #include <sys/socket.h>
456c5087a8SJack F Vogel 
466c5087a8SJack F Vogel #include <net/if.h>
4762b5b6ecSBjoern A. Zeeb #include <net/if_var.h>
486c5087a8SJack F Vogel #include <net/ethernet.h>
495fa2656eSBjoern A. Zeeb #include <net/vnet.h>
506c5087a8SJack F Vogel 
516c5087a8SJack F Vogel #include <netinet/in_systm.h>
526c5087a8SJack F Vogel #include <netinet/in.h>
5362b5b6ecSBjoern A. Zeeb #include <netinet/ip6.h>
546c5087a8SJack F Vogel #include <netinet/ip.h>
5531bfc56eSBjoern A. Zeeb #include <netinet/ip_var.h>
566c5087a8SJack F Vogel #include <netinet/tcp.h>
576c5087a8SJack F Vogel #include <netinet/tcp_lro.h>
586c5087a8SJack F Vogel 
5931bfc56eSBjoern A. Zeeb #include <netinet6/ip6_var.h>
6031bfc56eSBjoern A. Zeeb 
616c5087a8SJack F Vogel #include <machine/in_cksum.h>
626c5087a8SJack F Vogel 
63e936121dSHans Petter Selasky static MALLOC_DEFINE(M_LRO, "LRO", "LRO control structures");
646c5087a8SJack F Vogel 
6562b5b6ecSBjoern A. Zeeb #define	TCP_LRO_UPDATE_CSUM	1
6662b5b6ecSBjoern A. Zeeb #ifndef	TCP_LRO_UPDATE_CSUM
6762b5b6ecSBjoern A. Zeeb #define	TCP_LRO_INVALID_CSUM	0x0000
6862b5b6ecSBjoern A. Zeeb #endif
696c5087a8SJack F Vogel 
70*6dd38b87SSepherosa Ziehau static void	tcp_lro_rx_done(struct lro_ctrl *lc);
71*6dd38b87SSepherosa Ziehau 
726c5087a8SJack F Vogel int
7362b5b6ecSBjoern A. Zeeb tcp_lro_init(struct lro_ctrl *lc)
746c5087a8SJack F Vogel {
75e936121dSHans Petter Selasky 	return (tcp_lro_init_args(lc, NULL, TCP_LRO_ENTRIES, 0));
76e936121dSHans Petter Selasky }
77e936121dSHans Petter Selasky 
78e936121dSHans Petter Selasky int
79e936121dSHans Petter Selasky tcp_lro_init_args(struct lro_ctrl *lc, struct ifnet *ifp,
80e936121dSHans Petter Selasky     unsigned lro_entries, unsigned lro_mbufs)
81e936121dSHans Petter Selasky {
8262b5b6ecSBjoern A. Zeeb 	struct lro_entry *le;
83e936121dSHans Petter Selasky 	size_t size;
84e936121dSHans Petter Selasky 	unsigned i;
856c5087a8SJack F Vogel 
8662b5b6ecSBjoern A. Zeeb 	lc->lro_bad_csum = 0;
8762b5b6ecSBjoern A. Zeeb 	lc->lro_queued = 0;
8862b5b6ecSBjoern A. Zeeb 	lc->lro_flushed = 0;
8962b5b6ecSBjoern A. Zeeb 	lc->lro_cnt = 0;
90e936121dSHans Petter Selasky 	lc->lro_mbuf_count = 0;
91e936121dSHans Petter Selasky 	lc->lro_mbuf_max = lro_mbufs;
92e936121dSHans Petter Selasky 	lc->lro_cnt = lro_entries;
937ae3d4bfSSepherosa Ziehau 	lc->lro_ackcnt_lim = TCP_LRO_ACKCNT_MAX;
947ae3d4bfSSepherosa Ziehau 	lc->lro_length_lim = TCP_LRO_LENGTH_MAX;
95e936121dSHans Petter Selasky 	lc->ifp = ifp;
9662b5b6ecSBjoern A. Zeeb 	SLIST_INIT(&lc->lro_free);
9762b5b6ecSBjoern A. Zeeb 	SLIST_INIT(&lc->lro_active);
986c5087a8SJack F Vogel 
99e936121dSHans Petter Selasky 	/* compute size to allocate */
100e936121dSHans Petter Selasky 	size = (lro_mbufs * sizeof(struct mbuf *)) +
101e936121dSHans Petter Selasky 	    (lro_entries * sizeof(*le));
102e936121dSHans Petter Selasky 	lc->lro_mbuf_data = (struct mbuf **)
103e936121dSHans Petter Selasky 	    malloc(size, M_LRO, M_NOWAIT | M_ZERO);
1046c5087a8SJack F Vogel 
105e936121dSHans Petter Selasky 	/* check for out of memory */
106e936121dSHans Petter Selasky 	if (lc->lro_mbuf_data == NULL) {
107e936121dSHans Petter Selasky 		memset(lc, 0, sizeof(*lc));
108e936121dSHans Petter Selasky 		return (ENOMEM);
109e936121dSHans Petter Selasky 	}
110e936121dSHans Petter Selasky 	/* compute offset for LRO entries */
111e936121dSHans Petter Selasky 	le = (struct lro_entry *)
112e936121dSHans Petter Selasky 	    (lc->lro_mbuf_data + lro_mbufs);
113e936121dSHans Petter Selasky 
114e936121dSHans Petter Selasky 	/* setup linked list */
115e936121dSHans Petter Selasky 	for (i = 0; i != lro_entries; i++)
116e936121dSHans Petter Selasky 		SLIST_INSERT_HEAD(&lc->lro_free, le + i, next);
117e936121dSHans Petter Selasky 
118e936121dSHans Petter Selasky 	return (0);
1196c5087a8SJack F Vogel }
1206c5087a8SJack F Vogel 
1216c5087a8SJack F Vogel void
12262b5b6ecSBjoern A. Zeeb tcp_lro_free(struct lro_ctrl *lc)
1236c5087a8SJack F Vogel {
12462b5b6ecSBjoern A. Zeeb 	struct lro_entry *le;
125e936121dSHans Petter Selasky 	unsigned x;
1266c5087a8SJack F Vogel 
127e936121dSHans Petter Selasky 	/* reset LRO free list */
128e936121dSHans Petter Selasky 	SLIST_INIT(&lc->lro_free);
129e936121dSHans Petter Selasky 
130e936121dSHans Petter Selasky 	/* free active mbufs, if any */
131e936121dSHans Petter Selasky 	while ((le = SLIST_FIRST(&lc->lro_active)) != NULL) {
132e936121dSHans Petter Selasky 		SLIST_REMOVE_HEAD(&lc->lro_active, next);
133e936121dSHans Petter Selasky 		m_freem(le->m_head);
1346c5087a8SJack F Vogel 	}
135e936121dSHans Petter Selasky 
136e936121dSHans Petter Selasky 	/* free mbuf array, if any */
137e936121dSHans Petter Selasky 	for (x = 0; x != lc->lro_mbuf_count; x++)
138e936121dSHans Petter Selasky 		m_freem(lc->lro_mbuf_data[x]);
139e936121dSHans Petter Selasky 	lc->lro_mbuf_count = 0;
140e936121dSHans Petter Selasky 
141e936121dSHans Petter Selasky 	/* free allocated memory, if any */
142e936121dSHans Petter Selasky 	free(lc->lro_mbuf_data, M_LRO);
143e936121dSHans Petter Selasky 	lc->lro_mbuf_data = NULL;
1446c5087a8SJack F Vogel }
1456c5087a8SJack F Vogel 
14662b5b6ecSBjoern A. Zeeb #ifdef TCP_LRO_UPDATE_CSUM
14762b5b6ecSBjoern A. Zeeb static uint16_t
14862b5b6ecSBjoern A. Zeeb tcp_lro_csum_th(struct tcphdr *th)
14962b5b6ecSBjoern A. Zeeb {
15062b5b6ecSBjoern A. Zeeb 	uint32_t ch;
15162b5b6ecSBjoern A. Zeeb 	uint16_t *p, l;
15262b5b6ecSBjoern A. Zeeb 
15362b5b6ecSBjoern A. Zeeb 	ch = th->th_sum = 0x0000;
15462b5b6ecSBjoern A. Zeeb 	l = th->th_off;
15562b5b6ecSBjoern A. Zeeb 	p = (uint16_t *)th;
15662b5b6ecSBjoern A. Zeeb 	while (l > 0) {
15762b5b6ecSBjoern A. Zeeb 		ch += *p;
15862b5b6ecSBjoern A. Zeeb 		p++;
15962b5b6ecSBjoern A. Zeeb 		ch += *p;
16062b5b6ecSBjoern A. Zeeb 		p++;
16162b5b6ecSBjoern A. Zeeb 		l--;
16262b5b6ecSBjoern A. Zeeb 	}
16362b5b6ecSBjoern A. Zeeb 	while (ch > 0xffff)
16462b5b6ecSBjoern A. Zeeb 		ch = (ch >> 16) + (ch & 0xffff);
16562b5b6ecSBjoern A. Zeeb 
16662b5b6ecSBjoern A. Zeeb 	return (ch & 0xffff);
16762b5b6ecSBjoern A. Zeeb }
16862b5b6ecSBjoern A. Zeeb 
16962b5b6ecSBjoern A. Zeeb static uint16_t
17062b5b6ecSBjoern A. Zeeb tcp_lro_rx_csum_fixup(struct lro_entry *le, void *l3hdr, struct tcphdr *th,
17162b5b6ecSBjoern A. Zeeb     uint16_t tcp_data_len, uint16_t csum)
17262b5b6ecSBjoern A. Zeeb {
17362b5b6ecSBjoern A. Zeeb 	uint32_t c;
17462b5b6ecSBjoern A. Zeeb 	uint16_t cs;
17562b5b6ecSBjoern A. Zeeb 
17662b5b6ecSBjoern A. Zeeb 	c = csum;
17762b5b6ecSBjoern A. Zeeb 
17862b5b6ecSBjoern A. Zeeb 	/* Remove length from checksum. */
17962b5b6ecSBjoern A. Zeeb 	switch (le->eh_type) {
18062b5b6ecSBjoern A. Zeeb #ifdef INET6
18162b5b6ecSBjoern A. Zeeb 	case ETHERTYPE_IPV6:
18262b5b6ecSBjoern A. Zeeb 	{
18362b5b6ecSBjoern A. Zeeb 		struct ip6_hdr *ip6;
18462b5b6ecSBjoern A. Zeeb 
18562b5b6ecSBjoern A. Zeeb 		ip6 = (struct ip6_hdr *)l3hdr;
18662b5b6ecSBjoern A. Zeeb 		if (le->append_cnt == 0)
18762b5b6ecSBjoern A. Zeeb 			cs = ip6->ip6_plen;
18862b5b6ecSBjoern A. Zeeb 		else {
18962b5b6ecSBjoern A. Zeeb 			uint32_t cx;
19062b5b6ecSBjoern A. Zeeb 
19162b5b6ecSBjoern A. Zeeb 			cx = ntohs(ip6->ip6_plen);
19262b5b6ecSBjoern A. Zeeb 			cs = in6_cksum_pseudo(ip6, cx, ip6->ip6_nxt, 0);
19362b5b6ecSBjoern A. Zeeb 		}
19462b5b6ecSBjoern A. Zeeb 		break;
19562b5b6ecSBjoern A. Zeeb 	}
19662b5b6ecSBjoern A. Zeeb #endif
19762b5b6ecSBjoern A. Zeeb #ifdef INET
19862b5b6ecSBjoern A. Zeeb 	case ETHERTYPE_IP:
19962b5b6ecSBjoern A. Zeeb 	{
20062b5b6ecSBjoern A. Zeeb 		struct ip *ip4;
20162b5b6ecSBjoern A. Zeeb 
20262b5b6ecSBjoern A. Zeeb 		ip4 = (struct ip *)l3hdr;
20362b5b6ecSBjoern A. Zeeb 		if (le->append_cnt == 0)
20462b5b6ecSBjoern A. Zeeb 			cs = ip4->ip_len;
20562b5b6ecSBjoern A. Zeeb 		else {
20662b5b6ecSBjoern A. Zeeb 			cs = in_addword(ntohs(ip4->ip_len) - sizeof(*ip4),
20762b5b6ecSBjoern A. Zeeb 			    IPPROTO_TCP);
20862b5b6ecSBjoern A. Zeeb 			cs = in_pseudo(ip4->ip_src.s_addr, ip4->ip_dst.s_addr,
20962b5b6ecSBjoern A. Zeeb 			    htons(cs));
21062b5b6ecSBjoern A. Zeeb 		}
21162b5b6ecSBjoern A. Zeeb 		break;
21262b5b6ecSBjoern A. Zeeb 	}
21362b5b6ecSBjoern A. Zeeb #endif
21462b5b6ecSBjoern A. Zeeb 	default:
21562b5b6ecSBjoern A. Zeeb 		cs = 0;		/* Keep compiler happy. */
21662b5b6ecSBjoern A. Zeeb 	}
21762b5b6ecSBjoern A. Zeeb 
21862b5b6ecSBjoern A. Zeeb 	cs = ~cs;
21962b5b6ecSBjoern A. Zeeb 	c += cs;
22062b5b6ecSBjoern A. Zeeb 
22162b5b6ecSBjoern A. Zeeb 	/* Remove TCP header csum. */
22262b5b6ecSBjoern A. Zeeb 	cs = ~tcp_lro_csum_th(th);
22362b5b6ecSBjoern A. Zeeb 	c += cs;
22462b5b6ecSBjoern A. Zeeb 	while (c > 0xffff)
22562b5b6ecSBjoern A. Zeeb 		c = (c >> 16) + (c & 0xffff);
22662b5b6ecSBjoern A. Zeeb 
22762b5b6ecSBjoern A. Zeeb 	return (c & 0xffff);
22862b5b6ecSBjoern A. Zeeb }
22962b5b6ecSBjoern A. Zeeb #endif
23062b5b6ecSBjoern A. Zeeb 
231*6dd38b87SSepherosa Ziehau static void
232*6dd38b87SSepherosa Ziehau tcp_lro_rx_done(struct lro_ctrl *lc)
233*6dd38b87SSepherosa Ziehau {
234*6dd38b87SSepherosa Ziehau 	struct lro_entry *le;
235*6dd38b87SSepherosa Ziehau 
236*6dd38b87SSepherosa Ziehau 	while ((le = SLIST_FIRST(&lc->lro_active)) != NULL) {
237*6dd38b87SSepherosa Ziehau 		SLIST_REMOVE_HEAD(&lc->lro_active, next);
238*6dd38b87SSepherosa Ziehau 		tcp_lro_flush(lc, le);
239*6dd38b87SSepherosa Ziehau 	}
240*6dd38b87SSepherosa Ziehau }
241*6dd38b87SSepherosa Ziehau 
2426c5087a8SJack F Vogel void
2437127e6acSNavdeep Parhar tcp_lro_flush_inactive(struct lro_ctrl *lc, const struct timeval *timeout)
2447127e6acSNavdeep Parhar {
2457127e6acSNavdeep Parhar 	struct lro_entry *le, *le_tmp;
2467127e6acSNavdeep Parhar 	struct timeval tv;
2477127e6acSNavdeep Parhar 
2487127e6acSNavdeep Parhar 	if (SLIST_EMPTY(&lc->lro_active))
2497127e6acSNavdeep Parhar 		return;
2507127e6acSNavdeep Parhar 
2517127e6acSNavdeep Parhar 	getmicrotime(&tv);
2527127e6acSNavdeep Parhar 	timevalsub(&tv, timeout);
2537127e6acSNavdeep Parhar 	SLIST_FOREACH_SAFE(le, &lc->lro_active, next, le_tmp) {
2547127e6acSNavdeep Parhar 		if (timevalcmp(&tv, &le->mtime, >=)) {
2557127e6acSNavdeep Parhar 			SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
2567127e6acSNavdeep Parhar 			tcp_lro_flush(lc, le);
2577127e6acSNavdeep Parhar 		}
2587127e6acSNavdeep Parhar 	}
2597127e6acSNavdeep Parhar }
2607127e6acSNavdeep Parhar 
2617127e6acSNavdeep Parhar void
26262b5b6ecSBjoern A. Zeeb tcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *le)
2636c5087a8SJack F Vogel {
2646c5087a8SJack F Vogel 
26562b5b6ecSBjoern A. Zeeb 	if (le->append_cnt > 0) {
26662b5b6ecSBjoern A. Zeeb 		struct tcphdr *th;
26762b5b6ecSBjoern A. Zeeb 		uint16_t p_len;
2686c5087a8SJack F Vogel 
26962b5b6ecSBjoern A. Zeeb 		p_len = htons(le->p_len);
27062b5b6ecSBjoern A. Zeeb 		switch (le->eh_type) {
27162b5b6ecSBjoern A. Zeeb #ifdef INET6
27262b5b6ecSBjoern A. Zeeb 		case ETHERTYPE_IPV6:
2736c5087a8SJack F Vogel 		{
27462b5b6ecSBjoern A. Zeeb 			struct ip6_hdr *ip6;
27562b5b6ecSBjoern A. Zeeb 
27662b5b6ecSBjoern A. Zeeb 			ip6 = le->le_ip6;
27762b5b6ecSBjoern A. Zeeb 			ip6->ip6_plen = p_len;
27862b5b6ecSBjoern A. Zeeb 			th = (struct tcphdr *)(ip6 + 1);
27962b5b6ecSBjoern A. Zeeb 			le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID |
28062b5b6ecSBjoern A. Zeeb 			    CSUM_PSEUDO_HDR;
28162b5b6ecSBjoern A. Zeeb 			le->p_len += ETHER_HDR_LEN + sizeof(*ip6);
28262b5b6ecSBjoern A. Zeeb 			break;
28362b5b6ecSBjoern A. Zeeb 		}
28462b5b6ecSBjoern A. Zeeb #endif
28562b5b6ecSBjoern A. Zeeb #ifdef INET
28662b5b6ecSBjoern A. Zeeb 		case ETHERTYPE_IP:
28762b5b6ecSBjoern A. Zeeb 		{
28862b5b6ecSBjoern A. Zeeb 			struct ip *ip4;
28962b5b6ecSBjoern A. Zeeb #ifdef TCP_LRO_UPDATE_CSUM
29062b5b6ecSBjoern A. Zeeb 			uint32_t cl;
29162b5b6ecSBjoern A. Zeeb 			uint16_t c;
29262b5b6ecSBjoern A. Zeeb #endif
29362b5b6ecSBjoern A. Zeeb 
29462b5b6ecSBjoern A. Zeeb 			ip4 = le->le_ip4;
29562b5b6ecSBjoern A. Zeeb #ifdef TCP_LRO_UPDATE_CSUM
29662b5b6ecSBjoern A. Zeeb 			/* Fix IP header checksum for new length. */
29762b5b6ecSBjoern A. Zeeb 			c = ~ip4->ip_sum;
29862b5b6ecSBjoern A. Zeeb 			cl = c;
29962b5b6ecSBjoern A. Zeeb 			c = ~ip4->ip_len;
30062b5b6ecSBjoern A. Zeeb 			cl += c + p_len;
30162b5b6ecSBjoern A. Zeeb 			while (cl > 0xffff)
30262b5b6ecSBjoern A. Zeeb 				cl = (cl >> 16) + (cl & 0xffff);
30362b5b6ecSBjoern A. Zeeb 			c = cl;
30462b5b6ecSBjoern A. Zeeb 			ip4->ip_sum = ~c;
30562b5b6ecSBjoern A. Zeeb #else
30662b5b6ecSBjoern A. Zeeb 			ip4->ip_sum = TCP_LRO_INVALID_CSUM;
30762b5b6ecSBjoern A. Zeeb #endif
30862b5b6ecSBjoern A. Zeeb 			ip4->ip_len = p_len;
30962b5b6ecSBjoern A. Zeeb 			th = (struct tcphdr *)(ip4 + 1);
31062b5b6ecSBjoern A. Zeeb 			le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID |
31162b5b6ecSBjoern A. Zeeb 			    CSUM_PSEUDO_HDR | CSUM_IP_CHECKED | CSUM_IP_VALID;
31262b5b6ecSBjoern A. Zeeb 			le->p_len += ETHER_HDR_LEN;
31362b5b6ecSBjoern A. Zeeb 			break;
31462b5b6ecSBjoern A. Zeeb 		}
31562b5b6ecSBjoern A. Zeeb #endif
31662b5b6ecSBjoern A. Zeeb 		default:
31762b5b6ecSBjoern A. Zeeb 			th = NULL;	/* Keep compiler happy. */
31862b5b6ecSBjoern A. Zeeb 		}
31962b5b6ecSBjoern A. Zeeb 		le->m_head->m_pkthdr.csum_data = 0xffff;
32062b5b6ecSBjoern A. Zeeb 		le->m_head->m_pkthdr.len = le->p_len;
32162b5b6ecSBjoern A. Zeeb 
32262b5b6ecSBjoern A. Zeeb 		/* Incorporate the latest ACK into the TCP header. */
32362b5b6ecSBjoern A. Zeeb 		th->th_ack = le->ack_seq;
32462b5b6ecSBjoern A. Zeeb 		th->th_win = le->window;
32562b5b6ecSBjoern A. Zeeb 		/* Incorporate latest timestamp into the TCP header. */
32662b5b6ecSBjoern A. Zeeb 		if (le->timestamp != 0) {
3276c5087a8SJack F Vogel 			uint32_t *ts_ptr;
3286c5087a8SJack F Vogel 
32962b5b6ecSBjoern A. Zeeb 			ts_ptr = (uint32_t *)(th + 1);
33062b5b6ecSBjoern A. Zeeb 			ts_ptr[1] = htonl(le->tsval);
33162b5b6ecSBjoern A. Zeeb 			ts_ptr[2] = le->tsecr;
33262b5b6ecSBjoern A. Zeeb 		}
33362b5b6ecSBjoern A. Zeeb #ifdef TCP_LRO_UPDATE_CSUM
33462b5b6ecSBjoern A. Zeeb 		/* Update the TCP header checksum. */
33562b5b6ecSBjoern A. Zeeb 		le->ulp_csum += p_len;
33662b5b6ecSBjoern A. Zeeb 		le->ulp_csum += tcp_lro_csum_th(th);
33762b5b6ecSBjoern A. Zeeb 		while (le->ulp_csum > 0xffff)
33862b5b6ecSBjoern A. Zeeb 			le->ulp_csum = (le->ulp_csum >> 16) +
33962b5b6ecSBjoern A. Zeeb 			    (le->ulp_csum & 0xffff);
34062b5b6ecSBjoern A. Zeeb 		th->th_sum = (le->ulp_csum & 0xffff);
34162b5b6ecSBjoern A. Zeeb 		th->th_sum = ~th->th_sum;
34262b5b6ecSBjoern A. Zeeb #else
34362b5b6ecSBjoern A. Zeeb 		th->th_sum = TCP_LRO_INVALID_CSUM;
34462b5b6ecSBjoern A. Zeeb #endif
34562b5b6ecSBjoern A. Zeeb 	}
3466c5087a8SJack F Vogel 
34762b5b6ecSBjoern A. Zeeb 	(*lc->ifp->if_input)(lc->ifp, le->m_head);
34862b5b6ecSBjoern A. Zeeb 	lc->lro_queued += le->append_cnt + 1;
34962b5b6ecSBjoern A. Zeeb 	lc->lro_flushed++;
35062b5b6ecSBjoern A. Zeeb 	bzero(le, sizeof(*le));
35162b5b6ecSBjoern A. Zeeb 	SLIST_INSERT_HEAD(&lc->lro_free, le, next);
35262b5b6ecSBjoern A. Zeeb }
3536c5087a8SJack F Vogel 
354e936121dSHans Petter Selasky static int
355e936121dSHans Petter Selasky tcp_lro_mbuf_compare_header(const void *ppa, const void *ppb)
356e936121dSHans Petter Selasky {
357e936121dSHans Petter Selasky 	const struct mbuf *ma = *((const struct mbuf * const *)ppa);
358e936121dSHans Petter Selasky 	const struct mbuf *mb = *((const struct mbuf * const *)ppb);
359e936121dSHans Petter Selasky 	int ret;
360e936121dSHans Petter Selasky 
361e936121dSHans Petter Selasky 	ret = M_HASHTYPE_GET(ma) - M_HASHTYPE_GET(mb);
362e936121dSHans Petter Selasky 	if (ret != 0)
363e936121dSHans Petter Selasky 		goto done;
364e936121dSHans Petter Selasky 
3653e9470b7SHans Petter Selasky 	if (ma->m_pkthdr.flowid > mb->m_pkthdr.flowid)
3663e9470b7SHans Petter Selasky 		return (1);
3673e9470b7SHans Petter Selasky 	else if (ma->m_pkthdr.flowid < mb->m_pkthdr.flowid)
3683e9470b7SHans Petter Selasky 		return (-1);
369e936121dSHans Petter Selasky 
370e936121dSHans Petter Selasky 	ret = TCP_LRO_SEQUENCE(ma) - TCP_LRO_SEQUENCE(mb);
371e936121dSHans Petter Selasky done:
372e936121dSHans Petter Selasky 	return (ret);
373e936121dSHans Petter Selasky }
374e936121dSHans Petter Selasky 
375e936121dSHans Petter Selasky void
376e936121dSHans Petter Selasky tcp_lro_flush_all(struct lro_ctrl *lc)
377e936121dSHans Petter Selasky {
378e936121dSHans Petter Selasky 	uint32_t hashtype;
379e936121dSHans Petter Selasky 	uint32_t flowid;
380e936121dSHans Petter Selasky 	unsigned x;
381e936121dSHans Petter Selasky 
382e936121dSHans Petter Selasky 	/* check if no mbufs to flush */
383*6dd38b87SSepherosa Ziehau 	if (lc->lro_mbuf_count == 0)
384e936121dSHans Petter Selasky 		goto done;
385e936121dSHans Petter Selasky 
386e936121dSHans Petter Selasky 	/* sort all mbufs according to stream */
387e936121dSHans Petter Selasky 	qsort(lc->lro_mbuf_data, lc->lro_mbuf_count, sizeof(struct mbuf *),
388e936121dSHans Petter Selasky 	    &tcp_lro_mbuf_compare_header);
389e936121dSHans Petter Selasky 
390e936121dSHans Petter Selasky 	/* input data into LRO engine, stream by stream */
391e936121dSHans Petter Selasky 	flowid = 0;
392e936121dSHans Petter Selasky 	hashtype = M_HASHTYPE_NONE;
393e936121dSHans Petter Selasky 	for (x = 0; x != lc->lro_mbuf_count; x++) {
394e936121dSHans Petter Selasky 		struct mbuf *mb;
395e936121dSHans Petter Selasky 
396e936121dSHans Petter Selasky 		mb = lc->lro_mbuf_data[x];
397e936121dSHans Petter Selasky 
398e936121dSHans Petter Selasky 		/* check for new stream */
399e936121dSHans Petter Selasky 		if (mb->m_pkthdr.flowid != flowid ||
400e936121dSHans Petter Selasky 		    M_HASHTYPE_GET(mb) != hashtype) {
401e936121dSHans Petter Selasky 			flowid = mb->m_pkthdr.flowid;
402e936121dSHans Petter Selasky 			hashtype = M_HASHTYPE_GET(mb);
403e936121dSHans Petter Selasky 
404e936121dSHans Petter Selasky 			/* flush active streams */
405*6dd38b87SSepherosa Ziehau 			tcp_lro_rx_done(lc);
406e936121dSHans Petter Selasky 		}
407e936121dSHans Petter Selasky #ifdef TCP_LRO_RESET_SEQUENCE
408e936121dSHans Petter Selasky 		/* reset sequence number */
409e936121dSHans Petter Selasky 		TCP_LRO_SEQUENCE(mb) = 0;
410e936121dSHans Petter Selasky #endif
411e936121dSHans Petter Selasky 		/* add packet to LRO engine */
412e936121dSHans Petter Selasky 		if (tcp_lro_rx(lc, mb, 0) != 0) {
413e936121dSHans Petter Selasky 			/* input packet to network layer */
414e936121dSHans Petter Selasky 			(*lc->ifp->if_input)(lc->ifp, mb);
415e936121dSHans Petter Selasky 			lc->lro_queued++;
416e936121dSHans Petter Selasky 			lc->lro_flushed++;
417e936121dSHans Petter Selasky 		}
418e936121dSHans Petter Selasky 	}
419e936121dSHans Petter Selasky done:
420e936121dSHans Petter Selasky 	/* flush active streams */
421*6dd38b87SSepherosa Ziehau 	tcp_lro_rx_done(lc);
422*6dd38b87SSepherosa Ziehau 
423e936121dSHans Petter Selasky 	lc->lro_mbuf_count = 0;
424e936121dSHans Petter Selasky }
425e936121dSHans Petter Selasky 
42662b5b6ecSBjoern A. Zeeb #ifdef INET6
42762b5b6ecSBjoern A. Zeeb static int
42862b5b6ecSBjoern A. Zeeb tcp_lro_rx_ipv6(struct lro_ctrl *lc, struct mbuf *m, struct ip6_hdr *ip6,
42962b5b6ecSBjoern A. Zeeb     struct tcphdr **th)
43062b5b6ecSBjoern A. Zeeb {
4316c5087a8SJack F Vogel 
43262b5b6ecSBjoern A. Zeeb 	/* XXX-BZ we should check the flow-label. */
4336c5087a8SJack F Vogel 
43462b5b6ecSBjoern A. Zeeb 	/* XXX-BZ We do not yet support ext. hdrs. */
43562b5b6ecSBjoern A. Zeeb 	if (ip6->ip6_nxt != IPPROTO_TCP)
43662b5b6ecSBjoern A. Zeeb 		return (TCP_LRO_NOT_SUPPORTED);
43762b5b6ecSBjoern A. Zeeb 
43862b5b6ecSBjoern A. Zeeb 	/* Find the TCP header. */
43962b5b6ecSBjoern A. Zeeb 	*th = (struct tcphdr *)(ip6 + 1);
44062b5b6ecSBjoern A. Zeeb 
44162b5b6ecSBjoern A. Zeeb 	return (0);
44262b5b6ecSBjoern A. Zeeb }
44362b5b6ecSBjoern A. Zeeb #endif
44462b5b6ecSBjoern A. Zeeb 
44562b5b6ecSBjoern A. Zeeb #ifdef INET
44662b5b6ecSBjoern A. Zeeb static int
44762b5b6ecSBjoern A. Zeeb tcp_lro_rx_ipv4(struct lro_ctrl *lc, struct mbuf *m, struct ip *ip4,
44862b5b6ecSBjoern A. Zeeb     struct tcphdr **th)
44962b5b6ecSBjoern A. Zeeb {
45062b5b6ecSBjoern A. Zeeb 	int csum_flags;
45162b5b6ecSBjoern A. Zeeb 	uint16_t csum;
45262b5b6ecSBjoern A. Zeeb 
45362b5b6ecSBjoern A. Zeeb 	if (ip4->ip_p != IPPROTO_TCP)
45462b5b6ecSBjoern A. Zeeb 		return (TCP_LRO_NOT_SUPPORTED);
45562b5b6ecSBjoern A. Zeeb 
45662b5b6ecSBjoern A. Zeeb 	/* Ensure there are no options. */
45762b5b6ecSBjoern A. Zeeb 	if ((ip4->ip_hl << 2) != sizeof (*ip4))
45862b5b6ecSBjoern A. Zeeb 		return (TCP_LRO_CANNOT);
45962b5b6ecSBjoern A. Zeeb 
46062b5b6ecSBjoern A. Zeeb 	/* .. and the packet is not fragmented. */
46162b5b6ecSBjoern A. Zeeb 	if (ip4->ip_off & htons(IP_MF|IP_OFFMASK))
46262b5b6ecSBjoern A. Zeeb 		return (TCP_LRO_CANNOT);
46362b5b6ecSBjoern A. Zeeb 
46462b5b6ecSBjoern A. Zeeb 	/* Legacy IP has a header checksum that needs to be correct. */
46562b5b6ecSBjoern A. Zeeb 	csum_flags = m->m_pkthdr.csum_flags;
46645709593SKip Macy 	if (csum_flags & CSUM_IP_CHECKED) {
46745709593SKip Macy 		if (__predict_false((csum_flags & CSUM_IP_VALID) == 0)) {
46862b5b6ecSBjoern A. Zeeb 			lc->lro_bad_csum++;
46962b5b6ecSBjoern A. Zeeb 			return (TCP_LRO_CANNOT);
47045709593SKip Macy 		}
47145709593SKip Macy 	} else {
47262b5b6ecSBjoern A. Zeeb 		csum = in_cksum_hdr(ip4);
473e5ca1ffaSAndrew Gallatin 		if (__predict_false((csum) != 0)) {
47462b5b6ecSBjoern A. Zeeb 			lc->lro_bad_csum++;
47562b5b6ecSBjoern A. Zeeb 			return (TCP_LRO_CANNOT);
4766c5087a8SJack F Vogel 		}
47745709593SKip Macy 	}
4786c5087a8SJack F Vogel 
47962b5b6ecSBjoern A. Zeeb 	/* Find the TCP header (we assured there are no IP options). */
48062b5b6ecSBjoern A. Zeeb 	*th = (struct tcphdr *)(ip4 + 1);
4816c5087a8SJack F Vogel 
48262b5b6ecSBjoern A. Zeeb 	return (0);
4836c5087a8SJack F Vogel }
48462b5b6ecSBjoern A. Zeeb #endif
4856c5087a8SJack F Vogel 
48662b5b6ecSBjoern A. Zeeb int
48762b5b6ecSBjoern A. Zeeb tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
48862b5b6ecSBjoern A. Zeeb {
48962b5b6ecSBjoern A. Zeeb 	struct lro_entry *le;
49062b5b6ecSBjoern A. Zeeb 	struct ether_header *eh;
49162b5b6ecSBjoern A. Zeeb #ifdef INET6
49262b5b6ecSBjoern A. Zeeb 	struct ip6_hdr *ip6 = NULL;	/* Keep compiler happy. */
49362b5b6ecSBjoern A. Zeeb #endif
49462b5b6ecSBjoern A. Zeeb #ifdef INET
49562b5b6ecSBjoern A. Zeeb 	struct ip *ip4 = NULL;		/* Keep compiler happy. */
49662b5b6ecSBjoern A. Zeeb #endif
49762b5b6ecSBjoern A. Zeeb 	struct tcphdr *th;
49862b5b6ecSBjoern A. Zeeb 	void *l3hdr = NULL;		/* Keep compiler happy. */
49962b5b6ecSBjoern A. Zeeb 	uint32_t *ts_ptr;
50062b5b6ecSBjoern A. Zeeb 	tcp_seq seq;
50162b5b6ecSBjoern A. Zeeb 	int error, ip_len, l;
50262b5b6ecSBjoern A. Zeeb 	uint16_t eh_type, tcp_data_len;
5036c5087a8SJack F Vogel 
50462b5b6ecSBjoern A. Zeeb 	/* We expect a contiguous header [eh, ip, tcp]. */
50562b5b6ecSBjoern A. Zeeb 
50662b5b6ecSBjoern A. Zeeb 	eh = mtod(m, struct ether_header *);
50762b5b6ecSBjoern A. Zeeb 	eh_type = ntohs(eh->ether_type);
50862b5b6ecSBjoern A. Zeeb 	switch (eh_type) {
50962b5b6ecSBjoern A. Zeeb #ifdef INET6
51062b5b6ecSBjoern A. Zeeb 	case ETHERTYPE_IPV6:
5115fa2656eSBjoern A. Zeeb 	{
5125fa2656eSBjoern A. Zeeb 		CURVNET_SET(lc->ifp->if_vnet);
51331bfc56eSBjoern A. Zeeb 		if (V_ip6_forwarding != 0) {
51431bfc56eSBjoern A. Zeeb 			/* XXX-BZ stats but changing lro_ctrl is a problem. */
5155fa2656eSBjoern A. Zeeb 			CURVNET_RESTORE();
51631bfc56eSBjoern A. Zeeb 			return (TCP_LRO_CANNOT);
51731bfc56eSBjoern A. Zeeb 		}
5185fa2656eSBjoern A. Zeeb 		CURVNET_RESTORE();
51962b5b6ecSBjoern A. Zeeb 		l3hdr = ip6 = (struct ip6_hdr *)(eh + 1);
52062b5b6ecSBjoern A. Zeeb 		error = tcp_lro_rx_ipv6(lc, m, ip6, &th);
52162b5b6ecSBjoern A. Zeeb 		if (error != 0)
52262b5b6ecSBjoern A. Zeeb 			return (error);
52362b5b6ecSBjoern A. Zeeb 		tcp_data_len = ntohs(ip6->ip6_plen);
52462b5b6ecSBjoern A. Zeeb 		ip_len = sizeof(*ip6) + tcp_data_len;
52562b5b6ecSBjoern A. Zeeb 		break;
5265fa2656eSBjoern A. Zeeb 	}
52762b5b6ecSBjoern A. Zeeb #endif
52862b5b6ecSBjoern A. Zeeb #ifdef INET
52962b5b6ecSBjoern A. Zeeb 	case ETHERTYPE_IP:
5305fa2656eSBjoern A. Zeeb 	{
5315fa2656eSBjoern A. Zeeb 		CURVNET_SET(lc->ifp->if_vnet);
53231bfc56eSBjoern A. Zeeb 		if (V_ipforwarding != 0) {
53331bfc56eSBjoern A. Zeeb 			/* XXX-BZ stats but changing lro_ctrl is a problem. */
5345fa2656eSBjoern A. Zeeb 			CURVNET_RESTORE();
53531bfc56eSBjoern A. Zeeb 			return (TCP_LRO_CANNOT);
53631bfc56eSBjoern A. Zeeb 		}
5375fa2656eSBjoern A. Zeeb 		CURVNET_RESTORE();
53862b5b6ecSBjoern A. Zeeb 		l3hdr = ip4 = (struct ip *)(eh + 1);
53962b5b6ecSBjoern A. Zeeb 		error = tcp_lro_rx_ipv4(lc, m, ip4, &th);
54062b5b6ecSBjoern A. Zeeb 		if (error != 0)
54162b5b6ecSBjoern A. Zeeb 			return (error);
54262b5b6ecSBjoern A. Zeeb 		ip_len = ntohs(ip4->ip_len);
54362b5b6ecSBjoern A. Zeeb 		tcp_data_len = ip_len - sizeof(*ip4);
54462b5b6ecSBjoern A. Zeeb 		break;
5455fa2656eSBjoern A. Zeeb 	}
54662b5b6ecSBjoern A. Zeeb #endif
54762b5b6ecSBjoern A. Zeeb 	/* XXX-BZ what happens in case of VLAN(s)? */
54862b5b6ecSBjoern A. Zeeb 	default:
54962b5b6ecSBjoern A. Zeeb 		return (TCP_LRO_NOT_SUPPORTED);
55062b5b6ecSBjoern A. Zeeb 	}
5516c5087a8SJack F Vogel 
5526c5087a8SJack F Vogel 	/*
55362b5b6ecSBjoern A. Zeeb 	 * If the frame is padded beyond the end of the IP packet, then we must
55462b5b6ecSBjoern A. Zeeb 	 * trim the extra bytes off.
5556c5087a8SJack F Vogel 	 */
55662b5b6ecSBjoern A. Zeeb 	l = m->m_pkthdr.len - (ETHER_HDR_LEN + ip_len);
55762b5b6ecSBjoern A. Zeeb 	if (l != 0) {
55862b5b6ecSBjoern A. Zeeb 		if (l < 0)
55962b5b6ecSBjoern A. Zeeb 			/* Truncated packet. */
56062b5b6ecSBjoern A. Zeeb 			return (TCP_LRO_CANNOT);
56162b5b6ecSBjoern A. Zeeb 
56262b5b6ecSBjoern A. Zeeb 		m_adj(m, -l);
5636c5087a8SJack F Vogel 	}
5646c5087a8SJack F Vogel 
56562b5b6ecSBjoern A. Zeeb 	/*
56662b5b6ecSBjoern A. Zeeb 	 * Check TCP header constraints.
56762b5b6ecSBjoern A. Zeeb 	 */
56862b5b6ecSBjoern A. Zeeb 	/* Ensure no bits set besides ACK or PSH. */
56962b5b6ecSBjoern A. Zeeb 	if ((th->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
57062b5b6ecSBjoern A. Zeeb 		return (TCP_LRO_CANNOT);
57162b5b6ecSBjoern A. Zeeb 
57262b5b6ecSBjoern A. Zeeb 	/* XXX-BZ We lose a AKC|PUSH flag concatinating multiple segments. */
57362b5b6ecSBjoern A. Zeeb 	/* XXX-BZ Ideally we'd flush on PUSH? */
57462b5b6ecSBjoern A. Zeeb 
57562b5b6ecSBjoern A. Zeeb 	/*
57662b5b6ecSBjoern A. Zeeb 	 * Check for timestamps.
57762b5b6ecSBjoern A. Zeeb 	 * Since the only option we handle are timestamps, we only have to
57862b5b6ecSBjoern A. Zeeb 	 * handle the simple case of aligned timestamps.
57962b5b6ecSBjoern A. Zeeb 	 */
58062b5b6ecSBjoern A. Zeeb 	l = (th->th_off << 2);
58162b5b6ecSBjoern A. Zeeb 	tcp_data_len -= l;
58262b5b6ecSBjoern A. Zeeb 	l -= sizeof(*th);
58362b5b6ecSBjoern A. Zeeb 	ts_ptr = (uint32_t *)(th + 1);
58462b5b6ecSBjoern A. Zeeb 	if (l != 0 && (__predict_false(l != TCPOLEN_TSTAMP_APPA) ||
58562b5b6ecSBjoern A. Zeeb 	    (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|
58662b5b6ecSBjoern A. Zeeb 	    TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP))))
58762b5b6ecSBjoern A. Zeeb 		return (TCP_LRO_CANNOT);
58862b5b6ecSBjoern A. Zeeb 
58962b5b6ecSBjoern A. Zeeb 	/* If the driver did not pass in the checksum, set it now. */
59062b5b6ecSBjoern A. Zeeb 	if (csum == 0x0000)
59162b5b6ecSBjoern A. Zeeb 		csum = th->th_sum;
59262b5b6ecSBjoern A. Zeeb 
59362b5b6ecSBjoern A. Zeeb 	seq = ntohl(th->th_seq);
59462b5b6ecSBjoern A. Zeeb 
59562b5b6ecSBjoern A. Zeeb 	/* Try to find a matching previous segment. */
59662b5b6ecSBjoern A. Zeeb 	SLIST_FOREACH(le, &lc->lro_active, next) {
59762b5b6ecSBjoern A. Zeeb 		if (le->eh_type != eh_type)
59862b5b6ecSBjoern A. Zeeb 			continue;
59962b5b6ecSBjoern A. Zeeb 		if (le->source_port != th->th_sport ||
60062b5b6ecSBjoern A. Zeeb 		    le->dest_port != th->th_dport)
60162b5b6ecSBjoern A. Zeeb 			continue;
60262b5b6ecSBjoern A. Zeeb 		switch (eh_type) {
60362b5b6ecSBjoern A. Zeeb #ifdef INET6
60462b5b6ecSBjoern A. Zeeb 		case ETHERTYPE_IPV6:
60562b5b6ecSBjoern A. Zeeb 			if (bcmp(&le->source_ip6, &ip6->ip6_src,
60662b5b6ecSBjoern A. Zeeb 			    sizeof(struct in6_addr)) != 0 ||
60762b5b6ecSBjoern A. Zeeb 			    bcmp(&le->dest_ip6, &ip6->ip6_dst,
60862b5b6ecSBjoern A. Zeeb 			    sizeof(struct in6_addr)) != 0)
60962b5b6ecSBjoern A. Zeeb 				continue;
61062b5b6ecSBjoern A. Zeeb 			break;
61162b5b6ecSBjoern A. Zeeb #endif
61262b5b6ecSBjoern A. Zeeb #ifdef INET
61362b5b6ecSBjoern A. Zeeb 		case ETHERTYPE_IP:
61462b5b6ecSBjoern A. Zeeb 			if (le->source_ip4 != ip4->ip_src.s_addr ||
61562b5b6ecSBjoern A. Zeeb 			    le->dest_ip4 != ip4->ip_dst.s_addr)
61662b5b6ecSBjoern A. Zeeb 				continue;
61762b5b6ecSBjoern A. Zeeb 			break;
61862b5b6ecSBjoern A. Zeeb #endif
6196c5087a8SJack F Vogel 		}
6206c5087a8SJack F Vogel 
621ca712262SColin Percival 		/* Flush now if appending will result in overflow. */
6227ae3d4bfSSepherosa Ziehau 		if (le->p_len > (lc->lro_length_lim - tcp_data_len)) {
62362b5b6ecSBjoern A. Zeeb 			SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
62462b5b6ecSBjoern A. Zeeb 			tcp_lro_flush(lc, le);
625ca712262SColin Percival 			break;
626ca712262SColin Percival 		}
627ca712262SColin Percival 
62862b5b6ecSBjoern A. Zeeb 		/* Try to append the new segment. */
62962b5b6ecSBjoern A. Zeeb 		if (__predict_false(seq != le->next_seq ||
63062b5b6ecSBjoern A. Zeeb 		    (tcp_data_len == 0 && le->ack_seq == th->th_ack))) {
63162b5b6ecSBjoern A. Zeeb 			/* Out of order packet or duplicate ACK. */
63262b5b6ecSBjoern A. Zeeb 			SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
63362b5b6ecSBjoern A. Zeeb 			tcp_lro_flush(lc, le);
63462b5b6ecSBjoern A. Zeeb 			return (TCP_LRO_CANNOT);
6356c5087a8SJack F Vogel 		}
6366c5087a8SJack F Vogel 
63762b5b6ecSBjoern A. Zeeb 		if (l != 0) {
6386c5087a8SJack F Vogel 			uint32_t tsval = ntohl(*(ts_ptr + 1));
63962b5b6ecSBjoern A. Zeeb 			/* Make sure timestamp values are increasing. */
64062b5b6ecSBjoern A. Zeeb 			/* XXX-BZ flip and use TSTMP_GEQ macro for this? */
64162b5b6ecSBjoern A. Zeeb 			if (__predict_false(le->tsval > tsval ||
64262b5b6ecSBjoern A. Zeeb 			    *(ts_ptr + 2) == 0))
64362b5b6ecSBjoern A. Zeeb 				return (TCP_LRO_CANNOT);
64462b5b6ecSBjoern A. Zeeb 			le->tsval = tsval;
64562b5b6ecSBjoern A. Zeeb 			le->tsecr = *(ts_ptr + 2);
6466c5087a8SJack F Vogel 		}
6476c5087a8SJack F Vogel 
64862b5b6ecSBjoern A. Zeeb 		le->next_seq += tcp_data_len;
64962b5b6ecSBjoern A. Zeeb 		le->ack_seq = th->th_ack;
65062b5b6ecSBjoern A. Zeeb 		le->window = th->th_win;
65162b5b6ecSBjoern A. Zeeb 		le->append_cnt++;
65262b5b6ecSBjoern A. Zeeb 
65362b5b6ecSBjoern A. Zeeb #ifdef TCP_LRO_UPDATE_CSUM
65462b5b6ecSBjoern A. Zeeb 		le->ulp_csum += tcp_lro_rx_csum_fixup(le, l3hdr, th,
65562b5b6ecSBjoern A. Zeeb 		    tcp_data_len, ~csum);
65662b5b6ecSBjoern A. Zeeb #endif
65762b5b6ecSBjoern A. Zeeb 
6586c5087a8SJack F Vogel 		if (tcp_data_len == 0) {
65962b5b6ecSBjoern A. Zeeb 			m_freem(m);
6607ae3d4bfSSepherosa Ziehau 			/*
6617ae3d4bfSSepherosa Ziehau 			 * Flush this LRO entry, if this ACK should not
6627ae3d4bfSSepherosa Ziehau 			 * be further delayed.
6637ae3d4bfSSepherosa Ziehau 			 */
6647ae3d4bfSSepherosa Ziehau 			if (le->append_cnt >= lc->lro_ackcnt_lim) {
6657ae3d4bfSSepherosa Ziehau 				SLIST_REMOVE(&lc->lro_active, le, lro_entry,
6667ae3d4bfSSepherosa Ziehau 				    next);
6677ae3d4bfSSepherosa Ziehau 				tcp_lro_flush(lc, le);
6687ae3d4bfSSepherosa Ziehau 			}
66962b5b6ecSBjoern A. Zeeb 			return (0);
6706c5087a8SJack F Vogel 		}
67162b5b6ecSBjoern A. Zeeb 
67262b5b6ecSBjoern A. Zeeb 		le->p_len += tcp_data_len;
67362b5b6ecSBjoern A. Zeeb 
67462b5b6ecSBjoern A. Zeeb 		/*
67562b5b6ecSBjoern A. Zeeb 		 * Adjust the mbuf so that m_data points to the first byte of
67662b5b6ecSBjoern A. Zeeb 		 * the ULP payload.  Adjust the mbuf to avoid complications and
67762b5b6ecSBjoern A. Zeeb 		 * append new segment to existing mbuf chain.
6786c5087a8SJack F Vogel 		 */
67962b5b6ecSBjoern A. Zeeb 		m_adj(m, m->m_pkthdr.len - tcp_data_len);
6809523d1bfSNavdeep Parhar 		m_demote_pkthdr(m);
6816c5087a8SJack F Vogel 
68262b5b6ecSBjoern A. Zeeb 		le->m_tail->m_next = m;
68362b5b6ecSBjoern A. Zeeb 		le->m_tail = m_last(m);
6846c5087a8SJack F Vogel 
68562b5b6ecSBjoern A. Zeeb 		/*
68662b5b6ecSBjoern A. Zeeb 		 * If a possible next full length packet would cause an
68762b5b6ecSBjoern A. Zeeb 		 * overflow, pro-actively flush now.
6886c5087a8SJack F Vogel 		 */
6897ae3d4bfSSepherosa Ziehau 		if (le->p_len > (lc->lro_length_lim - lc->ifp->if_mtu)) {
69062b5b6ecSBjoern A. Zeeb 			SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
69162b5b6ecSBjoern A. Zeeb 			tcp_lro_flush(lc, le);
6927127e6acSNavdeep Parhar 		} else
6937127e6acSNavdeep Parhar 			getmicrotime(&le->mtime);
6946c5087a8SJack F Vogel 
69562b5b6ecSBjoern A. Zeeb 		return (0);
6966c5087a8SJack F Vogel 	}
69762b5b6ecSBjoern A. Zeeb 
69862b5b6ecSBjoern A. Zeeb 	/* Try to find an empty slot. */
69962b5b6ecSBjoern A. Zeeb 	if (SLIST_EMPTY(&lc->lro_free))
700489f0c3cSSepherosa Ziehau 		return (TCP_LRO_NO_ENTRIES);
70162b5b6ecSBjoern A. Zeeb 
70262b5b6ecSBjoern A. Zeeb 	/* Start a new segment chain. */
70362b5b6ecSBjoern A. Zeeb 	le = SLIST_FIRST(&lc->lro_free);
70462b5b6ecSBjoern A. Zeeb 	SLIST_REMOVE_HEAD(&lc->lro_free, next);
70562b5b6ecSBjoern A. Zeeb 	SLIST_INSERT_HEAD(&lc->lro_active, le, next);
7067127e6acSNavdeep Parhar 	getmicrotime(&le->mtime);
70762b5b6ecSBjoern A. Zeeb 
70862b5b6ecSBjoern A. Zeeb 	/* Start filling in details. */
70962b5b6ecSBjoern A. Zeeb 	switch (eh_type) {
71062b5b6ecSBjoern A. Zeeb #ifdef INET6
71162b5b6ecSBjoern A. Zeeb 	case ETHERTYPE_IPV6:
71262b5b6ecSBjoern A. Zeeb 		le->le_ip6 = ip6;
71362b5b6ecSBjoern A. Zeeb 		le->source_ip6 = ip6->ip6_src;
71462b5b6ecSBjoern A. Zeeb 		le->dest_ip6 = ip6->ip6_dst;
71562b5b6ecSBjoern A. Zeeb 		le->eh_type = eh_type;
71662b5b6ecSBjoern A. Zeeb 		le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN - sizeof(*ip6);
71762b5b6ecSBjoern A. Zeeb 		break;
71862b5b6ecSBjoern A. Zeeb #endif
71962b5b6ecSBjoern A. Zeeb #ifdef INET
72062b5b6ecSBjoern A. Zeeb 	case ETHERTYPE_IP:
72162b5b6ecSBjoern A. Zeeb 		le->le_ip4 = ip4;
72262b5b6ecSBjoern A. Zeeb 		le->source_ip4 = ip4->ip_src.s_addr;
72362b5b6ecSBjoern A. Zeeb 		le->dest_ip4 = ip4->ip_dst.s_addr;
72462b5b6ecSBjoern A. Zeeb 		le->eh_type = eh_type;
72562b5b6ecSBjoern A. Zeeb 		le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN;
72662b5b6ecSBjoern A. Zeeb 		break;
72762b5b6ecSBjoern A. Zeeb #endif
7286c5087a8SJack F Vogel 	}
72962b5b6ecSBjoern A. Zeeb 	le->source_port = th->th_sport;
73062b5b6ecSBjoern A. Zeeb 	le->dest_port = th->th_dport;
73162b5b6ecSBjoern A. Zeeb 
73262b5b6ecSBjoern A. Zeeb 	le->next_seq = seq + tcp_data_len;
73362b5b6ecSBjoern A. Zeeb 	le->ack_seq = th->th_ack;
73462b5b6ecSBjoern A. Zeeb 	le->window = th->th_win;
73562b5b6ecSBjoern A. Zeeb 	if (l != 0) {
73662b5b6ecSBjoern A. Zeeb 		le->timestamp = 1;
73762b5b6ecSBjoern A. Zeeb 		le->tsval = ntohl(*(ts_ptr + 1));
73862b5b6ecSBjoern A. Zeeb 		le->tsecr = *(ts_ptr + 2);
73962b5b6ecSBjoern A. Zeeb 	}
74062b5b6ecSBjoern A. Zeeb 
74162b5b6ecSBjoern A. Zeeb #ifdef TCP_LRO_UPDATE_CSUM
74262b5b6ecSBjoern A. Zeeb 	/*
74362b5b6ecSBjoern A. Zeeb 	 * Do not touch the csum of the first packet.  However save the
74462b5b6ecSBjoern A. Zeeb 	 * "adjusted" checksum of just the source and destination addresses,
74562b5b6ecSBjoern A. Zeeb 	 * the next header and the TCP payload.  The length and TCP header
74662b5b6ecSBjoern A. Zeeb 	 * parts may change, so we remove those from the saved checksum and
74762b5b6ecSBjoern A. Zeeb 	 * re-add with final values on tcp_lro_flush() if needed.
74862b5b6ecSBjoern A. Zeeb 	 */
74962b5b6ecSBjoern A. Zeeb 	KASSERT(le->ulp_csum == 0, ("%s: le=%p le->ulp_csum=0x%04x\n",
75062b5b6ecSBjoern A. Zeeb 	    __func__, le, le->ulp_csum));
75162b5b6ecSBjoern A. Zeeb 
75262b5b6ecSBjoern A. Zeeb 	le->ulp_csum = tcp_lro_rx_csum_fixup(le, l3hdr, th, tcp_data_len,
75362b5b6ecSBjoern A. Zeeb 	    ~csum);
75462b5b6ecSBjoern A. Zeeb 	th->th_sum = csum;	/* Restore checksum on first packet. */
75562b5b6ecSBjoern A. Zeeb #endif
75662b5b6ecSBjoern A. Zeeb 
75762b5b6ecSBjoern A. Zeeb 	le->m_head = m;
75862b5b6ecSBjoern A. Zeeb 	le->m_tail = m_last(m);
75962b5b6ecSBjoern A. Zeeb 
76062b5b6ecSBjoern A. Zeeb 	return (0);
76162b5b6ecSBjoern A. Zeeb }
76262b5b6ecSBjoern A. Zeeb 
763e936121dSHans Petter Selasky void
764e936121dSHans Petter Selasky tcp_lro_queue_mbuf(struct lro_ctrl *lc, struct mbuf *mb)
765e936121dSHans Petter Selasky {
766e936121dSHans Petter Selasky 	/* sanity checks */
767e936121dSHans Petter Selasky 	if (__predict_false(lc->ifp == NULL || lc->lro_mbuf_data == NULL ||
768e936121dSHans Petter Selasky 	    lc->lro_mbuf_max == 0)) {
769e936121dSHans Petter Selasky 		/* packet drop */
770e936121dSHans Petter Selasky 		m_freem(mb);
771e936121dSHans Petter Selasky 		return;
772e936121dSHans Petter Selasky 	}
773e936121dSHans Petter Selasky 
774e936121dSHans Petter Selasky 	/* check if packet is not LRO capable */
775e936121dSHans Petter Selasky 	if (__predict_false(mb->m_pkthdr.csum_flags == 0 ||
776e936121dSHans Petter Selasky 	    (lc->ifp->if_capenable & IFCAP_LRO) == 0)) {
777e936121dSHans Petter Selasky 		lc->lro_flushed++;
778e936121dSHans Petter Selasky 		lc->lro_queued++;
779e936121dSHans Petter Selasky 
780e936121dSHans Petter Selasky 		/* input packet to network layer */
781e936121dSHans Petter Selasky 		(*lc->ifp->if_input) (lc->ifp, mb);
782e936121dSHans Petter Selasky 		return;
783e936121dSHans Petter Selasky 	}
784e936121dSHans Petter Selasky 
785e936121dSHans Petter Selasky 	/* check if array is full */
786e936121dSHans Petter Selasky 	if (__predict_false(lc->lro_mbuf_count == lc->lro_mbuf_max))
787e936121dSHans Petter Selasky 		tcp_lro_flush_all(lc);
788e936121dSHans Petter Selasky 
789e936121dSHans Petter Selasky 	/* store sequence number */
790e936121dSHans Petter Selasky 	TCP_LRO_SEQUENCE(mb) = lc->lro_mbuf_count;
791e936121dSHans Petter Selasky 
792e936121dSHans Petter Selasky 	/* enter mbuf */
793e936121dSHans Petter Selasky 	lc->lro_mbuf_data[lc->lro_mbuf_count++] = mb;
794e936121dSHans Petter Selasky }
795e936121dSHans Petter Selasky 
79662b5b6ecSBjoern A. Zeeb /* end */
797