xref: /freebsd/sys/netinet/tcp_lro.c (revision 51e3c20d36fc949570d1b03fc35d7ee729d24c57)
127f190a3SBjoern A. Zeeb /*-
227f190a3SBjoern A. Zeeb  * Copyright (c) 2007, Myricom Inc.
327f190a3SBjoern A. Zeeb  * Copyright (c) 2008, Intel Corporation.
462b5b6ecSBjoern A. Zeeb  * Copyright (c) 2012 The FreeBSD Foundation
5e936121dSHans Petter Selasky  * Copyright (c) 2016 Mellanox Technologies.
627f190a3SBjoern A. Zeeb  * All rights reserved.
727f190a3SBjoern A. Zeeb  *
862b5b6ecSBjoern A. Zeeb  * Portions of this software were developed by Bjoern Zeeb
962b5b6ecSBjoern A. Zeeb  * under sponsorship from the FreeBSD Foundation.
1062b5b6ecSBjoern A. Zeeb  *
1127f190a3SBjoern A. Zeeb  * Redistribution and use in source and binary forms, with or without
1227f190a3SBjoern A. Zeeb  * modification, are permitted provided that the following conditions
1327f190a3SBjoern A. Zeeb  * are met:
1427f190a3SBjoern A. Zeeb  * 1. Redistributions of source code must retain the above copyright
1527f190a3SBjoern A. Zeeb  *    notice, this list of conditions and the following disclaimer.
1627f190a3SBjoern A. Zeeb  * 2. Redistributions in binary form must reproduce the above copyright
1727f190a3SBjoern A. Zeeb  *    notice, this list of conditions and the following disclaimer in the
1827f190a3SBjoern A. Zeeb  *    documentation and/or other materials provided with the distribution.
1927f190a3SBjoern A. Zeeb  *
2027f190a3SBjoern A. Zeeb  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
2127f190a3SBjoern A. Zeeb  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2227f190a3SBjoern A. Zeeb  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2327f190a3SBjoern A. Zeeb  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
2427f190a3SBjoern A. Zeeb  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2527f190a3SBjoern A. Zeeb  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2627f190a3SBjoern A. Zeeb  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2727f190a3SBjoern A. Zeeb  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2827f190a3SBjoern A. Zeeb  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2927f190a3SBjoern A. Zeeb  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3027f190a3SBjoern A. Zeeb  * SUCH DAMAGE.
3127f190a3SBjoern A. Zeeb  */
326c5087a8SJack F Vogel 
3362b5b6ecSBjoern A. Zeeb #include <sys/cdefs.h>
3462b5b6ecSBjoern A. Zeeb __FBSDID("$FreeBSD$");
3562b5b6ecSBjoern A. Zeeb 
3662b5b6ecSBjoern A. Zeeb #include "opt_inet.h"
3762b5b6ecSBjoern A. Zeeb #include "opt_inet6.h"
3862b5b6ecSBjoern A. Zeeb 
396c5087a8SJack F Vogel #include <sys/param.h>
406c5087a8SJack F Vogel #include <sys/systm.h>
416c5087a8SJack F Vogel #include <sys/kernel.h>
428ec07310SGleb Smirnoff #include <sys/malloc.h>
438ec07310SGleb Smirnoff #include <sys/mbuf.h>
446c5087a8SJack F Vogel #include <sys/socket.h>
456c5087a8SJack F Vogel 
466c5087a8SJack F Vogel #include <net/if.h>
4762b5b6ecSBjoern A. Zeeb #include <net/if_var.h>
486c5087a8SJack F Vogel #include <net/ethernet.h>
495fa2656eSBjoern A. Zeeb #include <net/vnet.h>
506c5087a8SJack F Vogel 
516c5087a8SJack F Vogel #include <netinet/in_systm.h>
526c5087a8SJack F Vogel #include <netinet/in.h>
5362b5b6ecSBjoern A. Zeeb #include <netinet/ip6.h>
546c5087a8SJack F Vogel #include <netinet/ip.h>
5531bfc56eSBjoern A. Zeeb #include <netinet/ip_var.h>
566c5087a8SJack F Vogel #include <netinet/tcp.h>
576c5087a8SJack F Vogel #include <netinet/tcp_lro.h>
586c5087a8SJack F Vogel 
5931bfc56eSBjoern A. Zeeb #include <netinet6/ip6_var.h>
6031bfc56eSBjoern A. Zeeb 
616c5087a8SJack F Vogel #include <machine/in_cksum.h>
626c5087a8SJack F Vogel 
63e936121dSHans Petter Selasky static MALLOC_DEFINE(M_LRO, "LRO", "LRO control structures");
646c5087a8SJack F Vogel 
6562b5b6ecSBjoern A. Zeeb #define	TCP_LRO_UPDATE_CSUM	1
6662b5b6ecSBjoern A. Zeeb #ifndef	TCP_LRO_UPDATE_CSUM
6762b5b6ecSBjoern A. Zeeb #define	TCP_LRO_INVALID_CSUM	0x0000
6862b5b6ecSBjoern A. Zeeb #endif
696c5087a8SJack F Vogel 
706dd38b87SSepherosa Ziehau static void	tcp_lro_rx_done(struct lro_ctrl *lc);
716dd38b87SSepherosa Ziehau 
72*51e3c20dSSepherosa Ziehau static __inline void
73*51e3c20dSSepherosa Ziehau tcp_lro_active_insert(struct lro_ctrl *lc, struct lro_entry *le)
74*51e3c20dSSepherosa Ziehau {
75*51e3c20dSSepherosa Ziehau 
76*51e3c20dSSepherosa Ziehau 	LIST_INSERT_HEAD(&lc->lro_active, le, next);
77*51e3c20dSSepherosa Ziehau }
78*51e3c20dSSepherosa Ziehau 
79*51e3c20dSSepherosa Ziehau static __inline void
80*51e3c20dSSepherosa Ziehau tcp_lro_active_remove(struct lro_entry *le)
81*51e3c20dSSepherosa Ziehau {
82*51e3c20dSSepherosa Ziehau 
83*51e3c20dSSepherosa Ziehau 	LIST_REMOVE(le, next);
84*51e3c20dSSepherosa Ziehau }
85*51e3c20dSSepherosa Ziehau 
866c5087a8SJack F Vogel int
8762b5b6ecSBjoern A. Zeeb tcp_lro_init(struct lro_ctrl *lc)
886c5087a8SJack F Vogel {
89e936121dSHans Petter Selasky 	return (tcp_lro_init_args(lc, NULL, TCP_LRO_ENTRIES, 0));
90e936121dSHans Petter Selasky }
91e936121dSHans Petter Selasky 
92e936121dSHans Petter Selasky int
93e936121dSHans Petter Selasky tcp_lro_init_args(struct lro_ctrl *lc, struct ifnet *ifp,
94e936121dSHans Petter Selasky     unsigned lro_entries, unsigned lro_mbufs)
95e936121dSHans Petter Selasky {
9662b5b6ecSBjoern A. Zeeb 	struct lro_entry *le;
97e936121dSHans Petter Selasky 	size_t size;
98e936121dSHans Petter Selasky 	unsigned i;
996c5087a8SJack F Vogel 
10062b5b6ecSBjoern A. Zeeb 	lc->lro_bad_csum = 0;
10162b5b6ecSBjoern A. Zeeb 	lc->lro_queued = 0;
10262b5b6ecSBjoern A. Zeeb 	lc->lro_flushed = 0;
10362b5b6ecSBjoern A. Zeeb 	lc->lro_cnt = 0;
104e936121dSHans Petter Selasky 	lc->lro_mbuf_count = 0;
105e936121dSHans Petter Selasky 	lc->lro_mbuf_max = lro_mbufs;
106e936121dSHans Petter Selasky 	lc->lro_cnt = lro_entries;
1077ae3d4bfSSepherosa Ziehau 	lc->lro_ackcnt_lim = TCP_LRO_ACKCNT_MAX;
1087ae3d4bfSSepherosa Ziehau 	lc->lro_length_lim = TCP_LRO_LENGTH_MAX;
109e936121dSHans Petter Selasky 	lc->ifp = ifp;
1101ea44822SSepherosa Ziehau 	LIST_INIT(&lc->lro_free);
1111ea44822SSepherosa Ziehau 	LIST_INIT(&lc->lro_active);
1126c5087a8SJack F Vogel 
113e936121dSHans Petter Selasky 	/* compute size to allocate */
114e936121dSHans Petter Selasky 	size = (lro_mbufs * sizeof(struct mbuf *)) +
115e936121dSHans Petter Selasky 	    (lro_entries * sizeof(*le));
116e936121dSHans Petter Selasky 	lc->lro_mbuf_data = (struct mbuf **)
117e936121dSHans Petter Selasky 	    malloc(size, M_LRO, M_NOWAIT | M_ZERO);
1186c5087a8SJack F Vogel 
119e936121dSHans Petter Selasky 	/* check for out of memory */
120e936121dSHans Petter Selasky 	if (lc->lro_mbuf_data == NULL) {
121e936121dSHans Petter Selasky 		memset(lc, 0, sizeof(*lc));
122e936121dSHans Petter Selasky 		return (ENOMEM);
123e936121dSHans Petter Selasky 	}
124e936121dSHans Petter Selasky 	/* compute offset for LRO entries */
125e936121dSHans Petter Selasky 	le = (struct lro_entry *)
126e936121dSHans Petter Selasky 	    (lc->lro_mbuf_data + lro_mbufs);
127e936121dSHans Petter Selasky 
128e936121dSHans Petter Selasky 	/* setup linked list */
129e936121dSHans Petter Selasky 	for (i = 0; i != lro_entries; i++)
1301ea44822SSepherosa Ziehau 		LIST_INSERT_HEAD(&lc->lro_free, le + i, next);
131e936121dSHans Petter Selasky 
132e936121dSHans Petter Selasky 	return (0);
1336c5087a8SJack F Vogel }
1346c5087a8SJack F Vogel 
1356c5087a8SJack F Vogel void
13662b5b6ecSBjoern A. Zeeb tcp_lro_free(struct lro_ctrl *lc)
1376c5087a8SJack F Vogel {
13862b5b6ecSBjoern A. Zeeb 	struct lro_entry *le;
139e936121dSHans Petter Selasky 	unsigned x;
1406c5087a8SJack F Vogel 
141e936121dSHans Petter Selasky 	/* reset LRO free list */
1421ea44822SSepherosa Ziehau 	LIST_INIT(&lc->lro_free);
143e936121dSHans Petter Selasky 
144e936121dSHans Petter Selasky 	/* free active mbufs, if any */
1451ea44822SSepherosa Ziehau 	while ((le = LIST_FIRST(&lc->lro_active)) != NULL) {
146*51e3c20dSSepherosa Ziehau 		tcp_lro_active_remove(le);
147e936121dSHans Petter Selasky 		m_freem(le->m_head);
1486c5087a8SJack F Vogel 	}
149e936121dSHans Petter Selasky 
150e936121dSHans Petter Selasky 	/* free mbuf array, if any */
151e936121dSHans Petter Selasky 	for (x = 0; x != lc->lro_mbuf_count; x++)
152e936121dSHans Petter Selasky 		m_freem(lc->lro_mbuf_data[x]);
153e936121dSHans Petter Selasky 	lc->lro_mbuf_count = 0;
154e936121dSHans Petter Selasky 
155e936121dSHans Petter Selasky 	/* free allocated memory, if any */
156e936121dSHans Petter Selasky 	free(lc->lro_mbuf_data, M_LRO);
157e936121dSHans Petter Selasky 	lc->lro_mbuf_data = NULL;
1586c5087a8SJack F Vogel }
1596c5087a8SJack F Vogel 
16062b5b6ecSBjoern A. Zeeb #ifdef TCP_LRO_UPDATE_CSUM
16162b5b6ecSBjoern A. Zeeb static uint16_t
16262b5b6ecSBjoern A. Zeeb tcp_lro_csum_th(struct tcphdr *th)
16362b5b6ecSBjoern A. Zeeb {
16462b5b6ecSBjoern A. Zeeb 	uint32_t ch;
16562b5b6ecSBjoern A. Zeeb 	uint16_t *p, l;
16662b5b6ecSBjoern A. Zeeb 
16762b5b6ecSBjoern A. Zeeb 	ch = th->th_sum = 0x0000;
16862b5b6ecSBjoern A. Zeeb 	l = th->th_off;
16962b5b6ecSBjoern A. Zeeb 	p = (uint16_t *)th;
17062b5b6ecSBjoern A. Zeeb 	while (l > 0) {
17162b5b6ecSBjoern A. Zeeb 		ch += *p;
17262b5b6ecSBjoern A. Zeeb 		p++;
17362b5b6ecSBjoern A. Zeeb 		ch += *p;
17462b5b6ecSBjoern A. Zeeb 		p++;
17562b5b6ecSBjoern A. Zeeb 		l--;
17662b5b6ecSBjoern A. Zeeb 	}
17762b5b6ecSBjoern A. Zeeb 	while (ch > 0xffff)
17862b5b6ecSBjoern A. Zeeb 		ch = (ch >> 16) + (ch & 0xffff);
17962b5b6ecSBjoern A. Zeeb 
18062b5b6ecSBjoern A. Zeeb 	return (ch & 0xffff);
18162b5b6ecSBjoern A. Zeeb }
18262b5b6ecSBjoern A. Zeeb 
18362b5b6ecSBjoern A. Zeeb static uint16_t
18462b5b6ecSBjoern A. Zeeb tcp_lro_rx_csum_fixup(struct lro_entry *le, void *l3hdr, struct tcphdr *th,
18562b5b6ecSBjoern A. Zeeb     uint16_t tcp_data_len, uint16_t csum)
18662b5b6ecSBjoern A. Zeeb {
18762b5b6ecSBjoern A. Zeeb 	uint32_t c;
18862b5b6ecSBjoern A. Zeeb 	uint16_t cs;
18962b5b6ecSBjoern A. Zeeb 
19062b5b6ecSBjoern A. Zeeb 	c = csum;
19162b5b6ecSBjoern A. Zeeb 
19262b5b6ecSBjoern A. Zeeb 	/* Remove length from checksum. */
19362b5b6ecSBjoern A. Zeeb 	switch (le->eh_type) {
19462b5b6ecSBjoern A. Zeeb #ifdef INET6
19562b5b6ecSBjoern A. Zeeb 	case ETHERTYPE_IPV6:
19662b5b6ecSBjoern A. Zeeb 	{
19762b5b6ecSBjoern A. Zeeb 		struct ip6_hdr *ip6;
19862b5b6ecSBjoern A. Zeeb 
19962b5b6ecSBjoern A. Zeeb 		ip6 = (struct ip6_hdr *)l3hdr;
20062b5b6ecSBjoern A. Zeeb 		if (le->append_cnt == 0)
20162b5b6ecSBjoern A. Zeeb 			cs = ip6->ip6_plen;
20262b5b6ecSBjoern A. Zeeb 		else {
20362b5b6ecSBjoern A. Zeeb 			uint32_t cx;
20462b5b6ecSBjoern A. Zeeb 
20562b5b6ecSBjoern A. Zeeb 			cx = ntohs(ip6->ip6_plen);
20662b5b6ecSBjoern A. Zeeb 			cs = in6_cksum_pseudo(ip6, cx, ip6->ip6_nxt, 0);
20762b5b6ecSBjoern A. Zeeb 		}
20862b5b6ecSBjoern A. Zeeb 		break;
20962b5b6ecSBjoern A. Zeeb 	}
21062b5b6ecSBjoern A. Zeeb #endif
21162b5b6ecSBjoern A. Zeeb #ifdef INET
21262b5b6ecSBjoern A. Zeeb 	case ETHERTYPE_IP:
21362b5b6ecSBjoern A. Zeeb 	{
21462b5b6ecSBjoern A. Zeeb 		struct ip *ip4;
21562b5b6ecSBjoern A. Zeeb 
21662b5b6ecSBjoern A. Zeeb 		ip4 = (struct ip *)l3hdr;
21762b5b6ecSBjoern A. Zeeb 		if (le->append_cnt == 0)
21862b5b6ecSBjoern A. Zeeb 			cs = ip4->ip_len;
21962b5b6ecSBjoern A. Zeeb 		else {
22062b5b6ecSBjoern A. Zeeb 			cs = in_addword(ntohs(ip4->ip_len) - sizeof(*ip4),
22162b5b6ecSBjoern A. Zeeb 			    IPPROTO_TCP);
22262b5b6ecSBjoern A. Zeeb 			cs = in_pseudo(ip4->ip_src.s_addr, ip4->ip_dst.s_addr,
22362b5b6ecSBjoern A. Zeeb 			    htons(cs));
22462b5b6ecSBjoern A. Zeeb 		}
22562b5b6ecSBjoern A. Zeeb 		break;
22662b5b6ecSBjoern A. Zeeb 	}
22762b5b6ecSBjoern A. Zeeb #endif
22862b5b6ecSBjoern A. Zeeb 	default:
22962b5b6ecSBjoern A. Zeeb 		cs = 0;		/* Keep compiler happy. */
23062b5b6ecSBjoern A. Zeeb 	}
23162b5b6ecSBjoern A. Zeeb 
23262b5b6ecSBjoern A. Zeeb 	cs = ~cs;
23362b5b6ecSBjoern A. Zeeb 	c += cs;
23462b5b6ecSBjoern A. Zeeb 
23562b5b6ecSBjoern A. Zeeb 	/* Remove TCP header csum. */
23662b5b6ecSBjoern A. Zeeb 	cs = ~tcp_lro_csum_th(th);
23762b5b6ecSBjoern A. Zeeb 	c += cs;
23862b5b6ecSBjoern A. Zeeb 	while (c > 0xffff)
23962b5b6ecSBjoern A. Zeeb 		c = (c >> 16) + (c & 0xffff);
24062b5b6ecSBjoern A. Zeeb 
24162b5b6ecSBjoern A. Zeeb 	return (c & 0xffff);
24262b5b6ecSBjoern A. Zeeb }
24362b5b6ecSBjoern A. Zeeb #endif
24462b5b6ecSBjoern A. Zeeb 
2456dd38b87SSepherosa Ziehau static void
2466dd38b87SSepherosa Ziehau tcp_lro_rx_done(struct lro_ctrl *lc)
2476dd38b87SSepherosa Ziehau {
2486dd38b87SSepherosa Ziehau 	struct lro_entry *le;
2496dd38b87SSepherosa Ziehau 
2501ea44822SSepherosa Ziehau 	while ((le = LIST_FIRST(&lc->lro_active)) != NULL) {
251*51e3c20dSSepherosa Ziehau 		tcp_lro_active_remove(le);
2526dd38b87SSepherosa Ziehau 		tcp_lro_flush(lc, le);
2536dd38b87SSepherosa Ziehau 	}
2546dd38b87SSepherosa Ziehau }
2556dd38b87SSepherosa Ziehau 
2566c5087a8SJack F Vogel void
2577127e6acSNavdeep Parhar tcp_lro_flush_inactive(struct lro_ctrl *lc, const struct timeval *timeout)
2587127e6acSNavdeep Parhar {
2597127e6acSNavdeep Parhar 	struct lro_entry *le, *le_tmp;
2607127e6acSNavdeep Parhar 	struct timeval tv;
2617127e6acSNavdeep Parhar 
2621ea44822SSepherosa Ziehau 	if (LIST_EMPTY(&lc->lro_active))
2637127e6acSNavdeep Parhar 		return;
2647127e6acSNavdeep Parhar 
2657127e6acSNavdeep Parhar 	getmicrotime(&tv);
2667127e6acSNavdeep Parhar 	timevalsub(&tv, timeout);
2671ea44822SSepherosa Ziehau 	LIST_FOREACH_SAFE(le, &lc->lro_active, next, le_tmp) {
2687127e6acSNavdeep Parhar 		if (timevalcmp(&tv, &le->mtime, >=)) {
269*51e3c20dSSepherosa Ziehau 			tcp_lro_active_remove(le);
2707127e6acSNavdeep Parhar 			tcp_lro_flush(lc, le);
2717127e6acSNavdeep Parhar 		}
2727127e6acSNavdeep Parhar 	}
2737127e6acSNavdeep Parhar }
2747127e6acSNavdeep Parhar 
2757127e6acSNavdeep Parhar void
27662b5b6ecSBjoern A. Zeeb tcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *le)
2776c5087a8SJack F Vogel {
2786c5087a8SJack F Vogel 
27962b5b6ecSBjoern A. Zeeb 	if (le->append_cnt > 0) {
28062b5b6ecSBjoern A. Zeeb 		struct tcphdr *th;
28162b5b6ecSBjoern A. Zeeb 		uint16_t p_len;
2826c5087a8SJack F Vogel 
28362b5b6ecSBjoern A. Zeeb 		p_len = htons(le->p_len);
28462b5b6ecSBjoern A. Zeeb 		switch (le->eh_type) {
28562b5b6ecSBjoern A. Zeeb #ifdef INET6
28662b5b6ecSBjoern A. Zeeb 		case ETHERTYPE_IPV6:
2876c5087a8SJack F Vogel 		{
28862b5b6ecSBjoern A. Zeeb 			struct ip6_hdr *ip6;
28962b5b6ecSBjoern A. Zeeb 
29062b5b6ecSBjoern A. Zeeb 			ip6 = le->le_ip6;
29162b5b6ecSBjoern A. Zeeb 			ip6->ip6_plen = p_len;
29262b5b6ecSBjoern A. Zeeb 			th = (struct tcphdr *)(ip6 + 1);
29362b5b6ecSBjoern A. Zeeb 			le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID |
29462b5b6ecSBjoern A. Zeeb 			    CSUM_PSEUDO_HDR;
29562b5b6ecSBjoern A. Zeeb 			le->p_len += ETHER_HDR_LEN + sizeof(*ip6);
29662b5b6ecSBjoern A. Zeeb 			break;
29762b5b6ecSBjoern A. Zeeb 		}
29862b5b6ecSBjoern A. Zeeb #endif
29962b5b6ecSBjoern A. Zeeb #ifdef INET
30062b5b6ecSBjoern A. Zeeb 		case ETHERTYPE_IP:
30162b5b6ecSBjoern A. Zeeb 		{
30262b5b6ecSBjoern A. Zeeb 			struct ip *ip4;
30362b5b6ecSBjoern A. Zeeb #ifdef TCP_LRO_UPDATE_CSUM
30462b5b6ecSBjoern A. Zeeb 			uint32_t cl;
30562b5b6ecSBjoern A. Zeeb 			uint16_t c;
30662b5b6ecSBjoern A. Zeeb #endif
30762b5b6ecSBjoern A. Zeeb 
30862b5b6ecSBjoern A. Zeeb 			ip4 = le->le_ip4;
30962b5b6ecSBjoern A. Zeeb #ifdef TCP_LRO_UPDATE_CSUM
31062b5b6ecSBjoern A. Zeeb 			/* Fix IP header checksum for new length. */
31162b5b6ecSBjoern A. Zeeb 			c = ~ip4->ip_sum;
31262b5b6ecSBjoern A. Zeeb 			cl = c;
31362b5b6ecSBjoern A. Zeeb 			c = ~ip4->ip_len;
31462b5b6ecSBjoern A. Zeeb 			cl += c + p_len;
31562b5b6ecSBjoern A. Zeeb 			while (cl > 0xffff)
31662b5b6ecSBjoern A. Zeeb 				cl = (cl >> 16) + (cl & 0xffff);
31762b5b6ecSBjoern A. Zeeb 			c = cl;
31862b5b6ecSBjoern A. Zeeb 			ip4->ip_sum = ~c;
31962b5b6ecSBjoern A. Zeeb #else
32062b5b6ecSBjoern A. Zeeb 			ip4->ip_sum = TCP_LRO_INVALID_CSUM;
32162b5b6ecSBjoern A. Zeeb #endif
32262b5b6ecSBjoern A. Zeeb 			ip4->ip_len = p_len;
32362b5b6ecSBjoern A. Zeeb 			th = (struct tcphdr *)(ip4 + 1);
32462b5b6ecSBjoern A. Zeeb 			le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID |
32562b5b6ecSBjoern A. Zeeb 			    CSUM_PSEUDO_HDR | CSUM_IP_CHECKED | CSUM_IP_VALID;
32662b5b6ecSBjoern A. Zeeb 			le->p_len += ETHER_HDR_LEN;
32762b5b6ecSBjoern A. Zeeb 			break;
32862b5b6ecSBjoern A. Zeeb 		}
32962b5b6ecSBjoern A. Zeeb #endif
33062b5b6ecSBjoern A. Zeeb 		default:
33162b5b6ecSBjoern A. Zeeb 			th = NULL;	/* Keep compiler happy. */
33262b5b6ecSBjoern A. Zeeb 		}
33362b5b6ecSBjoern A. Zeeb 		le->m_head->m_pkthdr.csum_data = 0xffff;
33462b5b6ecSBjoern A. Zeeb 		le->m_head->m_pkthdr.len = le->p_len;
33562b5b6ecSBjoern A. Zeeb 
33662b5b6ecSBjoern A. Zeeb 		/* Incorporate the latest ACK into the TCP header. */
33762b5b6ecSBjoern A. Zeeb 		th->th_ack = le->ack_seq;
33862b5b6ecSBjoern A. Zeeb 		th->th_win = le->window;
33962b5b6ecSBjoern A. Zeeb 		/* Incorporate latest timestamp into the TCP header. */
34062b5b6ecSBjoern A. Zeeb 		if (le->timestamp != 0) {
3416c5087a8SJack F Vogel 			uint32_t *ts_ptr;
3426c5087a8SJack F Vogel 
34362b5b6ecSBjoern A. Zeeb 			ts_ptr = (uint32_t *)(th + 1);
34462b5b6ecSBjoern A. Zeeb 			ts_ptr[1] = htonl(le->tsval);
34562b5b6ecSBjoern A. Zeeb 			ts_ptr[2] = le->tsecr;
34662b5b6ecSBjoern A. Zeeb 		}
34762b5b6ecSBjoern A. Zeeb #ifdef TCP_LRO_UPDATE_CSUM
34862b5b6ecSBjoern A. Zeeb 		/* Update the TCP header checksum. */
34962b5b6ecSBjoern A. Zeeb 		le->ulp_csum += p_len;
35062b5b6ecSBjoern A. Zeeb 		le->ulp_csum += tcp_lro_csum_th(th);
35162b5b6ecSBjoern A. Zeeb 		while (le->ulp_csum > 0xffff)
35262b5b6ecSBjoern A. Zeeb 			le->ulp_csum = (le->ulp_csum >> 16) +
35362b5b6ecSBjoern A. Zeeb 			    (le->ulp_csum & 0xffff);
35462b5b6ecSBjoern A. Zeeb 		th->th_sum = (le->ulp_csum & 0xffff);
35562b5b6ecSBjoern A. Zeeb 		th->th_sum = ~th->th_sum;
35662b5b6ecSBjoern A. Zeeb #else
35762b5b6ecSBjoern A. Zeeb 		th->th_sum = TCP_LRO_INVALID_CSUM;
35862b5b6ecSBjoern A. Zeeb #endif
35962b5b6ecSBjoern A. Zeeb 	}
3606c5087a8SJack F Vogel 
36162b5b6ecSBjoern A. Zeeb 	(*lc->ifp->if_input)(lc->ifp, le->m_head);
36262b5b6ecSBjoern A. Zeeb 	lc->lro_queued += le->append_cnt + 1;
36362b5b6ecSBjoern A. Zeeb 	lc->lro_flushed++;
36462b5b6ecSBjoern A. Zeeb 	bzero(le, sizeof(*le));
3651ea44822SSepherosa Ziehau 	LIST_INSERT_HEAD(&lc->lro_free, le, next);
36662b5b6ecSBjoern A. Zeeb }
3676c5087a8SJack F Vogel 
368e936121dSHans Petter Selasky static int
369e936121dSHans Petter Selasky tcp_lro_mbuf_compare_header(const void *ppa, const void *ppb)
370e936121dSHans Petter Selasky {
371e936121dSHans Petter Selasky 	const struct mbuf *ma = *((const struct mbuf * const *)ppa);
372e936121dSHans Petter Selasky 	const struct mbuf *mb = *((const struct mbuf * const *)ppb);
373e936121dSHans Petter Selasky 	int ret;
374e936121dSHans Petter Selasky 
375e936121dSHans Petter Selasky 	ret = M_HASHTYPE_GET(ma) - M_HASHTYPE_GET(mb);
376e936121dSHans Petter Selasky 	if (ret != 0)
377e936121dSHans Petter Selasky 		goto done;
378e936121dSHans Petter Selasky 
3793e9470b7SHans Petter Selasky 	if (ma->m_pkthdr.flowid > mb->m_pkthdr.flowid)
3803e9470b7SHans Petter Selasky 		return (1);
3813e9470b7SHans Petter Selasky 	else if (ma->m_pkthdr.flowid < mb->m_pkthdr.flowid)
3823e9470b7SHans Petter Selasky 		return (-1);
383e936121dSHans Petter Selasky 
384e936121dSHans Petter Selasky 	ret = TCP_LRO_SEQUENCE(ma) - TCP_LRO_SEQUENCE(mb);
385e936121dSHans Petter Selasky done:
386e936121dSHans Petter Selasky 	return (ret);
387e936121dSHans Petter Selasky }
388e936121dSHans Petter Selasky 
389e936121dSHans Petter Selasky void
390e936121dSHans Petter Selasky tcp_lro_flush_all(struct lro_ctrl *lc)
391e936121dSHans Petter Selasky {
392e936121dSHans Petter Selasky 	uint32_t hashtype;
393e936121dSHans Petter Selasky 	uint32_t flowid;
394e936121dSHans Petter Selasky 	unsigned x;
395e936121dSHans Petter Selasky 
396e936121dSHans Petter Selasky 	/* check if no mbufs to flush */
3976dd38b87SSepherosa Ziehau 	if (lc->lro_mbuf_count == 0)
398e936121dSHans Petter Selasky 		goto done;
399e936121dSHans Petter Selasky 
400e936121dSHans Petter Selasky 	/* sort all mbufs according to stream */
401e936121dSHans Petter Selasky 	qsort(lc->lro_mbuf_data, lc->lro_mbuf_count, sizeof(struct mbuf *),
402e936121dSHans Petter Selasky 	    &tcp_lro_mbuf_compare_header);
403e936121dSHans Petter Selasky 
404e936121dSHans Petter Selasky 	/* input data into LRO engine, stream by stream */
405e936121dSHans Petter Selasky 	flowid = 0;
406e936121dSHans Petter Selasky 	hashtype = M_HASHTYPE_NONE;
407e936121dSHans Petter Selasky 	for (x = 0; x != lc->lro_mbuf_count; x++) {
408e936121dSHans Petter Selasky 		struct mbuf *mb;
409e936121dSHans Petter Selasky 
410e936121dSHans Petter Selasky 		mb = lc->lro_mbuf_data[x];
411e936121dSHans Petter Selasky 
412e936121dSHans Petter Selasky 		/* check for new stream */
413e936121dSHans Petter Selasky 		if (mb->m_pkthdr.flowid != flowid ||
414e936121dSHans Petter Selasky 		    M_HASHTYPE_GET(mb) != hashtype) {
415e936121dSHans Petter Selasky 			flowid = mb->m_pkthdr.flowid;
416e936121dSHans Petter Selasky 			hashtype = M_HASHTYPE_GET(mb);
417e936121dSHans Petter Selasky 
418e936121dSHans Petter Selasky 			/* flush active streams */
4196dd38b87SSepherosa Ziehau 			tcp_lro_rx_done(lc);
420e936121dSHans Petter Selasky 		}
421e936121dSHans Petter Selasky #ifdef TCP_LRO_RESET_SEQUENCE
422e936121dSHans Petter Selasky 		/* reset sequence number */
423e936121dSHans Petter Selasky 		TCP_LRO_SEQUENCE(mb) = 0;
424e936121dSHans Petter Selasky #endif
425e936121dSHans Petter Selasky 		/* add packet to LRO engine */
426e936121dSHans Petter Selasky 		if (tcp_lro_rx(lc, mb, 0) != 0) {
427e936121dSHans Petter Selasky 			/* input packet to network layer */
428e936121dSHans Petter Selasky 			(*lc->ifp->if_input)(lc->ifp, mb);
429e936121dSHans Petter Selasky 			lc->lro_queued++;
430e936121dSHans Petter Selasky 			lc->lro_flushed++;
431e936121dSHans Petter Selasky 		}
432e936121dSHans Petter Selasky 	}
433e936121dSHans Petter Selasky done:
434e936121dSHans Petter Selasky 	/* flush active streams */
4356dd38b87SSepherosa Ziehau 	tcp_lro_rx_done(lc);
4366dd38b87SSepherosa Ziehau 
437e936121dSHans Petter Selasky 	lc->lro_mbuf_count = 0;
438e936121dSHans Petter Selasky }
439e936121dSHans Petter Selasky 
44062b5b6ecSBjoern A. Zeeb #ifdef INET6
44162b5b6ecSBjoern A. Zeeb static int
44262b5b6ecSBjoern A. Zeeb tcp_lro_rx_ipv6(struct lro_ctrl *lc, struct mbuf *m, struct ip6_hdr *ip6,
44362b5b6ecSBjoern A. Zeeb     struct tcphdr **th)
44462b5b6ecSBjoern A. Zeeb {
4456c5087a8SJack F Vogel 
44662b5b6ecSBjoern A. Zeeb 	/* XXX-BZ we should check the flow-label. */
4476c5087a8SJack F Vogel 
44862b5b6ecSBjoern A. Zeeb 	/* XXX-BZ We do not yet support ext. hdrs. */
44962b5b6ecSBjoern A. Zeeb 	if (ip6->ip6_nxt != IPPROTO_TCP)
45062b5b6ecSBjoern A. Zeeb 		return (TCP_LRO_NOT_SUPPORTED);
45162b5b6ecSBjoern A. Zeeb 
45262b5b6ecSBjoern A. Zeeb 	/* Find the TCP header. */
45362b5b6ecSBjoern A. Zeeb 	*th = (struct tcphdr *)(ip6 + 1);
45462b5b6ecSBjoern A. Zeeb 
45562b5b6ecSBjoern A. Zeeb 	return (0);
45662b5b6ecSBjoern A. Zeeb }
45762b5b6ecSBjoern A. Zeeb #endif
45862b5b6ecSBjoern A. Zeeb 
45962b5b6ecSBjoern A. Zeeb #ifdef INET
46062b5b6ecSBjoern A. Zeeb static int
46162b5b6ecSBjoern A. Zeeb tcp_lro_rx_ipv4(struct lro_ctrl *lc, struct mbuf *m, struct ip *ip4,
46262b5b6ecSBjoern A. Zeeb     struct tcphdr **th)
46362b5b6ecSBjoern A. Zeeb {
46462b5b6ecSBjoern A. Zeeb 	int csum_flags;
46562b5b6ecSBjoern A. Zeeb 	uint16_t csum;
46662b5b6ecSBjoern A. Zeeb 
46762b5b6ecSBjoern A. Zeeb 	if (ip4->ip_p != IPPROTO_TCP)
46862b5b6ecSBjoern A. Zeeb 		return (TCP_LRO_NOT_SUPPORTED);
46962b5b6ecSBjoern A. Zeeb 
47062b5b6ecSBjoern A. Zeeb 	/* Ensure there are no options. */
47162b5b6ecSBjoern A. Zeeb 	if ((ip4->ip_hl << 2) != sizeof (*ip4))
47262b5b6ecSBjoern A. Zeeb 		return (TCP_LRO_CANNOT);
47362b5b6ecSBjoern A. Zeeb 
47462b5b6ecSBjoern A. Zeeb 	/* .. and the packet is not fragmented. */
47562b5b6ecSBjoern A. Zeeb 	if (ip4->ip_off & htons(IP_MF|IP_OFFMASK))
47662b5b6ecSBjoern A. Zeeb 		return (TCP_LRO_CANNOT);
47762b5b6ecSBjoern A. Zeeb 
47862b5b6ecSBjoern A. Zeeb 	/* Legacy IP has a header checksum that needs to be correct. */
47962b5b6ecSBjoern A. Zeeb 	csum_flags = m->m_pkthdr.csum_flags;
48045709593SKip Macy 	if (csum_flags & CSUM_IP_CHECKED) {
48145709593SKip Macy 		if (__predict_false((csum_flags & CSUM_IP_VALID) == 0)) {
48262b5b6ecSBjoern A. Zeeb 			lc->lro_bad_csum++;
48362b5b6ecSBjoern A. Zeeb 			return (TCP_LRO_CANNOT);
48445709593SKip Macy 		}
48545709593SKip Macy 	} else {
48662b5b6ecSBjoern A. Zeeb 		csum = in_cksum_hdr(ip4);
487e5ca1ffaSAndrew Gallatin 		if (__predict_false((csum) != 0)) {
48862b5b6ecSBjoern A. Zeeb 			lc->lro_bad_csum++;
48962b5b6ecSBjoern A. Zeeb 			return (TCP_LRO_CANNOT);
4906c5087a8SJack F Vogel 		}
49145709593SKip Macy 	}
4926c5087a8SJack F Vogel 
49362b5b6ecSBjoern A. Zeeb 	/* Find the TCP header (we assured there are no IP options). */
49462b5b6ecSBjoern A. Zeeb 	*th = (struct tcphdr *)(ip4 + 1);
4956c5087a8SJack F Vogel 
49662b5b6ecSBjoern A. Zeeb 	return (0);
4976c5087a8SJack F Vogel }
49862b5b6ecSBjoern A. Zeeb #endif
4996c5087a8SJack F Vogel 
50062b5b6ecSBjoern A. Zeeb int
50162b5b6ecSBjoern A. Zeeb tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
50262b5b6ecSBjoern A. Zeeb {
50362b5b6ecSBjoern A. Zeeb 	struct lro_entry *le;
50462b5b6ecSBjoern A. Zeeb 	struct ether_header *eh;
50562b5b6ecSBjoern A. Zeeb #ifdef INET6
50662b5b6ecSBjoern A. Zeeb 	struct ip6_hdr *ip6 = NULL;	/* Keep compiler happy. */
50762b5b6ecSBjoern A. Zeeb #endif
50862b5b6ecSBjoern A. Zeeb #ifdef INET
50962b5b6ecSBjoern A. Zeeb 	struct ip *ip4 = NULL;		/* Keep compiler happy. */
51062b5b6ecSBjoern A. Zeeb #endif
51162b5b6ecSBjoern A. Zeeb 	struct tcphdr *th;
51262b5b6ecSBjoern A. Zeeb 	void *l3hdr = NULL;		/* Keep compiler happy. */
51362b5b6ecSBjoern A. Zeeb 	uint32_t *ts_ptr;
51462b5b6ecSBjoern A. Zeeb 	tcp_seq seq;
51562b5b6ecSBjoern A. Zeeb 	int error, ip_len, l;
51662b5b6ecSBjoern A. Zeeb 	uint16_t eh_type, tcp_data_len;
5176c5087a8SJack F Vogel 
51862b5b6ecSBjoern A. Zeeb 	/* We expect a contiguous header [eh, ip, tcp]. */
51962b5b6ecSBjoern A. Zeeb 
52062b5b6ecSBjoern A. Zeeb 	eh = mtod(m, struct ether_header *);
52162b5b6ecSBjoern A. Zeeb 	eh_type = ntohs(eh->ether_type);
52262b5b6ecSBjoern A. Zeeb 	switch (eh_type) {
52362b5b6ecSBjoern A. Zeeb #ifdef INET6
52462b5b6ecSBjoern A. Zeeb 	case ETHERTYPE_IPV6:
5255fa2656eSBjoern A. Zeeb 	{
5265fa2656eSBjoern A. Zeeb 		CURVNET_SET(lc->ifp->if_vnet);
52731bfc56eSBjoern A. Zeeb 		if (V_ip6_forwarding != 0) {
52831bfc56eSBjoern A. Zeeb 			/* XXX-BZ stats but changing lro_ctrl is a problem. */
5295fa2656eSBjoern A. Zeeb 			CURVNET_RESTORE();
53031bfc56eSBjoern A. Zeeb 			return (TCP_LRO_CANNOT);
53131bfc56eSBjoern A. Zeeb 		}
5325fa2656eSBjoern A. Zeeb 		CURVNET_RESTORE();
53362b5b6ecSBjoern A. Zeeb 		l3hdr = ip6 = (struct ip6_hdr *)(eh + 1);
53462b5b6ecSBjoern A. Zeeb 		error = tcp_lro_rx_ipv6(lc, m, ip6, &th);
53562b5b6ecSBjoern A. Zeeb 		if (error != 0)
53662b5b6ecSBjoern A. Zeeb 			return (error);
53762b5b6ecSBjoern A. Zeeb 		tcp_data_len = ntohs(ip6->ip6_plen);
53862b5b6ecSBjoern A. Zeeb 		ip_len = sizeof(*ip6) + tcp_data_len;
53962b5b6ecSBjoern A. Zeeb 		break;
5405fa2656eSBjoern A. Zeeb 	}
54162b5b6ecSBjoern A. Zeeb #endif
54262b5b6ecSBjoern A. Zeeb #ifdef INET
54362b5b6ecSBjoern A. Zeeb 	case ETHERTYPE_IP:
5445fa2656eSBjoern A. Zeeb 	{
5455fa2656eSBjoern A. Zeeb 		CURVNET_SET(lc->ifp->if_vnet);
54631bfc56eSBjoern A. Zeeb 		if (V_ipforwarding != 0) {
54731bfc56eSBjoern A. Zeeb 			/* XXX-BZ stats but changing lro_ctrl is a problem. */
5485fa2656eSBjoern A. Zeeb 			CURVNET_RESTORE();
54931bfc56eSBjoern A. Zeeb 			return (TCP_LRO_CANNOT);
55031bfc56eSBjoern A. Zeeb 		}
5515fa2656eSBjoern A. Zeeb 		CURVNET_RESTORE();
55262b5b6ecSBjoern A. Zeeb 		l3hdr = ip4 = (struct ip *)(eh + 1);
55362b5b6ecSBjoern A. Zeeb 		error = tcp_lro_rx_ipv4(lc, m, ip4, &th);
55462b5b6ecSBjoern A. Zeeb 		if (error != 0)
55562b5b6ecSBjoern A. Zeeb 			return (error);
55662b5b6ecSBjoern A. Zeeb 		ip_len = ntohs(ip4->ip_len);
55762b5b6ecSBjoern A. Zeeb 		tcp_data_len = ip_len - sizeof(*ip4);
55862b5b6ecSBjoern A. Zeeb 		break;
5595fa2656eSBjoern A. Zeeb 	}
56062b5b6ecSBjoern A. Zeeb #endif
56162b5b6ecSBjoern A. Zeeb 	/* XXX-BZ what happens in case of VLAN(s)? */
56262b5b6ecSBjoern A. Zeeb 	default:
56362b5b6ecSBjoern A. Zeeb 		return (TCP_LRO_NOT_SUPPORTED);
56462b5b6ecSBjoern A. Zeeb 	}
5656c5087a8SJack F Vogel 
5666c5087a8SJack F Vogel 	/*
56762b5b6ecSBjoern A. Zeeb 	 * If the frame is padded beyond the end of the IP packet, then we must
56862b5b6ecSBjoern A. Zeeb 	 * trim the extra bytes off.
5696c5087a8SJack F Vogel 	 */
57062b5b6ecSBjoern A. Zeeb 	l = m->m_pkthdr.len - (ETHER_HDR_LEN + ip_len);
57162b5b6ecSBjoern A. Zeeb 	if (l != 0) {
57262b5b6ecSBjoern A. Zeeb 		if (l < 0)
57362b5b6ecSBjoern A. Zeeb 			/* Truncated packet. */
57462b5b6ecSBjoern A. Zeeb 			return (TCP_LRO_CANNOT);
57562b5b6ecSBjoern A. Zeeb 
57662b5b6ecSBjoern A. Zeeb 		m_adj(m, -l);
5776c5087a8SJack F Vogel 	}
5786c5087a8SJack F Vogel 
57962b5b6ecSBjoern A. Zeeb 	/*
58062b5b6ecSBjoern A. Zeeb 	 * Check TCP header constraints.
58162b5b6ecSBjoern A. Zeeb 	 */
58262b5b6ecSBjoern A. Zeeb 	/* Ensure no bits set besides ACK or PSH. */
58362b5b6ecSBjoern A. Zeeb 	if ((th->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
58462b5b6ecSBjoern A. Zeeb 		return (TCP_LRO_CANNOT);
58562b5b6ecSBjoern A. Zeeb 
5869b436b18SSepherosa Ziehau 	/* XXX-BZ We lose a ACK|PUSH flag concatenating multiple segments. */
58762b5b6ecSBjoern A. Zeeb 	/* XXX-BZ Ideally we'd flush on PUSH? */
58862b5b6ecSBjoern A. Zeeb 
58962b5b6ecSBjoern A. Zeeb 	/*
59062b5b6ecSBjoern A. Zeeb 	 * Check for timestamps.
59162b5b6ecSBjoern A. Zeeb 	 * Since the only option we handle are timestamps, we only have to
59262b5b6ecSBjoern A. Zeeb 	 * handle the simple case of aligned timestamps.
59362b5b6ecSBjoern A. Zeeb 	 */
59462b5b6ecSBjoern A. Zeeb 	l = (th->th_off << 2);
59562b5b6ecSBjoern A. Zeeb 	tcp_data_len -= l;
59662b5b6ecSBjoern A. Zeeb 	l -= sizeof(*th);
59762b5b6ecSBjoern A. Zeeb 	ts_ptr = (uint32_t *)(th + 1);
59862b5b6ecSBjoern A. Zeeb 	if (l != 0 && (__predict_false(l != TCPOLEN_TSTAMP_APPA) ||
59962b5b6ecSBjoern A. Zeeb 	    (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|
60062b5b6ecSBjoern A. Zeeb 	    TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP))))
60162b5b6ecSBjoern A. Zeeb 		return (TCP_LRO_CANNOT);
60262b5b6ecSBjoern A. Zeeb 
60362b5b6ecSBjoern A. Zeeb 	/* If the driver did not pass in the checksum, set it now. */
60462b5b6ecSBjoern A. Zeeb 	if (csum == 0x0000)
60562b5b6ecSBjoern A. Zeeb 		csum = th->th_sum;
60662b5b6ecSBjoern A. Zeeb 
60762b5b6ecSBjoern A. Zeeb 	seq = ntohl(th->th_seq);
60862b5b6ecSBjoern A. Zeeb 
60962b5b6ecSBjoern A. Zeeb 	/* Try to find a matching previous segment. */
6101ea44822SSepherosa Ziehau 	LIST_FOREACH(le, &lc->lro_active, next) {
61162b5b6ecSBjoern A. Zeeb 		if (le->eh_type != eh_type)
61262b5b6ecSBjoern A. Zeeb 			continue;
61362b5b6ecSBjoern A. Zeeb 		if (le->source_port != th->th_sport ||
61462b5b6ecSBjoern A. Zeeb 		    le->dest_port != th->th_dport)
61562b5b6ecSBjoern A. Zeeb 			continue;
61662b5b6ecSBjoern A. Zeeb 		switch (eh_type) {
61762b5b6ecSBjoern A. Zeeb #ifdef INET6
61862b5b6ecSBjoern A. Zeeb 		case ETHERTYPE_IPV6:
61962b5b6ecSBjoern A. Zeeb 			if (bcmp(&le->source_ip6, &ip6->ip6_src,
62062b5b6ecSBjoern A. Zeeb 			    sizeof(struct in6_addr)) != 0 ||
62162b5b6ecSBjoern A. Zeeb 			    bcmp(&le->dest_ip6, &ip6->ip6_dst,
62262b5b6ecSBjoern A. Zeeb 			    sizeof(struct in6_addr)) != 0)
62362b5b6ecSBjoern A. Zeeb 				continue;
62462b5b6ecSBjoern A. Zeeb 			break;
62562b5b6ecSBjoern A. Zeeb #endif
62662b5b6ecSBjoern A. Zeeb #ifdef INET
62762b5b6ecSBjoern A. Zeeb 		case ETHERTYPE_IP:
62862b5b6ecSBjoern A. Zeeb 			if (le->source_ip4 != ip4->ip_src.s_addr ||
62962b5b6ecSBjoern A. Zeeb 			    le->dest_ip4 != ip4->ip_dst.s_addr)
63062b5b6ecSBjoern A. Zeeb 				continue;
63162b5b6ecSBjoern A. Zeeb 			break;
63262b5b6ecSBjoern A. Zeeb #endif
6336c5087a8SJack F Vogel 		}
6346c5087a8SJack F Vogel 
635ca712262SColin Percival 		/* Flush now if appending will result in overflow. */
6367ae3d4bfSSepherosa Ziehau 		if (le->p_len > (lc->lro_length_lim - tcp_data_len)) {
637*51e3c20dSSepherosa Ziehau 			tcp_lro_active_remove(le);
63862b5b6ecSBjoern A. Zeeb 			tcp_lro_flush(lc, le);
639ca712262SColin Percival 			break;
640ca712262SColin Percival 		}
641ca712262SColin Percival 
64262b5b6ecSBjoern A. Zeeb 		/* Try to append the new segment. */
64362b5b6ecSBjoern A. Zeeb 		if (__predict_false(seq != le->next_seq ||
64462b5b6ecSBjoern A. Zeeb 		    (tcp_data_len == 0 && le->ack_seq == th->th_ack))) {
64562b5b6ecSBjoern A. Zeeb 			/* Out of order packet or duplicate ACK. */
646*51e3c20dSSepherosa Ziehau 			tcp_lro_active_remove(le);
64762b5b6ecSBjoern A. Zeeb 			tcp_lro_flush(lc, le);
64862b5b6ecSBjoern A. Zeeb 			return (TCP_LRO_CANNOT);
6496c5087a8SJack F Vogel 		}
6506c5087a8SJack F Vogel 
65162b5b6ecSBjoern A. Zeeb 		if (l != 0) {
6526c5087a8SJack F Vogel 			uint32_t tsval = ntohl(*(ts_ptr + 1));
65362b5b6ecSBjoern A. Zeeb 			/* Make sure timestamp values are increasing. */
65462b5b6ecSBjoern A. Zeeb 			/* XXX-BZ flip and use TSTMP_GEQ macro for this? */
65562b5b6ecSBjoern A. Zeeb 			if (__predict_false(le->tsval > tsval ||
65662b5b6ecSBjoern A. Zeeb 			    *(ts_ptr + 2) == 0))
65762b5b6ecSBjoern A. Zeeb 				return (TCP_LRO_CANNOT);
65862b5b6ecSBjoern A. Zeeb 			le->tsval = tsval;
65962b5b6ecSBjoern A. Zeeb 			le->tsecr = *(ts_ptr + 2);
6606c5087a8SJack F Vogel 		}
6616c5087a8SJack F Vogel 
66262b5b6ecSBjoern A. Zeeb 		le->next_seq += tcp_data_len;
66362b5b6ecSBjoern A. Zeeb 		le->ack_seq = th->th_ack;
66462b5b6ecSBjoern A. Zeeb 		le->window = th->th_win;
66562b5b6ecSBjoern A. Zeeb 		le->append_cnt++;
66662b5b6ecSBjoern A. Zeeb 
66762b5b6ecSBjoern A. Zeeb #ifdef TCP_LRO_UPDATE_CSUM
66862b5b6ecSBjoern A. Zeeb 		le->ulp_csum += tcp_lro_rx_csum_fixup(le, l3hdr, th,
66962b5b6ecSBjoern A. Zeeb 		    tcp_data_len, ~csum);
67062b5b6ecSBjoern A. Zeeb #endif
67162b5b6ecSBjoern A. Zeeb 
6726c5087a8SJack F Vogel 		if (tcp_data_len == 0) {
67362b5b6ecSBjoern A. Zeeb 			m_freem(m);
6747ae3d4bfSSepherosa Ziehau 			/*
6757ae3d4bfSSepherosa Ziehau 			 * Flush this LRO entry, if this ACK should not
6767ae3d4bfSSepherosa Ziehau 			 * be further delayed.
6777ae3d4bfSSepherosa Ziehau 			 */
6787ae3d4bfSSepherosa Ziehau 			if (le->append_cnt >= lc->lro_ackcnt_lim) {
679*51e3c20dSSepherosa Ziehau 				tcp_lro_active_remove(le);
6807ae3d4bfSSepherosa Ziehau 				tcp_lro_flush(lc, le);
6817ae3d4bfSSepherosa Ziehau 			}
68262b5b6ecSBjoern A. Zeeb 			return (0);
6836c5087a8SJack F Vogel 		}
68462b5b6ecSBjoern A. Zeeb 
68562b5b6ecSBjoern A. Zeeb 		le->p_len += tcp_data_len;
68662b5b6ecSBjoern A. Zeeb 
68762b5b6ecSBjoern A. Zeeb 		/*
68862b5b6ecSBjoern A. Zeeb 		 * Adjust the mbuf so that m_data points to the first byte of
68962b5b6ecSBjoern A. Zeeb 		 * the ULP payload.  Adjust the mbuf to avoid complications and
69062b5b6ecSBjoern A. Zeeb 		 * append new segment to existing mbuf chain.
6916c5087a8SJack F Vogel 		 */
69262b5b6ecSBjoern A. Zeeb 		m_adj(m, m->m_pkthdr.len - tcp_data_len);
6939523d1bfSNavdeep Parhar 		m_demote_pkthdr(m);
6946c5087a8SJack F Vogel 
69562b5b6ecSBjoern A. Zeeb 		le->m_tail->m_next = m;
69662b5b6ecSBjoern A. Zeeb 		le->m_tail = m_last(m);
6976c5087a8SJack F Vogel 
69862b5b6ecSBjoern A. Zeeb 		/*
69962b5b6ecSBjoern A. Zeeb 		 * If a possible next full length packet would cause an
70062b5b6ecSBjoern A. Zeeb 		 * overflow, pro-actively flush now.
7016c5087a8SJack F Vogel 		 */
7027ae3d4bfSSepherosa Ziehau 		if (le->p_len > (lc->lro_length_lim - lc->ifp->if_mtu)) {
703*51e3c20dSSepherosa Ziehau 			tcp_lro_active_remove(le);
70462b5b6ecSBjoern A. Zeeb 			tcp_lro_flush(lc, le);
7057127e6acSNavdeep Parhar 		} else
7067127e6acSNavdeep Parhar 			getmicrotime(&le->mtime);
7076c5087a8SJack F Vogel 
70862b5b6ecSBjoern A. Zeeb 		return (0);
7096c5087a8SJack F Vogel 	}
71062b5b6ecSBjoern A. Zeeb 
71162b5b6ecSBjoern A. Zeeb 	/* Try to find an empty slot. */
7121ea44822SSepherosa Ziehau 	if (LIST_EMPTY(&lc->lro_free))
713489f0c3cSSepherosa Ziehau 		return (TCP_LRO_NO_ENTRIES);
71462b5b6ecSBjoern A. Zeeb 
71562b5b6ecSBjoern A. Zeeb 	/* Start a new segment chain. */
7161ea44822SSepherosa Ziehau 	le = LIST_FIRST(&lc->lro_free);
7171ea44822SSepherosa Ziehau 	LIST_REMOVE(le, next);
718*51e3c20dSSepherosa Ziehau 	tcp_lro_active_insert(lc, le);
7197127e6acSNavdeep Parhar 	getmicrotime(&le->mtime);
72062b5b6ecSBjoern A. Zeeb 
72162b5b6ecSBjoern A. Zeeb 	/* Start filling in details. */
72262b5b6ecSBjoern A. Zeeb 	switch (eh_type) {
72362b5b6ecSBjoern A. Zeeb #ifdef INET6
72462b5b6ecSBjoern A. Zeeb 	case ETHERTYPE_IPV6:
72562b5b6ecSBjoern A. Zeeb 		le->le_ip6 = ip6;
72662b5b6ecSBjoern A. Zeeb 		le->source_ip6 = ip6->ip6_src;
72762b5b6ecSBjoern A. Zeeb 		le->dest_ip6 = ip6->ip6_dst;
72862b5b6ecSBjoern A. Zeeb 		le->eh_type = eh_type;
72962b5b6ecSBjoern A. Zeeb 		le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN - sizeof(*ip6);
73062b5b6ecSBjoern A. Zeeb 		break;
73162b5b6ecSBjoern A. Zeeb #endif
73262b5b6ecSBjoern A. Zeeb #ifdef INET
73362b5b6ecSBjoern A. Zeeb 	case ETHERTYPE_IP:
73462b5b6ecSBjoern A. Zeeb 		le->le_ip4 = ip4;
73562b5b6ecSBjoern A. Zeeb 		le->source_ip4 = ip4->ip_src.s_addr;
73662b5b6ecSBjoern A. Zeeb 		le->dest_ip4 = ip4->ip_dst.s_addr;
73762b5b6ecSBjoern A. Zeeb 		le->eh_type = eh_type;
73862b5b6ecSBjoern A. Zeeb 		le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN;
73962b5b6ecSBjoern A. Zeeb 		break;
74062b5b6ecSBjoern A. Zeeb #endif
7416c5087a8SJack F Vogel 	}
74262b5b6ecSBjoern A. Zeeb 	le->source_port = th->th_sport;
74362b5b6ecSBjoern A. Zeeb 	le->dest_port = th->th_dport;
74462b5b6ecSBjoern A. Zeeb 
74562b5b6ecSBjoern A. Zeeb 	le->next_seq = seq + tcp_data_len;
74662b5b6ecSBjoern A. Zeeb 	le->ack_seq = th->th_ack;
74762b5b6ecSBjoern A. Zeeb 	le->window = th->th_win;
74862b5b6ecSBjoern A. Zeeb 	if (l != 0) {
74962b5b6ecSBjoern A. Zeeb 		le->timestamp = 1;
75062b5b6ecSBjoern A. Zeeb 		le->tsval = ntohl(*(ts_ptr + 1));
75162b5b6ecSBjoern A. Zeeb 		le->tsecr = *(ts_ptr + 2);
75262b5b6ecSBjoern A. Zeeb 	}
75362b5b6ecSBjoern A. Zeeb 
75462b5b6ecSBjoern A. Zeeb #ifdef TCP_LRO_UPDATE_CSUM
75562b5b6ecSBjoern A. Zeeb 	/*
75662b5b6ecSBjoern A. Zeeb 	 * Do not touch the csum of the first packet.  However save the
75762b5b6ecSBjoern A. Zeeb 	 * "adjusted" checksum of just the source and destination addresses,
75862b5b6ecSBjoern A. Zeeb 	 * the next header and the TCP payload.  The length and TCP header
75962b5b6ecSBjoern A. Zeeb 	 * parts may change, so we remove those from the saved checksum and
76062b5b6ecSBjoern A. Zeeb 	 * re-add with final values on tcp_lro_flush() if needed.
76162b5b6ecSBjoern A. Zeeb 	 */
76262b5b6ecSBjoern A. Zeeb 	KASSERT(le->ulp_csum == 0, ("%s: le=%p le->ulp_csum=0x%04x\n",
76362b5b6ecSBjoern A. Zeeb 	    __func__, le, le->ulp_csum));
76462b5b6ecSBjoern A. Zeeb 
76562b5b6ecSBjoern A. Zeeb 	le->ulp_csum = tcp_lro_rx_csum_fixup(le, l3hdr, th, tcp_data_len,
76662b5b6ecSBjoern A. Zeeb 	    ~csum);
76762b5b6ecSBjoern A. Zeeb 	th->th_sum = csum;	/* Restore checksum on first packet. */
76862b5b6ecSBjoern A. Zeeb #endif
76962b5b6ecSBjoern A. Zeeb 
77062b5b6ecSBjoern A. Zeeb 	le->m_head = m;
77162b5b6ecSBjoern A. Zeeb 	le->m_tail = m_last(m);
77262b5b6ecSBjoern A. Zeeb 
77362b5b6ecSBjoern A. Zeeb 	return (0);
77462b5b6ecSBjoern A. Zeeb }
77562b5b6ecSBjoern A. Zeeb 
776e936121dSHans Petter Selasky void
777e936121dSHans Petter Selasky tcp_lro_queue_mbuf(struct lro_ctrl *lc, struct mbuf *mb)
778e936121dSHans Petter Selasky {
779e936121dSHans Petter Selasky 	/* sanity checks */
780e936121dSHans Petter Selasky 	if (__predict_false(lc->ifp == NULL || lc->lro_mbuf_data == NULL ||
781e936121dSHans Petter Selasky 	    lc->lro_mbuf_max == 0)) {
782e936121dSHans Petter Selasky 		/* packet drop */
783e936121dSHans Petter Selasky 		m_freem(mb);
784e936121dSHans Petter Selasky 		return;
785e936121dSHans Petter Selasky 	}
786e936121dSHans Petter Selasky 
787e936121dSHans Petter Selasky 	/* check if packet is not LRO capable */
788e936121dSHans Petter Selasky 	if (__predict_false(mb->m_pkthdr.csum_flags == 0 ||
789e936121dSHans Petter Selasky 	    (lc->ifp->if_capenable & IFCAP_LRO) == 0)) {
790e936121dSHans Petter Selasky 		lc->lro_flushed++;
791e936121dSHans Petter Selasky 		lc->lro_queued++;
792e936121dSHans Petter Selasky 
793e936121dSHans Petter Selasky 		/* input packet to network layer */
794e936121dSHans Petter Selasky 		(*lc->ifp->if_input) (lc->ifp, mb);
795e936121dSHans Petter Selasky 		return;
796e936121dSHans Petter Selasky 	}
797e936121dSHans Petter Selasky 
798e936121dSHans Petter Selasky 	/* check if array is full */
799e936121dSHans Petter Selasky 	if (__predict_false(lc->lro_mbuf_count == lc->lro_mbuf_max))
800e936121dSHans Petter Selasky 		tcp_lro_flush_all(lc);
801e936121dSHans Petter Selasky 
802e936121dSHans Petter Selasky 	/* store sequence number */
803e936121dSHans Petter Selasky 	TCP_LRO_SEQUENCE(mb) = lc->lro_mbuf_count;
804e936121dSHans Petter Selasky 
805e936121dSHans Petter Selasky 	/* enter mbuf */
806e936121dSHans Petter Selasky 	lc->lro_mbuf_data[lc->lro_mbuf_count++] = mb;
807e936121dSHans Petter Selasky }
808e936121dSHans Petter Selasky 
80962b5b6ecSBjoern A. Zeeb /* end */
810