xref: /freebsd/sys/netinet/tcp_lro.c (revision f8acc03ef1c9a9d85bc6615beab0ace2ddfdf9fd)
127f190a3SBjoern A. Zeeb /*-
227f190a3SBjoern A. Zeeb  * Copyright (c) 2007, Myricom Inc.
327f190a3SBjoern A. Zeeb  * Copyright (c) 2008, Intel Corporation.
462b5b6ecSBjoern A. Zeeb  * Copyright (c) 2012 The FreeBSD Foundation
5e936121dSHans Petter Selasky  * Copyright (c) 2016 Mellanox Technologies.
627f190a3SBjoern A. Zeeb  * All rights reserved.
727f190a3SBjoern A. Zeeb  *
862b5b6ecSBjoern A. Zeeb  * Portions of this software were developed by Bjoern Zeeb
962b5b6ecSBjoern A. Zeeb  * under sponsorship from the FreeBSD Foundation.
1062b5b6ecSBjoern A. Zeeb  *
1127f190a3SBjoern A. Zeeb  * Redistribution and use in source and binary forms, with or without
1227f190a3SBjoern A. Zeeb  * modification, are permitted provided that the following conditions
1327f190a3SBjoern A. Zeeb  * are met:
1427f190a3SBjoern A. Zeeb  * 1. Redistributions of source code must retain the above copyright
1527f190a3SBjoern A. Zeeb  *    notice, this list of conditions and the following disclaimer.
1627f190a3SBjoern A. Zeeb  * 2. Redistributions in binary form must reproduce the above copyright
1727f190a3SBjoern A. Zeeb  *    notice, this list of conditions and the following disclaimer in the
1827f190a3SBjoern A. Zeeb  *    documentation and/or other materials provided with the distribution.
1927f190a3SBjoern A. Zeeb  *
2027f190a3SBjoern A. Zeeb  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
2127f190a3SBjoern A. Zeeb  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2227f190a3SBjoern A. Zeeb  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2327f190a3SBjoern A. Zeeb  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
2427f190a3SBjoern A. Zeeb  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2527f190a3SBjoern A. Zeeb  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2627f190a3SBjoern A. Zeeb  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2727f190a3SBjoern A. Zeeb  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2827f190a3SBjoern A. Zeeb  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2927f190a3SBjoern A. Zeeb  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3027f190a3SBjoern A. Zeeb  * SUCH DAMAGE.
3127f190a3SBjoern A. Zeeb  */
326c5087a8SJack F Vogel 
3362b5b6ecSBjoern A. Zeeb #include <sys/cdefs.h>
3462b5b6ecSBjoern A. Zeeb __FBSDID("$FreeBSD$");
3562b5b6ecSBjoern A. Zeeb 
3662b5b6ecSBjoern A. Zeeb #include "opt_inet.h"
3762b5b6ecSBjoern A. Zeeb #include "opt_inet6.h"
3862b5b6ecSBjoern A. Zeeb 
396c5087a8SJack F Vogel #include <sys/param.h>
406c5087a8SJack F Vogel #include <sys/systm.h>
416c5087a8SJack F Vogel #include <sys/kernel.h>
428ec07310SGleb Smirnoff #include <sys/malloc.h>
438ec07310SGleb Smirnoff #include <sys/mbuf.h>
446c5087a8SJack F Vogel #include <sys/socket.h>
458452c1b3SSepherosa Ziehau #include <sys/sysctl.h>
466c5087a8SJack F Vogel 
476c5087a8SJack F Vogel #include <net/if.h>
4862b5b6ecSBjoern A. Zeeb #include <net/if_var.h>
496c5087a8SJack F Vogel #include <net/ethernet.h>
505fa2656eSBjoern A. Zeeb #include <net/vnet.h>
516c5087a8SJack F Vogel 
526c5087a8SJack F Vogel #include <netinet/in_systm.h>
536c5087a8SJack F Vogel #include <netinet/in.h>
5462b5b6ecSBjoern A. Zeeb #include <netinet/ip6.h>
556c5087a8SJack F Vogel #include <netinet/ip.h>
5631bfc56eSBjoern A. Zeeb #include <netinet/ip_var.h>
576c5087a8SJack F Vogel #include <netinet/tcp.h>
586c5087a8SJack F Vogel #include <netinet/tcp_lro.h>
598452c1b3SSepherosa Ziehau #include <netinet/tcp_var.h>
606c5087a8SJack F Vogel 
6131bfc56eSBjoern A. Zeeb #include <netinet6/ip6_var.h>
6231bfc56eSBjoern A. Zeeb 
636c5087a8SJack F Vogel #include <machine/in_cksum.h>
646c5087a8SJack F Vogel 
65e936121dSHans Petter Selasky static MALLOC_DEFINE(M_LRO, "LRO", "LRO control structures");
666c5087a8SJack F Vogel 
6762b5b6ecSBjoern A. Zeeb #define	TCP_LRO_UPDATE_CSUM	1
6862b5b6ecSBjoern A. Zeeb #ifndef	TCP_LRO_UPDATE_CSUM
6962b5b6ecSBjoern A. Zeeb #define	TCP_LRO_INVALID_CSUM	0x0000
7062b5b6ecSBjoern A. Zeeb #endif
716c5087a8SJack F Vogel 
726dd38b87SSepherosa Ziehau static void	tcp_lro_rx_done(struct lro_ctrl *lc);
7305cde7efSSepherosa Ziehau static int	tcp_lro_rx2(struct lro_ctrl *lc, struct mbuf *m,
7405cde7efSSepherosa Ziehau 		    uint32_t csum, int use_hash);
756dd38b87SSepherosa Ziehau 
768452c1b3SSepherosa Ziehau SYSCTL_NODE(_net_inet_tcp, OID_AUTO, lro,  CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
778452c1b3SSepherosa Ziehau     "TCP LRO");
788452c1b3SSepherosa Ziehau 
798452c1b3SSepherosa Ziehau static unsigned	tcp_lro_entries = TCP_LRO_ENTRIES;
808452c1b3SSepherosa Ziehau SYSCTL_UINT(_net_inet_tcp_lro, OID_AUTO, entries,
818452c1b3SSepherosa Ziehau     CTLFLAG_RDTUN | CTLFLAG_MPSAFE, &tcp_lro_entries, 0,
828452c1b3SSepherosa Ziehau     "default number of LRO entries");
838452c1b3SSepherosa Ziehau 
8451e3c20dSSepherosa Ziehau static __inline void
8505cde7efSSepherosa Ziehau tcp_lro_active_insert(struct lro_ctrl *lc, struct lro_head *bucket,
8605cde7efSSepherosa Ziehau     struct lro_entry *le)
8751e3c20dSSepherosa Ziehau {
8851e3c20dSSepherosa Ziehau 
8951e3c20dSSepherosa Ziehau 	LIST_INSERT_HEAD(&lc->lro_active, le, next);
9005cde7efSSepherosa Ziehau 	LIST_INSERT_HEAD(bucket, le, hash_next);
9151e3c20dSSepherosa Ziehau }
9251e3c20dSSepherosa Ziehau 
9351e3c20dSSepherosa Ziehau static __inline void
9451e3c20dSSepherosa Ziehau tcp_lro_active_remove(struct lro_entry *le)
9551e3c20dSSepherosa Ziehau {
9651e3c20dSSepherosa Ziehau 
9705cde7efSSepherosa Ziehau 	LIST_REMOVE(le, next);		/* active list */
9805cde7efSSepherosa Ziehau 	LIST_REMOVE(le, hash_next);	/* hash bucket */
9951e3c20dSSepherosa Ziehau }
10051e3c20dSSepherosa Ziehau 
1016c5087a8SJack F Vogel int
10262b5b6ecSBjoern A. Zeeb tcp_lro_init(struct lro_ctrl *lc)
1036c5087a8SJack F Vogel {
1048452c1b3SSepherosa Ziehau 	return (tcp_lro_init_args(lc, NULL, tcp_lro_entries, 0));
105e936121dSHans Petter Selasky }
106e936121dSHans Petter Selasky 
107e936121dSHans Petter Selasky int
108e936121dSHans Petter Selasky tcp_lro_init_args(struct lro_ctrl *lc, struct ifnet *ifp,
109e936121dSHans Petter Selasky     unsigned lro_entries, unsigned lro_mbufs)
110e936121dSHans Petter Selasky {
11162b5b6ecSBjoern A. Zeeb 	struct lro_entry *le;
112e936121dSHans Petter Selasky 	size_t size;
11305cde7efSSepherosa Ziehau 	unsigned i, elements;
1146c5087a8SJack F Vogel 
11562b5b6ecSBjoern A. Zeeb 	lc->lro_bad_csum = 0;
11662b5b6ecSBjoern A. Zeeb 	lc->lro_queued = 0;
11762b5b6ecSBjoern A. Zeeb 	lc->lro_flushed = 0;
118e936121dSHans Petter Selasky 	lc->lro_mbuf_count = 0;
119e936121dSHans Petter Selasky 	lc->lro_mbuf_max = lro_mbufs;
120e936121dSHans Petter Selasky 	lc->lro_cnt = lro_entries;
1217ae3d4bfSSepherosa Ziehau 	lc->lro_ackcnt_lim = TCP_LRO_ACKCNT_MAX;
1227ae3d4bfSSepherosa Ziehau 	lc->lro_length_lim = TCP_LRO_LENGTH_MAX;
123e936121dSHans Petter Selasky 	lc->ifp = ifp;
1241ea44822SSepherosa Ziehau 	LIST_INIT(&lc->lro_free);
1251ea44822SSepherosa Ziehau 	LIST_INIT(&lc->lro_active);
1266c5087a8SJack F Vogel 
12705cde7efSSepherosa Ziehau 	/* create hash table to accelerate entry lookup */
12805cde7efSSepherosa Ziehau 	if (lro_entries > lro_mbufs)
12905cde7efSSepherosa Ziehau 		elements = lro_entries;
13005cde7efSSepherosa Ziehau 	else
13105cde7efSSepherosa Ziehau 		elements = lro_mbufs;
13205cde7efSSepherosa Ziehau 	lc->lro_hash = phashinit_flags(elements, M_LRO, &lc->lro_hashsz,
13305cde7efSSepherosa Ziehau 	    HASH_NOWAIT);
13405cde7efSSepherosa Ziehau 	if (lc->lro_hash == NULL) {
13505cde7efSSepherosa Ziehau 		memset(lc, 0, sizeof(*lc));
13605cde7efSSepherosa Ziehau 		return (ENOMEM);
13705cde7efSSepherosa Ziehau 	}
13805cde7efSSepherosa Ziehau 
139e936121dSHans Petter Selasky 	/* compute size to allocate */
140fc271df3SHans Petter Selasky 	size = (lro_mbufs * sizeof(struct lro_mbuf_sort)) +
141e936121dSHans Petter Selasky 	    (lro_entries * sizeof(*le));
142fc271df3SHans Petter Selasky 	lc->lro_mbuf_data = (struct lro_mbuf_sort *)
143e936121dSHans Petter Selasky 	    malloc(size, M_LRO, M_NOWAIT | M_ZERO);
1446c5087a8SJack F Vogel 
145e936121dSHans Petter Selasky 	/* check for out of memory */
146e936121dSHans Petter Selasky 	if (lc->lro_mbuf_data == NULL) {
147a3927369SNavdeep Parhar 		free(lc->lro_hash, M_LRO);
148e936121dSHans Petter Selasky 		memset(lc, 0, sizeof(*lc));
149e936121dSHans Petter Selasky 		return (ENOMEM);
150e936121dSHans Petter Selasky 	}
151e936121dSHans Petter Selasky 	/* compute offset for LRO entries */
152e936121dSHans Petter Selasky 	le = (struct lro_entry *)
153e936121dSHans Petter Selasky 	    (lc->lro_mbuf_data + lro_mbufs);
154e936121dSHans Petter Selasky 
155e936121dSHans Petter Selasky 	/* setup linked list */
156e936121dSHans Petter Selasky 	for (i = 0; i != lro_entries; i++)
1571ea44822SSepherosa Ziehau 		LIST_INSERT_HEAD(&lc->lro_free, le + i, next);
158e936121dSHans Petter Selasky 
159e936121dSHans Petter Selasky 	return (0);
1606c5087a8SJack F Vogel }
1616c5087a8SJack F Vogel 
1626c5087a8SJack F Vogel void
16362b5b6ecSBjoern A. Zeeb tcp_lro_free(struct lro_ctrl *lc)
1646c5087a8SJack F Vogel {
16562b5b6ecSBjoern A. Zeeb 	struct lro_entry *le;
166e936121dSHans Petter Selasky 	unsigned x;
1676c5087a8SJack F Vogel 
168e936121dSHans Petter Selasky 	/* reset LRO free list */
1691ea44822SSepherosa Ziehau 	LIST_INIT(&lc->lro_free);
170e936121dSHans Petter Selasky 
171e936121dSHans Petter Selasky 	/* free active mbufs, if any */
1721ea44822SSepherosa Ziehau 	while ((le = LIST_FIRST(&lc->lro_active)) != NULL) {
17351e3c20dSSepherosa Ziehau 		tcp_lro_active_remove(le);
174e936121dSHans Petter Selasky 		m_freem(le->m_head);
1756c5087a8SJack F Vogel 	}
176e936121dSHans Petter Selasky 
17705cde7efSSepherosa Ziehau 	/* free hash table */
17805cde7efSSepherosa Ziehau 	free(lc->lro_hash, M_LRO);
17905cde7efSSepherosa Ziehau 	lc->lro_hash = NULL;
18005cde7efSSepherosa Ziehau 	lc->lro_hashsz = 0;
18105cde7efSSepherosa Ziehau 
182e936121dSHans Petter Selasky 	/* free mbuf array, if any */
183e936121dSHans Petter Selasky 	for (x = 0; x != lc->lro_mbuf_count; x++)
184fc271df3SHans Petter Selasky 		m_freem(lc->lro_mbuf_data[x].mb);
185e936121dSHans Petter Selasky 	lc->lro_mbuf_count = 0;
186e936121dSHans Petter Selasky 
187e936121dSHans Petter Selasky 	/* free allocated memory, if any */
188e936121dSHans Petter Selasky 	free(lc->lro_mbuf_data, M_LRO);
189e936121dSHans Petter Selasky 	lc->lro_mbuf_data = NULL;
1906c5087a8SJack F Vogel }
1916c5087a8SJack F Vogel 
19262b5b6ecSBjoern A. Zeeb #ifdef TCP_LRO_UPDATE_CSUM
19362b5b6ecSBjoern A. Zeeb static uint16_t
19462b5b6ecSBjoern A. Zeeb tcp_lro_csum_th(struct tcphdr *th)
19562b5b6ecSBjoern A. Zeeb {
19662b5b6ecSBjoern A. Zeeb 	uint32_t ch;
19762b5b6ecSBjoern A. Zeeb 	uint16_t *p, l;
19862b5b6ecSBjoern A. Zeeb 
19962b5b6ecSBjoern A. Zeeb 	ch = th->th_sum = 0x0000;
20062b5b6ecSBjoern A. Zeeb 	l = th->th_off;
20162b5b6ecSBjoern A. Zeeb 	p = (uint16_t *)th;
20262b5b6ecSBjoern A. Zeeb 	while (l > 0) {
20362b5b6ecSBjoern A. Zeeb 		ch += *p;
20462b5b6ecSBjoern A. Zeeb 		p++;
20562b5b6ecSBjoern A. Zeeb 		ch += *p;
20662b5b6ecSBjoern A. Zeeb 		p++;
20762b5b6ecSBjoern A. Zeeb 		l--;
20862b5b6ecSBjoern A. Zeeb 	}
20962b5b6ecSBjoern A. Zeeb 	while (ch > 0xffff)
21062b5b6ecSBjoern A. Zeeb 		ch = (ch >> 16) + (ch & 0xffff);
21162b5b6ecSBjoern A. Zeeb 
21262b5b6ecSBjoern A. Zeeb 	return (ch & 0xffff);
21362b5b6ecSBjoern A. Zeeb }
21462b5b6ecSBjoern A. Zeeb 
21562b5b6ecSBjoern A. Zeeb static uint16_t
21662b5b6ecSBjoern A. Zeeb tcp_lro_rx_csum_fixup(struct lro_entry *le, void *l3hdr, struct tcphdr *th,
21762b5b6ecSBjoern A. Zeeb     uint16_t tcp_data_len, uint16_t csum)
21862b5b6ecSBjoern A. Zeeb {
21962b5b6ecSBjoern A. Zeeb 	uint32_t c;
22062b5b6ecSBjoern A. Zeeb 	uint16_t cs;
22162b5b6ecSBjoern A. Zeeb 
22262b5b6ecSBjoern A. Zeeb 	c = csum;
22362b5b6ecSBjoern A. Zeeb 
22462b5b6ecSBjoern A. Zeeb 	/* Remove length from checksum. */
22562b5b6ecSBjoern A. Zeeb 	switch (le->eh_type) {
22662b5b6ecSBjoern A. Zeeb #ifdef INET6
22762b5b6ecSBjoern A. Zeeb 	case ETHERTYPE_IPV6:
22862b5b6ecSBjoern A. Zeeb 	{
22962b5b6ecSBjoern A. Zeeb 		struct ip6_hdr *ip6;
23062b5b6ecSBjoern A. Zeeb 
23162b5b6ecSBjoern A. Zeeb 		ip6 = (struct ip6_hdr *)l3hdr;
23262b5b6ecSBjoern A. Zeeb 		if (le->append_cnt == 0)
23362b5b6ecSBjoern A. Zeeb 			cs = ip6->ip6_plen;
23462b5b6ecSBjoern A. Zeeb 		else {
23562b5b6ecSBjoern A. Zeeb 			uint32_t cx;
23662b5b6ecSBjoern A. Zeeb 
23762b5b6ecSBjoern A. Zeeb 			cx = ntohs(ip6->ip6_plen);
23862b5b6ecSBjoern A. Zeeb 			cs = in6_cksum_pseudo(ip6, cx, ip6->ip6_nxt, 0);
23962b5b6ecSBjoern A. Zeeb 		}
24062b5b6ecSBjoern A. Zeeb 		break;
24162b5b6ecSBjoern A. Zeeb 	}
24262b5b6ecSBjoern A. Zeeb #endif
24362b5b6ecSBjoern A. Zeeb #ifdef INET
24462b5b6ecSBjoern A. Zeeb 	case ETHERTYPE_IP:
24562b5b6ecSBjoern A. Zeeb 	{
24662b5b6ecSBjoern A. Zeeb 		struct ip *ip4;
24762b5b6ecSBjoern A. Zeeb 
24862b5b6ecSBjoern A. Zeeb 		ip4 = (struct ip *)l3hdr;
24962b5b6ecSBjoern A. Zeeb 		if (le->append_cnt == 0)
25062b5b6ecSBjoern A. Zeeb 			cs = ip4->ip_len;
25162b5b6ecSBjoern A. Zeeb 		else {
25262b5b6ecSBjoern A. Zeeb 			cs = in_addword(ntohs(ip4->ip_len) - sizeof(*ip4),
25362b5b6ecSBjoern A. Zeeb 			    IPPROTO_TCP);
25462b5b6ecSBjoern A. Zeeb 			cs = in_pseudo(ip4->ip_src.s_addr, ip4->ip_dst.s_addr,
25562b5b6ecSBjoern A. Zeeb 			    htons(cs));
25662b5b6ecSBjoern A. Zeeb 		}
25762b5b6ecSBjoern A. Zeeb 		break;
25862b5b6ecSBjoern A. Zeeb 	}
25962b5b6ecSBjoern A. Zeeb #endif
26062b5b6ecSBjoern A. Zeeb 	default:
26162b5b6ecSBjoern A. Zeeb 		cs = 0;		/* Keep compiler happy. */
26262b5b6ecSBjoern A. Zeeb 	}
26362b5b6ecSBjoern A. Zeeb 
26462b5b6ecSBjoern A. Zeeb 	cs = ~cs;
26562b5b6ecSBjoern A. Zeeb 	c += cs;
26662b5b6ecSBjoern A. Zeeb 
26762b5b6ecSBjoern A. Zeeb 	/* Remove TCP header csum. */
26862b5b6ecSBjoern A. Zeeb 	cs = ~tcp_lro_csum_th(th);
26962b5b6ecSBjoern A. Zeeb 	c += cs;
27062b5b6ecSBjoern A. Zeeb 	while (c > 0xffff)
27162b5b6ecSBjoern A. Zeeb 		c = (c >> 16) + (c & 0xffff);
27262b5b6ecSBjoern A. Zeeb 
27362b5b6ecSBjoern A. Zeeb 	return (c & 0xffff);
27462b5b6ecSBjoern A. Zeeb }
27562b5b6ecSBjoern A. Zeeb #endif
27662b5b6ecSBjoern A. Zeeb 
2776dd38b87SSepherosa Ziehau static void
2786dd38b87SSepherosa Ziehau tcp_lro_rx_done(struct lro_ctrl *lc)
2796dd38b87SSepherosa Ziehau {
2806dd38b87SSepherosa Ziehau 	struct lro_entry *le;
2816dd38b87SSepherosa Ziehau 
2821ea44822SSepherosa Ziehau 	while ((le = LIST_FIRST(&lc->lro_active)) != NULL) {
28351e3c20dSSepherosa Ziehau 		tcp_lro_active_remove(le);
2846dd38b87SSepherosa Ziehau 		tcp_lro_flush(lc, le);
2856dd38b87SSepherosa Ziehau 	}
2866dd38b87SSepherosa Ziehau }
2876dd38b87SSepherosa Ziehau 
2886c5087a8SJack F Vogel void
2897127e6acSNavdeep Parhar tcp_lro_flush_inactive(struct lro_ctrl *lc, const struct timeval *timeout)
2907127e6acSNavdeep Parhar {
2917127e6acSNavdeep Parhar 	struct lro_entry *le, *le_tmp;
2927127e6acSNavdeep Parhar 	struct timeval tv;
2937127e6acSNavdeep Parhar 
2941ea44822SSepherosa Ziehau 	if (LIST_EMPTY(&lc->lro_active))
2957127e6acSNavdeep Parhar 		return;
2967127e6acSNavdeep Parhar 
2977127e6acSNavdeep Parhar 	getmicrotime(&tv);
2987127e6acSNavdeep Parhar 	timevalsub(&tv, timeout);
2991ea44822SSepherosa Ziehau 	LIST_FOREACH_SAFE(le, &lc->lro_active, next, le_tmp) {
3007127e6acSNavdeep Parhar 		if (timevalcmp(&tv, &le->mtime, >=)) {
30151e3c20dSSepherosa Ziehau 			tcp_lro_active_remove(le);
3027127e6acSNavdeep Parhar 			tcp_lro_flush(lc, le);
3037127e6acSNavdeep Parhar 		}
3047127e6acSNavdeep Parhar 	}
3057127e6acSNavdeep Parhar }
3067127e6acSNavdeep Parhar 
3077127e6acSNavdeep Parhar void
30862b5b6ecSBjoern A. Zeeb tcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *le)
3096c5087a8SJack F Vogel {
3106c5087a8SJack F Vogel 
31162b5b6ecSBjoern A. Zeeb 	if (le->append_cnt > 0) {
31262b5b6ecSBjoern A. Zeeb 		struct tcphdr *th;
31362b5b6ecSBjoern A. Zeeb 		uint16_t p_len;
3146c5087a8SJack F Vogel 
31562b5b6ecSBjoern A. Zeeb 		p_len = htons(le->p_len);
31662b5b6ecSBjoern A. Zeeb 		switch (le->eh_type) {
31762b5b6ecSBjoern A. Zeeb #ifdef INET6
31862b5b6ecSBjoern A. Zeeb 		case ETHERTYPE_IPV6:
3196c5087a8SJack F Vogel 		{
32062b5b6ecSBjoern A. Zeeb 			struct ip6_hdr *ip6;
32162b5b6ecSBjoern A. Zeeb 
32262b5b6ecSBjoern A. Zeeb 			ip6 = le->le_ip6;
32362b5b6ecSBjoern A. Zeeb 			ip6->ip6_plen = p_len;
32462b5b6ecSBjoern A. Zeeb 			th = (struct tcphdr *)(ip6 + 1);
32562b5b6ecSBjoern A. Zeeb 			le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID |
32662b5b6ecSBjoern A. Zeeb 			    CSUM_PSEUDO_HDR;
32762b5b6ecSBjoern A. Zeeb 			le->p_len += ETHER_HDR_LEN + sizeof(*ip6);
32862b5b6ecSBjoern A. Zeeb 			break;
32962b5b6ecSBjoern A. Zeeb 		}
33062b5b6ecSBjoern A. Zeeb #endif
33162b5b6ecSBjoern A. Zeeb #ifdef INET
33262b5b6ecSBjoern A. Zeeb 		case ETHERTYPE_IP:
33362b5b6ecSBjoern A. Zeeb 		{
33462b5b6ecSBjoern A. Zeeb 			struct ip *ip4;
33562b5b6ecSBjoern A. Zeeb #ifdef TCP_LRO_UPDATE_CSUM
33662b5b6ecSBjoern A. Zeeb 			uint32_t cl;
33762b5b6ecSBjoern A. Zeeb 			uint16_t c;
33862b5b6ecSBjoern A. Zeeb #endif
33962b5b6ecSBjoern A. Zeeb 
34062b5b6ecSBjoern A. Zeeb 			ip4 = le->le_ip4;
34162b5b6ecSBjoern A. Zeeb #ifdef TCP_LRO_UPDATE_CSUM
34262b5b6ecSBjoern A. Zeeb 			/* Fix IP header checksum for new length. */
34362b5b6ecSBjoern A. Zeeb 			c = ~ip4->ip_sum;
34462b5b6ecSBjoern A. Zeeb 			cl = c;
34562b5b6ecSBjoern A. Zeeb 			c = ~ip4->ip_len;
34662b5b6ecSBjoern A. Zeeb 			cl += c + p_len;
34762b5b6ecSBjoern A. Zeeb 			while (cl > 0xffff)
34862b5b6ecSBjoern A. Zeeb 				cl = (cl >> 16) + (cl & 0xffff);
34962b5b6ecSBjoern A. Zeeb 			c = cl;
35062b5b6ecSBjoern A. Zeeb 			ip4->ip_sum = ~c;
35162b5b6ecSBjoern A. Zeeb #else
35262b5b6ecSBjoern A. Zeeb 			ip4->ip_sum = TCP_LRO_INVALID_CSUM;
35362b5b6ecSBjoern A. Zeeb #endif
35462b5b6ecSBjoern A. Zeeb 			ip4->ip_len = p_len;
35562b5b6ecSBjoern A. Zeeb 			th = (struct tcphdr *)(ip4 + 1);
35662b5b6ecSBjoern A. Zeeb 			le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID |
35762b5b6ecSBjoern A. Zeeb 			    CSUM_PSEUDO_HDR | CSUM_IP_CHECKED | CSUM_IP_VALID;
35862b5b6ecSBjoern A. Zeeb 			le->p_len += ETHER_HDR_LEN;
35962b5b6ecSBjoern A. Zeeb 			break;
36062b5b6ecSBjoern A. Zeeb 		}
36162b5b6ecSBjoern A. Zeeb #endif
36262b5b6ecSBjoern A. Zeeb 		default:
36362b5b6ecSBjoern A. Zeeb 			th = NULL;	/* Keep compiler happy. */
36462b5b6ecSBjoern A. Zeeb 		}
36562b5b6ecSBjoern A. Zeeb 		le->m_head->m_pkthdr.csum_data = 0xffff;
36662b5b6ecSBjoern A. Zeeb 		le->m_head->m_pkthdr.len = le->p_len;
36762b5b6ecSBjoern A. Zeeb 
36862b5b6ecSBjoern A. Zeeb 		/* Incorporate the latest ACK into the TCP header. */
36962b5b6ecSBjoern A. Zeeb 		th->th_ack = le->ack_seq;
37062b5b6ecSBjoern A. Zeeb 		th->th_win = le->window;
37162b5b6ecSBjoern A. Zeeb 		/* Incorporate latest timestamp into the TCP header. */
37262b5b6ecSBjoern A. Zeeb 		if (le->timestamp != 0) {
3736c5087a8SJack F Vogel 			uint32_t *ts_ptr;
3746c5087a8SJack F Vogel 
37562b5b6ecSBjoern A. Zeeb 			ts_ptr = (uint32_t *)(th + 1);
37662b5b6ecSBjoern A. Zeeb 			ts_ptr[1] = htonl(le->tsval);
37762b5b6ecSBjoern A. Zeeb 			ts_ptr[2] = le->tsecr;
37862b5b6ecSBjoern A. Zeeb 		}
37962b5b6ecSBjoern A. Zeeb #ifdef TCP_LRO_UPDATE_CSUM
38062b5b6ecSBjoern A. Zeeb 		/* Update the TCP header checksum. */
38162b5b6ecSBjoern A. Zeeb 		le->ulp_csum += p_len;
38262b5b6ecSBjoern A. Zeeb 		le->ulp_csum += tcp_lro_csum_th(th);
38362b5b6ecSBjoern A. Zeeb 		while (le->ulp_csum > 0xffff)
38462b5b6ecSBjoern A. Zeeb 			le->ulp_csum = (le->ulp_csum >> 16) +
38562b5b6ecSBjoern A. Zeeb 			    (le->ulp_csum & 0xffff);
38662b5b6ecSBjoern A. Zeeb 		th->th_sum = (le->ulp_csum & 0xffff);
38762b5b6ecSBjoern A. Zeeb 		th->th_sum = ~th->th_sum;
38862b5b6ecSBjoern A. Zeeb #else
38962b5b6ecSBjoern A. Zeeb 		th->th_sum = TCP_LRO_INVALID_CSUM;
39062b5b6ecSBjoern A. Zeeb #endif
39162b5b6ecSBjoern A. Zeeb 	}
3926c5087a8SJack F Vogel 
3934b7b743cSLawrence Stewart 	le->m_head->m_pkthdr.lro_nsegs = le->append_cnt + 1;
39462b5b6ecSBjoern A. Zeeb 	(*lc->ifp->if_input)(lc->ifp, le->m_head);
39562b5b6ecSBjoern A. Zeeb 	lc->lro_queued += le->append_cnt + 1;
39662b5b6ecSBjoern A. Zeeb 	lc->lro_flushed++;
39762b5b6ecSBjoern A. Zeeb 	bzero(le, sizeof(*le));
3981ea44822SSepherosa Ziehau 	LIST_INSERT_HEAD(&lc->lro_free, le, next);
39962b5b6ecSBjoern A. Zeeb }
4006c5087a8SJack F Vogel 
401fc271df3SHans Petter Selasky #ifdef HAVE_INLINE_FLSLL
402fc271df3SHans Petter Selasky #define	tcp_lro_msb_64(x) (1ULL << (flsll(x) - 1))
403fc271df3SHans Petter Selasky #else
404fc271df3SHans Petter Selasky static inline uint64_t
405fc271df3SHans Petter Selasky tcp_lro_msb_64(uint64_t x)
406e936121dSHans Petter Selasky {
407fc271df3SHans Petter Selasky 	x |= (x >> 1);
408fc271df3SHans Petter Selasky 	x |= (x >> 2);
409fc271df3SHans Petter Selasky 	x |= (x >> 4);
410fc271df3SHans Petter Selasky 	x |= (x >> 8);
411fc271df3SHans Petter Selasky 	x |= (x >> 16);
412fc271df3SHans Petter Selasky 	x |= (x >> 32);
413fc271df3SHans Petter Selasky 	return (x & ~(x >> 1));
414fc271df3SHans Petter Selasky }
415fc271df3SHans Petter Selasky #endif
416e936121dSHans Petter Selasky 
417fc271df3SHans Petter Selasky /*
418fc271df3SHans Petter Selasky  * The tcp_lro_sort() routine is comparable to qsort(), except it has
419fc271df3SHans Petter Selasky  * a worst case complexity limit of O(MIN(N,64)*N), where N is the
420fc271df3SHans Petter Selasky  * number of elements to sort and 64 is the number of sequence bits
421fc271df3SHans Petter Selasky  * available. The algorithm is bit-slicing the 64-bit sequence number,
422fc271df3SHans Petter Selasky  * sorting one bit at a time from the most significant bit until the
423ec668905SHans Petter Selasky  * least significant one, skipping the constant bits. This is
424ec668905SHans Petter Selasky  * typically called a radix sort.
425fc271df3SHans Petter Selasky  */
426fc271df3SHans Petter Selasky static void
427fc271df3SHans Petter Selasky tcp_lro_sort(struct lro_mbuf_sort *parray, uint32_t size)
428fc271df3SHans Petter Selasky {
429fc271df3SHans Petter Selasky 	struct lro_mbuf_sort temp;
430fc271df3SHans Petter Selasky 	uint64_t ones;
431fc271df3SHans Petter Selasky 	uint64_t zeros;
432fc271df3SHans Petter Selasky 	uint32_t x;
433fc271df3SHans Petter Selasky 	uint32_t y;
434e936121dSHans Petter Selasky 
435fc271df3SHans Petter Selasky repeat:
436ec668905SHans Petter Selasky 	/* for small arrays insertion sort is faster */
437fc271df3SHans Petter Selasky 	if (size <= 12) {
438ec668905SHans Petter Selasky 		for (x = 1; x < size; x++) {
439fc271df3SHans Petter Selasky 			temp = parray[x];
440ec668905SHans Petter Selasky 			for (y = x; y > 0 && temp.seq < parray[y - 1].seq; y--)
441ec668905SHans Petter Selasky 				parray[y] = parray[y - 1];
442fc271df3SHans Petter Selasky 			parray[y] = temp;
443fc271df3SHans Petter Selasky 		}
444fc271df3SHans Petter Selasky 		return;
445fc271df3SHans Petter Selasky 	}
446e936121dSHans Petter Selasky 
447fc271df3SHans Petter Selasky 	/* compute sequence bits which are constant */
448fc271df3SHans Petter Selasky 	ones = 0;
449fc271df3SHans Petter Selasky 	zeros = 0;
450fc271df3SHans Petter Selasky 	for (x = 0; x != size; x++) {
451fc271df3SHans Petter Selasky 		ones |= parray[x].seq;
452fc271df3SHans Petter Selasky 		zeros |= ~parray[x].seq;
453fc271df3SHans Petter Selasky 	}
454fc271df3SHans Petter Selasky 
455fc271df3SHans Petter Selasky 	/* compute bits which are not constant into "ones" */
456fc271df3SHans Petter Selasky 	ones &= zeros;
457fc271df3SHans Petter Selasky 	if (ones == 0)
458fc271df3SHans Petter Selasky 		return;
459fc271df3SHans Petter Selasky 
460fc271df3SHans Petter Selasky 	/* pick the most significant bit which is not constant */
461fc271df3SHans Petter Selasky 	ones = tcp_lro_msb_64(ones);
462fc271df3SHans Petter Selasky 
463fc271df3SHans Petter Selasky 	/*
464fc271df3SHans Petter Selasky 	 * Move entries having cleared sequence bits to the beginning
465fc271df3SHans Petter Selasky 	 * of the array:
466fc271df3SHans Petter Selasky 	 */
467fc271df3SHans Petter Selasky 	for (x = y = 0; y != size; y++) {
468fc271df3SHans Petter Selasky 		/* skip set bits */
469fc271df3SHans Petter Selasky 		if (parray[y].seq & ones)
470fc271df3SHans Petter Selasky 			continue;
471fc271df3SHans Petter Selasky 		/* swap entries */
472fc271df3SHans Petter Selasky 		temp = parray[x];
473fc271df3SHans Petter Selasky 		parray[x] = parray[y];
474fc271df3SHans Petter Selasky 		parray[y] = temp;
475fc271df3SHans Petter Selasky 		x++;
476fc271df3SHans Petter Selasky 	}
477fc271df3SHans Petter Selasky 
478fc271df3SHans Petter Selasky 	KASSERT(x != 0 && x != size, ("Memory is corrupted\n"));
479fc271df3SHans Petter Selasky 
480fc271df3SHans Petter Selasky 	/* sort zeros */
481fc271df3SHans Petter Selasky 	tcp_lro_sort(parray, x);
482fc271df3SHans Petter Selasky 
483fc271df3SHans Petter Selasky 	/* sort ones */
484fc271df3SHans Petter Selasky 	parray += x;
485fc271df3SHans Petter Selasky 	size -= x;
486fc271df3SHans Petter Selasky 	goto repeat;
487e936121dSHans Petter Selasky }
488e936121dSHans Petter Selasky 
489e936121dSHans Petter Selasky void
490e936121dSHans Petter Selasky tcp_lro_flush_all(struct lro_ctrl *lc)
491e936121dSHans Petter Selasky {
492fc271df3SHans Petter Selasky 	uint64_t seq;
493fc271df3SHans Petter Selasky 	uint64_t nseq;
494e936121dSHans Petter Selasky 	unsigned x;
495e936121dSHans Petter Selasky 
496e936121dSHans Petter Selasky 	/* check if no mbufs to flush */
4976dd38b87SSepherosa Ziehau 	if (lc->lro_mbuf_count == 0)
498e936121dSHans Petter Selasky 		goto done;
499e936121dSHans Petter Selasky 
500e936121dSHans Petter Selasky 	/* sort all mbufs according to stream */
501fc271df3SHans Petter Selasky 	tcp_lro_sort(lc->lro_mbuf_data, lc->lro_mbuf_count);
502e936121dSHans Petter Selasky 
503e936121dSHans Petter Selasky 	/* input data into LRO engine, stream by stream */
504fc271df3SHans Petter Selasky 	seq = 0;
505e936121dSHans Petter Selasky 	for (x = 0; x != lc->lro_mbuf_count; x++) {
506e936121dSHans Petter Selasky 		struct mbuf *mb;
507e936121dSHans Petter Selasky 
508fc271df3SHans Petter Selasky 		/* get mbuf */
509fc271df3SHans Petter Selasky 		mb = lc->lro_mbuf_data[x].mb;
510fc271df3SHans Petter Selasky 
511fc271df3SHans Petter Selasky 		/* get sequence number, masking away the packet index */
512fc271df3SHans Petter Selasky 		nseq = lc->lro_mbuf_data[x].seq & (-1ULL << 24);
513e936121dSHans Petter Selasky 
514e936121dSHans Petter Selasky 		/* check for new stream */
515fc271df3SHans Petter Selasky 		if (seq != nseq) {
516fc271df3SHans Petter Selasky 			seq = nseq;
517e936121dSHans Petter Selasky 
518e936121dSHans Petter Selasky 			/* flush active streams */
5196dd38b87SSepherosa Ziehau 			tcp_lro_rx_done(lc);
520e936121dSHans Petter Selasky 		}
521fc271df3SHans Petter Selasky 
522e936121dSHans Petter Selasky 		/* add packet to LRO engine */
52305cde7efSSepherosa Ziehau 		if (tcp_lro_rx2(lc, mb, 0, 0) != 0) {
524e936121dSHans Petter Selasky 			/* input packet to network layer */
525e936121dSHans Petter Selasky 			(*lc->ifp->if_input)(lc->ifp, mb);
526e936121dSHans Petter Selasky 			lc->lro_queued++;
527e936121dSHans Petter Selasky 			lc->lro_flushed++;
528e936121dSHans Petter Selasky 		}
529e936121dSHans Petter Selasky 	}
530e936121dSHans Petter Selasky done:
531e936121dSHans Petter Selasky 	/* flush active streams */
5326dd38b87SSepherosa Ziehau 	tcp_lro_rx_done(lc);
5336dd38b87SSepherosa Ziehau 
534e936121dSHans Petter Selasky 	lc->lro_mbuf_count = 0;
535e936121dSHans Petter Selasky }
536e936121dSHans Petter Selasky 
53762b5b6ecSBjoern A. Zeeb #ifdef INET6
53862b5b6ecSBjoern A. Zeeb static int
53962b5b6ecSBjoern A. Zeeb tcp_lro_rx_ipv6(struct lro_ctrl *lc, struct mbuf *m, struct ip6_hdr *ip6,
54062b5b6ecSBjoern A. Zeeb     struct tcphdr **th)
54162b5b6ecSBjoern A. Zeeb {
5426c5087a8SJack F Vogel 
54362b5b6ecSBjoern A. Zeeb 	/* XXX-BZ we should check the flow-label. */
5446c5087a8SJack F Vogel 
54562b5b6ecSBjoern A. Zeeb 	/* XXX-BZ We do not yet support ext. hdrs. */
54662b5b6ecSBjoern A. Zeeb 	if (ip6->ip6_nxt != IPPROTO_TCP)
54762b5b6ecSBjoern A. Zeeb 		return (TCP_LRO_NOT_SUPPORTED);
54862b5b6ecSBjoern A. Zeeb 
54962b5b6ecSBjoern A. Zeeb 	/* Find the TCP header. */
55062b5b6ecSBjoern A. Zeeb 	*th = (struct tcphdr *)(ip6 + 1);
55162b5b6ecSBjoern A. Zeeb 
55262b5b6ecSBjoern A. Zeeb 	return (0);
55362b5b6ecSBjoern A. Zeeb }
55462b5b6ecSBjoern A. Zeeb #endif
55562b5b6ecSBjoern A. Zeeb 
55662b5b6ecSBjoern A. Zeeb #ifdef INET
55762b5b6ecSBjoern A. Zeeb static int
55862b5b6ecSBjoern A. Zeeb tcp_lro_rx_ipv4(struct lro_ctrl *lc, struct mbuf *m, struct ip *ip4,
55962b5b6ecSBjoern A. Zeeb     struct tcphdr **th)
56062b5b6ecSBjoern A. Zeeb {
56162b5b6ecSBjoern A. Zeeb 	int csum_flags;
56262b5b6ecSBjoern A. Zeeb 	uint16_t csum;
56362b5b6ecSBjoern A. Zeeb 
56462b5b6ecSBjoern A. Zeeb 	if (ip4->ip_p != IPPROTO_TCP)
56562b5b6ecSBjoern A. Zeeb 		return (TCP_LRO_NOT_SUPPORTED);
56662b5b6ecSBjoern A. Zeeb 
56762b5b6ecSBjoern A. Zeeb 	/* Ensure there are no options. */
56862b5b6ecSBjoern A. Zeeb 	if ((ip4->ip_hl << 2) != sizeof (*ip4))
56962b5b6ecSBjoern A. Zeeb 		return (TCP_LRO_CANNOT);
57062b5b6ecSBjoern A. Zeeb 
57162b5b6ecSBjoern A. Zeeb 	/* .. and the packet is not fragmented. */
57262b5b6ecSBjoern A. Zeeb 	if (ip4->ip_off & htons(IP_MF|IP_OFFMASK))
57362b5b6ecSBjoern A. Zeeb 		return (TCP_LRO_CANNOT);
57462b5b6ecSBjoern A. Zeeb 
57562b5b6ecSBjoern A. Zeeb 	/* Legacy IP has a header checksum that needs to be correct. */
57662b5b6ecSBjoern A. Zeeb 	csum_flags = m->m_pkthdr.csum_flags;
57745709593SKip Macy 	if (csum_flags & CSUM_IP_CHECKED) {
57845709593SKip Macy 		if (__predict_false((csum_flags & CSUM_IP_VALID) == 0)) {
57962b5b6ecSBjoern A. Zeeb 			lc->lro_bad_csum++;
58062b5b6ecSBjoern A. Zeeb 			return (TCP_LRO_CANNOT);
58145709593SKip Macy 		}
58245709593SKip Macy 	} else {
58362b5b6ecSBjoern A. Zeeb 		csum = in_cksum_hdr(ip4);
584e5ca1ffaSAndrew Gallatin 		if (__predict_false((csum) != 0)) {
58562b5b6ecSBjoern A. Zeeb 			lc->lro_bad_csum++;
58662b5b6ecSBjoern A. Zeeb 			return (TCP_LRO_CANNOT);
5876c5087a8SJack F Vogel 		}
58845709593SKip Macy 	}
5896c5087a8SJack F Vogel 
59062b5b6ecSBjoern A. Zeeb 	/* Find the TCP header (we assured there are no IP options). */
59162b5b6ecSBjoern A. Zeeb 	*th = (struct tcphdr *)(ip4 + 1);
5926c5087a8SJack F Vogel 
59362b5b6ecSBjoern A. Zeeb 	return (0);
5946c5087a8SJack F Vogel }
59562b5b6ecSBjoern A. Zeeb #endif
5966c5087a8SJack F Vogel 
59705cde7efSSepherosa Ziehau static int
59805cde7efSSepherosa Ziehau tcp_lro_rx2(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum, int use_hash)
59962b5b6ecSBjoern A. Zeeb {
60062b5b6ecSBjoern A. Zeeb 	struct lro_entry *le;
60162b5b6ecSBjoern A. Zeeb 	struct ether_header *eh;
60262b5b6ecSBjoern A. Zeeb #ifdef INET6
60362b5b6ecSBjoern A. Zeeb 	struct ip6_hdr *ip6 = NULL;	/* Keep compiler happy. */
60462b5b6ecSBjoern A. Zeeb #endif
60562b5b6ecSBjoern A. Zeeb #ifdef INET
60662b5b6ecSBjoern A. Zeeb 	struct ip *ip4 = NULL;		/* Keep compiler happy. */
60762b5b6ecSBjoern A. Zeeb #endif
60862b5b6ecSBjoern A. Zeeb 	struct tcphdr *th;
60962b5b6ecSBjoern A. Zeeb 	void *l3hdr = NULL;		/* Keep compiler happy. */
61062b5b6ecSBjoern A. Zeeb 	uint32_t *ts_ptr;
61162b5b6ecSBjoern A. Zeeb 	tcp_seq seq;
61262b5b6ecSBjoern A. Zeeb 	int error, ip_len, l;
61362b5b6ecSBjoern A. Zeeb 	uint16_t eh_type, tcp_data_len;
61405cde7efSSepherosa Ziehau 	struct lro_head *bucket;
615b9ec6f0bSSepherosa Ziehau 	int force_flush = 0;
6166c5087a8SJack F Vogel 
61762b5b6ecSBjoern A. Zeeb 	/* We expect a contiguous header [eh, ip, tcp]. */
61862b5b6ecSBjoern A. Zeeb 
61962b5b6ecSBjoern A. Zeeb 	eh = mtod(m, struct ether_header *);
62062b5b6ecSBjoern A. Zeeb 	eh_type = ntohs(eh->ether_type);
62162b5b6ecSBjoern A. Zeeb 	switch (eh_type) {
62262b5b6ecSBjoern A. Zeeb #ifdef INET6
62362b5b6ecSBjoern A. Zeeb 	case ETHERTYPE_IPV6:
6245fa2656eSBjoern A. Zeeb 	{
6255fa2656eSBjoern A. Zeeb 		CURVNET_SET(lc->ifp->if_vnet);
62631bfc56eSBjoern A. Zeeb 		if (V_ip6_forwarding != 0) {
62731bfc56eSBjoern A. Zeeb 			/* XXX-BZ stats but changing lro_ctrl is a problem. */
6285fa2656eSBjoern A. Zeeb 			CURVNET_RESTORE();
62931bfc56eSBjoern A. Zeeb 			return (TCP_LRO_CANNOT);
63031bfc56eSBjoern A. Zeeb 		}
6315fa2656eSBjoern A. Zeeb 		CURVNET_RESTORE();
63262b5b6ecSBjoern A. Zeeb 		l3hdr = ip6 = (struct ip6_hdr *)(eh + 1);
63362b5b6ecSBjoern A. Zeeb 		error = tcp_lro_rx_ipv6(lc, m, ip6, &th);
63462b5b6ecSBjoern A. Zeeb 		if (error != 0)
63562b5b6ecSBjoern A. Zeeb 			return (error);
63662b5b6ecSBjoern A. Zeeb 		tcp_data_len = ntohs(ip6->ip6_plen);
63762b5b6ecSBjoern A. Zeeb 		ip_len = sizeof(*ip6) + tcp_data_len;
63862b5b6ecSBjoern A. Zeeb 		break;
6395fa2656eSBjoern A. Zeeb 	}
64062b5b6ecSBjoern A. Zeeb #endif
64162b5b6ecSBjoern A. Zeeb #ifdef INET
64262b5b6ecSBjoern A. Zeeb 	case ETHERTYPE_IP:
6435fa2656eSBjoern A. Zeeb 	{
6445fa2656eSBjoern A. Zeeb 		CURVNET_SET(lc->ifp->if_vnet);
64531bfc56eSBjoern A. Zeeb 		if (V_ipforwarding != 0) {
64631bfc56eSBjoern A. Zeeb 			/* XXX-BZ stats but changing lro_ctrl is a problem. */
6475fa2656eSBjoern A. Zeeb 			CURVNET_RESTORE();
64831bfc56eSBjoern A. Zeeb 			return (TCP_LRO_CANNOT);
64931bfc56eSBjoern A. Zeeb 		}
6505fa2656eSBjoern A. Zeeb 		CURVNET_RESTORE();
65162b5b6ecSBjoern A. Zeeb 		l3hdr = ip4 = (struct ip *)(eh + 1);
65262b5b6ecSBjoern A. Zeeb 		error = tcp_lro_rx_ipv4(lc, m, ip4, &th);
65362b5b6ecSBjoern A. Zeeb 		if (error != 0)
65462b5b6ecSBjoern A. Zeeb 			return (error);
65562b5b6ecSBjoern A. Zeeb 		ip_len = ntohs(ip4->ip_len);
65662b5b6ecSBjoern A. Zeeb 		tcp_data_len = ip_len - sizeof(*ip4);
65762b5b6ecSBjoern A. Zeeb 		break;
6585fa2656eSBjoern A. Zeeb 	}
65962b5b6ecSBjoern A. Zeeb #endif
66062b5b6ecSBjoern A. Zeeb 	/* XXX-BZ what happens in case of VLAN(s)? */
66162b5b6ecSBjoern A. Zeeb 	default:
66262b5b6ecSBjoern A. Zeeb 		return (TCP_LRO_NOT_SUPPORTED);
66362b5b6ecSBjoern A. Zeeb 	}
6646c5087a8SJack F Vogel 
6656c5087a8SJack F Vogel 	/*
66662b5b6ecSBjoern A. Zeeb 	 * If the frame is padded beyond the end of the IP packet, then we must
66762b5b6ecSBjoern A. Zeeb 	 * trim the extra bytes off.
6686c5087a8SJack F Vogel 	 */
66962b5b6ecSBjoern A. Zeeb 	l = m->m_pkthdr.len - (ETHER_HDR_LEN + ip_len);
67062b5b6ecSBjoern A. Zeeb 	if (l != 0) {
67162b5b6ecSBjoern A. Zeeb 		if (l < 0)
67262b5b6ecSBjoern A. Zeeb 			/* Truncated packet. */
67362b5b6ecSBjoern A. Zeeb 			return (TCP_LRO_CANNOT);
67462b5b6ecSBjoern A. Zeeb 
67562b5b6ecSBjoern A. Zeeb 		m_adj(m, -l);
6766c5087a8SJack F Vogel 	}
6776c5087a8SJack F Vogel 
67862b5b6ecSBjoern A. Zeeb 	/*
67962b5b6ecSBjoern A. Zeeb 	 * Check TCP header constraints.
68062b5b6ecSBjoern A. Zeeb 	 */
68162b5b6ecSBjoern A. Zeeb 	/* Ensure no bits set besides ACK or PSH. */
682b9ec6f0bSSepherosa Ziehau 	if ((th->th_flags & ~(TH_ACK | TH_PUSH)) != 0) {
683b9ec6f0bSSepherosa Ziehau 		if (th->th_flags & TH_SYN)
68462b5b6ecSBjoern A. Zeeb 			return (TCP_LRO_CANNOT);
685b9ec6f0bSSepherosa Ziehau 		/*
686b9ec6f0bSSepherosa Ziehau 		 * Make sure that previously seen segements/ACKs are delivered
687b9ec6f0bSSepherosa Ziehau 		 * before this segement, e.g. FIN.
688b9ec6f0bSSepherosa Ziehau 		 */
689b9ec6f0bSSepherosa Ziehau 		force_flush = 1;
690b9ec6f0bSSepherosa Ziehau 	}
69162b5b6ecSBjoern A. Zeeb 
6929b436b18SSepherosa Ziehau 	/* XXX-BZ We lose a ACK|PUSH flag concatenating multiple segments. */
69362b5b6ecSBjoern A. Zeeb 	/* XXX-BZ Ideally we'd flush on PUSH? */
69462b5b6ecSBjoern A. Zeeb 
69562b5b6ecSBjoern A. Zeeb 	/*
69662b5b6ecSBjoern A. Zeeb 	 * Check for timestamps.
69762b5b6ecSBjoern A. Zeeb 	 * Since the only option we handle are timestamps, we only have to
69862b5b6ecSBjoern A. Zeeb 	 * handle the simple case of aligned timestamps.
69962b5b6ecSBjoern A. Zeeb 	 */
70062b5b6ecSBjoern A. Zeeb 	l = (th->th_off << 2);
70162b5b6ecSBjoern A. Zeeb 	tcp_data_len -= l;
70262b5b6ecSBjoern A. Zeeb 	l -= sizeof(*th);
70362b5b6ecSBjoern A. Zeeb 	ts_ptr = (uint32_t *)(th + 1);
70462b5b6ecSBjoern A. Zeeb 	if (l != 0 && (__predict_false(l != TCPOLEN_TSTAMP_APPA) ||
70562b5b6ecSBjoern A. Zeeb 	    (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|
706b9ec6f0bSSepherosa Ziehau 	    TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))) {
707b9ec6f0bSSepherosa Ziehau 		/*
708b9ec6f0bSSepherosa Ziehau 		 * Make sure that previously seen segements/ACKs are delivered
709b9ec6f0bSSepherosa Ziehau 		 * before this segement.
710b9ec6f0bSSepherosa Ziehau 		 */
711b9ec6f0bSSepherosa Ziehau 		force_flush = 1;
712b9ec6f0bSSepherosa Ziehau 	}
71362b5b6ecSBjoern A. Zeeb 
71462b5b6ecSBjoern A. Zeeb 	/* If the driver did not pass in the checksum, set it now. */
71562b5b6ecSBjoern A. Zeeb 	if (csum == 0x0000)
71662b5b6ecSBjoern A. Zeeb 		csum = th->th_sum;
71762b5b6ecSBjoern A. Zeeb 
71862b5b6ecSBjoern A. Zeeb 	seq = ntohl(th->th_seq);
71962b5b6ecSBjoern A. Zeeb 
72005cde7efSSepherosa Ziehau 	if (!use_hash) {
72105cde7efSSepherosa Ziehau 		bucket = &lc->lro_hash[0];
72205cde7efSSepherosa Ziehau 	} else if (M_HASHTYPE_ISHASH(m)) {
72305cde7efSSepherosa Ziehau 		bucket = &lc->lro_hash[m->m_pkthdr.flowid % lc->lro_hashsz];
72405cde7efSSepherosa Ziehau 	} else {
72505cde7efSSepherosa Ziehau 		uint32_t hash;
72605cde7efSSepherosa Ziehau 
72705cde7efSSepherosa Ziehau 		switch (eh_type) {
72805cde7efSSepherosa Ziehau #ifdef INET
72905cde7efSSepherosa Ziehau 		case ETHERTYPE_IP:
73005cde7efSSepherosa Ziehau 			hash = ip4->ip_src.s_addr + ip4->ip_dst.s_addr;
73105cde7efSSepherosa Ziehau 			break;
73205cde7efSSepherosa Ziehau #endif
73305cde7efSSepherosa Ziehau #ifdef INET6
73405cde7efSSepherosa Ziehau 		case ETHERTYPE_IPV6:
73505cde7efSSepherosa Ziehau 			hash = ip6->ip6_src.s6_addr32[0] +
73605cde7efSSepherosa Ziehau 			    ip6->ip6_dst.s6_addr32[0];
73705cde7efSSepherosa Ziehau 			hash += ip6->ip6_src.s6_addr32[1] +
73805cde7efSSepherosa Ziehau 			    ip6->ip6_dst.s6_addr32[1];
73905cde7efSSepherosa Ziehau 			hash += ip6->ip6_src.s6_addr32[2] +
74005cde7efSSepherosa Ziehau 			    ip6->ip6_dst.s6_addr32[2];
74105cde7efSSepherosa Ziehau 			hash += ip6->ip6_src.s6_addr32[3] +
74205cde7efSSepherosa Ziehau 			    ip6->ip6_dst.s6_addr32[3];
74305cde7efSSepherosa Ziehau 			break;
74405cde7efSSepherosa Ziehau #endif
74505cde7efSSepherosa Ziehau 		default:
74605cde7efSSepherosa Ziehau 			hash = 0;
74705cde7efSSepherosa Ziehau 			break;
74805cde7efSSepherosa Ziehau 		}
74905cde7efSSepherosa Ziehau 		hash += th->th_sport + th->th_dport;
75005cde7efSSepherosa Ziehau 		bucket = &lc->lro_hash[hash % lc->lro_hashsz];
75105cde7efSSepherosa Ziehau 	}
75205cde7efSSepherosa Ziehau 
75362b5b6ecSBjoern A. Zeeb 	/* Try to find a matching previous segment. */
75405cde7efSSepherosa Ziehau 	LIST_FOREACH(le, bucket, hash_next) {
75562b5b6ecSBjoern A. Zeeb 		if (le->eh_type != eh_type)
75662b5b6ecSBjoern A. Zeeb 			continue;
75762b5b6ecSBjoern A. Zeeb 		if (le->source_port != th->th_sport ||
75862b5b6ecSBjoern A. Zeeb 		    le->dest_port != th->th_dport)
75962b5b6ecSBjoern A. Zeeb 			continue;
76062b5b6ecSBjoern A. Zeeb 		switch (eh_type) {
76162b5b6ecSBjoern A. Zeeb #ifdef INET6
76262b5b6ecSBjoern A. Zeeb 		case ETHERTYPE_IPV6:
76362b5b6ecSBjoern A. Zeeb 			if (bcmp(&le->source_ip6, &ip6->ip6_src,
76462b5b6ecSBjoern A. Zeeb 			    sizeof(struct in6_addr)) != 0 ||
76562b5b6ecSBjoern A. Zeeb 			    bcmp(&le->dest_ip6, &ip6->ip6_dst,
76662b5b6ecSBjoern A. Zeeb 			    sizeof(struct in6_addr)) != 0)
76762b5b6ecSBjoern A. Zeeb 				continue;
76862b5b6ecSBjoern A. Zeeb 			break;
76962b5b6ecSBjoern A. Zeeb #endif
77062b5b6ecSBjoern A. Zeeb #ifdef INET
77162b5b6ecSBjoern A. Zeeb 		case ETHERTYPE_IP:
77262b5b6ecSBjoern A. Zeeb 			if (le->source_ip4 != ip4->ip_src.s_addr ||
77362b5b6ecSBjoern A. Zeeb 			    le->dest_ip4 != ip4->ip_dst.s_addr)
77462b5b6ecSBjoern A. Zeeb 				continue;
77562b5b6ecSBjoern A. Zeeb 			break;
77662b5b6ecSBjoern A. Zeeb #endif
7776c5087a8SJack F Vogel 		}
7786c5087a8SJack F Vogel 
779b9ec6f0bSSepherosa Ziehau 		if (force_flush) {
780b9ec6f0bSSepherosa Ziehau 			/* Timestamps mismatch; this is a FIN, etc */
781b9ec6f0bSSepherosa Ziehau 			tcp_lro_active_remove(le);
782b9ec6f0bSSepherosa Ziehau 			tcp_lro_flush(lc, le);
783b9ec6f0bSSepherosa Ziehau 			return (TCP_LRO_CANNOT);
784b9ec6f0bSSepherosa Ziehau 		}
785b9ec6f0bSSepherosa Ziehau 
786ca712262SColin Percival 		/* Flush now if appending will result in overflow. */
7877ae3d4bfSSepherosa Ziehau 		if (le->p_len > (lc->lro_length_lim - tcp_data_len)) {
78851e3c20dSSepherosa Ziehau 			tcp_lro_active_remove(le);
78962b5b6ecSBjoern A. Zeeb 			tcp_lro_flush(lc, le);
790ca712262SColin Percival 			break;
791ca712262SColin Percival 		}
792ca712262SColin Percival 
79362b5b6ecSBjoern A. Zeeb 		/* Try to append the new segment. */
79462b5b6ecSBjoern A. Zeeb 		if (__predict_false(seq != le->next_seq ||
79562b5b6ecSBjoern A. Zeeb 		    (tcp_data_len == 0 && le->ack_seq == th->th_ack))) {
79662b5b6ecSBjoern A. Zeeb 			/* Out of order packet or duplicate ACK. */
79751e3c20dSSepherosa Ziehau 			tcp_lro_active_remove(le);
79862b5b6ecSBjoern A. Zeeb 			tcp_lro_flush(lc, le);
79962b5b6ecSBjoern A. Zeeb 			return (TCP_LRO_CANNOT);
8006c5087a8SJack F Vogel 		}
8016c5087a8SJack F Vogel 
80262b5b6ecSBjoern A. Zeeb 		if (l != 0) {
8036c5087a8SJack F Vogel 			uint32_t tsval = ntohl(*(ts_ptr + 1));
80462b5b6ecSBjoern A. Zeeb 			/* Make sure timestamp values are increasing. */
80562b5b6ecSBjoern A. Zeeb 			/* XXX-BZ flip and use TSTMP_GEQ macro for this? */
80662b5b6ecSBjoern A. Zeeb 			if (__predict_false(le->tsval > tsval ||
80762b5b6ecSBjoern A. Zeeb 			    *(ts_ptr + 2) == 0))
80862b5b6ecSBjoern A. Zeeb 				return (TCP_LRO_CANNOT);
80962b5b6ecSBjoern A. Zeeb 			le->tsval = tsval;
81062b5b6ecSBjoern A. Zeeb 			le->tsecr = *(ts_ptr + 2);
8116c5087a8SJack F Vogel 		}
8126c5087a8SJack F Vogel 
81362b5b6ecSBjoern A. Zeeb 		le->next_seq += tcp_data_len;
81462b5b6ecSBjoern A. Zeeb 		le->ack_seq = th->th_ack;
81562b5b6ecSBjoern A. Zeeb 		le->window = th->th_win;
81662b5b6ecSBjoern A. Zeeb 		le->append_cnt++;
81762b5b6ecSBjoern A. Zeeb 
81862b5b6ecSBjoern A. Zeeb #ifdef TCP_LRO_UPDATE_CSUM
81962b5b6ecSBjoern A. Zeeb 		le->ulp_csum += tcp_lro_rx_csum_fixup(le, l3hdr, th,
82062b5b6ecSBjoern A. Zeeb 		    tcp_data_len, ~csum);
82162b5b6ecSBjoern A. Zeeb #endif
82262b5b6ecSBjoern A. Zeeb 
8236c5087a8SJack F Vogel 		if (tcp_data_len == 0) {
82462b5b6ecSBjoern A. Zeeb 			m_freem(m);
8257ae3d4bfSSepherosa Ziehau 			/*
8267ae3d4bfSSepherosa Ziehau 			 * Flush this LRO entry, if this ACK should not
8277ae3d4bfSSepherosa Ziehau 			 * be further delayed.
8287ae3d4bfSSepherosa Ziehau 			 */
8297ae3d4bfSSepherosa Ziehau 			if (le->append_cnt >= lc->lro_ackcnt_lim) {
83051e3c20dSSepherosa Ziehau 				tcp_lro_active_remove(le);
8317ae3d4bfSSepherosa Ziehau 				tcp_lro_flush(lc, le);
8327ae3d4bfSSepherosa Ziehau 			}
83362b5b6ecSBjoern A. Zeeb 			return (0);
8346c5087a8SJack F Vogel 		}
83562b5b6ecSBjoern A. Zeeb 
83662b5b6ecSBjoern A. Zeeb 		le->p_len += tcp_data_len;
83762b5b6ecSBjoern A. Zeeb 
83862b5b6ecSBjoern A. Zeeb 		/*
83962b5b6ecSBjoern A. Zeeb 		 * Adjust the mbuf so that m_data points to the first byte of
84062b5b6ecSBjoern A. Zeeb 		 * the ULP payload.  Adjust the mbuf to avoid complications and
84162b5b6ecSBjoern A. Zeeb 		 * append new segment to existing mbuf chain.
8426c5087a8SJack F Vogel 		 */
84362b5b6ecSBjoern A. Zeeb 		m_adj(m, m->m_pkthdr.len - tcp_data_len);
8449523d1bfSNavdeep Parhar 		m_demote_pkthdr(m);
8456c5087a8SJack F Vogel 
84662b5b6ecSBjoern A. Zeeb 		le->m_tail->m_next = m;
84762b5b6ecSBjoern A. Zeeb 		le->m_tail = m_last(m);
8486c5087a8SJack F Vogel 
84962b5b6ecSBjoern A. Zeeb 		/*
85062b5b6ecSBjoern A. Zeeb 		 * If a possible next full length packet would cause an
85162b5b6ecSBjoern A. Zeeb 		 * overflow, pro-actively flush now.
8526c5087a8SJack F Vogel 		 */
8537ae3d4bfSSepherosa Ziehau 		if (le->p_len > (lc->lro_length_lim - lc->ifp->if_mtu)) {
85451e3c20dSSepherosa Ziehau 			tcp_lro_active_remove(le);
85562b5b6ecSBjoern A. Zeeb 			tcp_lro_flush(lc, le);
8567127e6acSNavdeep Parhar 		} else
8577127e6acSNavdeep Parhar 			getmicrotime(&le->mtime);
8586c5087a8SJack F Vogel 
85962b5b6ecSBjoern A. Zeeb 		return (0);
8606c5087a8SJack F Vogel 	}
86162b5b6ecSBjoern A. Zeeb 
862b9ec6f0bSSepherosa Ziehau 	if (force_flush) {
863b9ec6f0bSSepherosa Ziehau 		/*
864b9ec6f0bSSepherosa Ziehau 		 * Nothing to flush, but this segment can not be further
865b9ec6f0bSSepherosa Ziehau 		 * aggregated/delayed.
866b9ec6f0bSSepherosa Ziehau 		 */
867b9ec6f0bSSepherosa Ziehau 		return (TCP_LRO_CANNOT);
868b9ec6f0bSSepherosa Ziehau 	}
869b9ec6f0bSSepherosa Ziehau 
87062b5b6ecSBjoern A. Zeeb 	/* Try to find an empty slot. */
8711ea44822SSepherosa Ziehau 	if (LIST_EMPTY(&lc->lro_free))
872489f0c3cSSepherosa Ziehau 		return (TCP_LRO_NO_ENTRIES);
87362b5b6ecSBjoern A. Zeeb 
87462b5b6ecSBjoern A. Zeeb 	/* Start a new segment chain. */
8751ea44822SSepherosa Ziehau 	le = LIST_FIRST(&lc->lro_free);
8761ea44822SSepherosa Ziehau 	LIST_REMOVE(le, next);
87705cde7efSSepherosa Ziehau 	tcp_lro_active_insert(lc, bucket, le);
8787127e6acSNavdeep Parhar 	getmicrotime(&le->mtime);
87962b5b6ecSBjoern A. Zeeb 
88062b5b6ecSBjoern A. Zeeb 	/* Start filling in details. */
88162b5b6ecSBjoern A. Zeeb 	switch (eh_type) {
88262b5b6ecSBjoern A. Zeeb #ifdef INET6
88362b5b6ecSBjoern A. Zeeb 	case ETHERTYPE_IPV6:
88462b5b6ecSBjoern A. Zeeb 		le->le_ip6 = ip6;
88562b5b6ecSBjoern A. Zeeb 		le->source_ip6 = ip6->ip6_src;
88662b5b6ecSBjoern A. Zeeb 		le->dest_ip6 = ip6->ip6_dst;
88762b5b6ecSBjoern A. Zeeb 		le->eh_type = eh_type;
88862b5b6ecSBjoern A. Zeeb 		le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN - sizeof(*ip6);
88962b5b6ecSBjoern A. Zeeb 		break;
89062b5b6ecSBjoern A. Zeeb #endif
89162b5b6ecSBjoern A. Zeeb #ifdef INET
89262b5b6ecSBjoern A. Zeeb 	case ETHERTYPE_IP:
89362b5b6ecSBjoern A. Zeeb 		le->le_ip4 = ip4;
89462b5b6ecSBjoern A. Zeeb 		le->source_ip4 = ip4->ip_src.s_addr;
89562b5b6ecSBjoern A. Zeeb 		le->dest_ip4 = ip4->ip_dst.s_addr;
89662b5b6ecSBjoern A. Zeeb 		le->eh_type = eh_type;
89762b5b6ecSBjoern A. Zeeb 		le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN;
89862b5b6ecSBjoern A. Zeeb 		break;
89962b5b6ecSBjoern A. Zeeb #endif
9006c5087a8SJack F Vogel 	}
90162b5b6ecSBjoern A. Zeeb 	le->source_port = th->th_sport;
90262b5b6ecSBjoern A. Zeeb 	le->dest_port = th->th_dport;
90362b5b6ecSBjoern A. Zeeb 
90462b5b6ecSBjoern A. Zeeb 	le->next_seq = seq + tcp_data_len;
90562b5b6ecSBjoern A. Zeeb 	le->ack_seq = th->th_ack;
90662b5b6ecSBjoern A. Zeeb 	le->window = th->th_win;
90762b5b6ecSBjoern A. Zeeb 	if (l != 0) {
90862b5b6ecSBjoern A. Zeeb 		le->timestamp = 1;
90962b5b6ecSBjoern A. Zeeb 		le->tsval = ntohl(*(ts_ptr + 1));
91062b5b6ecSBjoern A. Zeeb 		le->tsecr = *(ts_ptr + 2);
91162b5b6ecSBjoern A. Zeeb 	}
91262b5b6ecSBjoern A. Zeeb 
91362b5b6ecSBjoern A. Zeeb #ifdef TCP_LRO_UPDATE_CSUM
91462b5b6ecSBjoern A. Zeeb 	/*
91562b5b6ecSBjoern A. Zeeb 	 * Do not touch the csum of the first packet.  However save the
91662b5b6ecSBjoern A. Zeeb 	 * "adjusted" checksum of just the source and destination addresses,
91762b5b6ecSBjoern A. Zeeb 	 * the next header and the TCP payload.  The length and TCP header
91862b5b6ecSBjoern A. Zeeb 	 * parts may change, so we remove those from the saved checksum and
91962b5b6ecSBjoern A. Zeeb 	 * re-add with final values on tcp_lro_flush() if needed.
92062b5b6ecSBjoern A. Zeeb 	 */
92162b5b6ecSBjoern A. Zeeb 	KASSERT(le->ulp_csum == 0, ("%s: le=%p le->ulp_csum=0x%04x\n",
92262b5b6ecSBjoern A. Zeeb 	    __func__, le, le->ulp_csum));
92362b5b6ecSBjoern A. Zeeb 
92462b5b6ecSBjoern A. Zeeb 	le->ulp_csum = tcp_lro_rx_csum_fixup(le, l3hdr, th, tcp_data_len,
92562b5b6ecSBjoern A. Zeeb 	    ~csum);
92662b5b6ecSBjoern A. Zeeb 	th->th_sum = csum;	/* Restore checksum on first packet. */
92762b5b6ecSBjoern A. Zeeb #endif
92862b5b6ecSBjoern A. Zeeb 
92962b5b6ecSBjoern A. Zeeb 	le->m_head = m;
93062b5b6ecSBjoern A. Zeeb 	le->m_tail = m_last(m);
93162b5b6ecSBjoern A. Zeeb 
93262b5b6ecSBjoern A. Zeeb 	return (0);
93362b5b6ecSBjoern A. Zeeb }
93462b5b6ecSBjoern A. Zeeb 
93505cde7efSSepherosa Ziehau int
93605cde7efSSepherosa Ziehau tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
93705cde7efSSepherosa Ziehau {
93805cde7efSSepherosa Ziehau 
93905cde7efSSepherosa Ziehau 	return tcp_lro_rx2(lc, m, csum, 1);
94005cde7efSSepherosa Ziehau }
94105cde7efSSepherosa Ziehau 
942e936121dSHans Petter Selasky void
943e936121dSHans Petter Selasky tcp_lro_queue_mbuf(struct lro_ctrl *lc, struct mbuf *mb)
944e936121dSHans Petter Selasky {
945e936121dSHans Petter Selasky 	/* sanity checks */
946e936121dSHans Petter Selasky 	if (__predict_false(lc->ifp == NULL || lc->lro_mbuf_data == NULL ||
947e936121dSHans Petter Selasky 	    lc->lro_mbuf_max == 0)) {
948e936121dSHans Petter Selasky 		/* packet drop */
949e936121dSHans Petter Selasky 		m_freem(mb);
950e936121dSHans Petter Selasky 		return;
951e936121dSHans Petter Selasky 	}
952e936121dSHans Petter Selasky 
953e936121dSHans Petter Selasky 	/* check if packet is not LRO capable */
954e936121dSHans Petter Selasky 	if (__predict_false(mb->m_pkthdr.csum_flags == 0 ||
955e936121dSHans Petter Selasky 	    (lc->ifp->if_capenable & IFCAP_LRO) == 0)) {
956e936121dSHans Petter Selasky 
957e936121dSHans Petter Selasky 		/* input packet to network layer */
958e936121dSHans Petter Selasky 		(*lc->ifp->if_input) (lc->ifp, mb);
959e936121dSHans Petter Selasky 		return;
960e936121dSHans Petter Selasky 	}
961e936121dSHans Petter Selasky 
962fc271df3SHans Petter Selasky 	/* create sequence number */
963fc271df3SHans Petter Selasky 	lc->lro_mbuf_data[lc->lro_mbuf_count].seq =
964fc271df3SHans Petter Selasky 	    (((uint64_t)M_HASHTYPE_GET(mb)) << 56) |
965fc271df3SHans Petter Selasky 	    (((uint64_t)mb->m_pkthdr.flowid) << 24) |
966fc271df3SHans Petter Selasky 	    ((uint64_t)lc->lro_mbuf_count);
967e936121dSHans Petter Selasky 
968e936121dSHans Petter Selasky 	/* enter mbuf */
969*f8acc03eSNavdeep Parhar 	lc->lro_mbuf_data[lc->lro_mbuf_count].mb = mb;
970*f8acc03eSNavdeep Parhar 
971*f8acc03eSNavdeep Parhar 	/* flush if array is full */
972*f8acc03eSNavdeep Parhar 	if (__predict_false(++lc->lro_mbuf_count == lc->lro_mbuf_max))
973*f8acc03eSNavdeep Parhar 		tcp_lro_flush_all(lc);
974e936121dSHans Petter Selasky }
975e936121dSHans Petter Selasky 
97662b5b6ecSBjoern A. Zeeb /* end */
977