xref: /freebsd/sys/netinet/tcp_lro.c (revision a321cc5dc908a14d42e57e2468923937f18c21fc)
1 /*-
2  * Copyright (c) 2007, Myricom Inc.
3  * Copyright (c) 2008, Intel Corporation.
4  * Copyright (c) 2012 The FreeBSD Foundation
5  * Copyright (c) 2016 Mellanox Technologies.
6  * All rights reserved.
7  *
8  * Portions of this software were developed by Bjoern Zeeb
9  * under sponsorship from the FreeBSD Foundation.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 
36 #include "opt_inet.h"
37 #include "opt_inet6.h"
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/kernel.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/socket.h>
45 
46 #include <net/if.h>
47 #include <net/if_var.h>
48 #include <net/ethernet.h>
49 #include <net/vnet.h>
50 
51 #include <netinet/in_systm.h>
52 #include <netinet/in.h>
53 #include <netinet/ip6.h>
54 #include <netinet/ip.h>
55 #include <netinet/ip_var.h>
56 #include <netinet/tcp.h>
57 #include <netinet/tcp_lro.h>
58 
59 #include <netinet6/ip6_var.h>
60 
61 #include <machine/in_cksum.h>
62 
63 static MALLOC_DEFINE(M_LRO, "LRO", "LRO control structures");
64 
65 #define	TCP_LRO_UPDATE_CSUM	1
66 #ifndef	TCP_LRO_UPDATE_CSUM
67 #define	TCP_LRO_INVALID_CSUM	0x0000
68 #endif
69 
70 static void	tcp_lro_rx_done(struct lro_ctrl *lc);
71 
72 int
73 tcp_lro_init(struct lro_ctrl *lc)
74 {
75 	return (tcp_lro_init_args(lc, NULL, TCP_LRO_ENTRIES, 0));
76 }
77 
78 int
79 tcp_lro_init_args(struct lro_ctrl *lc, struct ifnet *ifp,
80     unsigned lro_entries, unsigned lro_mbufs)
81 {
82 	struct lro_entry *le;
83 	size_t size;
84 	unsigned i;
85 
86 	lc->lro_bad_csum = 0;
87 	lc->lro_queued = 0;
88 	lc->lro_flushed = 0;
89 	lc->lro_cnt = 0;
90 	lc->lro_mbuf_count = 0;
91 	lc->lro_mbuf_max = lro_mbufs;
92 	lc->lro_cnt = lro_entries;
93 	lc->lro_ackcnt_lim = TCP_LRO_ACKCNT_MAX;
94 	lc->lro_length_lim = TCP_LRO_LENGTH_MAX;
95 	lc->ifp = ifp;
96 	LIST_INIT(&lc->lro_free);
97 	LIST_INIT(&lc->lro_active);
98 
99 	/* compute size to allocate */
100 	size = (lro_mbufs * sizeof(struct mbuf *)) +
101 	    (lro_entries * sizeof(*le));
102 	lc->lro_mbuf_data = (struct mbuf **)
103 	    malloc(size, M_LRO, M_NOWAIT | M_ZERO);
104 
105 	/* check for out of memory */
106 	if (lc->lro_mbuf_data == NULL) {
107 		memset(lc, 0, sizeof(*lc));
108 		return (ENOMEM);
109 	}
110 	/* compute offset for LRO entries */
111 	le = (struct lro_entry *)
112 	    (lc->lro_mbuf_data + lro_mbufs);
113 
114 	/* setup linked list */
115 	for (i = 0; i != lro_entries; i++)
116 		LIST_INSERT_HEAD(&lc->lro_free, le + i, next);
117 
118 	return (0);
119 }
120 
121 void
122 tcp_lro_free(struct lro_ctrl *lc)
123 {
124 	struct lro_entry *le;
125 	unsigned x;
126 
127 	/* reset LRO free list */
128 	LIST_INIT(&lc->lro_free);
129 
130 	/* free active mbufs, if any */
131 	while ((le = LIST_FIRST(&lc->lro_active)) != NULL) {
132 		LIST_REMOVE(le, next);
133 		m_freem(le->m_head);
134 	}
135 
136 	/* free mbuf array, if any */
137 	for (x = 0; x != lc->lro_mbuf_count; x++)
138 		m_freem(lc->lro_mbuf_data[x]);
139 	lc->lro_mbuf_count = 0;
140 
141 	/* free allocated memory, if any */
142 	free(lc->lro_mbuf_data, M_LRO);
143 	lc->lro_mbuf_data = NULL;
144 }
145 
146 #ifdef TCP_LRO_UPDATE_CSUM
147 static uint16_t
148 tcp_lro_csum_th(struct tcphdr *th)
149 {
150 	uint32_t ch;
151 	uint16_t *p, l;
152 
153 	ch = th->th_sum = 0x0000;
154 	l = th->th_off;
155 	p = (uint16_t *)th;
156 	while (l > 0) {
157 		ch += *p;
158 		p++;
159 		ch += *p;
160 		p++;
161 		l--;
162 	}
163 	while (ch > 0xffff)
164 		ch = (ch >> 16) + (ch & 0xffff);
165 
166 	return (ch & 0xffff);
167 }
168 
169 static uint16_t
170 tcp_lro_rx_csum_fixup(struct lro_entry *le, void *l3hdr, struct tcphdr *th,
171     uint16_t tcp_data_len, uint16_t csum)
172 {
173 	uint32_t c;
174 	uint16_t cs;
175 
176 	c = csum;
177 
178 	/* Remove length from checksum. */
179 	switch (le->eh_type) {
180 #ifdef INET6
181 	case ETHERTYPE_IPV6:
182 	{
183 		struct ip6_hdr *ip6;
184 
185 		ip6 = (struct ip6_hdr *)l3hdr;
186 		if (le->append_cnt == 0)
187 			cs = ip6->ip6_plen;
188 		else {
189 			uint32_t cx;
190 
191 			cx = ntohs(ip6->ip6_plen);
192 			cs = in6_cksum_pseudo(ip6, cx, ip6->ip6_nxt, 0);
193 		}
194 		break;
195 	}
196 #endif
197 #ifdef INET
198 	case ETHERTYPE_IP:
199 	{
200 		struct ip *ip4;
201 
202 		ip4 = (struct ip *)l3hdr;
203 		if (le->append_cnt == 0)
204 			cs = ip4->ip_len;
205 		else {
206 			cs = in_addword(ntohs(ip4->ip_len) - sizeof(*ip4),
207 			    IPPROTO_TCP);
208 			cs = in_pseudo(ip4->ip_src.s_addr, ip4->ip_dst.s_addr,
209 			    htons(cs));
210 		}
211 		break;
212 	}
213 #endif
214 	default:
215 		cs = 0;		/* Keep compiler happy. */
216 	}
217 
218 	cs = ~cs;
219 	c += cs;
220 
221 	/* Remove TCP header csum. */
222 	cs = ~tcp_lro_csum_th(th);
223 	c += cs;
224 	while (c > 0xffff)
225 		c = (c >> 16) + (c & 0xffff);
226 
227 	return (c & 0xffff);
228 }
229 #endif
230 
231 static void
232 tcp_lro_rx_done(struct lro_ctrl *lc)
233 {
234 	struct lro_entry *le;
235 
236 	while ((le = LIST_FIRST(&lc->lro_active)) != NULL) {
237 		LIST_REMOVE(le, next);
238 		tcp_lro_flush(lc, le);
239 	}
240 }
241 
242 void
243 tcp_lro_flush_inactive(struct lro_ctrl *lc, const struct timeval *timeout)
244 {
245 	struct lro_entry *le, *le_tmp;
246 	struct timeval tv;
247 
248 	if (LIST_EMPTY(&lc->lro_active))
249 		return;
250 
251 	getmicrotime(&tv);
252 	timevalsub(&tv, timeout);
253 	LIST_FOREACH_SAFE(le, &lc->lro_active, next, le_tmp) {
254 		if (timevalcmp(&tv, &le->mtime, >=)) {
255 			LIST_REMOVE(le, next);
256 			tcp_lro_flush(lc, le);
257 		}
258 	}
259 }
260 
261 void
262 tcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *le)
263 {
264 
265 	if (le->append_cnt > 0) {
266 		struct tcphdr *th;
267 		uint16_t p_len;
268 
269 		p_len = htons(le->p_len);
270 		switch (le->eh_type) {
271 #ifdef INET6
272 		case ETHERTYPE_IPV6:
273 		{
274 			struct ip6_hdr *ip6;
275 
276 			ip6 = le->le_ip6;
277 			ip6->ip6_plen = p_len;
278 			th = (struct tcphdr *)(ip6 + 1);
279 			le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID |
280 			    CSUM_PSEUDO_HDR;
281 			le->p_len += ETHER_HDR_LEN + sizeof(*ip6);
282 			break;
283 		}
284 #endif
285 #ifdef INET
286 		case ETHERTYPE_IP:
287 		{
288 			struct ip *ip4;
289 #ifdef TCP_LRO_UPDATE_CSUM
290 			uint32_t cl;
291 			uint16_t c;
292 #endif
293 
294 			ip4 = le->le_ip4;
295 #ifdef TCP_LRO_UPDATE_CSUM
296 			/* Fix IP header checksum for new length. */
297 			c = ~ip4->ip_sum;
298 			cl = c;
299 			c = ~ip4->ip_len;
300 			cl += c + p_len;
301 			while (cl > 0xffff)
302 				cl = (cl >> 16) + (cl & 0xffff);
303 			c = cl;
304 			ip4->ip_sum = ~c;
305 #else
306 			ip4->ip_sum = TCP_LRO_INVALID_CSUM;
307 #endif
308 			ip4->ip_len = p_len;
309 			th = (struct tcphdr *)(ip4 + 1);
310 			le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID |
311 			    CSUM_PSEUDO_HDR | CSUM_IP_CHECKED | CSUM_IP_VALID;
312 			le->p_len += ETHER_HDR_LEN;
313 			break;
314 		}
315 #endif
316 		default:
317 			th = NULL;	/* Keep compiler happy. */
318 		}
319 		le->m_head->m_pkthdr.csum_data = 0xffff;
320 		le->m_head->m_pkthdr.len = le->p_len;
321 
322 		/* Incorporate the latest ACK into the TCP header. */
323 		th->th_ack = le->ack_seq;
324 		th->th_win = le->window;
325 		/* Incorporate latest timestamp into the TCP header. */
326 		if (le->timestamp != 0) {
327 			uint32_t *ts_ptr;
328 
329 			ts_ptr = (uint32_t *)(th + 1);
330 			ts_ptr[1] = htonl(le->tsval);
331 			ts_ptr[2] = le->tsecr;
332 		}
333 #ifdef TCP_LRO_UPDATE_CSUM
334 		/* Update the TCP header checksum. */
335 		le->ulp_csum += p_len;
336 		le->ulp_csum += tcp_lro_csum_th(th);
337 		while (le->ulp_csum > 0xffff)
338 			le->ulp_csum = (le->ulp_csum >> 16) +
339 			    (le->ulp_csum & 0xffff);
340 		th->th_sum = (le->ulp_csum & 0xffff);
341 		th->th_sum = ~th->th_sum;
342 #else
343 		th->th_sum = TCP_LRO_INVALID_CSUM;
344 #endif
345 	}
346 
347 	(*lc->ifp->if_input)(lc->ifp, le->m_head);
348 	lc->lro_queued += le->append_cnt + 1;
349 	lc->lro_flushed++;
350 	bzero(le, sizeof(*le));
351 	LIST_INSERT_HEAD(&lc->lro_free, le, next);
352 }
353 
354 static int
355 tcp_lro_mbuf_compare_header(const void *ppa, const void *ppb)
356 {
357 	const struct mbuf *ma = *((const struct mbuf * const *)ppa);
358 	const struct mbuf *mb = *((const struct mbuf * const *)ppb);
359 	int ret;
360 
361 	ret = M_HASHTYPE_GET(ma) - M_HASHTYPE_GET(mb);
362 	if (ret != 0)
363 		goto done;
364 
365 	if (ma->m_pkthdr.flowid > mb->m_pkthdr.flowid)
366 		return (1);
367 	else if (ma->m_pkthdr.flowid < mb->m_pkthdr.flowid)
368 		return (-1);
369 
370 	ret = TCP_LRO_SEQUENCE(ma) - TCP_LRO_SEQUENCE(mb);
371 done:
372 	return (ret);
373 }
374 
375 void
376 tcp_lro_flush_all(struct lro_ctrl *lc)
377 {
378 	uint32_t hashtype;
379 	uint32_t flowid;
380 	unsigned x;
381 
382 	/* check if no mbufs to flush */
383 	if (lc->lro_mbuf_count == 0)
384 		goto done;
385 
386 	/* sort all mbufs according to stream */
387 	qsort(lc->lro_mbuf_data, lc->lro_mbuf_count, sizeof(struct mbuf *),
388 	    &tcp_lro_mbuf_compare_header);
389 
390 	/* input data into LRO engine, stream by stream */
391 	flowid = 0;
392 	hashtype = M_HASHTYPE_NONE;
393 	for (x = 0; x != lc->lro_mbuf_count; x++) {
394 		struct mbuf *mb;
395 
396 		mb = lc->lro_mbuf_data[x];
397 
398 		/* check for new stream */
399 		if (mb->m_pkthdr.flowid != flowid ||
400 		    M_HASHTYPE_GET(mb) != hashtype) {
401 			flowid = mb->m_pkthdr.flowid;
402 			hashtype = M_HASHTYPE_GET(mb);
403 
404 			/* flush active streams */
405 			tcp_lro_rx_done(lc);
406 		}
407 #ifdef TCP_LRO_RESET_SEQUENCE
408 		/* reset sequence number */
409 		TCP_LRO_SEQUENCE(mb) = 0;
410 #endif
411 		/* add packet to LRO engine */
412 		if (tcp_lro_rx(lc, mb, 0) != 0) {
413 			/* input packet to network layer */
414 			(*lc->ifp->if_input)(lc->ifp, mb);
415 			lc->lro_queued++;
416 			lc->lro_flushed++;
417 		}
418 	}
419 done:
420 	/* flush active streams */
421 	tcp_lro_rx_done(lc);
422 
423 	lc->lro_mbuf_count = 0;
424 }
425 
426 #ifdef INET6
427 static int
428 tcp_lro_rx_ipv6(struct lro_ctrl *lc, struct mbuf *m, struct ip6_hdr *ip6,
429     struct tcphdr **th)
430 {
431 
432 	/* XXX-BZ we should check the flow-label. */
433 
434 	/* XXX-BZ We do not yet support ext. hdrs. */
435 	if (ip6->ip6_nxt != IPPROTO_TCP)
436 		return (TCP_LRO_NOT_SUPPORTED);
437 
438 	/* Find the TCP header. */
439 	*th = (struct tcphdr *)(ip6 + 1);
440 
441 	return (0);
442 }
443 #endif
444 
445 #ifdef INET
446 static int
447 tcp_lro_rx_ipv4(struct lro_ctrl *lc, struct mbuf *m, struct ip *ip4,
448     struct tcphdr **th)
449 {
450 	int csum_flags;
451 	uint16_t csum;
452 
453 	if (ip4->ip_p != IPPROTO_TCP)
454 		return (TCP_LRO_NOT_SUPPORTED);
455 
456 	/* Ensure there are no options. */
457 	if ((ip4->ip_hl << 2) != sizeof (*ip4))
458 		return (TCP_LRO_CANNOT);
459 
460 	/* .. and the packet is not fragmented. */
461 	if (ip4->ip_off & htons(IP_MF|IP_OFFMASK))
462 		return (TCP_LRO_CANNOT);
463 
464 	/* Legacy IP has a header checksum that needs to be correct. */
465 	csum_flags = m->m_pkthdr.csum_flags;
466 	if (csum_flags & CSUM_IP_CHECKED) {
467 		if (__predict_false((csum_flags & CSUM_IP_VALID) == 0)) {
468 			lc->lro_bad_csum++;
469 			return (TCP_LRO_CANNOT);
470 		}
471 	} else {
472 		csum = in_cksum_hdr(ip4);
473 		if (__predict_false((csum) != 0)) {
474 			lc->lro_bad_csum++;
475 			return (TCP_LRO_CANNOT);
476 		}
477 	}
478 
479 	/* Find the TCP header (we assured there are no IP options). */
480 	*th = (struct tcphdr *)(ip4 + 1);
481 
482 	return (0);
483 }
484 #endif
485 
486 int
487 tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
488 {
489 	struct lro_entry *le;
490 	struct ether_header *eh;
491 #ifdef INET6
492 	struct ip6_hdr *ip6 = NULL;	/* Keep compiler happy. */
493 #endif
494 #ifdef INET
495 	struct ip *ip4 = NULL;		/* Keep compiler happy. */
496 #endif
497 	struct tcphdr *th;
498 	void *l3hdr = NULL;		/* Keep compiler happy. */
499 	uint32_t *ts_ptr;
500 	tcp_seq seq;
501 	int error, ip_len, l;
502 	uint16_t eh_type, tcp_data_len;
503 
504 	/* We expect a contiguous header [eh, ip, tcp]. */
505 
506 	eh = mtod(m, struct ether_header *);
507 	eh_type = ntohs(eh->ether_type);
508 	switch (eh_type) {
509 #ifdef INET6
510 	case ETHERTYPE_IPV6:
511 	{
512 		CURVNET_SET(lc->ifp->if_vnet);
513 		if (V_ip6_forwarding != 0) {
514 			/* XXX-BZ stats but changing lro_ctrl is a problem. */
515 			CURVNET_RESTORE();
516 			return (TCP_LRO_CANNOT);
517 		}
518 		CURVNET_RESTORE();
519 		l3hdr = ip6 = (struct ip6_hdr *)(eh + 1);
520 		error = tcp_lro_rx_ipv6(lc, m, ip6, &th);
521 		if (error != 0)
522 			return (error);
523 		tcp_data_len = ntohs(ip6->ip6_plen);
524 		ip_len = sizeof(*ip6) + tcp_data_len;
525 		break;
526 	}
527 #endif
528 #ifdef INET
529 	case ETHERTYPE_IP:
530 	{
531 		CURVNET_SET(lc->ifp->if_vnet);
532 		if (V_ipforwarding != 0) {
533 			/* XXX-BZ stats but changing lro_ctrl is a problem. */
534 			CURVNET_RESTORE();
535 			return (TCP_LRO_CANNOT);
536 		}
537 		CURVNET_RESTORE();
538 		l3hdr = ip4 = (struct ip *)(eh + 1);
539 		error = tcp_lro_rx_ipv4(lc, m, ip4, &th);
540 		if (error != 0)
541 			return (error);
542 		ip_len = ntohs(ip4->ip_len);
543 		tcp_data_len = ip_len - sizeof(*ip4);
544 		break;
545 	}
546 #endif
547 	/* XXX-BZ what happens in case of VLAN(s)? */
548 	default:
549 		return (TCP_LRO_NOT_SUPPORTED);
550 	}
551 
552 	/*
553 	 * If the frame is padded beyond the end of the IP packet, then we must
554 	 * trim the extra bytes off.
555 	 */
556 	l = m->m_pkthdr.len - (ETHER_HDR_LEN + ip_len);
557 	if (l != 0) {
558 		if (l < 0)
559 			/* Truncated packet. */
560 			return (TCP_LRO_CANNOT);
561 
562 		m_adj(m, -l);
563 	}
564 
565 	/*
566 	 * Check TCP header constraints.
567 	 */
568 	/* Ensure no bits set besides ACK or PSH. */
569 	if ((th->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
570 		return (TCP_LRO_CANNOT);
571 
572 	/* XXX-BZ We lose a AKC|PUSH flag concatinating multiple segments. */
573 	/* XXX-BZ Ideally we'd flush on PUSH? */
574 
575 	/*
576 	 * Check for timestamps.
577 	 * Since the only option we handle are timestamps, we only have to
578 	 * handle the simple case of aligned timestamps.
579 	 */
580 	l = (th->th_off << 2);
581 	tcp_data_len -= l;
582 	l -= sizeof(*th);
583 	ts_ptr = (uint32_t *)(th + 1);
584 	if (l != 0 && (__predict_false(l != TCPOLEN_TSTAMP_APPA) ||
585 	    (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|
586 	    TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP))))
587 		return (TCP_LRO_CANNOT);
588 
589 	/* If the driver did not pass in the checksum, set it now. */
590 	if (csum == 0x0000)
591 		csum = th->th_sum;
592 
593 	seq = ntohl(th->th_seq);
594 
595 	/* Try to find a matching previous segment. */
596 	LIST_FOREACH(le, &lc->lro_active, next) {
597 		if (le->eh_type != eh_type)
598 			continue;
599 		if (le->source_port != th->th_sport ||
600 		    le->dest_port != th->th_dport)
601 			continue;
602 		switch (eh_type) {
603 #ifdef INET6
604 		case ETHERTYPE_IPV6:
605 			if (bcmp(&le->source_ip6, &ip6->ip6_src,
606 			    sizeof(struct in6_addr)) != 0 ||
607 			    bcmp(&le->dest_ip6, &ip6->ip6_dst,
608 			    sizeof(struct in6_addr)) != 0)
609 				continue;
610 			break;
611 #endif
612 #ifdef INET
613 		case ETHERTYPE_IP:
614 			if (le->source_ip4 != ip4->ip_src.s_addr ||
615 			    le->dest_ip4 != ip4->ip_dst.s_addr)
616 				continue;
617 			break;
618 #endif
619 		}
620 
621 		/* Flush now if appending will result in overflow. */
622 		if (le->p_len > (lc->lro_length_lim - tcp_data_len)) {
623 			LIST_REMOVE(le, next);
624 			tcp_lro_flush(lc, le);
625 			break;
626 		}
627 
628 		/* Try to append the new segment. */
629 		if (__predict_false(seq != le->next_seq ||
630 		    (tcp_data_len == 0 && le->ack_seq == th->th_ack))) {
631 			/* Out of order packet or duplicate ACK. */
632 			LIST_REMOVE(le, next);
633 			tcp_lro_flush(lc, le);
634 			return (TCP_LRO_CANNOT);
635 		}
636 
637 		if (l != 0) {
638 			uint32_t tsval = ntohl(*(ts_ptr + 1));
639 			/* Make sure timestamp values are increasing. */
640 			/* XXX-BZ flip and use TSTMP_GEQ macro for this? */
641 			if (__predict_false(le->tsval > tsval ||
642 			    *(ts_ptr + 2) == 0))
643 				return (TCP_LRO_CANNOT);
644 			le->tsval = tsval;
645 			le->tsecr = *(ts_ptr + 2);
646 		}
647 
648 		le->next_seq += tcp_data_len;
649 		le->ack_seq = th->th_ack;
650 		le->window = th->th_win;
651 		le->append_cnt++;
652 
653 #ifdef TCP_LRO_UPDATE_CSUM
654 		le->ulp_csum += tcp_lro_rx_csum_fixup(le, l3hdr, th,
655 		    tcp_data_len, ~csum);
656 #endif
657 
658 		if (tcp_data_len == 0) {
659 			m_freem(m);
660 			/*
661 			 * Flush this LRO entry, if this ACK should not
662 			 * be further delayed.
663 			 */
664 			if (le->append_cnt >= lc->lro_ackcnt_lim) {
665 				LIST_REMOVE(le, next);
666 				tcp_lro_flush(lc, le);
667 			}
668 			return (0);
669 		}
670 
671 		le->p_len += tcp_data_len;
672 
673 		/*
674 		 * Adjust the mbuf so that m_data points to the first byte of
675 		 * the ULP payload.  Adjust the mbuf to avoid complications and
676 		 * append new segment to existing mbuf chain.
677 		 */
678 		m_adj(m, m->m_pkthdr.len - tcp_data_len);
679 		m_demote_pkthdr(m);
680 
681 		le->m_tail->m_next = m;
682 		le->m_tail = m_last(m);
683 
684 		/*
685 		 * If a possible next full length packet would cause an
686 		 * overflow, pro-actively flush now.
687 		 */
688 		if (le->p_len > (lc->lro_length_lim - lc->ifp->if_mtu)) {
689 			LIST_REMOVE(le, next);
690 			tcp_lro_flush(lc, le);
691 		} else
692 			getmicrotime(&le->mtime);
693 
694 		return (0);
695 	}
696 
697 	/* Try to find an empty slot. */
698 	if (LIST_EMPTY(&lc->lro_free))
699 		return (TCP_LRO_NO_ENTRIES);
700 
701 	/* Start a new segment chain. */
702 	le = LIST_FIRST(&lc->lro_free);
703 	LIST_REMOVE(le, next);
704 	LIST_INSERT_HEAD(&lc->lro_active, le, next);
705 	getmicrotime(&le->mtime);
706 
707 	/* Start filling in details. */
708 	switch (eh_type) {
709 #ifdef INET6
710 	case ETHERTYPE_IPV6:
711 		le->le_ip6 = ip6;
712 		le->source_ip6 = ip6->ip6_src;
713 		le->dest_ip6 = ip6->ip6_dst;
714 		le->eh_type = eh_type;
715 		le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN - sizeof(*ip6);
716 		break;
717 #endif
718 #ifdef INET
719 	case ETHERTYPE_IP:
720 		le->le_ip4 = ip4;
721 		le->source_ip4 = ip4->ip_src.s_addr;
722 		le->dest_ip4 = ip4->ip_dst.s_addr;
723 		le->eh_type = eh_type;
724 		le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN;
725 		break;
726 #endif
727 	}
728 	le->source_port = th->th_sport;
729 	le->dest_port = th->th_dport;
730 
731 	le->next_seq = seq + tcp_data_len;
732 	le->ack_seq = th->th_ack;
733 	le->window = th->th_win;
734 	if (l != 0) {
735 		le->timestamp = 1;
736 		le->tsval = ntohl(*(ts_ptr + 1));
737 		le->tsecr = *(ts_ptr + 2);
738 	}
739 
740 #ifdef TCP_LRO_UPDATE_CSUM
741 	/*
742 	 * Do not touch the csum of the first packet.  However save the
743 	 * "adjusted" checksum of just the source and destination addresses,
744 	 * the next header and the TCP payload.  The length and TCP header
745 	 * parts may change, so we remove those from the saved checksum and
746 	 * re-add with final values on tcp_lro_flush() if needed.
747 	 */
748 	KASSERT(le->ulp_csum == 0, ("%s: le=%p le->ulp_csum=0x%04x\n",
749 	    __func__, le, le->ulp_csum));
750 
751 	le->ulp_csum = tcp_lro_rx_csum_fixup(le, l3hdr, th, tcp_data_len,
752 	    ~csum);
753 	th->th_sum = csum;	/* Restore checksum on first packet. */
754 #endif
755 
756 	le->m_head = m;
757 	le->m_tail = m_last(m);
758 
759 	return (0);
760 }
761 
762 void
763 tcp_lro_queue_mbuf(struct lro_ctrl *lc, struct mbuf *mb)
764 {
765 	/* sanity checks */
766 	if (__predict_false(lc->ifp == NULL || lc->lro_mbuf_data == NULL ||
767 	    lc->lro_mbuf_max == 0)) {
768 		/* packet drop */
769 		m_freem(mb);
770 		return;
771 	}
772 
773 	/* check if packet is not LRO capable */
774 	if (__predict_false(mb->m_pkthdr.csum_flags == 0 ||
775 	    (lc->ifp->if_capenable & IFCAP_LRO) == 0)) {
776 		lc->lro_flushed++;
777 		lc->lro_queued++;
778 
779 		/* input packet to network layer */
780 		(*lc->ifp->if_input) (lc->ifp, mb);
781 		return;
782 	}
783 
784 	/* check if array is full */
785 	if (__predict_false(lc->lro_mbuf_count == lc->lro_mbuf_max))
786 		tcp_lro_flush_all(lc);
787 
788 	/* store sequence number */
789 	TCP_LRO_SEQUENCE(mb) = lc->lro_mbuf_count;
790 
791 	/* enter mbuf */
792 	lc->lro_mbuf_data[lc->lro_mbuf_count++] = mb;
793 }
794 
795 /* end */
796