xref: /freebsd/sys/netinet/tcp_lro.h (revision b45daaea95abd8bda52caaacf120f9197caab3e7)
127f190a3SBjoern A. Zeeb /*-
2fe267a55SPedro F. Giffuni  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3fe267a55SPedro F. Giffuni  *
427f190a3SBjoern A. Zeeb  * Copyright (c) 2006, Myricom Inc.
527f190a3SBjoern A. Zeeb  * Copyright (c) 2008, Intel Corporation.
69ca874cfSHans Petter Selasky  * Copyright (c) 2016-2021 Mellanox Technologies.
727f190a3SBjoern A. Zeeb  * All rights reserved.
827f190a3SBjoern A. Zeeb  *
927f190a3SBjoern A. Zeeb  * Redistribution and use in source and binary forms, with or without
1027f190a3SBjoern A. Zeeb  * modification, are permitted provided that the following conditions
1127f190a3SBjoern A. Zeeb  * are met:
1227f190a3SBjoern A. Zeeb  * 1. Redistributions of source code must retain the above copyright
1327f190a3SBjoern A. Zeeb  *    notice, this list of conditions and the following disclaimer.
1427f190a3SBjoern A. Zeeb  * 2. Redistributions in binary form must reproduce the above copyright
1527f190a3SBjoern A. Zeeb  *    notice, this list of conditions and the following disclaimer in the
1627f190a3SBjoern A. Zeeb  *    documentation and/or other materials provided with the distribution.
1727f190a3SBjoern A. Zeeb  *
1827f190a3SBjoern A. Zeeb  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1927f190a3SBjoern A. Zeeb  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2027f190a3SBjoern A. Zeeb  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2127f190a3SBjoern A. Zeeb  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
2227f190a3SBjoern A. Zeeb  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2327f190a3SBjoern A. Zeeb  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2427f190a3SBjoern A. Zeeb  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2527f190a3SBjoern A. Zeeb  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2627f190a3SBjoern A. Zeeb  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2727f190a3SBjoern A. Zeeb  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2827f190a3SBjoern A. Zeeb  * SUCH DAMAGE.
2927f190a3SBjoern A. Zeeb  *
3027f190a3SBjoern A. Zeeb  * $FreeBSD$
3127f190a3SBjoern A. Zeeb  */
326c5087a8SJack F Vogel 
336c5087a8SJack F Vogel #ifndef _TCP_LRO_H_
346c5087a8SJack F Vogel #define _TCP_LRO_H_
356c5087a8SJack F Vogel 
367127e6acSNavdeep Parhar #include <sys/time.h>
377127e6acSNavdeep Parhar 
38e936121dSHans Petter Selasky #ifndef TCP_LRO_ENTRIES
39e936121dSHans Petter Selasky /* Define default number of LRO entries per RX queue */
40e936121dSHans Petter Selasky #define	TCP_LRO_ENTRIES	8
41e936121dSHans Petter Selasky #endif
42e936121dSHans Petter Selasky 
4369a34e8dSRandall Stewart /*
4469a34e8dSRandall Stewart  * Flags for ACK entry for compression
4569a34e8dSRandall Stewart  * the bottom 8 bits has the th_flags.
4669a34e8dSRandall Stewart  * LRO itself adds only the TSTMP flags
4769a34e8dSRandall Stewart  * to indicate if either of the types
4869a34e8dSRandall Stewart  * of timestamps are filled and the
4969a34e8dSRandall Stewart  * HAS_TSTMP option to indicate if the
5069a34e8dSRandall Stewart  * TCP timestamp option is valid.
5169a34e8dSRandall Stewart  *
5269a34e8dSRandall Stewart  * The other 5 flag bits are for processing
5369a34e8dSRandall Stewart  * by a stack.
5469a34e8dSRandall Stewart  *
5569a34e8dSRandall Stewart  */
5669a34e8dSRandall Stewart #define TSTMP_LRO		0x0100
5769a34e8dSRandall Stewart #define TSTMP_HDWR		0x0200
5869a34e8dSRandall Stewart #define HAS_TSTMP		0x0400
5969a34e8dSRandall Stewart 
6069a34e8dSRandall Stewart struct inpcb;
6169a34e8dSRandall Stewart 
629ca874cfSHans Petter Selasky union lro_address {
639ca874cfSHans Petter Selasky 	u_long raw[1];
649ca874cfSHans Petter Selasky 	struct {
659ca874cfSHans Petter Selasky 		uint16_t lro_type;	/* internal */
669ca874cfSHans Petter Selasky #define	LRO_TYPE_NONE     0
679ca874cfSHans Petter Selasky #define	LRO_TYPE_IPV4_TCP 1
689ca874cfSHans Petter Selasky #define	LRO_TYPE_IPV6_TCP 2
699ca874cfSHans Petter Selasky #define	LRO_TYPE_IPV4_UDP 3
709ca874cfSHans Petter Selasky #define	LRO_TYPE_IPV6_UDP 4
719ca874cfSHans Petter Selasky 		uint16_t vlan_id;	/* VLAN identifier */
729ca874cfSHans Petter Selasky 		uint16_t s_port;	/* source TCP/UDP port */
739ca874cfSHans Petter Selasky 		uint16_t d_port;	/* destination TCP/UDP port */
749ca874cfSHans Petter Selasky 		uint32_t vxlan_vni;	/* VXLAN virtual network identifier */
759ca874cfSHans Petter Selasky 		union {
769ca874cfSHans Petter Selasky #ifdef INET
779ca874cfSHans Petter Selasky 			struct in_addr v4;
789ca874cfSHans Petter Selasky #endif
799ca874cfSHans Petter Selasky #ifdef INET6
809ca874cfSHans Petter Selasky 			struct in6_addr v6;
819ca874cfSHans Petter Selasky #endif
829ca874cfSHans Petter Selasky 		} s_addr;	/* source IPv4/IPv6 address */
839ca874cfSHans Petter Selasky 		union {
849ca874cfSHans Petter Selasky #ifdef INET
859ca874cfSHans Petter Selasky 			struct in_addr v4;
869ca874cfSHans Petter Selasky #endif
879ca874cfSHans Petter Selasky #ifdef INET6
889ca874cfSHans Petter Selasky 			struct in6_addr v6;
899ca874cfSHans Petter Selasky #endif
909ca874cfSHans Petter Selasky 		} d_addr;	/* destination IPv4/IPv6 address */
919ca874cfSHans Petter Selasky 	};
929ca874cfSHans Petter Selasky } __aligned(sizeof(u_long));
939ca874cfSHans Petter Selasky 
949ca874cfSHans Petter Selasky #define	LRO_RAW_ADDRESS_MAX \
959ca874cfSHans Petter Selasky     (sizeof(union lro_address) / sizeof(u_long))
969ca874cfSHans Petter Selasky 
979ca874cfSHans Petter Selasky /* Optimize address comparison by comparing one unsigned long at a time: */
989ca874cfSHans Petter Selasky 
999ca874cfSHans Petter Selasky static inline bool
1009ca874cfSHans Petter Selasky lro_address_compare(const union lro_address *pa, const union lro_address *pb)
1019ca874cfSHans Petter Selasky {
1029ca874cfSHans Petter Selasky 	if (pa->lro_type == LRO_TYPE_NONE && pb->lro_type == LRO_TYPE_NONE) {
1039ca874cfSHans Petter Selasky 		return (true);
1049ca874cfSHans Petter Selasky 	} else for (unsigned i = 0; i < LRO_RAW_ADDRESS_MAX; i++) {
1059ca874cfSHans Petter Selasky 		if (pa->raw[i] != pb->raw[i])
1069ca874cfSHans Petter Selasky 			return (false);
1079ca874cfSHans Petter Selasky 	}
1089ca874cfSHans Petter Selasky 	return (true);
1099ca874cfSHans Petter Selasky }
1109ca874cfSHans Petter Selasky 
1119ca874cfSHans Petter Selasky struct lro_parser {
1129ca874cfSHans Petter Selasky 	union lro_address data;
1139ca874cfSHans Petter Selasky 	union {
1149ca874cfSHans Petter Selasky 		uint8_t *l3;
1159ca874cfSHans Petter Selasky 		struct ip *ip4;
1169ca874cfSHans Petter Selasky 		struct ip6_hdr *ip6;
1179ca874cfSHans Petter Selasky 	};
1189ca874cfSHans Petter Selasky 	union {
1199ca874cfSHans Petter Selasky 		uint8_t *l4;
1209ca874cfSHans Petter Selasky 		struct tcphdr *tcp;
1219ca874cfSHans Petter Selasky 		struct udphdr *udp;
1229ca874cfSHans Petter Selasky 	};
1239ca874cfSHans Petter Selasky 	uint16_t total_hdr_len;
1249ca874cfSHans Petter Selasky };
1259ca874cfSHans Petter Selasky 
1269ca874cfSHans Petter Selasky /* This structure is zeroed frequently, try to keep it small. */
1271ea44822SSepherosa Ziehau struct lro_entry {
1281ea44822SSepherosa Ziehau 	LIST_ENTRY(lro_entry)	next;
12905cde7efSSepherosa Ziehau 	LIST_ENTRY(lro_entry)	hash_next;
1306c5087a8SJack F Vogel 	struct mbuf		*m_head;
1316c5087a8SJack F Vogel 	struct mbuf		*m_tail;
132e57b2d0eSRandall Stewart 	struct mbuf		*m_last_mbuf;
1339ca874cfSHans Petter Selasky 	struct lro_parser	outer;
1349ca874cfSHans Petter Selasky 	struct lro_parser	inner;
13562b5b6ecSBjoern A. Zeeb 	uint32_t		next_seq;	/* tcp_seq */
13662b5b6ecSBjoern A. Zeeb 	uint32_t		ack_seq;	/* tcp_seq */
13762b5b6ecSBjoern A. Zeeb 	uint32_t		tsval;
13862b5b6ecSBjoern A. Zeeb 	uint32_t		tsecr;
1399ca874cfSHans Petter Selasky 	uint16_t		compressed;
1409ca874cfSHans Petter Selasky 	uint16_t		uncompressed;
14162b5b6ecSBjoern A. Zeeb 	uint16_t		window;
14262b5b6ecSBjoern A. Zeeb 	uint16_t		timestamp;	/* flag, not a TCP hdr field. */
143*b45daaeaSRandall Stewart 	struct bintime		alloc_time;	/* time when entry was allocated */
1446c5087a8SJack F Vogel };
1456c5087a8SJack F Vogel 
1469ca874cfSHans Petter Selasky LIST_HEAD(lro_head, lro_entry);
14762b5b6ecSBjoern A. Zeeb 
148fc271df3SHans Petter Selasky struct lro_mbuf_sort {
149fc271df3SHans Petter Selasky 	uint64_t seq;
150fc271df3SHans Petter Selasky 	struct mbuf *mb;
151fc271df3SHans Petter Selasky };
152fc271df3SHans Petter Selasky 
15362b5b6ecSBjoern A. Zeeb /* NB: This is part of driver structs. */
1546c5087a8SJack F Vogel struct lro_ctrl {
1556c5087a8SJack F Vogel 	struct ifnet	*ifp;
156fc271df3SHans Petter Selasky 	struct lro_mbuf_sort *lro_mbuf_data;
157*b45daaeaSRandall Stewart 	struct bintime	lro_last_queue_time;	/* last time data was queued */
158e936121dSHans Petter Selasky 	uint64_t	lro_queued;
159e936121dSHans Petter Selasky 	uint64_t	lro_flushed;
160e936121dSHans Petter Selasky 	uint64_t	lro_bad_csum;
161e936121dSHans Petter Selasky 	unsigned	lro_cnt;
162e936121dSHans Petter Selasky 	unsigned	lro_mbuf_count;
163e936121dSHans Petter Selasky 	unsigned	lro_mbuf_max;
1647ae3d4bfSSepherosa Ziehau 	unsigned short	lro_ackcnt_lim;		/* max # of aggregated ACKs */
1657ae3d4bfSSepherosa Ziehau 	unsigned 	lro_length_lim;		/* max len of aggregated data */
1666c5087a8SJack F Vogel 
16705cde7efSSepherosa Ziehau 	u_long		lro_hashsz;
16805cde7efSSepherosa Ziehau 	struct lro_head	*lro_hash;
1696c5087a8SJack F Vogel 	struct lro_head	lro_active;
1706c5087a8SJack F Vogel 	struct lro_head	lro_free;
1716c5087a8SJack F Vogel };
1726c5087a8SJack F Vogel 
17369a34e8dSRandall Stewart struct tcp_ackent {
17469a34e8dSRandall Stewart 	uint64_t timestamp;	/* hardware or sofware timestamp, valid if TSTMP_LRO or TSTMP_HDRW set */
17569a34e8dSRandall Stewart 	uint32_t seq;		/* th_seq value */
17669a34e8dSRandall Stewart 	uint32_t ack;		/* th_ack value */
17769a34e8dSRandall Stewart 	uint32_t ts_value;	/* If ts option value, valid if HAS_TSTMP is set */
17869a34e8dSRandall Stewart 	uint32_t ts_echo;	/* If ts option echo, valid if HAS_TSTMP is set */
17969a34e8dSRandall Stewart 	uint16_t win;		/* TCP window */
18069a34e8dSRandall Stewart 	uint16_t flags;		/* Flags to say if TS is present and type of timestamp and th_flags */
18169a34e8dSRandall Stewart 	uint8_t  codepoint;	/* IP level codepoint including ECN bits */
18269a34e8dSRandall Stewart 	uint8_t  ack_val_set;	/* Classification of ack used by the stack */
18369a34e8dSRandall Stewart 	uint8_t  pad[2];	/* To 32 byte boundary */
18469a34e8dSRandall Stewart };
18569a34e8dSRandall Stewart 
18669a34e8dSRandall Stewart /* We use two M_PROTO on the mbuf */
18769a34e8dSRandall Stewart #define M_ACKCMP	M_PROTO4   /* Indicates LRO is sending in a  Ack-compression mbuf */
18869a34e8dSRandall Stewart #define M_LRO_EHDRSTRP	M_PROTO6   /* Indicates that LRO has stripped the etherenet header */
18969a34e8dSRandall Stewart 
1909ca874cfSHans Petter Selasky #define	TCP_LRO_LENGTH_MAX	(65535 - 255)	/* safe value with room for outer headers */
1917ae3d4bfSSepherosa Ziehau #define	TCP_LRO_ACKCNT_MAX	65535		/* unlimited */
1927ae3d4bfSSepherosa Ziehau 
1936c5087a8SJack F Vogel int tcp_lro_init(struct lro_ctrl *);
194e936121dSHans Petter Selasky int tcp_lro_init_args(struct lro_ctrl *, struct ifnet *, unsigned, unsigned);
1956c5087a8SJack F Vogel void tcp_lro_free(struct lro_ctrl *);
1967127e6acSNavdeep Parhar void tcp_lro_flush_inactive(struct lro_ctrl *, const struct timeval *);
1976c5087a8SJack F Vogel void tcp_lro_flush(struct lro_ctrl *, struct lro_entry *);
198e936121dSHans Petter Selasky void tcp_lro_flush_all(struct lro_ctrl *);
1996c5087a8SJack F Vogel int tcp_lro_rx(struct lro_ctrl *, struct mbuf *, uint32_t);
200e936121dSHans Petter Selasky void tcp_lro_queue_mbuf(struct lro_ctrl *, struct mbuf *);
201e57b2d0eSRandall Stewart void tcp_lro_reg_mbufq(void);
202e57b2d0eSRandall Stewart void tcp_lro_dereg_mbufq(void);
2036c5087a8SJack F Vogel 
204489f0c3cSSepherosa Ziehau #define	TCP_LRO_NO_ENTRIES	-2
20562b5b6ecSBjoern A. Zeeb #define	TCP_LRO_CANNOT		-1
20662b5b6ecSBjoern A. Zeeb #define	TCP_LRO_NOT_SUPPORTED	1
2076c5087a8SJack F Vogel 
2086c5087a8SJack F Vogel #endif /* _TCP_LRO_H_ */
209