xref: /freebsd/sys/netinet/tcp_lro.h (revision 500f4659d7c8947082dba040a1d58e7d228f8d44)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2006, Myricom Inc.
5  * Copyright (c) 2008, Intel Corporation.
6  * Copyright (c) 2016 Mellanox Technologies.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  *
30  * $FreeBSD$
31  */
32 
33 #ifndef _TCP_LRO_H_
34 #define _TCP_LRO_H_
35 
36 #include <sys/time.h>
37 
38 #ifndef TCP_LRO_ENTRIES
39 /* Define default number of LRO entries per RX queue */
40 #define	TCP_LRO_ENTRIES	8
41 #endif
42 
43 /*
44  * Flags for ACK entry for compression
45  * the bottom 8 bits has the th_flags.
46  * LRO itself adds only the TSTMP flags
47  * to indicate if either of the types
48  * of timestamps are filled and the
49  * HAS_TSTMP option to indicate if the
50  * TCP timestamp option is valid.
51  *
52  * The other 5 flag bits are for processing
53  * by a stack.
54  *
55  */
56 #define TSTMP_LRO		0x0100
57 #define TSTMP_HDWR		0x0200
58 #define HAS_TSTMP		0x0400
59 
60 /* Flags in LRO entry */
61 #define CAN_USE_ACKCMP		0x0001
62 #define HAS_COMP_ENTRIES	0x0002
63 
64 struct inpcb;
65 
66 struct lro_entry {
67 	LIST_ENTRY(lro_entry)	next;
68 	LIST_ENTRY(lro_entry)	hash_next;
69 	struct mbuf		*m_head;
70 	struct mbuf		*m_tail;
71 	struct mbuf		*m_last_mbuf;
72 	struct mbuf		*m_prev_last;
73 	struct inpcb 		*inp;
74 	union {
75 		struct ip	*ip4;
76 		struct ip6_hdr	*ip6;
77 	} leip;
78 	union {
79 		in_addr_t	s_ip4;
80 		struct in6_addr	s_ip6;
81 	} lesource;
82 	union {
83 		in_addr_t	d_ip4;
84 		struct in6_addr	d_ip6;
85 	} ledest;
86 	uint16_t		source_port;
87 	uint16_t		dest_port;
88 	uint16_t		eh_type;	/* EthernetHeader type. */
89 	uint16_t		append_cnt;
90 	uint32_t		p_len;		/* IP header payload length. */
91 	uint32_t		ulp_csum;	/* TCP, etc. checksum. */
92 	uint32_t		next_seq;	/* tcp_seq */
93 	uint32_t		ack_seq;	/* tcp_seq */
94 	uint32_t		tsval;
95 	uint32_t		tsecr;
96 	uint32_t		tcp_tot_p_len;	/* TCP payload length of chain */
97 	uint16_t		window;
98 	uint16_t		timestamp;	/* flag, not a TCP hdr field. */
99 	uint16_t		need_wakeup;
100 	uint16_t		mbuf_cnt;	/* Count of mbufs collected see note */
101 	uint16_t		mbuf_appended;
102 	uint16_t		cmp_ack_cnt;
103 	uint16_t		flags;
104 	uint16_t		strip_cnt;
105 	struct timeval		mtime;
106 };
107 /*
108  * Note: The mbuf_cnt field tracks our number of mbufs added to the m_next
109  *       list. Each mbuf counted can have data and of course it will
110  *	 have an ack as well (by defintion any inbound tcp segment will
111  *	 have an ack value. We use this count to tell us how many ACK's
112  *	 are present for our ack-count threshold. If we exceed that or
113  *	 the data threshold we will wake up the endpoint.
114  */
115 LIST_HEAD(lro_head, lro_entry);
116 
117 #define	le_ip4			leip.ip4
118 #define	le_ip6			leip.ip6
119 #define	source_ip4		lesource.s_ip4
120 #define	dest_ip4		ledest.d_ip4
121 #define	source_ip6		lesource.s_ip6
122 #define	dest_ip6		ledest.d_ip6
123 
124 struct lro_mbuf_sort {
125 	uint64_t seq;
126 	struct mbuf *mb;
127 };
128 
129 /* NB: This is part of driver structs. */
130 struct lro_ctrl {
131 	struct ifnet	*ifp;
132 	struct lro_mbuf_sort *lro_mbuf_data;
133 	struct timeval lro_last_flush;
134 	uint64_t	lro_queued;
135 	uint64_t	lro_flushed;
136 	uint64_t	lro_bad_csum;
137 	unsigned	lro_cnt;
138 	unsigned	lro_mbuf_count;
139 	unsigned	lro_mbuf_max;
140 	unsigned short	lro_ackcnt_lim;		/* max # of aggregated ACKs */
141 	unsigned 	lro_length_lim;		/* max len of aggregated data */
142 
143 	u_long		lro_hashsz;
144 	struct lro_head	*lro_hash;
145 	struct lro_head	lro_active;
146 	struct lro_head	lro_free;
147 };
148 
149 struct tcp_ackent {
150 	uint64_t timestamp;	/* hardware or sofware timestamp, valid if TSTMP_LRO or TSTMP_HDRW set */
151 	uint32_t seq;		/* th_seq value */
152 	uint32_t ack;		/* th_ack value */
153 	uint32_t ts_value;	/* If ts option value, valid if HAS_TSTMP is set */
154 	uint32_t ts_echo;	/* If ts option echo, valid if HAS_TSTMP is set */
155 	uint16_t win;		/* TCP window */
156 	uint16_t flags;		/* Flags to say if TS is present and type of timestamp and th_flags */
157 	uint8_t  codepoint;	/* IP level codepoint including ECN bits */
158 	uint8_t  ack_val_set;	/* Classification of ack used by the stack */
159 	uint8_t  pad[2];	/* To 32 byte boundary */
160 };
161 
162 /* We use two M_PROTO on the mbuf */
163 #define M_ACKCMP	M_PROTO4   /* Indicates LRO is sending in a  Ack-compression mbuf */
164 #define M_LRO_EHDRSTRP	M_PROTO6   /* Indicates that LRO has stripped the etherenet header */
165 
166 #define	TCP_LRO_LENGTH_MAX	65535
167 #define	TCP_LRO_ACKCNT_MAX	65535		/* unlimited */
168 
169 int tcp_lro_init(struct lro_ctrl *);
170 int tcp_lro_init_args(struct lro_ctrl *, struct ifnet *, unsigned, unsigned);
171 void tcp_lro_free(struct lro_ctrl *);
172 void tcp_lro_flush_inactive(struct lro_ctrl *, const struct timeval *);
173 void tcp_lro_flush(struct lro_ctrl *, struct lro_entry *);
174 void tcp_lro_flush_all(struct lro_ctrl *);
175 int tcp_lro_rx(struct lro_ctrl *, struct mbuf *, uint32_t);
176 void tcp_lro_queue_mbuf(struct lro_ctrl *, struct mbuf *);
177 void tcp_lro_reg_mbufq(void);
178 void tcp_lro_dereg_mbufq(void);
179 
180 #define	TCP_LRO_NO_ENTRIES	-2
181 #define	TCP_LRO_CANNOT		-1
182 #define	TCP_LRO_NOT_SUPPORTED	1
183 
184 #endif /* _TCP_LRO_H_ */
185