xref: /freebsd/sys/netinet/tcp.h (revision f0d5b1bdf075c68ddb1dcfbc5a0eda0214510b5b)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1982, 1986, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #ifndef _NETINET_TCP_H_
33 #define _NETINET_TCP_H_
34 
35 #include <sys/types.h>
36 
37 #if __BSD_VISIBLE
38 
39 typedef	u_int32_t tcp_seq;
40 
41 #define tcp6_seq	tcp_seq	/* for KAME src sync over BSD*'s */
42 #define tcp6hdr		tcphdr	/* for KAME src sync over BSD*'s */
43 
44 /*
45  * TCP header.
46  * Per RFC 793, September, 1981.
47  */
48 struct tcphdr {
49 	u_short	th_sport;		/* source port */
50 	u_short	th_dport;		/* destination port */
51 	tcp_seq	th_seq;			/* sequence number */
52 	tcp_seq	th_ack;			/* acknowledgement number */
53 #if BYTE_ORDER == LITTLE_ENDIAN
54 	u_char	th_x2:4,		/* upper 4 (reserved) flags */
55 		th_off:4;		/* data offset */
56 #endif
57 #if BYTE_ORDER == BIG_ENDIAN
58 	u_char	th_off:4,		/* data offset */
59 		th_x2:4;		/* upper 4 (reserved) flags */
60 #endif
61 	u_char	th_flags;
62 #define	TH_FIN	0x01
63 #define	TH_SYN	0x02
64 #define	TH_RST	0x04
65 #define	TH_PUSH	0x08
66 #define	TH_ACK	0x10
67 #define	TH_URG	0x20
68 #define	TH_ECE	0x40
69 #define	TH_CWR	0x80
70 #define	TH_AE	0x100			/* maps into th_x2 */
71 #define	TH_RES3	0x200
72 #define	TH_RES2	0x400
73 #define	TH_RES1	0x800
74 #define	TH_FLAGS	(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG|TH_ECE|TH_CWR)
75 #define	PRINT_TH_FLAGS	"\20\1FIN\2SYN\3RST\4PUSH\5ACK\6URG\7ECE\10CWR\11AE"
76 
77 	u_short	th_win;			/* window */
78 	u_short	th_sum;			/* checksum */
79 	u_short	th_urp;			/* urgent pointer */
80 };
81 
82 static __inline uint16_t
83 __tcp_get_flags(const struct tcphdr *th)
84 {
85 	return (((uint16_t)th->th_x2 << 8) | th->th_flags);
86 }
87 
88 static __inline void
89 __tcp_set_flags(struct tcphdr *th, uint16_t flags)
90 {
91 	th->th_x2 = (flags >> 8) & 0x0f;
92 	th->th_flags = flags & 0xff;
93 }
94 
95 #ifdef _KERNEL
96 #define tcp_get_flags(th) __tcp_get_flags(th)
97 #define tcp_set_flags(th, flags) __tcp_set_flags(th, flags)
98 #endif
99 
100 #define	PADTCPOLEN(len)		((((len) / 4) + !!((len) % 4)) * 4)
101 
102 #define	TCPOPT_EOL		0
103 #define	   TCPOLEN_EOL			1
104 #define	TCPOPT_PAD		0		/* padding after EOL */
105 #define	   TCPOLEN_PAD			1
106 #define	TCPOPT_NOP		1
107 #define	   TCPOLEN_NOP			1
108 #define	TCPOPT_MAXSEG		2
109 #define    TCPOLEN_MAXSEG		4
110 #define TCPOPT_WINDOW		3
111 #define    TCPOLEN_WINDOW		3
112 #define TCPOPT_SACK_PERMITTED	4
113 #define    TCPOLEN_SACK_PERMITTED	2
114 #define TCPOPT_SACK		5
115 #define	   TCPOLEN_SACKHDR		2
116 #define    TCPOLEN_SACK			8	/* 2*sizeof(tcp_seq) */
117 #define TCPOPT_TIMESTAMP	8
118 #define    TCPOLEN_TIMESTAMP		10
119 #define    TCPOLEN_TSTAMP_APPA		(TCPOLEN_TIMESTAMP+2) /* appendix A */
120 #define	TCPOPT_SIGNATURE	19		/* Keyed MD5: RFC 2385 */
121 #define	   TCPOLEN_SIGNATURE		18
122 #define	TCPOPT_FAST_OPEN	34
123 #define	   TCPOLEN_FAST_OPEN_EMPTY	2
124 
125 #define	MAX_TCPOPTLEN		40	/* Absolute maximum TCP options len */
126 
127 /* Miscellaneous constants */
128 #define	MAX_SACK_BLKS	6	/* Max # SACK blocks stored at receiver side */
129 #define	TCP_MAX_SACK	4	/* MAX # SACKs sent in any segment */
130 
131 /*
132  * The default maximum segment size (MSS) to be used for new TCP connections
133  * when path MTU discovery is not enabled.
134  *
135  * RFC879 derives the default MSS from the largest datagram size hosts are
136  * minimally required to handle directly or through IP reassembly minus the
137  * size of the IP and TCP header.  With IPv6 the minimum MTU is specified
138  * in RFC2460.
139  *
140  * For IPv4 the MSS is 576 - sizeof(struct tcpiphdr)
141  * For IPv6 the MSS is IPV6_MMTU - sizeof(struct ip6_hdr) - sizeof(struct tcphdr)
142  *
143  * We use explicit numerical definition here to avoid header pollution.
144  */
145 #define	TCP_MSS		536
146 #define	TCP6_MSS	1220
147 
148 /*
149  * Limit the lowest MSS we accept for path MTU discovery and the TCP SYN MSS
150  * option.  Allowing low values of MSS can consume significant resources and
151  * be used to mount a resource exhaustion attack.
152  * Connections requesting lower MSS values will be rounded up to this value
153  * and the IP_DF flag will be cleared to allow fragmentation along the path.
154  *
155  * See tcp_subr.c tcp_minmss SYSCTL declaration for more comments.  Setting
156  * it to "0" disables the minmss check.
157  *
158  * The default value is fine for TCP across the Internet's smallest official
159  * link MTU (256 bytes for AX.25 packet radio).  However, a connection is very
160  * unlikely to come across such low MTU interfaces these days (anno domini 2003).
161  */
162 #define	TCP_MINMSS 216
163 
164 #define	TCP_MAXWIN	65535	/* largest value for (unscaled) window */
165 #define	TTCP_CLIENT_SND_WND	4096	/* dflt send window for T/TCP client */
166 
167 #define TCP_MAX_WINSHIFT	14	/* maximum window shift */
168 
169 #define TCP_MAXBURST		4	/* maximum segments in a burst */
170 
171 #define TCP_MAXHLEN	(0xf<<2)	/* max length of header in bytes */
172 #define TCP_MAXOLEN	(TCP_MAXHLEN - sizeof(struct tcphdr))
173 					/* max space left for options */
174 
175 #define TCP_FASTOPEN_MIN_COOKIE_LEN	4	/* Per RFC7413 */
176 #define TCP_FASTOPEN_MAX_COOKIE_LEN	16	/* Per RFC7413 */
177 #define TCP_FASTOPEN_PSK_LEN		16	/* Same as TCP_FASTOPEN_KEY_LEN */
178 #endif /* __BSD_VISIBLE */
179 
180 /*
181  * User-settable options (used with setsockopt).  These are discrete
182  * values and are not masked together.  Some values appear to be
183  * bitmasks for historical reasons.
184  */
185 #define	TCP_NODELAY		1	/* don't delay send to coalesce packets */
186 #if __BSD_VISIBLE
187 #define	TCP_MAXSEG		2	/* set maximum segment size */
188 #define	TCP_NOPUSH		4	/* don't push last block of write */
189 #define	TCP_NOOPT		8	/* don't use TCP options */
190 #define	TCP_MD5SIG		16	/* use MD5 digests (RFC2385) */
191 #define	TCP_INFO		32	/* retrieve tcp_info structure */
192 #define	TCP_STATS		33	/* retrieve stats blob structure */
193 #define	TCP_LOG			34	/* configure event logging for connection */
194 #define	TCP_LOGBUF		35	/* retrieve event log for connection */
195 #define	TCP_LOGID		36	/* configure log ID to correlate connections */
196 #define	TCP_LOGDUMP		37	/* dump connection log events to device */
197 #define	TCP_LOGDUMPID		38	/* dump events from connections with same ID to
198 					   device */
199 #define	TCP_TXTLS_ENABLE	39	/* TLS framing and encryption for transmit */
200 #define	TCP_TXTLS_MODE		40	/* Transmit TLS mode */
201 #define	TCP_RXTLS_ENABLE	41	/* TLS framing and encryption for receive */
202 #define	TCP_RXTLS_MODE		42	/* Receive TLS mode */
203 #define	TCP_IWND_NB		43	/* Override initial window (units: bytes) */
204 #define	TCP_IWND_NSEG		44	/* Override initial window (units: MSS segs) */
205 #ifdef _KERNEL
206 #define	TCP_USE_DDP		45	/* Use direct data placement for so_rcvbuf */
207 #endif
208 #define	TCP_LOGID_CNT		46	/* get number of connections with the same ID */
209 #define	TCP_LOG_TAG		47	/* configure tag for grouping logs */
210 #define	TCP_USER_LOG		48	/* userspace log event */
211 #define	TCP_CONGESTION		64	/* get/set congestion control algorithm */
212 #define	TCP_CCALGOOPT		65	/* get/set cc algorithm specific options */
213 #define	TCP_MAXUNACKTIME	68	/* maximum time without making progress (sec) */
214 					/* unused; was TCP_MAXPEAKRATE */
215 #define	TCP_IDLE_REDUCE		70	/* Reduce cwnd on idle input */
216 #define	TCP_REMOTE_UDP_ENCAPS_PORT 71	/* Enable TCP over UDP tunneling via the specified port */
217 #define	TCP_DELACK		72	/* socket option for delayed ack */
218 #define	TCP_FIN_IS_RST		73	/* A fin from the peer is treated has a RST */
219 #define	TCP_LOG_LIMIT		74	/* Limit to number of records in tcp-log */
220 #define	TCP_SHARED_CWND_ALLOWED	75	/* Use of a shared cwnd is allowed */
221 #define	TCP_PROC_ACCOUNTING	76	/* Do accounting on tcp cpu usage and counts */
222 #define	TCP_USE_CMP_ACKS	77	/* The transport can handle the Compressed mbuf acks */
223 #define	TCP_PERF_INFO		78	/* retrieve accounting counters */
224 #define	TCP_KEEPINIT		128	/* N, time to establish connection */
225 #define	TCP_KEEPIDLE		256	/* L,N,X start keeplives after this period */
226 #define	TCP_KEEPINTVL		512	/* L,N interval between keepalives */
227 #define	TCP_KEEPCNT		1024	/* L,N number of keepalives before close */
228 #define	TCP_FASTOPEN		1025	/* enable TFO / was created via TFO */
229 #define	TCP_PCAP_OUT		2048	/* number of output packets to keep */
230 #define	TCP_PCAP_IN		4096	/* number of input packets to keep */
231 #define	TCP_FUNCTION_BLK	8192	/* Set the tcp function pointers to the specified stack */
232 #define	TCP_FUNCTION_ALIAS	8193	/* Get the current tcp function pointer name alias */
233 /* Options for Rack and BBR */
234 #define	TCP_REUSPORT_LB_NUMA	1026	/* set listen socket numa domain */
235 #define	TCP_RACK_MBUF_QUEUE	1050	/* Do we allow mbuf queuing if supported */
236 					/* unused; was TCP_RACK_PROP */
237 #define	TCP_RACK_TLP_REDUCE 	1052	/* RACK TLP cwnd reduction (bool) */
238 					/* unused; was TCP_RACK_PACE_REDUCE */
239 #define	TCP_RACK_PACE_MAX_SEG	1054	/* Max TSO size we will send  */
240 #define	TCP_RACK_PACE_ALWAYS	1055	/* Use the always pace method */
241 					/* unused; was TCP_RACK_PROP_RATE */
242 #define	TCP_RACK_PRR_SENDALOT	1057	/* Allow PRR to send more than one seg */
243 #define	TCP_RACK_MIN_TO		1058	/* Minimum time between rack t-o's in ms */
244 					/* unused; was TCP_RACK_EARLY_RECOV */
245 #define	TCP_RACK_EARLY_SEG	1060	/* If early recovery max segments */
246 #define	TCP_RACK_REORD_THRESH	1061	/* RACK reorder threshold (shift amount) */
247 #define	TCP_RACK_REORD_FADE	1062	/* Does reordering fade after ms time */
248 #define	TCP_RACK_TLP_THRESH	1063	/* RACK TLP theshold i.e. srtt+(srtt/N) */
249 #define	TCP_RACK_PKT_DELAY	1064	/* RACK added ms i.e. rack-rtt + reord + N */
250 					/* unused; was TCP_RACK_TLP_INC_VAR */
251 					/* unused */
252 #define	TCP_BBR_IWINTSO		1067	/* Initial TSO window for BBRs first sends */
253 					/* unused; was TCP_BBR_RECFORCE */
254 #define	TCP_BBR_STARTUP_PG	1069	/* Startup pacing gain */
255 #define	TCP_BBR_DRAIN_PG	1070	/* Drain pacing gain */
256 					/* unused; was TCP_BBR_RWND_IS_APP */
257 #define	TCP_BBR_PROBE_RTT_INT	1072	/* How long in useconds between probe-rtt */
258 					/* unused; was TCP_BBR_ONE_RETRAN */
259 #define	TCP_BBR_STARTUP_LOSS_EXIT 1074	/* Do we exit a loss during startup if not 20% incr */
260 					/* unused; was TCP_BBR_USE_LOWGAIN */
261 #define	TCP_BBR_TSLIMITS	1076	/* Do we use experimental Timestamp limiting for our algo */
262 #define	TCP_BBR_PACE_OH		1077	/* Reused in 4.2 for pacing overhead setting */
263 					/* unused; was TCP_BBR_HOLD_TARGET */
264 #define	TCP_BBR_USEDEL_RATE	1079	/* Enable use of delivery rate for loss recovery */
265 #define	TCP_BBR_MIN_RTO		1080	/* Min RTO in milliseconds */
266 #define	TCP_BBR_MAX_RTO		1081	/* Max RTO in milliseconds */
267 					/* unused; was TCP_BBR_REC_OVER_HPTS */
268 #define	TCP_BBR_ALGORITHM	1083	/* What measurement algo does BBR use netflix=0, google=1 */
269 					/* unused; was TCP_BBR_DRAIN_INC_EXTRA */
270 					/* unused; was TCP_BBR_STARTUP_EXIT_EPOCH */
271 #define	TCP_BBR_PACE_PER_SEC	1086
272 #define	TCP_BBR_PACE_DEL_TAR	1087
273 #define	TCP_BBR_PACE_SEG_MAX	1088
274 #define	TCP_BBR_PACE_SEG_MIN	1089
275 #define	TCP_BBR_PACE_CROSS	1090
276 					/* unused */
277 					/* unused; was TCP_RACK_IDLE_REDUCE_HIGH */
278 					/* unused; was TCP_RACK_MIN_PACE */
279 					/* unused; was TCP_RACK_GP_INCREASE */
280 #define	TCP_RACK_TLP_USE	1095
281 #define	TCP_BBR_TMR_PACE_OH	1096	/* Recycled in 4.2 */
282 					/* unused; was TCP_BBR_EXTRA_GAIN */
283 #define	TCP_RACK_DO_DETECTION	1097	/* Recycle of extra gain for rack, attack detection */
284 #define	TCP_BBR_RACK_RTT_USE	1098	/* what RTT should we use 0, 1, or 2? */
285 #define	TCP_BBR_RETRAN_WTSO	1099
286 #define	TCP_DATA_AFTER_CLOSE	1100
287 #define	TCP_BBR_PROBE_RTT_GAIN	1101
288 #define	TCP_BBR_PROBE_RTT_LEN	1102
289 #define	TCP_BBR_SEND_IWND_IN_TSO 1103	/* Do we burst out whole iwin size chunks at start? */
290 #define	TCP_BBR_USE_RACK_RR	1104	/* Do we use the rack rapid recovery for pacing rxt's */
291 #define	TCP_BBR_USE_RACK_CHEAT 	TCP_BBR_USE_RACK_RR /* Compat. */
292 #define	TCP_BBR_HDWR_PACE	1105	/* Enable/disable hardware pacing */
293 #define	TCP_BBR_UTTER_MAX_TSO	1106	/* Do we enforce an utter max TSO size */
294 #define	TCP_BBR_EXTRA_STATE	1107	/* Special exit-persist catch up */
295 #define	TCP_BBR_FLOOR_MIN_TSO	1108	/* The min tso size */
296 #define	TCP_BBR_MIN_TOPACEOUT	1109	/* Do we suspend pacing until */
297 #define	TCP_BBR_TSTMP_RAISES	1110	/* Can a timestamp measurement raise the b/w */
298 #define	TCP_BBR_POLICER_DETECT	1111	/* Turn on/off google mode policer detection */
299 #define	TCP_BBR_RACK_INIT_RATE	1112	/* Set an initial pacing rate for when we have no b/w in kbits per sec */
300 #define	TCP_RACK_RR_CONF	1113	/* Rack rapid recovery configuration control*/
301 #define	TCP_RACK_GP_INCREASE_CA	1114	/* GP increase for Congestion Avoidance */
302 #define	TCP_RACK_GP_INCREASE_SS	1115	/* GP increase for Slow Start */
303 #define	TCP_RACK_GP_INCREASE_REC 1116	/* GP increase for Recovery */
304 #define	TCP_RACK_FORCE_MSEG	1117	/* Override to use the user set max-seg value */
305 #define	TCP_RACK_PACE_RATE_CA	1118	/* Pacing rate for Congestion Avoidance */
306 #define	TCP_RACK_PACE_RATE_SS	1119	/* Pacing rate for Slow Start */
307 #define	TCP_RACK_PACE_RATE_REC	1120	/* Pacing rate for Recovery */
308 #define	TCP_NO_PRR		1122	/* If pacing, don't use prr  */
309 #define	TCP_RACK_NONRXT_CFG_RATE 1123	/* In recovery does a non-rxt use the cfg rate */
310 #define	TCP_SHARED_CWND_ENABLE	1124	/* Use a shared cwnd if allowed */
311 #define	TCP_TIMELY_DYN_ADJ	1125	/* Do we attempt dynamic multipler adjustment with timely. */
312 #define	TCP_RACK_NO_PUSH_AT_MAX	1126	/* For timely do not push if we are over max rtt */
313 #define	TCP_RACK_PACE_TO_FILL	1127	/* If we are not in recovery, always pace to fill the cwnd in 1 RTT */
314 #define	TCP_SHARED_CWND_TIME_LIMIT 1128	/* we should limit to low time values the scwnd life */
315 #define	TCP_RACK_PROFILE	1129	/* Select a profile that sets multiple options */
316 #define	TCP_HDWR_RATE_CAP	1130	/* Allow hardware rates to cap pacing rate */
317 #define	TCP_PACING_RATE_CAP	1131	/* Highest rate allowed in pacing in bytes per second (uint64_t) */
318 #define	TCP_HDWR_UP_ONLY	1132	/* Allow the pacing rate to climb but not descend (with the exception of fill-cw */
319 #define	TCP_RACK_ABC_VAL	1133	/* Set a local ABC value different then the system default */
320 #define	TCP_REC_ABC_VAL		1134	/* Do we use the ABC value for recovery or the override one from sysctl  */
321 #define	TCP_RACK_MEASURE_CNT	1135	/* How many measurements are required in GP pacing */
322 #define	TCP_DEFER_OPTIONS	1136	/* Defer options until the proper number of measurements occur, does not defer TCP_RACK_MEASURE_CNT */
323 					/* unused; was TCP_FAST_RSM_HACK */
324 #define	TCP_RACK_PACING_BETA	1138	/* Changing the beta for pacing */
325 #define	TCP_RACK_PACING_BETA_ECN 1139	/* Changing the beta for ecn with pacing */
326 #define	TCP_RACK_TIMER_SLOP	1140	/* Set or get the timer slop used */
327 #define	TCP_RACK_DSACK_OPT	1141	/* How do we setup rack timer DSACK options bit 1/2 */
328 #define	TCP_RACK_ENABLE_HYSTART	1142	/* Do we allow hystart in the CC modules */
329 #define	TCP_RACK_SET_RXT_OPTIONS 1143	/* Set the bits in the retransmit options */
330 #define	TCP_RACK_HI_BETA	1144	/* Turn on/off high beta */
331 #define	TCP_RACK_SPLIT_LIMIT	1145	/* Set a split limit for split allocations */
332 #define	TCP_RACK_PACING_DIVISOR	1146	/* Pacing divisor given to rate-limit code for burst sizing */
333 #define	TCP_RACK_PACE_MIN_SEG	1147	/* Pacing min seg size rack will use */
334 #define	TCP_RACK_DGP_IN_REC	1148	/* Do we use full DGP in recovery? */
335 					/* unused; was TCP_POLICER_DETECT */
336 #define	TCP_HYBRID_PACING	1150	/* Hybrid pacing enablement */
337 #define	TCP_PACING_DND		1151	/* When pacing with rr_config=3 can sacks disturb us */
338 #define	TCP_SS_EEXIT		1152	/* Do we do early exit from slowtart if no  b/w growth */
339 #define	TCP_DGP_UPPER_BOUNDS	1153	/* SS and CA upper bound in percentage */
340 #define	TCP_NO_TIMELY		1154	/* Disable/enable Timely */
341 #define	TCP_HONOR_HPTS_MIN	1155	/* Do we honor hpts min to */
342 #define	TCP_REC_IS_DYN		1156	/* Do we allow timely to change recovery multiplier? */
343 #define	TCP_SIDECHAN_DIS	1157	/* Disable/enable the side-channel */
344 #define	TCP_FILLCW_RATE_CAP	1158	/* Set a cap for DGP's fillcw */
345 					/* unused; was TCP_POLICER_MSS */
346 #define	TCP_STACK_SPEC_INFO	1160	/* Get stack specific information (if present) */
347 #define	RACK_CSPR_IS_FCC	1161
348 #define	TCP_GP_USE_LTBW		1162	/* how we use lt_bw 0=not, 1=min, 2=max */
349 
350 
351 /* Start of reserved space for third-party user-settable options. */
352 #define	TCP_VENDOR	SO_VENDOR
353 
354 #define	TCP_CA_NAME_MAX	16	/* max congestion control name length */
355 
356 #define	TCPI_OPT_TIMESTAMPS	0x01
357 #define	TCPI_OPT_SACK		0x02
358 #define	TCPI_OPT_WSCALE		0x04
359 #define	TCPI_OPT_ECN		0x08
360 #define	TCPI_OPT_TOE		0x10
361 #define	TCPI_OPT_TFO		0x20
362 #define	TCPI_OPT_ACE		0x40
363 
364 /* Maximum length of log ID. */
365 #define TCP_LOG_ID_LEN	64
366 
367 /* TCP accounting counters */
368 #define TCP_NUM_PROC_COUNTERS 11
369 #define TCP_NUM_CNT_COUNTERS 13
370 
371 /* Must match counter array sizes in tcpcb */
372 struct tcp_perf_info {
373 	uint64_t	tcp_cnt_counters[TCP_NUM_CNT_COUNTERS];
374 	uint64_t	tcp_proc_time[TCP_NUM_CNT_COUNTERS];
375 	uint64_t	timebase;	/* timebase for tcp_proc_time */
376 	uint8_t		tb_is_stable;	/* timebase is stable/invariant */
377 };
378 
379 /*
380  * The TCP_INFO socket option comes from the Linux 2.6 TCP API, and permits
381  * the caller to query certain information about the state of a TCP
382  * connection.  We provide an overlapping set of fields with the Linux
383  * implementation, but since this is a fixed size structure, room has been
384  * left for growth.  In order to maximize potential future compatibility with
385  * the Linux API, the same variable names and order have been adopted, and
386  * padding left to make room for omitted fields in case they are added later.
387  *
388  * XXX: This is currently an unstable ABI/API, in that it is expected to
389  * change.
390  */
391 struct tcp_info {
392 	u_int8_t	tcpi_state;		/* TCP FSM state. */
393 	u_int8_t	__tcpi_ca_state;
394 	u_int8_t	__tcpi_retransmits;
395 	u_int8_t	__tcpi_probes;
396 	u_int8_t	__tcpi_backoff;
397 	u_int8_t	tcpi_options;		/* Options enabled on conn. */
398 	u_int8_t	tcpi_snd_wscale:4,	/* RFC1323 send shift value. */
399 			tcpi_rcv_wscale:4;	/* RFC1323 recv shift value. */
400 
401 	u_int32_t	tcpi_rto;		/* Retransmission timeout (usec). */
402 	u_int32_t	__tcpi_ato;
403 	u_int32_t	tcpi_snd_mss;		/* Max segment size for send. */
404 	u_int32_t	tcpi_rcv_mss;		/* Max segment size for receive. */
405 
406 	u_int32_t	__tcpi_unacked;
407 	u_int32_t	__tcpi_sacked;
408 	u_int32_t	__tcpi_lost;
409 	u_int32_t	__tcpi_retrans;
410 	u_int32_t	__tcpi_fackets;
411 
412 	/* Times; measurements in usecs. */
413 	u_int32_t	__tcpi_last_data_sent;
414 	u_int32_t	__tcpi_last_ack_sent;	/* Also unimpl. on Linux? */
415 	u_int32_t	tcpi_last_data_recv;	/* Time since last recv data. */
416 	u_int32_t	__tcpi_last_ack_recv;
417 
418 	/* Metrics; variable units. */
419 	u_int32_t	__tcpi_pmtu;
420 	u_int32_t	__tcpi_rcv_ssthresh;
421 	u_int32_t	tcpi_rtt;		/* Smoothed RTT in usecs. */
422 	u_int32_t	tcpi_rttvar;		/* RTT variance in usecs. */
423 	u_int32_t	tcpi_snd_ssthresh;	/* Slow start threshold. */
424 	u_int32_t	tcpi_snd_cwnd;		/* Send congestion window. */
425 	u_int32_t	__tcpi_advmss;
426 	u_int32_t	__tcpi_reordering;
427 
428 	u_int32_t	__tcpi_rcv_rtt;
429 	u_int32_t	tcpi_rcv_space;		/* Advertised recv window. */
430 
431 	/* FreeBSD extensions to tcp_info. */
432 	u_int32_t	tcpi_snd_wnd;		/* Advertised send window. */
433 	u_int32_t	tcpi_snd_bwnd;		/* No longer used. */
434 	u_int32_t	tcpi_snd_nxt;		/* Next egress seqno */
435 	u_int32_t	tcpi_rcv_nxt;		/* Next ingress seqno */
436 	u_int32_t	tcpi_toe_tid;		/* HWTID for TOE endpoints */
437 	u_int32_t	tcpi_snd_rexmitpack;	/* Retransmitted packets */
438 	u_int32_t	tcpi_rcv_ooopack;	/* Out-of-order packets */
439 	u_int32_t	tcpi_snd_zerowin;	/* Zero-sized windows sent */
440 
441 	/* Accurate ECN counters. */
442 	u_int32_t	tcpi_delivered_ce;
443 	u_int32_t	tcpi_received_ce;		/* # of CE marks received */
444 	u_int32_t	__tcpi_delivered_e1_bytes;
445 	u_int32_t	__tcpi_delivered_e0_bytes;
446 	u_int32_t	__tcpi_delivered_ce_bytes;
447 	u_int32_t	__tcpi_received_e1_bytes;
448 	u_int32_t	__tcpi_received_e0_bytes;
449 	u_int32_t	__tcpi_received_ce_bytes;
450 
451 	u_int32_t	tcpi_total_tlp;		/* tail loss probes sent */
452 	u_int64_t	tcpi_total_tlp_bytes;	/* tail loss probe bytes sent */
453 
454 	u_int32_t	tcpi_snd_una;		/* Unacked seqno sent */
455 	u_int32_t	tcpi_snd_max;		/* Highest seqno sent */
456 	u_int32_t	tcpi_rcv_numsacks;	/* Distinct SACK blks present */
457 	u_int32_t	tcpi_rcv_adv;		/* Peer advertised window */
458 	u_int32_t	tcpi_dupacks;		/* Consecutive dup ACKs recvd */
459 
460 	u_int32_t	tcpi_rttmin;		/* Min observed RTT */
461 	/* Padding to grow without breaking ABI. */
462 	u_int32_t	__tcpi_pad[14];		/* Padding. */
463 };
464 
465 /*
466  * If this structure is provided when setting the TCP_FASTOPEN socket
467  * option, and the enable member is non-zero, a subsequent connect will use
468  * pre-shared key (PSK) mode using the provided key.
469  */
470 struct tcp_fastopen {
471 	int enable;
472 	uint8_t psk[TCP_FASTOPEN_PSK_LEN];
473 };
474 
475 #define TCP_FUNCTION_NAME_LEN_MAX 32
476 
477 struct stack_specific_info {
478 	char stack_name[TCP_FUNCTION_NAME_LEN_MAX];
479 	uint64_t policer_last_bw;	/* Only valid if detection enabled and policer detected */
480 	uint64_t bytes_transmitted;
481 	uint64_t bytes_retransmitted;
482 	uint32_t policer_detection_enabled: 1,
483 		 policer_detected : 1,  /* transport thinks a policer is on path */
484 		 highly_buffered : 1,	/* transport considers the path highly buffered */
485 		 spare : 29;
486 	uint32_t policer_bucket_size;	/* Only valid if detection enabled and policer detected */
487 	uint32_t current_round;
488 	uint32_t _rack_i_pad[18];
489 };
490 
491 struct tcp_function_set {
492 	char function_set_name[TCP_FUNCTION_NAME_LEN_MAX];
493 	uint32_t pcbcnt;
494 };
495 
496 /* TLS modes for TCP_TXTLS_MODE */
497 #define	TCP_TLS_MODE_NONE	0
498 #define	TCP_TLS_MODE_SW		1
499 #define	TCP_TLS_MODE_IFNET	2
500 #define	TCP_TLS_MODE_TOE	3
501 
502 /*
503  * TCP Control message types
504  */
505 #define	TLS_SET_RECORD_TYPE	1
506 #define	TLS_GET_RECORD		2
507 
508 /*
509  * TCP log user opaque
510  */
511 struct tcp_snd_req {
512 	uint64_t timestamp;
513 	uint64_t start;
514 	uint64_t end;
515 	uint32_t flags;
516 	uint32_t playout_ms;
517 };
518 
519 union tcp_log_userdata {
520 	struct tcp_snd_req tcp_req;
521 };
522 
523 struct tcp_log_user {
524 	uint32_t type;
525 	uint32_t subtype;
526 	union tcp_log_userdata data;
527 };
528 
529 /* user types, i.e. apps */
530 #define TCP_LOG_USER_HTTPD	1
531 
532 /* user subtypes */
533 #define TCP_LOG_HTTPD_TS	1	/* client timestamp */
534 #define TCP_LOG_HTTPD_TS_REQ	2	/* client timestamp and request info */
535 
536 /* HTTPD REQ flags */
537 #define TCP_LOG_HTTPD_RANGE_START	0x0001
538 #define TCP_LOG_HTTPD_RANGE_END		0x0002
539 
540 /* Flags for hybrid pacing */
541 #define TCP_HYBRID_PACING_CU		0x0001		/* Enable catch-up mode */
542 #define TCP_HYBRID_PACING_DTL		0x0002		/* Enable Detailed logging */
543 #define TCP_HYBRID_PACING_CSPR		0x0004		/* A client suggested rate is present  */
544 #define TCP_HYBRID_PACING_H_MS		0x0008		/* A client hint for maxseg is present  */
545 #define TCP_HYBRID_PACING_ENABLE	0x0010		/* We are enabling hybrid pacing else disable */
546 #define TCP_HYBRID_PACING_S_MSS		0x0020		/* Clent wants us to set the mss overriding gp est in CU */
547 #define TCP_HAS_PLAYOUT_MS		0x0040		/* The client included the chunk playout milliseconds: deprecate */
548 /* the below are internal only flags */
549 #define TCP_HYBRID_PACING_USER_MASK	0x0FFF		/* Non-internal flags mask */
550 #define TCP_HYBRID_PACING_SETMSS	0x1000		/* Internal flag that tells us we set the mss on this entry */
551 #define TCP_HYBRID_PACING_WASSET	0x2000		/* We init to this to know if a hybrid command was issued */
552 #define TCP_HYBRID_PACING_SENDTIME	0x4000		/* Duplicate tm to last, use sendtime for catch up mode */
553 
554 struct tcp_hybrid_req {
555 	struct tcp_snd_req req;
556 	uint64_t cspr;
557 	uint32_t hint_maxseg;
558 	uint32_t hybrid_flags;
559 };
560 
561 /*
562  * TCP specific variables of interest for tp->t_stats stats(9) accounting.
563  */
564 #define	VOI_TCP_TXPB		0 /* Transmit payload bytes */
565 #define	VOI_TCP_RETXPB		1 /* Retransmit payload bytes */
566 #define	VOI_TCP_FRWIN		2 /* Foreign receive window */
567 #define	VOI_TCP_LCWIN		3 /* Local congesiton window */
568 #define	VOI_TCP_RTT		4 /* Round trip time */
569 #define	VOI_TCP_CSIG		5 /* Congestion signal */
570 #define	VOI_TCP_GPUT		6 /* Goodput */
571 #define	VOI_TCP_CALCFRWINDIFF	7 /* Congestion avoidance LCWIN - FRWIN */
572 #define	VOI_TCP_GPUT_ND		8 /* Goodput normalised delta */
573 #define	VOI_TCP_ACKLEN		9 /* Average ACKed bytes per ACK */
574 #define VOI_TCP_PATHRTT		10 /* The path RTT based on ACK arrival */
575 
576 #define TCP_REUSPORT_LB_NUMA_NODOM	(-2) /* remove numa binding */
577 #define TCP_REUSPORT_LB_NUMA_CURDOM	(-1) /* bind to current domain */
578 
579 #endif /* __BSD_VISIBLE */
580 #endif /* !_NETINET_TCP_H_ */
581