xref: /freebsd/sys/netinet/tcp_var.h (revision 46023d54c7c2c00d273b7499421315f216ff4989)
1c398230bSWarner Losh /*-
251369649SPedro F. Giffuni  * SPDX-License-Identifier: BSD-3-Clause
351369649SPedro F. Giffuni  *
46c52bc46SGarrett Wollman  * Copyright (c) 1982, 1986, 1993, 1994, 1995
5df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
6df8bae1dSRodney W. Grimes  *
7df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
8df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
9df8bae1dSRodney W. Grimes  * are met:
10df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
11df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
12df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
13df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
14df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
15fbbd9655SWarner Losh  * 3. Neither the name of the University nor the names of its contributors
16df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
17df8bae1dSRodney W. Grimes  *    without specific prior written permission.
18df8bae1dSRodney W. Grimes  *
19df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
30df8bae1dSRodney W. Grimes  */
31df8bae1dSRodney W. Grimes 
32707f139eSPaul Richards #ifndef _NETINET_TCP_VAR_H_
33707f139eSPaul Richards #define _NETINET_TCP_VAR_H_
34be2ac88cSJonathan Lemon 
35f76fcf6dSJeffrey Hsu #include <netinet/tcp.h>
3657a78e3bSGleb Smirnoff #include <netinet/tcp_fsm.h>
37be2ac88cSJonathan Lemon 
38eddfbb76SRobert Watson #ifdef _KERNEL
39eddfbb76SRobert Watson #include <net/vnet.h>
4086a996e6SHiren Panchasara #include <sys/mbuf.h>
4128d0a740SAndrew Gallatin #include <sys/ktls.h>
42cc65eb4eSGleb Smirnoff #endif
43f6dfe47aSMarko Zec 
44e570d231SRandall Stewart #define TCP_END_BYTE_INFO 8	/* Bytes that makeup the "end information array" */
45e570d231SRandall Stewart /* Types of ending byte info */
46e570d231SRandall Stewart #define TCP_EI_EMPTY_SLOT	0
47e570d231SRandall Stewart #define TCP_EI_STATUS_CLIENT_FIN	0x1
48e570d231SRandall Stewart #define TCP_EI_STATUS_CLIENT_RST	0x2
49e570d231SRandall Stewart #define TCP_EI_STATUS_SERVER_FIN	0x3
50e570d231SRandall Stewart #define TCP_EI_STATUS_SERVER_RST	0x4
51e570d231SRandall Stewart #define TCP_EI_STATUS_RETRAN		0x5
52e570d231SRandall Stewart #define TCP_EI_STATUS_PROGRESS		0x6
53e570d231SRandall Stewart #define TCP_EI_STATUS_PERSIST_MAX	0x7
54e570d231SRandall Stewart #define TCP_EI_STATUS_KEEP_MAX		0x8
55e570d231SRandall Stewart #define TCP_EI_STATUS_DATA_A_CLOSE	0x9
56e570d231SRandall Stewart #define TCP_EI_STATUS_RST_IN_FRONT	0xa
57e570d231SRandall Stewart #define TCP_EI_STATUS_2MSL		0xb
58e570d231SRandall Stewart #define TCP_EI_STATUS_MAX_VALUE		0xb
59e570d231SRandall Stewart 
6057a3a161SRandall Stewart #define TCP_TRK_REQ_LOG_NEW		0x01
6157a3a161SRandall Stewart #define TCP_TRK_REQ_LOG_COMPLETE	0x02
6257a3a161SRandall Stewart #define TCP_TRK_REQ_LOG_FREED		0x03
6357a3a161SRandall Stewart #define TCP_TRK_REQ_LOG_ALLOCFAIL	0x04
6457a3a161SRandall Stewart #define TCP_TRK_REQ_LOG_MOREYET	0x05
6557a3a161SRandall Stewart #define TCP_TRK_REQ_LOG_FORCEFREE	0x06
6657a3a161SRandall Stewart #define TCP_TRK_REQ_LOG_STALE		0x07
6757a3a161SRandall Stewart #define TCP_TRK_REQ_LOG_SEARCH		0x08
6873ee5756SRandall Stewart 
69e570d231SRandall Stewart /************************************************/
70e570d231SRandall Stewart /* Status bits we track to assure no duplicates,
71e570d231SRandall Stewart  * the bits here are not used by the code but
72e570d231SRandall Stewart  * for human representation. To check a bit we
73e570d231SRandall Stewart  * take and shift over by 1 minus the value (1-8).
74e570d231SRandall Stewart  */
75e570d231SRandall Stewart /************************************************/
76e570d231SRandall Stewart #define TCP_EI_BITS_CLIENT_FIN	0x001
77e570d231SRandall Stewart #define TCP_EI_BITS_CLIENT_RST	0x002
78e570d231SRandall Stewart #define TCP_EI_BITS_SERVER_FIN	0x004
79e570d231SRandall Stewart #define TCP_EI_BITS_SERVER_RST	0x008
80e570d231SRandall Stewart #define TCP_EI_BITS_RETRAN	0x010
81e570d231SRandall Stewart #define TCP_EI_BITS_PROGRESS	0x020
82e570d231SRandall Stewart #define TCP_EI_BITS_PRESIST_MAX	0x040
83e570d231SRandall Stewart #define TCP_EI_BITS_KEEP_MAX	0x080
84e570d231SRandall Stewart #define TCP_EI_BITS_DATA_A_CLO  0x100
85e570d231SRandall Stewart #define TCP_EI_BITS_RST_IN_FR	0x200	/* a front state reset */
86e570d231SRandall Stewart #define TCP_EI_BITS_2MS_TIMER	0x400	/* 2 MSL timer expired */
87e570d231SRandall Stewart 
88cc65eb4eSGleb Smirnoff #if defined(_KERNEL) || defined(_WANT_TCPCB)
891d14e88eSMark Johnston #include <sys/_callout.h>
901d14e88eSMark Johnston #include <sys/osd.h>
911d14e88eSMark Johnston 
92e68b3792SGleb Smirnoff #include <netinet/cc/cc.h>
93e68b3792SGleb Smirnoff 
944741bfcbSPatrick Kelsey /* TCP segment queue entry */
954741bfcbSPatrick Kelsey struct tseg_qent {
96c28440dbSRandall Stewart 	TAILQ_ENTRY(tseg_qent) tqe_q;
974741bfcbSPatrick Kelsey 	struct	mbuf   *tqe_m;		/* mbuf contains packet */
98c28440dbSRandall Stewart 	struct  mbuf   *tqe_last;	/* last mbuf in chain */
99c28440dbSRandall Stewart 	tcp_seq tqe_start;		/* TCP Sequence number start */
100c28440dbSRandall Stewart 	int	tqe_len;		/* TCP segment data length */
1011ebf4607SRichard Scheffenegger 	uint32_t tqe_flags;		/* The flags from tcp_get_flags() */
102c28440dbSRandall Stewart 	uint32_t tqe_mbuf_cnt;		/* Count of mbuf overhead */
1034741bfcbSPatrick Kelsey };
104c28440dbSRandall Stewart TAILQ_HEAD(tsegqe_head, tseg_qent);
1054741bfcbSPatrick Kelsey 
1066d90faf3SPaul Saab struct sackblk {
1076d90faf3SPaul Saab 	tcp_seq start;		/* start seq no. of sack block */
1086d90faf3SPaul Saab 	tcp_seq end;		/* end seq no. */
1096d90faf3SPaul Saab };
1106d90faf3SPaul Saab 
1116d90faf3SPaul Saab struct sackhole {
1126d90faf3SPaul Saab 	tcp_seq start;		/* start seq no. of hole */
1136d90faf3SPaul Saab 	tcp_seq end;		/* end seq no. */
1146d90faf3SPaul Saab 	tcp_seq rxmit;		/* next seq. no in hole to be retransmitted */
115a6235da6SPaul Saab 	TAILQ_ENTRY(sackhole) scblink;	/* scoreboard linkage */
1166d90faf3SPaul Saab };
11737332f04SBruce M Simpson 
1180077b016SPaul Saab struct sackhint {
1190077b016SPaul Saab 	struct sackhole	*nexthole;
120f359d6ebSRichard Scheffenegger 	int32_t		sack_bytes_rexmit;
121bee9ab2bSLawrence Stewart 	tcp_seq		last_sack_ack;	/* Most recent/largest sacked ack */
122962ebef8SLawrence Stewart 
123f359d6ebSRichard Scheffenegger 	int32_t		delivered_data; /* Newly acked data from last SACK */
124f359d6ebSRichard Scheffenegger 
125f359d6ebSRichard Scheffenegger 	int32_t		sacked_bytes;	/* Total sacked bytes reported by the
12612eeb81fSHiren Panchasara 					 * receiver via sack option
12712eeb81fSHiren Panchasara 					 */
1280e1d7c25SRichard Scheffenegger 	uint32_t	recover_fs;	/* Flight Size at the start of Loss recovery */
1290e1d7c25SRichard Scheffenegger 	uint32_t	prr_delivered;	/* Total bytes delivered using PRR */
130e5313869SRichard Scheffenegger 	uint32_t	prr_out;	/* Bytes sent during IN_RECOVERY */
13122dc8609SRichard Scheffenegger 	int32_t		hole_bytes;	/* current number of bytes in scoreboard holes */
13222dc8609SRichard Scheffenegger 	int32_t		lost_bytes;	/* number of rfc6675 IsLost() bytes */
1330077b016SPaul Saab };
1340077b016SPaul Saab 
135c28440dbSRandall Stewart #define SEGQ_EMPTY(tp) TAILQ_EMPTY(&(tp)->t_segq)
136c28440dbSRandall Stewart 
1372529f56eSJonathan T. Looney STAILQ_HEAD(tcp_log_stailq, tcp_log_mem);
1382529f56eSJonathan T. Looney 
13957a3a161SRandall Stewart #define TCP_TRK_TRACK_FLG_EMPTY 0x00	/* Available */
14057a3a161SRandall Stewart #define TCP_TRK_TRACK_FLG_USED  0x01	/* In use */
14157a3a161SRandall Stewart #define TCP_TRK_TRACK_FLG_OPEN  0x02	/* End is not valid (open range request) */
14257a3a161SRandall Stewart #define TCP_TRK_TRACK_FLG_SEQV  0x04	/* We had a sendfile that touched it  */
14357a3a161SRandall Stewart #define TCP_TRK_TRACK_FLG_COMP  0x08	/* Sendfile as placed the last bits (range req only) */
14457a3a161SRandall Stewart #define TCP_TRK_TRACK_FLG_FSND	0x10	/* First send has been done into the seq space */
145e18b97bdSRandall Stewart #define TCP_TRK_TRACK_FLG_LSND	0x20	/* We were able to set the Last Sent */
14657a3a161SRandall Stewart #define MAX_TCP_TRK_REQ 5		/* Max we will have at once */
14773ee5756SRandall Stewart 
14857a3a161SRandall Stewart struct tcp_sendfile_track {
14973ee5756SRandall Stewart 	uint64_t timestamp;	/* User sent timestamp */
15073ee5756SRandall Stewart 	uint64_t start;		/* Start of sendfile offset */
15173ee5756SRandall Stewart 	uint64_t end;		/* End if not open-range req */
15273ee5756SRandall Stewart 	uint64_t localtime;	/* Time we actually got the req */
15373ee5756SRandall Stewart 	uint64_t deadline;	/* If in CU mode, deadline to delivery */
15473ee5756SRandall Stewart 	uint64_t first_send;	/* Time of first send in the range */
15573ee5756SRandall Stewart 	uint64_t cspr;		/* Client suggested pace rate */
15673ee5756SRandall Stewart 	uint64_t sent_at_fs;	/* What was t_sndbytes as we begun sending */
15773ee5756SRandall Stewart 	uint64_t rxt_at_fs;	/* What was t_snd_rxt_bytes as we begun sending */
158e18b97bdSRandall Stewart 	uint64_t sent_at_ls;	/* Sent value at the last send */
159e18b97bdSRandall Stewart 	uint64_t rxt_at_ls;	/* Retransmit value at the last send */
16073ee5756SRandall Stewart 	tcp_seq start_seq;	/* First TCP Seq assigned */
16173ee5756SRandall Stewart 	tcp_seq end_seq;	/* If range req last seq */
16273ee5756SRandall Stewart 	uint32_t flags;		/* Type of request open etc */
16373ee5756SRandall Stewart 	uint32_t sbcc_at_s;	/* When we allocate what is the sb_cc */
16473ee5756SRandall Stewart 	uint32_t hint_maxseg;	/* Client hinted maxseg */
165e18b97bdSRandall Stewart 	uint32_t playout_ms;	/* Client playout ms */
16673ee5756SRandall Stewart 	uint32_t hybrid_flags;	/* Hybrid flags on this request */
16773ee5756SRandall Stewart };
16873ee5756SRandall Stewart 
16973ee5756SRandall Stewart 
17073ee5756SRandall Stewart /*
17173ee5756SRandall Stewart  * Change Query responses for a stack switch we create a structure
17273ee5756SRandall Stewart  * that allows query response from the new stack to the old, if
17373ee5756SRandall Stewart  * supported.
17473ee5756SRandall Stewart  *
17573ee5756SRandall Stewart  * There are three queries currently defined.
17673ee5756SRandall Stewart  *  - sendmap
17773ee5756SRandall Stewart  *  - timers
17873ee5756SRandall Stewart  *  - rack_times
17973ee5756SRandall Stewart  *
18073ee5756SRandall Stewart  * For the sendmap query the caller fills in the
18173ee5756SRandall Stewart  * req and the req_param as the first seq (usually
18273ee5756SRandall Stewart  * snd_una). When the response comes back indicating
18373ee5756SRandall Stewart  * that there was data (return value 1), then the caller
18473ee5756SRandall Stewart  * can build a sendmap entry based on the range and the
18573ee5756SRandall Stewart  * times. The next query would then be done at the
18673ee5756SRandall Stewart  * newly created sendmap_end. Repeated until sendmap_end == snd_max.
18773ee5756SRandall Stewart  *
18873ee5756SRandall Stewart  * Flags in sendmap_flags are defined below as well.
18973ee5756SRandall Stewart  *
19073ee5756SRandall Stewart  * For timers the standard PACE_TMR_XXXX flags are returned indicating
19173ee5756SRandall Stewart  * a pacing timer (possibly) and one other timer. If pacing timer then
19273ee5756SRandall Stewart  * the expiration timeout time in microseconds is in timer_pacing_to.
19373ee5756SRandall Stewart  * And the value used with whatever timer (if a flag is set) is in
19473ee5756SRandall Stewart  * timer_rxt. If no timers are running a 0 is returned and of
19573ee5756SRandall Stewart  * course no flags are set in timer_hpts_flags.
19673ee5756SRandall Stewart  *
19773ee5756SRandall Stewart  * The rack_times are a misc collection of information that
19873ee5756SRandall Stewart  * the old stack might possibly fill in. Of course its possible
19973ee5756SRandall Stewart  * that an old stack may not have a piece of information. If so
20073ee5756SRandall Stewart  * then setting that value to zero is advised. Setting any
20173ee5756SRandall Stewart  * timestamp passed should only place a zero in it when it
20273ee5756SRandall Stewart  * is unfilled. This may mean that a time is off by a micro-second
20373ee5756SRandall Stewart  * but this is ok in the grand scheme of things.
20473ee5756SRandall Stewart  *
20573ee5756SRandall Stewart  * When switching stacks it is desireable to get as much information
20673ee5756SRandall Stewart  * from the old stack to the new stack as possible. Though not always
20773ee5756SRandall Stewart  * will the stack be compatible in the types of information. The
20873ee5756SRandall Stewart  * init() function needs to take care when it begins changing
20973ee5756SRandall Stewart  * things such as inp_flags2 and the timer units to position these
21073ee5756SRandall Stewart  * changes at a point where it is unlikely they will fail after
21173ee5756SRandall Stewart  * making such changes. A stack optionally can have an "undo"
21273ee5756SRandall Stewart  * function
21373ee5756SRandall Stewart  *
21473ee5756SRandall Stewart  * To transfer information to the old stack from the new in
21573ee5756SRandall Stewart  * respect to LRO and the inp_flags2, the new stack should set
21673ee5756SRandall Stewart  * the inp_flags2 to what it supports. The old stack in its
21773ee5756SRandall Stewart  * fini() function should call the tcp_handle_orphaned_packets()
21873ee5756SRandall Stewart  * to clean up any packets. Note that a new stack should attempt
21973ee5756SRandall Stewart  */
22073ee5756SRandall Stewart 
22173ee5756SRandall Stewart /* Query types */
22273ee5756SRandall Stewart #define TCP_QUERY_SENDMAP	1
22373ee5756SRandall Stewart #define TCP_QUERY_TIMERS_UP	2
22473ee5756SRandall Stewart #define TCP_QUERY_RACK_TIMES	3
22573ee5756SRandall Stewart 
22673ee5756SRandall Stewart /* Flags returned in sendmap_flags */
22773ee5756SRandall Stewart #define SNDMAP_ACKED		0x000001/* The remote endpoint acked this */
22873ee5756SRandall Stewart #define SNDMAP_OVERMAX		0x000008/* We have more retran's then we can fit */
22973ee5756SRandall Stewart #define SNDMAP_SACK_PASSED	0x000010/* A sack was done above this block */
23073ee5756SRandall Stewart #define SNDMAP_HAS_FIN		0x000040/* segment is sent with fin */
23173ee5756SRandall Stewart #define SNDMAP_TLP		0x000080/* segment sent as tail-loss-probe */
23273ee5756SRandall Stewart #define SNDMAP_HAS_SYN		0x000800/* SYN is on this guy */
23373ee5756SRandall Stewart #define SNDMAP_HAD_PUSH		0x008000/* Push was sent on original send */
23473ee5756SRandall Stewart #define SNDMAP_MASK  (SNDMAP_ACKED|SNDMAP_OVERMAX|SNDMAP_SACK_PASSED|SNDMAP_HAS_FIN\
23573ee5756SRandall Stewart 		      |SNDMAP_TLP|SNDMAP_HAS_SYN|SNDMAP_HAD_PUSH)
23673ee5756SRandall Stewart #define SNDMAP_NRTX 3
23773ee5756SRandall Stewart 
23873ee5756SRandall Stewart struct tcp_query_resp {
23973ee5756SRandall Stewart 	int req;
24073ee5756SRandall Stewart 	uint32_t req_param;
24173ee5756SRandall Stewart 	union {
24273ee5756SRandall Stewart 		struct {
24373ee5756SRandall Stewart 			tcp_seq sendmap_start;
24473ee5756SRandall Stewart 			tcp_seq sendmap_end;
24573ee5756SRandall Stewart 			int sendmap_send_cnt;
24673ee5756SRandall Stewart 			uint64_t sendmap_time[SNDMAP_NRTX];
24773ee5756SRandall Stewart 			uint64_t sendmap_ack_arrival;
24873ee5756SRandall Stewart 			int sendmap_flags;
24973ee5756SRandall Stewart 			uint32_t sendmap_r_rtr_bytes;
25073ee5756SRandall Stewart 			/* If FAS is available if not 0 */
25173ee5756SRandall Stewart 			uint32_t sendmap_fas;
25273ee5756SRandall Stewart 			uint8_t sendmap_dupacks;
25373ee5756SRandall Stewart 		};
25473ee5756SRandall Stewart 		struct {
25573ee5756SRandall Stewart 			uint32_t timer_hpts_flags;
25673ee5756SRandall Stewart 			uint32_t timer_pacing_to;
25773ee5756SRandall Stewart 			uint32_t timer_timer_exp;
25873ee5756SRandall Stewart 		};
25973ee5756SRandall Stewart 		struct {
26073ee5756SRandall Stewart 			/* Timestamps and rtt's */
26173ee5756SRandall Stewart 			uint32_t rack_reorder_ts;	/* Last uscts that reordering was seen */
26273ee5756SRandall Stewart 			uint32_t rack_num_dsacks;	/* Num of dsacks seen */
26373ee5756SRandall Stewart 			uint32_t rack_rxt_last_time; 	/* Last time a RXT/TLP or rack tmr  went off */
26473ee5756SRandall Stewart 			uint32_t rack_min_rtt;		/* never 0 smallest rtt seen */
26573ee5756SRandall Stewart 			uint32_t rack_rtt;		/* Last rtt used by rack */
26673ee5756SRandall Stewart 			uint32_t rack_tmit_time;	/* The time the rtt seg was tmited */
26773ee5756SRandall Stewart 			uint32_t rack_time_went_idle;	/* If in persist the time we went idle */
26873ee5756SRandall Stewart 			/* Prr data  */
26973ee5756SRandall Stewart 			uint32_t rack_sacked;
27073ee5756SRandall Stewart 			uint32_t rack_holes_rxt;
27173ee5756SRandall Stewart 			uint32_t rack_prr_delivered;
27273ee5756SRandall Stewart 			uint32_t rack_prr_recovery_fs;
27373ee5756SRandall Stewart 			uint32_t rack_prr_out;
27473ee5756SRandall Stewart 			uint32_t rack_prr_sndcnt;
27573ee5756SRandall Stewart 			/* TLP data */
27673ee5756SRandall Stewart 			uint16_t rack_tlp_cnt_out;	/* How many tlp's have been sent */
27773ee5756SRandall Stewart 			/* Various bits */
27873ee5756SRandall Stewart 			uint8_t  rack_tlp_out;		/* Is a TLP outstanding */
27973ee5756SRandall Stewart 			uint8_t  rack_srtt_measured;	/* The previous stack has measured srtt */
28073ee5756SRandall Stewart 			uint8_t  rack_in_persist;	/* Is the old stack in persists? */
28173ee5756SRandall Stewart 			uint8_t	 rack_wanted_output;	/* Did the prevous stack have a want output set */
28273ee5756SRandall Stewart 		};
28373ee5756SRandall Stewart 	};
28473ee5756SRandall Stewart };
28573ee5756SRandall Stewart 
28673ee5756SRandall Stewart #define TCP_TMR_GRANULARITY_TICKS	1	/* TCP timers are in ticks (msec if hz=1000)  */
28773ee5756SRandall Stewart #define TCP_TMR_GRANULARITY_USEC	2	/* TCP timers are in microseconds */
28873ee5756SRandall Stewart 
289446ccdd0SGleb Smirnoff typedef enum {
2906b802933SMichael Tuexen 	TT_REXMT = 0,
291446ccdd0SGleb Smirnoff 	TT_PERSIST,
292446ccdd0SGleb Smirnoff 	TT_KEEP,
293446ccdd0SGleb Smirnoff 	TT_2MSL,
2946b802933SMichael Tuexen 	TT_DELACK,
295446ccdd0SGleb Smirnoff 	TT_N,
296446ccdd0SGleb Smirnoff } tt_which;
297446ccdd0SGleb Smirnoff 
29876578d60SMichael Tuexen typedef enum {
29976578d60SMichael Tuexen 	TT_PROCESSING = 0,
30076578d60SMichael Tuexen 	TT_PROCESSED,
30176578d60SMichael Tuexen 	TT_STARTING,
30276578d60SMichael Tuexen 	TT_STOPPING,
30376578d60SMichael Tuexen } tt_what;
30476578d60SMichael Tuexen 
30555bceb1eSRandall Stewart /*
306e68b3792SGleb Smirnoff  * Tcp control block, one per tcp connection.
307df8bae1dSRodney W. Grimes  */
308df8bae1dSRodney W. Grimes struct tcpcb {
3099aff05bbSJohn Baldwin 	struct inpcb t_inpcb;		/* embedded protocol independent cb */
310e68b3792SGleb Smirnoff #define	t_start_zero	t_fb
311e68b3792SGleb Smirnoff #define	t_zero_size	(sizeof(struct tcpcb) - \
312e68b3792SGleb Smirnoff 			    offsetof(struct tcpcb, t_start_zero))
313803a2305SRandall Stewart 	struct tcp_function_block *t_fb;/* TCP function call block */
314803a2305SRandall Stewart 	void	*t_fb_ptr;		/* Pointer to t_fb specific data */
315e68b3792SGleb Smirnoff 
316446ccdd0SGleb Smirnoff 	struct callout t_callout;
317446ccdd0SGleb Smirnoff 	sbintime_t t_timers[TT_N];
318446ccdd0SGleb Smirnoff 	sbintime_t t_precisions[TT_N];
319e68b3792SGleb Smirnoff 
320c2a69e84SGleb Smirnoff 	/* HPTS. Used by BBR and Rack stacks. See tcp_hpts.c for more info. */
321c2a69e84SGleb Smirnoff 	TAILQ_ENTRY(tcpcb)	t_hpts;		/* linkage to HPTS ring */
322c2a69e84SGleb Smirnoff 	STAILQ_HEAD(, mbuf)	t_inqueue;	/* HPTS input packets queue */
323c2a69e84SGleb Smirnoff 	uint32_t t_hpts_request;	/* Current hpts request, zero if
324c2a69e84SGleb Smirnoff 					 * fits in the pacing window. */
325c2a69e84SGleb Smirnoff 	uint32_t t_hpts_slot;		/* HPTS wheel slot this tcb is. */
326c2a69e84SGleb Smirnoff 	uint32_t t_hpts_drop_reas;	/* Reason we are dropping the pcb. */
327c2a69e84SGleb Smirnoff 	uint32_t t_hpts_gencnt;
328c2a69e84SGleb Smirnoff 	uint16_t t_hpts_cpu;		/* CPU chosen by hpts_cpuid(). */
329c2a69e84SGleb Smirnoff 	uint16_t t_lro_cpu;		/* CPU derived from LRO. */
330c2a69e84SGleb Smirnoff #define	HPTS_CPU_NONE	((uint16_t)-1)
331c2a69e84SGleb Smirnoff 	enum {
332c2a69e84SGleb Smirnoff 		IHPTS_NONE = 0,
333c2a69e84SGleb Smirnoff 		IHPTS_ONQUEUE,
334c2a69e84SGleb Smirnoff 		IHPTS_MOVING,
335c2a69e84SGleb Smirnoff 	} t_in_hpts;			/* Is it linked into HPTS? */
336c2a69e84SGleb Smirnoff 
337803a2305SRandall Stewart 	uint32_t t_maxseg:24,		/* maximum segment size */
33869c7c811SRandall Stewart 		_t_logstate:8;		/* State of "black box" logging */
33989e560f4SRandall Stewart 	uint32_t t_port:16,		/* Tunneling (over udp) port */
34089e560f4SRandall Stewart 		t_state:4,		/* state of this connection */
34189e560f4SRandall Stewart 		t_idle_reduce : 1,
34289e560f4SRandall Stewart 		t_delayed_ack: 7,	/* Delayed ack variable */
3433b0b41e6SRandall Stewart 		t_fin_is_rst: 1,	/* Are fin's treated as resets */
344a9a08eceSRandall Stewart 		t_log_state_set: 1,
345a9a08eceSRandall Stewart 		bits_spare : 2;
3466bb9a8e7SGarrett Wollman 	u_int	t_flags;
34703041aaaSHiren Panchasara 	tcp_seq	snd_una;		/* sent but unacknowledged */
348df8bae1dSRodney W. Grimes 	tcp_seq	snd_max;		/* highest sequence number sent;
349df8bae1dSRodney W. Grimes 					 * used to recognize retransmits
350df8bae1dSRodney W. Grimes 					 */
351c3229e05SDavid Greenman 	tcp_seq snd_nxt;		/* send next */
352c3229e05SDavid Greenman 	tcp_seq snd_up;			/* send urgent pointer */
353803a2305SRandall Stewart 	uint32_t snd_wnd;		/* send window */
354803a2305SRandall Stewart 	uint32_t snd_cwnd;		/* congestion-controlled window */
355c3738466SGleb Smirnoff 	uint32_t ts_offset;		/* our timestamp offset */
356c3738466SGleb Smirnoff 	uint32_t rfbuf_ts;		/* recv buffer autoscaling timestamp */
357803a2305SRandall Stewart 	int	rcv_numsacks;		/* # distinct sack blks present */
358c3738466SGleb Smirnoff 	u_int	t_tsomax;		/* TSO total burst length limit */
359803a2305SRandall Stewart 	u_int	t_tsomaxsegcount;	/* TSO maximum segment count */
360803a2305SRandall Stewart 	u_int	t_tsomaxsegsize;	/* TSO maximum segment size in bytes */
361c3229e05SDavid Greenman 	tcp_seq	rcv_nxt;		/* receive next */
362c3229e05SDavid Greenman 	tcp_seq	rcv_adv;		/* advertised window */
3633ac12506SJonathan T. Looney 	uint32_t rcv_wnd;		/* receive window */
364803a2305SRandall Stewart 	u_int	t_flags2;		/* More tcpcb flags storage */
365803a2305SRandall Stewart 	int	t_srtt;			/* smoothed round-trip time */
366803a2305SRandall Stewart 	int	t_rttvar;		/* variance in round-trip time */
367c3738466SGleb Smirnoff 	uint32_t ts_recent;		/* timestamp echo data */
368803a2305SRandall Stewart 	u_char	snd_scale;		/* window scaling for send window */
369803a2305SRandall Stewart 	u_char	rcv_scale;		/* window scaling for recv window */
370803a2305SRandall Stewart 	u_char	snd_limited;		/* segments limited transmitted */
371803a2305SRandall Stewart 	u_char	request_r_scale;	/* pending window scaling */
372803a2305SRandall Stewart 	tcp_seq	last_ack_sent;
373803a2305SRandall Stewart 	u_int	t_rcvtime;		/* inactivity time */
374c3229e05SDavid Greenman 	tcp_seq	rcv_up;			/* receive urgent pointer */
375803a2305SRandall Stewart 	int	t_segqlen;		/* segment reassembly queue length */
376c3738466SGleb Smirnoff 	uint32_t t_segqmbuflen;		/* total reassembly queue byte length */
377803a2305SRandall Stewart 	struct	tsegqe_head t_segq;	/* segment reassembly queue */
3783ac12506SJonathan T. Looney 	uint32_t snd_ssthresh;		/* snd_cwnd size threshold for
379df8bae1dSRodney W. Grimes 					 * for slow start exponential to
380df8bae1dSRodney W. Grimes 					 * linear switch
381df8bae1dSRodney W. Grimes 					 */
382803a2305SRandall Stewart 	tcp_seq	snd_wl1;		/* window update seg seq number */
383803a2305SRandall Stewart 	tcp_seq	snd_wl2;		/* window update seg ack number */
384803a2305SRandall Stewart 
385803a2305SRandall Stewart 	tcp_seq	irs;			/* initial receive sequence number */
386803a2305SRandall Stewart 	tcp_seq	iss;			/* initial send sequence number */
3879dc7d8a2SRichard Scheffenegger 	u_int	t_acktime;		/* RACK and BBR incoming new data was acked */
3889dc7d8a2SRichard Scheffenegger 	u_int	t_sndtime;		/* time last data was sent */
389803a2305SRandall Stewart 	u_int	ts_recent_age;		/* when last updated */
390cb942153SJeffrey Hsu 	tcp_seq	snd_recover;		/* for use in NewReno Fast Recovery */
391df8bae1dSRodney W. Grimes 	char	t_oobflags;		/* have some */
392df8bae1dSRodney W. Grimes 	char	t_iobc;			/* input character */
393c0e4090eSAndrew Gallatin 	uint8_t t_nic_ktls_xmit:1,	/* active nic ktls xmit sessions */
394c0e4090eSAndrew Gallatin 		t_nic_ktls_xmit_dis:1,	/* disabled nic xmit ktls? */
395c0e4090eSAndrew Gallatin 		t_nic_ktls_spare:6;	/* spare nic ktls */
396803a2305SRandall Stewart 	int	t_rxtcur;		/* current retransmit value (ticks) */
397bf6d304aSAndre Oppermann 
398803a2305SRandall Stewart 	int	t_rxtshift;		/* log(2) of rexmt exp. backoff */
399803a2305SRandall Stewart 	u_int	t_rtttime;		/* RTT measurement start time */
400803a2305SRandall Stewart 
401803a2305SRandall Stewart 	tcp_seq	t_rtseq;		/* sequence number being timed */
402803a2305SRandall Stewart 	u_int	t_starttime;		/* time connection was established */
403c3738466SGleb Smirnoff 	u_int	t_fbyte_in;		/* ticks time first byte queued in */
404c3738466SGleb Smirnoff 	u_int	t_fbyte_out;		/* ticks time first byte queued out */
405803a2305SRandall Stewart 
406803a2305SRandall Stewart 	u_int	t_pmtud_saved_maxseg;	/* pre-blackhole MSS */
407b89af8e1SMichael Tuexen 	int	t_blackhole_enter;	/* when to enter blackhole detection */
408b89af8e1SMichael Tuexen 	int	t_blackhole_exit;	/* when to exit blackhole detection */
409803a2305SRandall Stewart 	u_int	t_rttmin;		/* minimum rtt allowed */
410803a2305SRandall Stewart 
411803a2305SRandall Stewart 	int	t_softerror;		/* possible error not yet reported */
412803a2305SRandall Stewart 	uint32_t max_sndwnd;		/* largest window peer has offered */
4133ac12506SJonathan T. Looney 	uint32_t snd_cwnd_prev;		/* cwnd prior to retransmit */
4143ac12506SJonathan T. Looney 	uint32_t snd_ssthresh_prev;	/* ssthresh prior to retransmit */
4159d11646dSJeffrey Hsu 	tcp_seq	snd_recover_prev;	/* snd_recover prior to retransmit */
416f5d34df5SGeorge V. Neville-Neil 	int	t_sndzerowin;		/* zero-window updates sent */
4176d90faf3SPaul Saab 	int	snd_numholes;		/* number of holes seen by sender */
418803a2305SRandall Stewart 	u_int	t_badrxtwin;		/* window for retransmit recovery */
4199d17a7a6SPaul Saab 	TAILQ_HEAD(sackhole_head, sackhole) snd_holes;
4209d17a7a6SPaul Saab 					/* SACK scoreboard (sorted) */
421808f11b7SPaul Saab 	tcp_seq	snd_fack;		/* last seq number(+1) sack'd by rcv'r*/
422803a2305SRandall Stewart 	struct sackblk sackblks[MAX_SACK_BLKS]; /* seq nos. of sack blocks */
4230077b016SPaul Saab 	struct sackhint	sackhint;	/* SACK scoreboard hint */
424eaf80179SAndre Oppermann 	int	t_rttlow;		/* smallest observerved RTT */
4256741ecf5SAndre Oppermann 	int	rfbuf_cnt;		/* recv buffer autoscaling byte count */
42609fe6320SNavdeep Parhar 	struct toedev	*tod;		/* toedev handling this connection */
427f5d34df5SGeorge V. Neville-Neil 	int	t_sndrexmitpack;	/* retransmit packets sent */
428f5d34df5SGeorge V. Neville-Neil 	int	t_rcvoopack;		/* out-of-order packets received */
429620721dbSKip Macy 	void	*t_toe;			/* TOE pcb pointer */
430e68b3792SGleb Smirnoff 	struct cc_algo	*t_cc;		/* congestion control algorithm */
431e68b3792SGleb Smirnoff 	struct cc_var	t_ccv;		/* congestion control specific vars */
432803a2305SRandall Stewart 	int	t_bytes_acked;		/* # bytes acked during current RTT */
43389e560f4SRandall Stewart 	u_int	t_maxunacktime;
4349077f387SGleb Smirnoff 	u_int	t_keepinit;		/* time to establish connection */
4359077f387SGleb Smirnoff 	u_int	t_keepidle;		/* time before keepalive probes begin */
4369077f387SGleb Smirnoff 	u_int	t_keepintvl;		/* interval between keepalives */
4379077f387SGleb Smirnoff 	u_int	t_keepcnt;		/* number of keepalives before close */
438803a2305SRandall Stewart 	int	t_dupacks;		/* consecutive dup acks recd */
4392529f56eSJonathan T. Looney 	int	t_lognum;		/* Number of log entries */
440a9a08eceSRandall Stewart 	int	t_loglimit;		/* Maximum number of log entries */
4410c7f3ae8SGleb Smirnoff 	uint32_t t_rcep;		/* Number of received CE marked pkts */
4420c7f3ae8SGleb Smirnoff 	uint32_t t_scep;		/* Synced number of delivered CE pkts */
443ce398115SJohn Baldwin 	int64_t	t_pacing_rate;		/* bytes / sec, -1 => unlimited */
444803a2305SRandall Stewart 	struct tcp_log_stailq t_logs;	/* Log buffer */
4452529f56eSJonathan T. Looney 	struct tcp_log_id_node *t_lin;
4462529f56eSJonathan T. Looney 	struct tcp_log_id_bucket *t_lib;
4472529f56eSJonathan T. Looney 	const char *t_output_caller;	/* Function that called tcp_output */
448adc56f5aSEdward Tomasz Napierala 	struct statsblob *t_stats;	/* Per-connection stats */
44969c7c811SRandall Stewart 	/* Should these be a pointer to the arrays or an array? */
450803a2305SRandall Stewart 	uint32_t t_logsn;		/* Log "serial number" */
451adc56f5aSEdward Tomasz Napierala 	uint32_t gput_ts;		/* Time goodput measurement started */
452adc56f5aSEdward Tomasz Napierala 	tcp_seq gput_seq;		/* Outbound measurement seq */
453adc56f5aSEdward Tomasz Napierala 	tcp_seq gput_ack;		/* Inbound measurement ack */
454adc56f5aSEdward Tomasz Napierala 	int32_t t_stats_gput_prev;	/* XXXLAS: Prev gput measurement */
4555d8fd932SRandall Stewart 	uint32_t t_sndtlppack;		/* tail loss probe packets sent */
4565d8fd932SRandall Stewart 	uint64_t t_sndtlpbyte;		/* total tail loss probe bytes sent */
45767e89281SRandall Stewart 	uint64_t t_sndbytes;		/* total bytes sent */
45867e89281SRandall Stewart 	uint64_t t_snd_rxt_bytes;	/* total bytes retransmitted */
459c3738466SGleb Smirnoff 	uint32_t t_dsack_bytes;		/* dsack bytes received */
460c3738466SGleb Smirnoff 	uint32_t t_dsack_tlp_bytes;	/* dsack bytes received for TLPs sent */
461c3738466SGleb Smirnoff 	uint32_t t_dsack_pack;		/* dsack packets we have eceived */
46273ee5756SRandall Stewart 	uint8_t t_tmr_granularity;	/* Granularity of all timers srtt etc */
46318b83b62SRichard Scheffenegger 	uint8_t t_rttupdated;		/* number of times rtt sampled */
464c3738466SGleb Smirnoff 	/* TCP Fast Open */
465c3738466SGleb Smirnoff 	uint8_t t_tfo_client_cookie_len; /* TFO client cookie length */
466e570d231SRandall Stewart 	uint32_t t_end_info_status;	/* Status flag of end info */
46740299c55SMichael Tuexen 	sbintime_t t_challenge_ack_end;	/* End of the challenge ack epoch */
46840299c55SMichael Tuexen 	uint32_t t_challenge_ack_cnt;	/* Number of challenge ACKs sent in
46940299c55SMichael Tuexen 					 * current epoch
47040299c55SMichael Tuexen 					 */
47140299c55SMichael Tuexen 
472c3738466SGleb Smirnoff 	unsigned int *t_tfo_pending;	/* TFO server pending counter */
473c560df6fSPatrick Kelsey 	union {
474c560df6fSPatrick Kelsey 		uint8_t client[TCP_FASTOPEN_MAX_COOKIE_LEN];
475c560df6fSPatrick Kelsey 		uint64_t server;
476c560df6fSPatrick Kelsey 	} t_tfo_cookie;			/* TCP Fast Open cookie to send */
477e570d231SRandall Stewart 	union {
478e570d231SRandall Stewart 		uint8_t t_end_info_bytes[TCP_END_BYTE_INFO];
479e570d231SRandall Stewart 		uint64_t t_end_info;
480e570d231SRandall Stewart 	};
481e68b3792SGleb Smirnoff 	struct osd	t_osd;		/* storage for Khelp module data */
48269c7c811SRandall Stewart 	uint8_t _t_logpoint;	/* Used when a BB log points is enabled */
483cf32543fSMichael Tuexen 	/*
484cf32543fSMichael Tuexen 	 * Keep all #ifdef'ed components at the end of the structure!
485cf32543fSMichael Tuexen 	 * This is important to minimize problems when compiling modules
486cf32543fSMichael Tuexen 	 * using this structure from within the modules' directory.
487cf32543fSMichael Tuexen 	 */
48873ee5756SRandall Stewart #ifdef TCP_REQUEST_TRK
48973ee5756SRandall Stewart 	/* Response tracking addons. */
49057a3a161SRandall Stewart 	uint8_t t_tcpreq_req;	/* Request count */
49157a3a161SRandall Stewart 	uint8_t t_tcpreq_open;	/* Number of open range requests */
49257a3a161SRandall Stewart 	uint8_t t_tcpreq_closed;	/* Number of closed range requests */
493e4a873bfSMichael Tuexen 	uint32_t tcp_hybrid_start;	/* Num of times we started hybrid pacing */
494e4a873bfSMichael Tuexen 	uint32_t tcp_hybrid_stop;	/* Num of times we stopped hybrid pacing */
495e4a873bfSMichael Tuexen 	uint32_t tcp_hybrid_error;	/* Num of times we failed to start hybrid pacing */
49657a3a161SRandall Stewart 	struct tcp_sendfile_track t_tcpreq_info[MAX_TCP_TRK_REQ];
49773ee5756SRandall Stewart #endif
498e4a873bfSMichael Tuexen #ifdef TCP_ACCOUNTING
499e4a873bfSMichael Tuexen 	uint64_t tcp_cnt_counters[TCP_NUM_CNT_COUNTERS];
500e4a873bfSMichael Tuexen 	uint64_t tcp_proc_time[TCP_NUM_CNT_COUNTERS];
501e4a873bfSMichael Tuexen #endif
502df8bae1dSRodney W. Grimes };
503cc65eb4eSGleb Smirnoff #endif	/* _KERNEL || _WANT_TCPCB */
504cc65eb4eSGleb Smirnoff 
505cc65eb4eSGleb Smirnoff #ifdef _KERNEL
506cc65eb4eSGleb Smirnoff struct tcptemp {
507cc65eb4eSGleb Smirnoff 	u_char	tt_ipgen[40]; /* the size must be of max ip header, now IPv6 */
508cc65eb4eSGleb Smirnoff 	struct	tcphdr tt_t;
509cc65eb4eSGleb Smirnoff };
510cc65eb4eSGleb Smirnoff 
51149a6fbe3SRichard Scheffenegger /* SACK scoreboard update status */
51249a6fbe3SRichard Scheffenegger typedef enum {
51349a6fbe3SRichard Scheffenegger 	SACK_NOCHANGE = 0,
51449a6fbe3SRichard Scheffenegger 	SACK_CHANGE,
51549a6fbe3SRichard Scheffenegger 	SACK_NEWLOSS
51649a6fbe3SRichard Scheffenegger } sackstatus_t;
51749a6fbe3SRichard Scheffenegger 
5189e644c23SMichael Tuexen /* Enable TCP/UDP tunneling port */
5199e644c23SMichael Tuexen #define TCP_TUNNELING_PORT_MIN		0
5209e644c23SMichael Tuexen #define TCP_TUNNELING_PORT_MAX		65535
5219e644c23SMichael Tuexen #define TCP_TUNNELING_PORT_DEFAULT	0
5229e644c23SMichael Tuexen 
5239e644c23SMichael Tuexen /* Enable TCP/UDP tunneling port */
5249e644c23SMichael Tuexen #define TCP_TUNNELING_OVERHEAD_MIN	sizeof(struct udphdr)
5259e644c23SMichael Tuexen #define TCP_TUNNELING_OVERHEAD_MAX	1024
5269e644c23SMichael Tuexen #define TCP_TUNNELING_OVERHEAD_DEFAULT	TCP_TUNNELING_OVERHEAD_MIN
5279e644c23SMichael Tuexen 
5281cf55767SRandall Stewart /* Minimum map entries limit value, if set */
5291cf55767SRandall Stewart #define TCP_MIN_MAP_ENTRIES_LIMIT	128
5301cf55767SRandall Stewart 
531cc65eb4eSGleb Smirnoff /* Flags for tcp functions */
532cc65eb4eSGleb Smirnoff #define	TCP_FUNC_BEING_REMOVED	0x01   	/* Can no longer be referenced */
533f64dc2abSGleb Smirnoff #define	TCP_FUNC_OUTPUT_CANDROP	0x02   	/* tfb_tcp_output may ask tcp_drop */
534e0b080f8SMichael Tuexen #define	TCP_FUNC_DEFAULT_OK	0x04   	/* Can be used as default */
535cc65eb4eSGleb Smirnoff 
53673ee5756SRandall Stewart /**
53786c9325dSMichael Tuexen  * tfb_tcp_handoff_ok is a mandatory function allowing
53886c9325dSMichael Tuexen  * to query a stack, if it can take over a tcpcb.
53986c9325dSMichael Tuexen  * You return 0 to say you can take over and run your stack,
54086c9325dSMichael Tuexen  * you return non-zero (an error number) to say no you can't.
54173ee5756SRandall Stewart  *
54273ee5756SRandall Stewart  * tfb_tcp_fb_init is used to allow the new stack to
54373ee5756SRandall Stewart  * setup its control block. Among the things it must
54473ee5756SRandall Stewart  * do is:
54573ee5756SRandall Stewart  * a) Make sure that the inp_flags2 is setup correctly
54673ee5756SRandall Stewart  *    for LRO. There are two flags that the previous
54773ee5756SRandall Stewart  *    stack may have set INP_MBUF_ACKCMP and
54873ee5756SRandall Stewart  *    INP_SUPPORTS_MBUFQ. If the new stack does not
54973ee5756SRandall Stewart  *    support these it *should* clear the flags.
55073ee5756SRandall Stewart  * b) Make sure that the timers are in the proper
55173ee5756SRandall Stewart  *    granularity that the stack wants. The stack
55273ee5756SRandall Stewart  *    should check the t_tmr_granularity field. Currently
55373ee5756SRandall Stewart  *    there are two values that it may hold
55473ee5756SRandall Stewart  *    TCP_TMR_GRANULARITY_TICKS and TCP_TMR_GRANULARITY_USEC.
55573ee5756SRandall Stewart  *    Use the functions tcp_timer_convert(tp, granularity);
55673ee5756SRandall Stewart  *    to move the timers to the correct format for your stack.
55773ee5756SRandall Stewart  *
55873ee5756SRandall Stewart  * The new stack may also optionally query the tfb_chg_query
55973ee5756SRandall Stewart  * function if the old stack has one. The new stack may ask
56073ee5756SRandall Stewart  * for one of three entries and can also state to the old
56173ee5756SRandall Stewart  * stack its support for the INP_MBUF_ACKCMP and
56273ee5756SRandall Stewart  * INP_SUPPORTS_MBUFQ. This is important since if there are
56373ee5756SRandall Stewart  * queued ack's without that statement the old stack will
56473ee5756SRandall Stewart  * be forced to discard the queued acks. The requests that
56573ee5756SRandall Stewart  * can be made for information by the new stacks are:
56673ee5756SRandall Stewart  *
56773ee5756SRandall Stewart  * Note also that the tfb_tcp_fb_init() when called can
56873ee5756SRandall Stewart  * determine if a query is needed by looking at the
56973ee5756SRandall Stewart  * value passed in the ptr. The ptr is designed to be
57073ee5756SRandall Stewart  * set in with any allocated memory, but the address
57173ee5756SRandall Stewart  * of the condtion (ptr == &tp->t_fb_ptr) will be
57273ee5756SRandall Stewart  * true if this is not a stack switch but the initial
57373ee5756SRandall Stewart  * setup of a tcb (which means no query would be needed).
57473ee5756SRandall Stewart  * If, however, the value is not t_fb_ptr, then the caller
57573ee5756SRandall Stewart  * is in the middle of a stack switch and is the new stack.
57673ee5756SRandall Stewart  * A query would be appropriate (if the new stack support
57773ee5756SRandall Stewart  * the query mechanism).
57873ee5756SRandall Stewart  *
57973ee5756SRandall Stewart  * TCP_QUERY_SENDMAP - Query of outstanding data.
58073ee5756SRandall Stewart  * TCP_QUERY_TIMERS_UP	- Query about running timers.
58173ee5756SRandall Stewart  * TCP_SUPPORTED_LRO - Declaration in req_param of
58273ee5756SRandall Stewart  *                     the inp_flags2 supported by
58373ee5756SRandall Stewart  *                     the new stack.
58473ee5756SRandall Stewart  * TCP_QUERY_RACK_TIMES	- Enquire about various timestamps
58573ee5756SRandall Stewart  *                        and states the old stack may be in.
58673ee5756SRandall Stewart  *
587cc65eb4eSGleb Smirnoff  * tfb_tcp_fb_fini is changed to add a flag to tell
588cc65eb4eSGleb Smirnoff  * the old stack if the tcb is being destroyed or
589cc65eb4eSGleb Smirnoff  * not. A one in the flag means the TCB is being
590cc65eb4eSGleb Smirnoff  * destroyed, a zero indicates its transitioning to
59173ee5756SRandall Stewart  * another stack (via socket option). The
59273ee5756SRandall Stewart  * tfb_tcp_fb_fini() function itself should not change timers
59373ee5756SRandall Stewart  * or inp_flags2 (the tfb_tcp_fb_init() must do that). However
59473ee5756SRandall Stewart  * if the old stack supports the LRO mbuf queuing, and the new
59573ee5756SRandall Stewart  * stack does not communicate via chg messages that it too does,
59673ee5756SRandall Stewart  * it must assume it does not and free any queued mbufs.
59773ee5756SRandall Stewart  *
598cc65eb4eSGleb Smirnoff  */
599cc65eb4eSGleb Smirnoff struct tcp_function_block {
600cc65eb4eSGleb Smirnoff 	char tfb_tcp_block_name[TCP_FUNCTION_NAME_LEN_MAX];
601cc65eb4eSGleb Smirnoff 	int	(*tfb_tcp_output)(struct tcpcb *);
60235bc0bccSGleb Smirnoff 	void	(*tfb_tcp_do_segment)(struct tcpcb *, struct mbuf *,
60335bc0bccSGleb Smirnoff 		    struct tcphdr *, int, int, uint8_t);
60435bc0bccSGleb Smirnoff 	int      (*tfb_do_segment_nounlock)(struct tcpcb *, struct mbuf *,
60535bc0bccSGleb Smirnoff 		    struct tcphdr *, int, int, uint8_t, int, struct timeval *);
60635bc0bccSGleb Smirnoff 	int     (*tfb_do_queued_segments)(struct tcpcb *, int);
60766fbc19fSGleb Smirnoff 	int     (*tfb_tcp_ctloutput)(struct tcpcb *, struct sockopt *);
608cc65eb4eSGleb Smirnoff 	/* Optional memory allocation/free routine */
60973ee5756SRandall Stewart 	int	(*tfb_tcp_fb_init)(struct tcpcb *, void **);
610cc65eb4eSGleb Smirnoff 	void	(*tfb_tcp_fb_fini)(struct tcpcb *, int);
611cc65eb4eSGleb Smirnoff 	/* Optional timers, must define all if you define one */
612cc65eb4eSGleb Smirnoff 	int	(*tfb_tcp_timer_stop_all)(struct tcpcb *);
613cc65eb4eSGleb Smirnoff 	void	(*tfb_tcp_rexmit_tmr)(struct tcpcb *);
614cc65eb4eSGleb Smirnoff 	int	(*tfb_tcp_handoff_ok)(struct tcpcb *);
61573ee5756SRandall Stewart 	void	(*tfb_tcp_mtu_chg)(struct tcpcb *tp);
616d3b6c96bSRandall Stewart 	int	(*tfb_pru_options)(struct tcpcb *, int);
6179e4d9e4cSRandall Stewart 	void	(*tfb_hwtls_change)(struct tcpcb *, int);
61873ee5756SRandall Stewart 	int	(*tfb_chg_query)(struct tcpcb *, struct tcp_query_resp *);
61973ee5756SRandall Stewart 	void	(*tfb_switch_failed)(struct tcpcb *);
62073ee5756SRandall Stewart 	bool	(*tfb_early_wake_check)(struct tcpcb *);
621e5049a17SRandall Stewart 	int     (*tfb_compute_pipe)(struct tcpcb *tp);
622e18b97bdSRandall Stewart 	int     (*tfb_stack_info)(struct tcpcb *tp, struct stack_specific_info *);
623e18b97bdSRandall Stewart 	void	(*tfb_inherit)(struct tcpcb *tp, struct inpcb *h_inp);
624cc65eb4eSGleb Smirnoff 	volatile uint32_t tfb_refcnt;
625cc65eb4eSGleb Smirnoff 	uint32_t  tfb_flags;
6262529f56eSJonathan T. Looney 	uint8_t	tfb_id;
627cc65eb4eSGleb Smirnoff };
628cc65eb4eSGleb Smirnoff 
629859f0f0dSMichael Tuexen /* Maximum number of names each TCP function block can be registered with. */
630859f0f0dSMichael Tuexen #define	TCP_FUNCTION_NAME_NUM_MAX	8
631859f0f0dSMichael Tuexen 
632cc65eb4eSGleb Smirnoff struct tcp_function {
633cc65eb4eSGleb Smirnoff 	TAILQ_ENTRY(tcp_function)	tf_next;
634dc6a41b9SJonathan T. Looney 	char				tf_name[TCP_FUNCTION_NAME_LEN_MAX];
635cc65eb4eSGleb Smirnoff 	struct tcp_function_block	*tf_fb;
636cc65eb4eSGleb Smirnoff };
637cc65eb4eSGleb Smirnoff 
638cc65eb4eSGleb Smirnoff TAILQ_HEAD(tcp_funchead, tcp_function);
6395b08b46aSGleb Smirnoff 
640f64dc2abSGleb Smirnoff struct tcpcb * tcp_drop(struct tcpcb *, int);
641f64dc2abSGleb Smirnoff 
642f64dc2abSGleb Smirnoff #ifdef _NETINET_IN_PCB_H_
643e68b3792SGleb Smirnoff #define	intotcpcb(inp)	__containerof((inp), struct tcpcb, t_inpcb)
6449eb0e832SGleb Smirnoff #define	sototcpcb(so)	intotcpcb(sotoinpcb(so))
645e68b3792SGleb Smirnoff #define	tptoinpcb(tp)	(&(tp)->t_inpcb)
646e68b3792SGleb Smirnoff #define	tptosocket(tp)	(tp)->t_inpcb.inp_socket
6479eb0e832SGleb Smirnoff 
648f64dc2abSGleb Smirnoff /*
649f64dc2abSGleb Smirnoff  * tcp_output()
650f64dc2abSGleb Smirnoff  * Handles tcp_drop request from advanced stacks and reports that inpcb is
651f64dc2abSGleb Smirnoff  * gone with negative return code.
652f64dc2abSGleb Smirnoff  * Drop in replacement for the default stack.
653f64dc2abSGleb Smirnoff  */
6545b08b46aSGleb Smirnoff static inline int
tcp_output(struct tcpcb * tp)6555b08b46aSGleb Smirnoff tcp_output(struct tcpcb *tp)
6565b08b46aSGleb Smirnoff {
6579eb0e832SGleb Smirnoff 	struct inpcb *inp = tptoinpcb(tp);
658f64dc2abSGleb Smirnoff 	int rv;
6595b08b46aSGleb Smirnoff 
6609eb0e832SGleb Smirnoff 	INP_WLOCK_ASSERT(inp);
661f64dc2abSGleb Smirnoff 
662f64dc2abSGleb Smirnoff 	rv = tp->t_fb->tfb_tcp_output(tp);
663f64dc2abSGleb Smirnoff 	if (rv < 0) {
664f64dc2abSGleb Smirnoff 		KASSERT(tp->t_fb->tfb_flags & TCP_FUNC_OUTPUT_CANDROP,
665f64dc2abSGleb Smirnoff 		    ("TCP stack %s requested tcp_drop(%p)",
666f64dc2abSGleb Smirnoff 		    tp->t_fb->tfb_tcp_block_name, tp));
667f64dc2abSGleb Smirnoff 		tp = tcp_drop(tp, -rv);
668f64dc2abSGleb Smirnoff 		if (tp)
6699eb0e832SGleb Smirnoff 			INP_WUNLOCK(inp);
6705b08b46aSGleb Smirnoff 	}
671f64dc2abSGleb Smirnoff 
672f64dc2abSGleb Smirnoff 	return (rv);
673f64dc2abSGleb Smirnoff }
674f64dc2abSGleb Smirnoff 
675f64dc2abSGleb Smirnoff /*
676f64dc2abSGleb Smirnoff  * tcp_output_unlock()
677f64dc2abSGleb Smirnoff  * Always returns unlocked, handles drop request from advanced stacks.
678f64dc2abSGleb Smirnoff  * Always returns positive error code.
679f64dc2abSGleb Smirnoff  */
680f64dc2abSGleb Smirnoff static inline int
tcp_output_unlock(struct tcpcb * tp)681f64dc2abSGleb Smirnoff tcp_output_unlock(struct tcpcb *tp)
682f64dc2abSGleb Smirnoff {
6839eb0e832SGleb Smirnoff 	struct inpcb *inp = tptoinpcb(tp);
684f64dc2abSGleb Smirnoff 	int rv;
685f64dc2abSGleb Smirnoff 
6869eb0e832SGleb Smirnoff 	INP_WLOCK_ASSERT(inp);
687f64dc2abSGleb Smirnoff 
688f64dc2abSGleb Smirnoff 	rv = tp->t_fb->tfb_tcp_output(tp);
689f64dc2abSGleb Smirnoff 	if (rv < 0) {
690f64dc2abSGleb Smirnoff 		KASSERT(tp->t_fb->tfb_flags & TCP_FUNC_OUTPUT_CANDROP,
691f64dc2abSGleb Smirnoff 		    ("TCP stack %s requested tcp_drop(%p)",
692f64dc2abSGleb Smirnoff 		    tp->t_fb->tfb_tcp_block_name, tp));
693f64dc2abSGleb Smirnoff 		rv = -rv;
694f64dc2abSGleb Smirnoff 		tp = tcp_drop(tp, rv);
695f64dc2abSGleb Smirnoff 		if (tp)
6969eb0e832SGleb Smirnoff 			INP_WUNLOCK(inp);
697f64dc2abSGleb Smirnoff 	} else
6989eb0e832SGleb Smirnoff 		INP_WUNLOCK(inp);
699f64dc2abSGleb Smirnoff 
700f64dc2abSGleb Smirnoff 	return (rv);
701f64dc2abSGleb Smirnoff }
702f64dc2abSGleb Smirnoff 
703f64dc2abSGleb Smirnoff /*
704f64dc2abSGleb Smirnoff  * tcp_output_nodrop()
705f64dc2abSGleb Smirnoff  * Always returns locked.  It is caller's responsibility to run tcp_drop()!
706f64dc2abSGleb Smirnoff  * Useful in syscall implementations, when we want to perform some logging
707f64dc2abSGleb Smirnoff  * and/or tracing with tcpcb before calling tcp_drop().  To be used with
708f64dc2abSGleb Smirnoff  * tcp_unlock_or_drop() later.
709f64dc2abSGleb Smirnoff  *
710f64dc2abSGleb Smirnoff  * XXXGL: maybe don't allow stacks to return a drop request at certain
711f64dc2abSGleb Smirnoff  * TCP states? Why would it do in connect(2)? In recv(2)?
712f64dc2abSGleb Smirnoff  */
713f64dc2abSGleb Smirnoff static inline int
tcp_output_nodrop(struct tcpcb * tp)714f64dc2abSGleb Smirnoff tcp_output_nodrop(struct tcpcb *tp)
715f64dc2abSGleb Smirnoff {
716f64dc2abSGleb Smirnoff 	int rv;
717f64dc2abSGleb Smirnoff 
7189eb0e832SGleb Smirnoff 	INP_WLOCK_ASSERT(tptoinpcb(tp));
719f64dc2abSGleb Smirnoff 
720f64dc2abSGleb Smirnoff 	rv = tp->t_fb->tfb_tcp_output(tp);
721f64dc2abSGleb Smirnoff 	KASSERT(rv >= 0 || tp->t_fb->tfb_flags & TCP_FUNC_OUTPUT_CANDROP,
722f64dc2abSGleb Smirnoff 	    ("TCP stack %s requested tcp_drop(%p)",
723f64dc2abSGleb Smirnoff 	    tp->t_fb->tfb_tcp_block_name, tp));
724f64dc2abSGleb Smirnoff 	return (rv);
725f64dc2abSGleb Smirnoff }
726f64dc2abSGleb Smirnoff 
727f64dc2abSGleb Smirnoff /*
728f64dc2abSGleb Smirnoff  * tcp_unlock_or_drop()
729f64dc2abSGleb Smirnoff  * Handle return code from tfb_tcp_output() after we have logged/traced,
730f64dc2abSGleb Smirnoff  * to be used with tcp_output_nodrop().
731f64dc2abSGleb Smirnoff  */
732f64dc2abSGleb Smirnoff static inline int
tcp_unlock_or_drop(struct tcpcb * tp,int tcp_output_retval)733f64dc2abSGleb Smirnoff tcp_unlock_or_drop(struct tcpcb *tp, int tcp_output_retval)
734f64dc2abSGleb Smirnoff {
7359eb0e832SGleb Smirnoff 	struct inpcb *inp = tptoinpcb(tp);
736f64dc2abSGleb Smirnoff 
7379eb0e832SGleb Smirnoff 	INP_WLOCK_ASSERT(inp);
738f64dc2abSGleb Smirnoff 
739f64dc2abSGleb Smirnoff         if (tcp_output_retval < 0) {
740f64dc2abSGleb Smirnoff                 tcp_output_retval = -tcp_output_retval;
741f64dc2abSGleb Smirnoff                 if (tcp_drop(tp, tcp_output_retval) != NULL)
7429eb0e832SGleb Smirnoff                         INP_WUNLOCK(inp);
743f64dc2abSGleb Smirnoff         } else
7449eb0e832SGleb Smirnoff 		INP_WUNLOCK(inp);
745f64dc2abSGleb Smirnoff 
746f64dc2abSGleb Smirnoff 	return (tcp_output_retval);
747f64dc2abSGleb Smirnoff }
748f64dc2abSGleb Smirnoff #endif	/* _NETINET_IN_PCB_H_ */
74937bf391dSRichard Scheffenegger 
75037bf391dSRichard Scheffenegger static int inline
tcp_packets_this_ack(struct tcpcb * tp,tcp_seq ack)75137bf391dSRichard Scheffenegger tcp_packets_this_ack(struct tcpcb *tp, tcp_seq ack)
75237bf391dSRichard Scheffenegger {
75337bf391dSRichard Scheffenegger 	return ((ack - tp->snd_una) / tp->t_maxseg +
75437bf391dSRichard Scheffenegger 		((((ack - tp->snd_una) % tp->t_maxseg) != 0) ? 1 : 0));
75537bf391dSRichard Scheffenegger }
756cc65eb4eSGleb Smirnoff #endif	/* _KERNEL */
757df8bae1dSRodney W. Grimes 
758c3ce7a79SRobert Watson /*
759c3ce7a79SRobert Watson  * Flags and utility macros for the t_flags field.
760c3ce7a79SRobert Watson  */
76177aabfd9SMichael Tuexen #define	TF_ACKNOW	0x00000001	/* ack peer immediately */
76277aabfd9SMichael Tuexen #define	TF_DELACK	0x00000002	/* ack, but try to delay it */
76377aabfd9SMichael Tuexen #define	TF_NODELAY	0x00000004	/* don't delay packets to coalesce */
76477aabfd9SMichael Tuexen #define	TF_NOOPT	0x00000008	/* don't use tcp options */
76577aabfd9SMichael Tuexen #define	TF_SENTFIN	0x00000010	/* have sent FIN */
76677aabfd9SMichael Tuexen #define	TF_REQ_SCALE	0x00000020	/* have/will request window scaling */
76777aabfd9SMichael Tuexen #define	TF_RCVD_SCALE	0x00000040	/* other side has requested scaling */
76877aabfd9SMichael Tuexen #define	TF_REQ_TSTMP	0x00000080	/* have/will request timestamps */
76977aabfd9SMichael Tuexen #define	TF_RCVD_TSTMP	0x00000100	/* a timestamp was received in SYN */
77077aabfd9SMichael Tuexen #define	TF_SACK_PERMIT	0x00000200	/* other side said I could SACK */
77177aabfd9SMichael Tuexen #define	TF_NEEDSYN	0x00000400	/* send SYN (implicit state) */
77277aabfd9SMichael Tuexen #define	TF_NEEDFIN	0x00000800	/* send FIN (implicit state) */
77377aabfd9SMichael Tuexen #define	TF_NOPUSH	0x00001000	/* don't push */
77468e623c3SRichard Scheffenegger #define	TF_PREVVALID	0x00002000	/* saved values for bad rxmit valid
77568e623c3SRichard Scheffenegger 					 * Note: accessing and restoring from
77668e623c3SRichard Scheffenegger 					 * these may only be done in the 1st
77768e623c3SRichard Scheffenegger 					 * RTO recovery round (t_rxtshift == 1)
77868e623c3SRichard Scheffenegger 					 */
7794d0770f1SRichard Scheffenegger #define	TF_WAKESOR	0x00004000	/* wake up receive socket */
780adc56f5aSEdward Tomasz Napierala #define	TF_GPUTINPROG	0x00008000	/* Goodput measurement in progress */
78177aabfd9SMichael Tuexen #define	TF_MORETOCOME	0x00010000	/* More data to be appended to sock */
782493105c2SGleb Smirnoff #define	TF_SONOTCONN	0x00020000	/* needs soisconnected() on ESTAB */
78377aabfd9SMichael Tuexen #define	TF_LASTIDLE	0x00040000	/* connection was previously idle */
78477aabfd9SMichael Tuexen #define	TF_RXWIN0SENT	0x00080000	/* sent a receiver win 0 in response */
78577aabfd9SMichael Tuexen #define	TF_FASTRECOVERY	0x00100000	/* in NewReno Fast Recovery */
78677aabfd9SMichael Tuexen #define	TF_WASFRECOVERY	0x00200000	/* was in NewReno Fast Recovery */
78777aabfd9SMichael Tuexen #define	TF_SIGNATURE	0x00400000	/* require MD5 digests (RFC2385) */
78877aabfd9SMichael Tuexen #define	TF_FORCEDATA	0x00800000	/* force out a byte */
78977aabfd9SMichael Tuexen #define	TF_TSO		0x01000000	/* TSO enabled on this connection */
79077aabfd9SMichael Tuexen #define	TF_TOE		0x02000000	/* this connection is offloaded */
79174703901SGleb Smirnoff #define	TF_CLOSED	0x04000000	/* close(2) called on socket */
792e18b97bdSRandall Stewart #define TF_SENTSYN      0x08000000      /* At least one syn has been sent */
7930471a8c7SRichard Scheffenegger #define	TF_LRD		0x10000000	/* Lost Retransmission Detection */
794dbc42409SLawrence Stewart #define	TF_CONGRECOVERY	0x20000000	/* congestion recovery mode */
795dbc42409SLawrence Stewart #define	TF_WASCRECOVERY	0x40000000	/* was in congestion recovery */
796281a0fd4SPatrick Kelsey #define	TF_FASTOPEN	0x80000000	/* TCP Fast Open indication */
797c3ce7a79SRobert Watson 
798dbc42409SLawrence Stewart #define	IN_FASTRECOVERY(t_flags)	(t_flags & TF_FASTRECOVERY)
799dbc42409SLawrence Stewart #define	ENTER_FASTRECOVERY(t_flags)	t_flags |= TF_FASTRECOVERY
800dbc42409SLawrence Stewart #define	EXIT_FASTRECOVERY(t_flags)	t_flags &= ~TF_FASTRECOVERY
801dbc42409SLawrence Stewart 
802dbc42409SLawrence Stewart #define	IN_CONGRECOVERY(t_flags)	(t_flags & TF_CONGRECOVERY)
803dbc42409SLawrence Stewart #define	ENTER_CONGRECOVERY(t_flags)	t_flags |= TF_CONGRECOVERY
804dbc42409SLawrence Stewart #define	EXIT_CONGRECOVERY(t_flags)	t_flags &= ~TF_CONGRECOVERY
805dbc42409SLawrence Stewart 
806dbc42409SLawrence Stewart #define	IN_RECOVERY(t_flags) (t_flags & (TF_CONGRECOVERY | TF_FASTRECOVERY))
807dbc42409SLawrence Stewart #define	ENTER_RECOVERY(t_flags) t_flags |= (TF_CONGRECOVERY | TF_FASTRECOVERY)
808dbc42409SLawrence Stewart #define	EXIT_RECOVERY(t_flags) t_flags &= ~(TF_CONGRECOVERY | TF_FASTRECOVERY)
809dbc42409SLawrence Stewart 
810dbc42409SLawrence Stewart #define	BYTES_THIS_ACK(tp, th)	(th->th_ack - tp->snd_una)
8119d11646dSJeffrey Hsu 
812c3ce7a79SRobert Watson /*
813c3ce7a79SRobert Watson  * Flags for the t_oobflags field.
814c3ce7a79SRobert Watson  */
815c3ce7a79SRobert Watson #define	TCPOOB_HAVEDATA	0x01
816c3ce7a79SRobert Watson #define	TCPOOB_HADDATA	0x02
817c3ce7a79SRobert Watson 
818eb6ad696SGarrett Wollman /*
8192529f56eSJonathan T. Looney  * Flags for the extended TCP flags field, t_flags2
820f6f6703fSSean Bruno  */
821f6f6703fSSean Bruno #define	TF2_PLPMTU_BLACKHOLE	0x00000001 /* Possible PLPMTUD Black Hole. */
822f6f6703fSSean Bruno #define	TF2_PLPMTU_PMTUD	0x00000002 /* Allowed to attempt PLPMTUD. */
823f6f6703fSSean Bruno #define	TF2_PLPMTU_MAXSEGSNT	0x00000004 /* Last seg sent was full seg. */
8242529f56eSJonathan T. Looney #define	TF2_LOG_AUTO		0x00000008 /* Session is auto-logging. */
82589e560f4SRandall Stewart #define	TF2_DROP_AF_DATA	0x00000010 /* Drop after all data ack'd */
8263cf38784SMichael Tuexen #define	TF2_ECN_PERMIT		0x00000020 /* connection ECN-ready */
8273cf38784SMichael Tuexen #define	TF2_ECN_SND_CWR		0x00000040 /* ECN CWR in queue */
8283cf38784SMichael Tuexen #define	TF2_ECN_SND_ECE		0x00000080 /* ECN ECE in queue */
8293cf38784SMichael Tuexen #define	TF2_ACE_PERMIT		0x00000100 /* Accurate ECN mode */
830c2a69e84SGleb Smirnoff #define	TF2_HPTS_CPU_SET	0x00000200 /* t_hpts_cpu is not random */
831e854dd38SRandall Stewart #define	TF2_FBYTES_COMPLETE	0x00000400 /* We have first bytes in and out */
832dc9daa04SRichard Scheffenegger #define	TF2_ECN_USE_ECT1	0x00000800 /* Use ECT(1) marking on session */
8338aa2be69SCheng Cui #define TF2_TCP_ACCOUNTING	0x00001000 /* Do TCP accounting */
834c2a69e84SGleb Smirnoff #define	TF2_HPTS_CALLS		0x00002000 /* tcp_output() called via HPTS */
835c3c20de3SGleb Smirnoff #define	TF2_MBUF_L_ACKS		0x00004000 /* large mbufs for ack compression */
836c3c20de3SGleb Smirnoff #define	TF2_MBUF_ACKCMP		0x00008000 /* mbuf ack compression ok */
837c3c20de3SGleb Smirnoff #define	TF2_SUPPORTS_MBUFQ	0x00010000 /* Supports the mbuf queue method */
838c3c20de3SGleb Smirnoff #define	TF2_MBUF_QUEUE_READY	0x00020000 /* Inputs can be queued */
839c3c20de3SGleb Smirnoff #define	TF2_DONT_SACK_QUEUE	0x00040000 /* Don't wake on sack */
840c3c20de3SGleb Smirnoff #define	TF2_CANNOT_DO_ECN	0x00080000 /* The stack does not do ECN */
841fce03f85SRandall Stewart #define	TF2_PROC_SACK_PROHIBIT	0x00100000 /* Due to small MSS size do not process sack's */
842b6919741SKonstantin Belousov #define	TF2_IPSEC_TSO		0x00200000 /* IPSEC + TSO supported */
84352eacec9SMichael Tuexen #define	TF2_NO_ISS_CHECK	0x00400000 /* Don't check SEG.ACK against ISS */
844dc9daa04SRichard Scheffenegger 
845f6f6703fSSean Bruno /*
846eb6ad696SGarrett Wollman  * Structure to hold TCP options that are only used during segment
847eb6ad696SGarrett Wollman  * processing (in tcp_input), but not held in the tcpcb.
848eb6ad696SGarrett Wollman  * It's basically used to reduce the number of parameters
84902a1a643SAndre Oppermann  * to tcp_dooptions and tcp_addoptions.
85002a1a643SAndre Oppermann  * The binary order of the to_flags is relevant for packing of the
85102a1a643SAndre Oppermann  * options in tcp_addoptions.
852eb6ad696SGarrett Wollman  */
853eb6ad696SGarrett Wollman struct tcpopt {
8545d20f974SJonathan T. Looney 	u_int32_t	to_flags;	/* which options are present */
85502a1a643SAndre Oppermann #define	TOF_MSS		0x0001		/* maximum segment size */
85602a1a643SAndre Oppermann #define	TOF_SCALE	0x0002		/* window scaling */
857032fae41SBjoern A. Zeeb #define	TOF_SACKPERM	0x0004		/* SACK permitted */
85802a1a643SAndre Oppermann #define	TOF_TS		0x0010		/* timestamp */
859032fae41SBjoern A. Zeeb #define	TOF_SIGNATURE	0x0040		/* TCP-MD5 signature option (RFC2385) */
860df47e437SAndre Oppermann #define	TOF_SACK	0x0080		/* Peer sent SACK option */
861281a0fd4SPatrick Kelsey #define	TOF_FASTOPEN	0x0100		/* TCP Fast Open (TFO) cookie */
862281a0fd4SPatrick Kelsey #define	TOF_MAXOPT	0x0200
863df47e437SAndre Oppermann 	u_int32_t	to_tsval;	/* new timestamp */
86402a1a643SAndre Oppermann 	u_int32_t	to_tsecr;	/* reflected timestamp */
865237fbe0aSLawrence Stewart 	u_char		*to_sacks;	/* pointer to the first SACK blocks */
866237fbe0aSLawrence Stewart 	u_char		*to_signature;	/* pointer to the TCP-MD5 signature */
867c560df6fSPatrick Kelsey 	u_int8_t	*to_tfo_cookie; /* pointer to the TFO cookie */
86802a1a643SAndre Oppermann 	u_int16_t	to_mss;		/* maximum segment size */
86902a1a643SAndre Oppermann 	u_int8_t	to_wscale;	/* window scaling */
8705a53ca16SPaul Saab 	u_int8_t	to_nsacks;	/* number of SACK blocks */
871281a0fd4SPatrick Kelsey 	u_int8_t	to_tfo_len;	/* TFO cookie length */
872d9a36286SBjoern A. Zeeb 	u_int32_t	to_spare;	/* UTO */
873be2ac88cSJonathan Lemon };
874be2ac88cSJonathan Lemon 
875f72167f4SAndre Oppermann /*
876f72167f4SAndre Oppermann  * Flags for tcp_dooptions.
877f72167f4SAndre Oppermann  */
878f72167f4SAndre Oppermann #define	TO_SYN		0x01		/* parse SYN-only options */
879f72167f4SAndre Oppermann 
88097d8d152SAndre Oppermann struct hc_metrics_lite {	/* must stay in sync with hc_metrics */
88109000cc1SGleb Smirnoff 	uint32_t	hc_mtu;		/* MTU for this path */
88209000cc1SGleb Smirnoff 	uint32_t	hc_ssthresh;	/* outbound gateway buffer limit */
88309000cc1SGleb Smirnoff 	uint32_t	hc_rtt;		/* estimated round trip time */
88409000cc1SGleb Smirnoff 	uint32_t	hc_rttvar;	/* estimated rtt variance */
88509000cc1SGleb Smirnoff 	uint32_t	hc_cwnd;	/* congestion window */
88609000cc1SGleb Smirnoff 	uint32_t	hc_sendpipe;	/* outbound delay-bandwidth product */
88709000cc1SGleb Smirnoff 	uint32_t	hc_recvpipe;	/* inbound delay-bandwidth product */
88897d8d152SAndre Oppermann };
88997d8d152SAndre Oppermann 
8908411d000SAndre Oppermann #ifndef _NETINET_IN_PCB_H_
8918411d000SAndre Oppermann struct in_conninfo;
8928411d000SAndre Oppermann #endif /* _NETINET_IN_PCB_H_ */
8938411d000SAndre Oppermann 
894df8bae1dSRodney W. Grimes /*
895df8bae1dSRodney W. Grimes  * The smoothed round-trip time and estimated variance
896df8bae1dSRodney W. Grimes  * are stored as fixed point numbers scaled by the values below.
897df8bae1dSRodney W. Grimes  * For convenience, these scales are also used in smoothing the average
898df8bae1dSRodney W. Grimes  * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed).
899df8bae1dSRodney W. Grimes  * With these scales, srtt has 3 bits to the right of the binary point,
900df8bae1dSRodney W. Grimes  * and thus an "ALPHA" of 0.875.  rttvar has 2 bits to the right of the
901df8bae1dSRodney W. Grimes  * binary point, and is smoothed with an ALPHA of 0.75.
902df8bae1dSRodney W. Grimes  */
9035a268d86SMichael Tuexen #define	TCP_RTT_SCALE		32	/* multiplier for srtt; 5 bits frac. */
9045a268d86SMichael Tuexen #define	TCP_RTT_SHIFT		5	/* shift for srtt; 5 bits frac. */
9055a268d86SMichael Tuexen #define	TCP_RTTVAR_SCALE	16	/* multiplier for rttvar; 4 bits */
9065a268d86SMichael Tuexen #define	TCP_RTTVAR_SHIFT	4	/* shift for rttvar; 4 bits */
907233e8c18SGarrett Wollman #define	TCP_DELTA_SHIFT		2	/* see tcp_input.c */
908df8bae1dSRodney W. Grimes 
909df8bae1dSRodney W. Grimes /*
910df8bae1dSRodney W. Grimes  * The initial retransmission should happen at rtt + 4 * rttvar.
911df8bae1dSRodney W. Grimes  * Because of the way we do the smoothing, srtt and rttvar
912df8bae1dSRodney W. Grimes  * will each average +1/2 tick of bias.  When we compute
913df8bae1dSRodney W. Grimes  * the retransmit timer, we want 1/2 tick of rounding and
914df8bae1dSRodney W. Grimes  * 1 extra tick because of +-1/2 tick uncertainty in the
915df8bae1dSRodney W. Grimes  * firing of the timer.  The bias will give us exactly the
916df8bae1dSRodney W. Grimes  * 1.5 tick we need.  But, because the bias is
917df8bae1dSRodney W. Grimes  * statistical, we have to test that we don't drop below
918df8bae1dSRodney W. Grimes  * the minimum feasible timer (which is 2 ticks).
919233e8c18SGarrett Wollman  * This version of the macro adapted from a paper by Lawrence
920233e8c18SGarrett Wollman  * Brakmo and Larry Peterson which outlines a problem caused
921233e8c18SGarrett Wollman  * by insufficient precision in the original implementation,
922233e8c18SGarrett Wollman  * which results in inappropriately large RTO values for very
923233e8c18SGarrett Wollman  * fast networks.
924df8bae1dSRodney W. Grimes  */
925233e8c18SGarrett Wollman #define	TCP_REXMTVAL(tp) \
926552b7df4SDavid Greenman 	max((tp)->t_rttmin, (((tp)->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT))  \
9276da5712bSGarrett Wollman 	  + (tp)->t_rttvar) >> TCP_DELTA_SHIFT)
928df8bae1dSRodney W. Grimes 
929df8bae1dSRodney W. Grimes /*
930b59753f1SGleb Smirnoff  * Global (per-VNET) TCP statistics.  The below structure represents what we
931b59753f1SGleb Smirnoff  * export to the userland, but in the kernel we have an array of counter_u64_t
932b59753f1SGleb Smirnoff  * with as many elements as there are members in the structure.  The counters
933*46023d54SGleb Smirnoff  * shall be increased by TCPSTAT_INC() or KMOD_TCPSTAT_INC().  Adding a new
934*46023d54SGleb Smirnoff  * counter also requires adding corresponding SDT probes into in_kdtrace.h and
935*46023d54SGleb Smirnoff  * into in_kdtrace.c.
936df8bae1dSRodney W. Grimes  */
937df8bae1dSRodney W. Grimes struct	tcpstat {
9385923c293SGleb Smirnoff 	uint64_t tcps_connattempt;	/* connections initiated */
9395923c293SGleb Smirnoff 	uint64_t tcps_accepts;		/* connections accepted */
9405923c293SGleb Smirnoff 	uint64_t tcps_connects;		/* connections established */
9415923c293SGleb Smirnoff 	uint64_t tcps_drops;		/* connections dropped */
9425923c293SGleb Smirnoff 	uint64_t tcps_conndrops;	/* embryonic connections dropped */
9435923c293SGleb Smirnoff 	uint64_t tcps_minmssdrops;	/* average minmss too low drops */
9445923c293SGleb Smirnoff 	uint64_t tcps_closed;		/* conn. closed (includes drops) */
9455923c293SGleb Smirnoff 	uint64_t tcps_segstimed;	/* segs where we tried to get rtt */
9465923c293SGleb Smirnoff 	uint64_t tcps_rttupdated;	/* times we succeeded */
9475923c293SGleb Smirnoff 	uint64_t tcps_delack;		/* delayed acks sent */
9485923c293SGleb Smirnoff 	uint64_t tcps_timeoutdrop;	/* conn. dropped in rxmt timeout */
9495923c293SGleb Smirnoff 	uint64_t tcps_rexmttimeo;	/* retransmit timeouts */
9505923c293SGleb Smirnoff 	uint64_t tcps_persisttimeo;	/* persist timeouts */
9515923c293SGleb Smirnoff 	uint64_t tcps_keeptimeo;	/* keepalive timeouts */
9525923c293SGleb Smirnoff 	uint64_t tcps_keepprobe;	/* keepalive probes sent */
9535923c293SGleb Smirnoff 	uint64_t tcps_keepdrops;	/* connections dropped in keepalive */
95408af8aacSRandall Stewart 	uint64_t tcps_progdrops;	/* drops due to no progress */
955df8bae1dSRodney W. Grimes 
9565923c293SGleb Smirnoff 	uint64_t tcps_sndtotal;		/* total packets sent */
9575923c293SGleb Smirnoff 	uint64_t tcps_sndpack;		/* data packets sent */
9585923c293SGleb Smirnoff 	uint64_t tcps_sndbyte;		/* data bytes sent */
9595923c293SGleb Smirnoff 	uint64_t tcps_sndrexmitpack;	/* data packets retransmitted */
9605923c293SGleb Smirnoff 	uint64_t tcps_sndrexmitbyte;	/* data bytes retransmitted */
9615923c293SGleb Smirnoff 	uint64_t tcps_sndrexmitbad;	/* unnecessary packet retransmissions */
9625923c293SGleb Smirnoff 	uint64_t tcps_sndacks;		/* ack-only packets sent */
9635923c293SGleb Smirnoff 	uint64_t tcps_sndprobe;		/* window probes sent */
9645923c293SGleb Smirnoff 	uint64_t tcps_sndurg;		/* packets sent with URG only */
9655923c293SGleb Smirnoff 	uint64_t tcps_sndwinup;		/* window update-only packets sent */
9665923c293SGleb Smirnoff 	uint64_t tcps_sndctrl;		/* control (SYN|FIN|RST) packets sent */
967df8bae1dSRodney W. Grimes 
9685923c293SGleb Smirnoff 	uint64_t tcps_rcvtotal;		/* total packets received */
9695923c293SGleb Smirnoff 	uint64_t tcps_rcvpack;		/* packets received in sequence */
9705923c293SGleb Smirnoff 	uint64_t tcps_rcvbyte;		/* bytes received in sequence */
9715923c293SGleb Smirnoff 	uint64_t tcps_rcvbadsum;	/* packets received with ccksum errs */
9725923c293SGleb Smirnoff 	uint64_t tcps_rcvbadoff;	/* packets received with bad offset */
973c669105dSGleb Smirnoff 	uint64_t tcps_rcvreassfull;	/* packets dropped for no reass space */
9745923c293SGleb Smirnoff 	uint64_t tcps_rcvshort;		/* packets received too short */
9755923c293SGleb Smirnoff 	uint64_t tcps_rcvduppack;	/* duplicate-only packets received */
9765923c293SGleb Smirnoff 	uint64_t tcps_rcvdupbyte;	/* duplicate-only bytes received */
9775923c293SGleb Smirnoff 	uint64_t tcps_rcvpartduppack;	/* packets with some duplicate data */
9785923c293SGleb Smirnoff 	uint64_t tcps_rcvpartdupbyte;	/* dup. bytes in part-dup. packets */
9795923c293SGleb Smirnoff 	uint64_t tcps_rcvoopack;	/* out-of-order packets received */
9805923c293SGleb Smirnoff 	uint64_t tcps_rcvoobyte;	/* out-of-order bytes received */
9815923c293SGleb Smirnoff 	uint64_t tcps_rcvpackafterwin;	/* packets with data after window */
9825923c293SGleb Smirnoff 	uint64_t tcps_rcvbyteafterwin;	/* bytes rcvd after window */
9835923c293SGleb Smirnoff 	uint64_t tcps_rcvafterclose;	/* packets rcvd after "close" */
9845923c293SGleb Smirnoff 	uint64_t tcps_rcvwinprobe;	/* rcvd window probe packets */
9855923c293SGleb Smirnoff 	uint64_t tcps_rcvdupack;	/* rcvd duplicate acks */
9865923c293SGleb Smirnoff 	uint64_t tcps_rcvacktoomuch;	/* rcvd acks for unsent data */
9875923c293SGleb Smirnoff 	uint64_t tcps_rcvackpack;	/* rcvd ack packets */
9885923c293SGleb Smirnoff 	uint64_t tcps_rcvackbyte;	/* bytes acked by rcvd acks */
9895923c293SGleb Smirnoff 	uint64_t tcps_rcvwinupd;	/* rcvd window update packets */
9905923c293SGleb Smirnoff 	uint64_t tcps_pawsdrop;		/* segments dropped due to PAWS */
9915923c293SGleb Smirnoff 	uint64_t tcps_predack;		/* times hdr predict ok for acks */
9925923c293SGleb Smirnoff 	uint64_t tcps_preddat;		/* times hdr predict ok for data pkts */
9935923c293SGleb Smirnoff 	uint64_t tcps_pcbcachemiss;
9945923c293SGleb Smirnoff 	uint64_t tcps_cachedrtt;	/* times cached RTT in route updated */
9955923c293SGleb Smirnoff 	uint64_t tcps_cachedrttvar;	/* times cached rttvar updated */
9965923c293SGleb Smirnoff 	uint64_t tcps_cachedssthresh;	/* times cached ssthresh updated */
9975923c293SGleb Smirnoff 	uint64_t tcps_usedrtt;		/* times RTT initialized from route */
9985923c293SGleb Smirnoff 	uint64_t tcps_usedrttvar;	/* times RTTVAR initialized from rt */
9995923c293SGleb Smirnoff 	uint64_t tcps_usedssthresh;	/* times ssthresh initialized from rt*/
10005923c293SGleb Smirnoff 	uint64_t tcps_persistdrop;	/* timeout in persist state */
10015923c293SGleb Smirnoff 	uint64_t tcps_badsyn;		/* bogus SYN, e.g. premature ACK */
10025923c293SGleb Smirnoff 	uint64_t tcps_mturesent;	/* resends due to MTU discovery */
10035923c293SGleb Smirnoff 	uint64_t tcps_listendrop;	/* listen queue overflows */
10045923c293SGleb Smirnoff 	uint64_t tcps_badrst;		/* ignored RSTs in the window */
1005be2ac88cSJonathan Lemon 
10065923c293SGleb Smirnoff 	uint64_t tcps_sc_added;		/* entry added to syncache */
10075923c293SGleb Smirnoff 	uint64_t tcps_sc_retransmitted;	/* syncache entry was retransmitted */
10085923c293SGleb Smirnoff 	uint64_t tcps_sc_dupsyn;	/* duplicate SYN packet */
10095923c293SGleb Smirnoff 	uint64_t tcps_sc_dropped;	/* could not reply to packet */
10105923c293SGleb Smirnoff 	uint64_t tcps_sc_completed;	/* successful extraction of entry */
10115923c293SGleb Smirnoff 	uint64_t tcps_sc_bucketoverflow;/* syncache per-bucket limit hit */
10125923c293SGleb Smirnoff 	uint64_t tcps_sc_cacheoverflow;	/* syncache cache limit hit */
10135923c293SGleb Smirnoff 	uint64_t tcps_sc_reset;		/* RST removed entry from syncache */
10145923c293SGleb Smirnoff 	uint64_t tcps_sc_stale;		/* timed out or listen socket gone */
10155923c293SGleb Smirnoff 	uint64_t tcps_sc_aborted;	/* syncache entry aborted */
10165923c293SGleb Smirnoff 	uint64_t tcps_sc_badack;	/* removed due to bad ACK */
10175923c293SGleb Smirnoff 	uint64_t tcps_sc_unreach;	/* ICMP unreachable received */
10185923c293SGleb Smirnoff 	uint64_t tcps_sc_zonefail;	/* zalloc() failed */
10195923c293SGleb Smirnoff 	uint64_t tcps_sc_sendcookie;	/* SYN cookie sent */
10205923c293SGleb Smirnoff 	uint64_t tcps_sc_recvcookie;	/* SYN cookie received */
10211396e87aSGleb Smirnoff 	uint64_t tcps_sc_spurcookie;	/* SYN cookie spurious, rejected */
10221396e87aSGleb Smirnoff 	uint64_t tcps_sc_failcookie;	/* SYN cookie failed, rejected */
102397d8d152SAndre Oppermann 
10245923c293SGleb Smirnoff 	uint64_t tcps_hc_added;		/* entry added to hostcache */
10255923c293SGleb Smirnoff 	uint64_t tcps_hc_bucketoverflow;/* hostcache per bucket limit hit */
10266d90faf3SPaul Saab 
10275923c293SGleb Smirnoff 	uint64_t tcps_finwait2_drops;    /* Drop FIN_WAIT_2 connection after time limit */
10287c72af87SMohan Srinivasan 
10296d90faf3SPaul Saab 	/* SACK related stats */
10305923c293SGleb Smirnoff 	uint64_t tcps_sack_recovery_episode; /* SACK recovery episodes */
10315923c293SGleb Smirnoff 	uint64_t tcps_sack_rexmits;	    /* SACK rexmit segments   */
10322a9aae9eSRichard Scheffenegger 	uint64_t tcps_sack_rexmits_tso;	    /* SACK rexmit TSO chunks */
10335923c293SGleb Smirnoff 	uint64_t tcps_sack_rexmit_bytes;    /* SACK rexmit bytes      */
10345923c293SGleb Smirnoff 	uint64_t tcps_sack_rcv_blocks;	    /* SACK blocks (options) received */
10355923c293SGleb Smirnoff 	uint64_t tcps_sack_send_blocks;	    /* SACK blocks (options) sent     */
10360471a8c7SRichard Scheffenegger 	uint64_t tcps_sack_lostrexmt;	    /* SACK lost retransmission recovered */
10375923c293SGleb Smirnoff 	uint64_t tcps_sack_sboverflow;	    /* times scoreboard overflowed */
1038f2512ba1SRui Paulo 
1039f2512ba1SRui Paulo 	/* ECN related stats */
10401a70101aSRichard Scheffenegger 	uint64_t tcps_ecn_rcvce;		/* ECN Congestion Experienced */
10411a70101aSRichard Scheffenegger 	uint64_t tcps_ecn_rcvect0;		/* ECN Capable Transport */
10421a70101aSRichard Scheffenegger 	uint64_t tcps_ecn_rcvect1;		/* ECN Capable Transport */
10435923c293SGleb Smirnoff 	uint64_t tcps_ecn_shs;		/* ECN successful handshakes */
10445923c293SGleb Smirnoff 	uint64_t tcps_ecn_rcwnd;	/* # times ECN reduced the cwnd */
1045962ebef8SLawrence Stewart 
10462903309aSAttilio Rao 	/* TCP_SIGNATURE related stats */
10475923c293SGleb Smirnoff 	uint64_t tcps_sig_rcvgoodsig;	/* Total matching signature received */
10485923c293SGleb Smirnoff 	uint64_t tcps_sig_rcvbadsig;	/* Total bad signature received */
1049fcf59617SAndrey V. Elsukov 	uint64_t tcps_sig_err_buildsig;	/* Failed to make signature */
10505923c293SGleb Smirnoff 	uint64_t tcps_sig_err_sigopt;	/* No signature expected by socket */
10515923c293SGleb Smirnoff 	uint64_t tcps_sig_err_nosigopt;	/* No signature provided by segment */
10522903309aSAttilio Rao 
105332a04bb8SSean Bruno 	/* Path MTU Discovery Black Hole Detection related stats */
105432a04bb8SSean Bruno 	uint64_t tcps_pmtud_blackhole_activated;	 /* Black Hole Count */
105532a04bb8SSean Bruno 	uint64_t tcps_pmtud_blackhole_activated_min_mss; /* BH at min MSS Count */
105632a04bb8SSean Bruno 	uint64_t tcps_pmtud_blackhole_failed;		 /* Black Hole Failure Count */
105732a04bb8SSean Bruno 
10589e644c23SMichael Tuexen 	uint64_t tcps_tunneled_pkts;	/* Packets encap's in UDP received */
10599e644c23SMichael Tuexen 	uint64_t tcps_tunneled_errs;	/* Packets that had errors that were UDP encaped */
10609e644c23SMichael Tuexen 
1061a36230f7SRandall Stewart 	/* Dsack related stats */
1062a36230f7SRandall Stewart 	uint64_t tcps_dsack_count;	/* Number of ACKs arriving with DSACKs */
1063a36230f7SRandall Stewart 	uint64_t tcps_dsack_bytes;	/* Number of bytes DSACK'ed no TLP */
1064a36230f7SRandall Stewart 	uint64_t tcps_dsack_tlp_bytes;	/* Number of bytes DSACK'ed due to TLPs */
1065a36230f7SRandall Stewart 
106671d2d5adSGleb Smirnoff 	/* TCPS_TIME_WAIT usage stats */
106771d2d5adSGleb Smirnoff 	uint64_t tcps_tw_recycles;	/* Times time-wait was recycled. */
106871d2d5adSGleb Smirnoff 	uint64_t tcps_tw_resets;	/* Times time-wait sent a reset. */
106971d2d5adSGleb Smirnoff 	uint64_t tcps_tw_responds;	/* Times time-wait sent a valid ack. */
107071d2d5adSGleb Smirnoff 
10713f169c54SRichard Scheffenegger 	/* Accurate ECN Handshake stats */
10723f169c54SRichard Scheffenegger 	uint64_t tcps_ace_nect;		/* ACE SYN packet with Non-ECT */
10733f169c54SRichard Scheffenegger 	uint64_t tcps_ace_ect1;		/* ACE SYN packet with ECT1 */
10743f169c54SRichard Scheffenegger 	uint64_t tcps_ace_ect0;		/* ACE SYN packet with ECT0 */
10753f169c54SRichard Scheffenegger 	uint64_t tcps_ace_ce;		/* ACE SYN packet with CE */
10763f169c54SRichard Scheffenegger 
10771a70101aSRichard Scheffenegger 	/* ECN related stats */
10781a70101aSRichard Scheffenegger 	uint64_t tcps_ecn_sndect0;		/* ECN Capable Transport */
10791a70101aSRichard Scheffenegger 	uint64_t tcps_ecn_sndect1;		/* ECN Capable Transport */
10801a70101aSRichard Scheffenegger 
1081945f9a7cSRandall Stewart 	/*
1082945f9a7cSRandall Stewart 	 * BBR and Rack implement TLP's these values count TLP bytes in
1083945f9a7cSRandall Stewart 	 * two catagories, bytes that were retransmitted and bytes that
1084945f9a7cSRandall Stewart 	 * were newly transmited. Both types can serve as TLP's but they
1085945f9a7cSRandall Stewart 	 * are accounted differently.
1086945f9a7cSRandall Stewart 	 */
1087945f9a7cSRandall Stewart 	uint64_t tcps_tlpresends;	/* number of tlp resends */
1088945f9a7cSRandall Stewart 	uint64_t tcps_tlpresend_bytes;	/* number of bytes resent by tlp */
1089945f9a7cSRandall Stewart 
1090646c28eaSMichael Tuexen 	/* SEG.ACK validation failures */
1091646c28eaSMichael Tuexen 	uint64_t tcps_rcvghostack;	/* received ACK for data never sent */
1092646c28eaSMichael Tuexen 	uint64_t tcps_rcvacktooold;	/* received ACK for data too long ago */
1093945f9a7cSRandall Stewart 
1094646c28eaSMichael Tuexen 
1095646c28eaSMichael Tuexen 	uint64_t _pad[1];		/* 1 TBD placeholder for STABLE */
1096df8bae1dSRodney W. Grimes };
1097df8bae1dSRodney W. Grimes 
1098b1a41566SGleb Smirnoff #define	tcps_rcvmemdrop	tcps_rcvreassfull	/* compat */
1099b1a41566SGleb Smirnoff 
1100de231a06SRobert Watson #ifdef _KERNEL
11015923c293SGleb Smirnoff #include <sys/counter.h>
110260d8dbbeSKristof Provost #include <netinet/in_kdtrace.h>
11035923c293SGleb Smirnoff 
11045da0521fSAndrey V. Elsukov VNET_PCPUSTAT_DECLARE(struct tcpstat, tcpstat);	/* tcp statistics */
1105315e3e38SRobert Watson /*
1106315e3e38SRobert Watson  * In-kernel consumers can use these accessor macros directly to update
1107315e3e38SRobert Watson  * stats.
1108315e3e38SRobert Watson  */
11095da0521fSAndrey V. Elsukov #define TCPSTAT_ADD(name, val)                                           \
111060d8dbbeSKristof Provost 	do {                                                             \
111160d8dbbeSKristof Provost 		MIB_SDT_PROBE1(tcp, count, name, (val));                 \
111260d8dbbeSKristof Provost 		VNET_PCPUSTAT_ADD(struct tcpstat, tcpstat, name, (val)); \
111360d8dbbeSKristof Provost 	} while (0)
111478b50714SRobert Watson #define	TCPSTAT_INC(name)	TCPSTAT_ADD(name, 1)
1115315e3e38SRobert Watson 
1116315e3e38SRobert Watson /*
1117315e3e38SRobert Watson  * Kernel module consumers must use this accessor macro.
1118315e3e38SRobert Watson  */
11197ca6e296SMichael Tuexen void	kmod_tcpstat_add(int statnum, int val);
11207ca6e296SMichael Tuexen #define KMOD_TCPSTAT_ADD(name, val)                               \
112160d8dbbeSKristof Provost 	do {                                                      \
112260d8dbbeSKristof Provost 		MIB_SDT_PROBE1(tcp, count, name, (val));          \
112360d8dbbeSKristof Provost 		kmod_tcpstat_add(offsetof(struct tcpstat, name) / \
112460d8dbbeSKristof Provost 			sizeof(uint64_t),                         \
112560d8dbbeSKristof Provost 		    val);                                         \
112660d8dbbeSKristof Provost 	} while (0)
11277ca6e296SMichael Tuexen #define	KMOD_TCPSTAT_INC(name)	KMOD_TCPSTAT_ADD(name, 1)
112839bc9de5SLawrence Stewart 
112939bc9de5SLawrence Stewart /*
1130bf840a17SGleb Smirnoff  * Running TCP connection count by state.
1131bf840a17SGleb Smirnoff  */
1132bf840a17SGleb Smirnoff VNET_DECLARE(counter_u64_t, tcps_states[TCP_NSTATES]);
1133f59d975eSGleb Smirnoff #define	V_tcps_states	VNET(tcps_states)
1134f59d975eSGleb Smirnoff #define	TCPSTATES_INC(state)	counter_u64_add(V_tcps_states[state], 1)
1135f59d975eSGleb Smirnoff #define	TCPSTATES_DEC(state)	counter_u64_add(V_tcps_states[state], -1)
1136bf840a17SGleb Smirnoff 
1137bf840a17SGleb Smirnoff /*
113839bc9de5SLawrence Stewart  * TCP specific helper hook point identifiers.
113939bc9de5SLawrence Stewart  */
114039bc9de5SLawrence Stewart #define	HHOOK_TCP_EST_IN		0
114139bc9de5SLawrence Stewart #define	HHOOK_TCP_EST_OUT		1
114239bc9de5SLawrence Stewart #define	HHOOK_TCP_LAST			HHOOK_TCP_EST_OUT
114339bc9de5SLawrence Stewart 
114439bc9de5SLawrence Stewart struct tcp_hhook_data {
114539bc9de5SLawrence Stewart 	struct tcpcb	*tp;
114639bc9de5SLawrence Stewart 	struct tcphdr	*th;
114739bc9de5SLawrence Stewart 	struct tcpopt	*to;
11483ac12506SJonathan T. Looney 	uint32_t	len;
114939bc9de5SLawrence Stewart 	int		tso;
115039bc9de5SLawrence Stewart 	tcp_seq		curack;
115139bc9de5SLawrence Stewart };
115289e560f4SRandall Stewart #ifdef TCP_HHOOK
115389e560f4SRandall Stewart void hhook_run_tcp_est_out(struct tcpcb *tp,
115489e560f4SRandall Stewart 	struct tcphdr *th, struct tcpopt *to,
115589e560f4SRandall Stewart 	uint32_t len, int tso);
115689e560f4SRandall Stewart #endif
1157de231a06SRobert Watson #endif
115878b50714SRobert Watson 
1159eb6ad696SGarrett Wollman /*
116098271db4SGarrett Wollman  * TCB structure exported to user-land via sysctl(3).
1161cc65eb4eSGleb Smirnoff  *
1162cc65eb4eSGleb Smirnoff  * Fields prefixed with "xt_" are unique to the export structure, and fields
1163cc65eb4eSGleb Smirnoff  * with "t_" or other prefixes match corresponding fields of 'struct tcpcb'.
1164cc65eb4eSGleb Smirnoff  *
1165cc65eb4eSGleb Smirnoff  * Legend:
1166cc65eb4eSGleb Smirnoff  * (s) - used by userland utilities in src
1167cc65eb4eSGleb Smirnoff  * (p) - used by utilities in ports
1168cc65eb4eSGleb Smirnoff  * (3) - is known to be used by third party software not in ports
1169cc65eb4eSGleb Smirnoff  * (n) - no known usage
1170cc65eb4eSGleb Smirnoff  *
1171a910fdcbSJohn Hay  * Evil hack: declare only if in_pcb.h and sys/socketvar.h have been
1172a910fdcbSJohn Hay  * included.  Not all of our clients do.
117398271db4SGarrett Wollman  */
1174a910fdcbSJohn Hay #if defined(_NETINET_IN_PCB_H_) && defined(_SYS_SOCKETVAR_H_)
117598271db4SGarrett Wollman struct xtcpcb {
1176f38b68aeSBrooks Davis 	ksize_t	xt_len;		/* length of this structure */
1177cc65eb4eSGleb Smirnoff 	struct xinpcb	xt_inp;
1178e5cccc35SMichael Tuexen 	char		xt_stack[TCP_FUNCTION_NAME_LEN_MAX];	/* (s) */
11792529f56eSJonathan T. Looney 	char		xt_logid[TCP_LOG_ID_LEN];	/* (s) */
118042d75607SMichael Tuexen 	char		xt_cc[TCP_CA_NAME_MAX];	/* (s) */
118142d75607SMichael Tuexen 	int64_t		spare64[6];
1182cc65eb4eSGleb Smirnoff 	int32_t		t_state;		/* (s,p) */
1183cc65eb4eSGleb Smirnoff 	uint32_t	t_flags;		/* (s,p) */
1184cc65eb4eSGleb Smirnoff 	int32_t		t_sndzerowin;		/* (s) */
1185cc65eb4eSGleb Smirnoff 	int32_t		t_sndrexmitpack;	/* (s) */
1186cc65eb4eSGleb Smirnoff 	int32_t		t_rcvoopack;		/* (s) */
1187cc65eb4eSGleb Smirnoff 	int32_t		t_rcvtime;		/* (s) */
1188cc65eb4eSGleb Smirnoff 	int32_t		tt_rexmt;		/* (s) */
1189cc65eb4eSGleb Smirnoff 	int32_t		tt_persist;		/* (s) */
1190cc65eb4eSGleb Smirnoff 	int32_t		tt_keep;		/* (s) */
1191cc65eb4eSGleb Smirnoff 	int32_t		tt_2msl;		/* (s) */
1192cc65eb4eSGleb Smirnoff 	int32_t		tt_delack;		/* (s) */
11932529f56eSJonathan T. Looney 	int32_t		t_logstate;		/* (3) */
119454321200SRichard Scheffenegger 	uint32_t	t_snd_cwnd;		/* (s) */
119554321200SRichard Scheffenegger 	uint32_t	t_snd_ssthresh;		/* (s) */
119654321200SRichard Scheffenegger 	uint32_t	t_maxseg;		/* (s) */
119754321200SRichard Scheffenegger 	uint32_t	t_rcv_wnd;		/* (s) */
119854321200SRichard Scheffenegger 	uint32_t	t_snd_wnd;		/* (s) */
119954321200SRichard Scheffenegger 	uint32_t	xt_ecn;			/* (s) */
1200a36230f7SRandall Stewart 	uint32_t	t_dsack_bytes;		/* (n) */
1201a36230f7SRandall Stewart 	uint32_t	t_dsack_tlp_bytes;	/* (n) */
1202a36230f7SRandall Stewart 	uint32_t	t_dsack_pack;		/* (n) */
12039e644c23SMichael Tuexen 	uint16_t	xt_encaps_port;		/* (s) */
12049e644c23SMichael Tuexen 	int16_t		spare16;
1205a36230f7SRandall Stewart 	int32_t		spare32[22];
1206cc65eb4eSGleb Smirnoff } __aligned(8);
12072529f56eSJonathan T. Looney 
1208cc65eb4eSGleb Smirnoff #ifdef _KERNEL
1209cc65eb4eSGleb Smirnoff void	tcp_inptoxtp(const struct inpcb *, struct xtcpcb *);
1210cc65eb4eSGleb Smirnoff #endif
121198271db4SGarrett Wollman #endif
121298271db4SGarrett Wollman 
121398271db4SGarrett Wollman /*
1214f8979519SJonathan T. Looney  * TCP function information (name-to-id mapping, aliases, and refcnt)
1215f8979519SJonathan T. Looney  * exported to user-land via sysctl(3).
12162529f56eSJonathan T. Looney  */
1217f8979519SJonathan T. Looney struct tcp_function_info {
1218f8979519SJonathan T. Looney 	uint32_t	tfi_refcnt;
12192529f56eSJonathan T. Looney 	uint8_t		tfi_id;
12202529f56eSJonathan T. Looney 	char		tfi_name[TCP_FUNCTION_NAME_LEN_MAX];
1221f8979519SJonathan T. Looney 	char		tfi_alias[TCP_FUNCTION_NAME_LEN_MAX];
12222529f56eSJonathan T. Looney };
12232529f56eSJonathan T. Looney 
12242529f56eSJonathan T. Looney /*
12255b26ea5dSJohn Baldwin  * Identifiers for TCP sysctl nodes
1226eb6ad696SGarrett Wollman  */
1227eb6ad696SGarrett Wollman #define	TCPCTL_DO_RFC1323	1	/* use RFC-1323 extensions */
1228eb6ad696SGarrett Wollman #define	TCPCTL_MSSDFLT		3	/* MSS default */
12292f06d2abSGleb Smirnoff #define TCPCTL_STATS		4	/* statistics */
1230f2ea20e6SGarrett Wollman #define	TCPCTL_RTTDFLT		5	/* default RTT estimate */
1231f2ea20e6SGarrett Wollman #define	TCPCTL_KEEPIDLE		6	/* keepalive idle timer */
1232f2ea20e6SGarrett Wollman #define	TCPCTL_KEEPINTVL	7	/* interval to send keepalives */
1233561c2ad3SPaul Traina #define	TCPCTL_SENDSPACE	8	/* send buffer space */
1234561c2ad3SPaul Traina #define	TCPCTL_RECVSPACE	9	/* receive buffer space */
1235571214d4SSheldon Hearn #define	TCPCTL_KEEPINIT		10	/* timeout for establishing syn */
123698271db4SGarrett Wollman #define	TCPCTL_PCBLIST		11	/* list of all outstanding PCBs */
12379b8b58e0SJonathan Lemon #define	TCPCTL_DELACKTIME	12	/* time before sending delayed ACK */
123876429de4SYoshinobu Inoue #define	TCPCTL_V6MSSDFLT	13	/* MSS default for IPv6 */
12396d90faf3SPaul Saab #define	TCPCTL_SACK		14	/* Selective Acknowledgement,rfc 2018 */
1240212a79b0SMaxim Konovalov #define	TCPCTL_DROP		15	/* drop tcp connection */
1241bf840a17SGleb Smirnoff #define	TCPCTL_STATES		16	/* connection counts by TCP state */
12425fea0d9eSKonstantin Belousov #define	TCPCTL_KTLSLIST		17	/* connections with active ktls
12435fea0d9eSKonstantin Belousov 					   session */
12445fea0d9eSKonstantin Belousov #define	TCPCTL_KTLSLIST_WKEYS	18	/* KTLSLIST with key data exported */
1245eb6ad696SGarrett Wollman 
1246664a31e4SPeter Wemm #ifdef _KERNEL
1247ce02431fSDoug Rabson #ifdef SYSCTL_DECL
1248ce02431fSDoug Rabson SYSCTL_DECL(_net_inet_tcp);
1249a55db2b6SPaul Saab SYSCTL_DECL(_net_inet_tcp_sack);
1250df541e5fSAndre Oppermann MALLOC_DECLARE(M_TCPLOG);
1251ce02431fSDoug Rabson #endif
1252ce02431fSDoug Rabson 
1253334fc582SBjoern A. Zeeb VNET_DECLARE(int, tcp_log_in_vain);
1254334fc582SBjoern A. Zeeb #define	V_tcp_log_in_vain		VNET(tcp_log_in_vain)
12553bdf4c42SGleb Smirnoff 
12563bdf4c42SGleb Smirnoff /*
12573bdf4c42SGleb Smirnoff  * Global TCP tunables shared between different stacks.
12583bdf4c42SGleb Smirnoff  * Please keep the list sorted.
12593bdf4c42SGleb Smirnoff  */
12603bdf4c42SGleb Smirnoff VNET_DECLARE(int, drop_synfin);
1261eddfbb76SRobert Watson VNET_DECLARE(int, path_mtu_discovery);
1262dbc42409SLawrence Stewart VNET_DECLARE(int, tcp_abc_l_var);
12634036380eSMichael Tuexen VNET_DECLARE(uint32_t, tcp_ack_war_cnt);
12644036380eSMichael Tuexen VNET_DECLARE(uint32_t, tcp_ack_war_time_window);
12653bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_autorcvbuf_max);
12663bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_autosndbuf_inc);
12673bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_autosndbuf_max);
12685dc99e9bSMark Johnston VNET_DECLARE(int, tcp_bind_all_fibs);
12693bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_delack_enabled);
12703bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_do_autorcvbuf);
12713bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_do_autosndbuf);
12723bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_do_ecn);
12730471a8c7SRichard Scheffenegger VNET_DECLARE(int, tcp_do_lrd);
127490cca08eSRichard Scheffenegger VNET_DECLARE(int, tcp_do_prr);
127590cca08eSRichard Scheffenegger VNET_DECLARE(int, tcp_do_prr_conservative);
1276b72e56e7SMichael Tuexen VNET_DECLARE(int, tcp_do_newcwv);
12773bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_do_rfc1323);
1278d2b3ceddSMichael Tuexen VNET_DECLARE(int, tcp_tolerate_missing_ts);
12793bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_do_rfc3042);
12803bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_do_rfc3390);
12813bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_do_rfc3465);
1282d1de2b05SRichard Scheffenegger VNET_DECLARE(int, tcp_do_newsack);
12833bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_do_sack);
12843bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_do_tso);
12853bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_ecn_maxretries);
12863bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_initcwnd_segments);
12873bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_insecure_rst);
12883bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_insecure_syn);
1289646c28eaSMichael Tuexen VNET_DECLARE(int, tcp_insecure_ack);
12901cf55767SRandall Stewart VNET_DECLARE(uint32_t, tcp_map_entries_limit);
12911cf55767SRandall Stewart VNET_DECLARE(uint32_t, tcp_map_split_limit);
12923bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_minmss);
12933bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_mssdflt);
1294adc56f5aSEdward Tomasz Napierala #ifdef STATS
1295adc56f5aSEdward Tomasz Napierala VNET_DECLARE(int, tcp_perconn_stats_dflt_tpl);
1296adc56f5aSEdward Tomasz Napierala VNET_DECLARE(int, tcp_perconn_stats_enable);
1297adc56f5aSEdward Tomasz Napierala #endif /* STATS */
12983bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_recvspace);
129943b117f8SRichard Scheffenegger VNET_DECLARE(int, tcp_retries);
13003bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_sack_globalholes);
13013bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_sack_globalmaxholes);
13023bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_sack_maxholes);
1303dcdfe449SRichard Scheffenegger VNET_DECLARE(int, tcp_sack_tso);
13043bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_sc_rst_sock_fail);
13053bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_sendspace);
13069e644c23SMichael Tuexen VNET_DECLARE(int, tcp_udp_tunneling_overhead);
13079e644c23SMichael Tuexen VNET_DECLARE(int, tcp_udp_tunneling_port);
13083bdf4c42SGleb Smirnoff VNET_DECLARE(struct inpcbinfo, tcbinfo);
13093bdf4c42SGleb Smirnoff 
13100471a8c7SRichard Scheffenegger #define	V_tcp_do_lrd			VNET(tcp_do_lrd)
13110e1d7c25SRichard Scheffenegger #define	V_tcp_do_prr			VNET(tcp_do_prr)
1312b72e56e7SMichael Tuexen #define	V_tcp_do_newcwv			VNET(tcp_do_newcwv)
13133bdf4c42SGleb Smirnoff #define	V_drop_synfin			VNET(drop_synfin)
13143bdf4c42SGleb Smirnoff #define	V_path_mtu_discovery		VNET(path_mtu_discovery)
13151e77c105SRobert Watson #define	V_tcbinfo			VNET(tcbinfo)
1316dbc42409SLawrence Stewart #define	V_tcp_abc_l_var			VNET(tcp_abc_l_var)
13174036380eSMichael Tuexen #define	V_tcp_ack_war_cnt		VNET(tcp_ack_war_cnt)
13184036380eSMichael Tuexen #define	V_tcp_ack_war_time_window	VNET(tcp_ack_war_time_window)
13193bdf4c42SGleb Smirnoff #define	V_tcp_autorcvbuf_max		VNET(tcp_autorcvbuf_max)
13203bdf4c42SGleb Smirnoff #define	V_tcp_autosndbuf_inc		VNET(tcp_autosndbuf_inc)
13213bdf4c42SGleb Smirnoff #define	V_tcp_autosndbuf_max		VNET(tcp_autosndbuf_max)
13225dc99e9bSMark Johnston #define	V_tcp_bind_all_fibs		VNET(tcp_bind_all_fibs)
13233bdf4c42SGleb Smirnoff #define	V_tcp_delack_enabled		VNET(tcp_delack_enabled)
13243bdf4c42SGleb Smirnoff #define	V_tcp_do_autorcvbuf		VNET(tcp_do_autorcvbuf)
13253bdf4c42SGleb Smirnoff #define	V_tcp_do_autosndbuf		VNET(tcp_do_autosndbuf)
13261e77c105SRobert Watson #define	V_tcp_do_ecn			VNET(tcp_do_ecn)
13273bdf4c42SGleb Smirnoff #define	V_tcp_do_rfc1323		VNET(tcp_do_rfc1323)
1328d2b3ceddSMichael Tuexen #define	V_tcp_tolerate_missing_ts	VNET(tcp_tolerate_missing_ts)
1329d21036e0SMichael Tuexen #define V_tcp_ts_offset_per_conn	VNET(tcp_ts_offset_per_conn)
13303bdf4c42SGleb Smirnoff #define	V_tcp_do_rfc3042		VNET(tcp_do_rfc3042)
13313bdf4c42SGleb Smirnoff #define	V_tcp_do_rfc3390		VNET(tcp_do_rfc3390)
13323bdf4c42SGleb Smirnoff #define	V_tcp_do_rfc3465		VNET(tcp_do_rfc3465)
1333d1de2b05SRichard Scheffenegger #define	V_tcp_do_newsack		VNET(tcp_do_newsack)
13343bdf4c42SGleb Smirnoff #define	V_tcp_do_sack			VNET(tcp_do_sack)
13353bdf4c42SGleb Smirnoff #define	V_tcp_do_tso			VNET(tcp_do_tso)
13361e77c105SRobert Watson #define	V_tcp_ecn_maxretries		VNET(tcp_ecn_maxretries)
13373bdf4c42SGleb Smirnoff #define	V_tcp_initcwnd_segments		VNET(tcp_initcwnd_segments)
13383bdf4c42SGleb Smirnoff #define	V_tcp_insecure_rst		VNET(tcp_insecure_rst)
13393bdf4c42SGleb Smirnoff #define	V_tcp_insecure_syn		VNET(tcp_insecure_syn)
1340646c28eaSMichael Tuexen #define	V_tcp_insecure_ack		VNET(tcp_insecure_ack)
13411cf55767SRandall Stewart #define	V_tcp_map_entries_limit		VNET(tcp_map_entries_limit)
13421cf55767SRandall Stewart #define	V_tcp_map_split_limit		VNET(tcp_map_split_limit)
13433bdf4c42SGleb Smirnoff #define	V_tcp_minmss			VNET(tcp_minmss)
13443bdf4c42SGleb Smirnoff #define	V_tcp_mssdflt			VNET(tcp_mssdflt)
1345adc56f5aSEdward Tomasz Napierala #ifdef STATS
1346adc56f5aSEdward Tomasz Napierala #define	V_tcp_perconn_stats_dflt_tpl	VNET(tcp_perconn_stats_dflt_tpl)
1347adc56f5aSEdward Tomasz Napierala #define	V_tcp_perconn_stats_enable	VNET(tcp_perconn_stats_enable)
1348adc56f5aSEdward Tomasz Napierala #endif /* STATS */
13493bdf4c42SGleb Smirnoff #define	V_tcp_recvspace			VNET(tcp_recvspace)
135043b117f8SRichard Scheffenegger #define	V_tcp_retries			VNET(tcp_retries)
13513bdf4c42SGleb Smirnoff #define	V_tcp_sack_globalholes		VNET(tcp_sack_globalholes)
13523bdf4c42SGleb Smirnoff #define	V_tcp_sack_globalmaxholes	VNET(tcp_sack_globalmaxholes)
13533bdf4c42SGleb Smirnoff #define	V_tcp_sack_maxholes		VNET(tcp_sack_maxholes)
1354dcdfe449SRichard Scheffenegger #define	V_tcp_sack_tso			VNET(tcp_sack_tso)
13553bdf4c42SGleb Smirnoff #define	V_tcp_sc_rst_sock_fail		VNET(tcp_sc_rst_sock_fail)
13563bdf4c42SGleb Smirnoff #define	V_tcp_sendspace			VNET(tcp_sendspace)
135789e560f4SRandall Stewart #define	V_tcp_udp_tunneling_overhead	VNET(tcp_udp_tunneling_overhead)
135889e560f4SRandall Stewart #define	V_tcp_udp_tunneling_port	VNET(tcp_udp_tunneling_port)
135989e560f4SRandall Stewart 
1360bd79708dSJonathan T. Looney #ifdef TCP_HHOOK
136139bc9de5SLawrence Stewart VNET_DECLARE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST + 1]);
136239bc9de5SLawrence Stewart #define	V_tcp_hhh		VNET(tcp_hhh)
1363bd79708dSJonathan T. Looney #endif
136439bc9de5SLawrence Stewart 
1365219a6ca9SGleb Smirnoff void	tcp_account_for_send(struct tcpcb *, uint32_t, uint8_t, uint8_t, bool);
136602a1a643SAndre Oppermann int	 tcp_addoptions(struct tcpopt *, u_char *);
1367df8bae1dSRodney W. Grimes struct tcpcb *
13684d77a549SAlfred Perlstein 	 tcp_close(struct tcpcb *);
1369623dce13SRobert Watson void	 tcp_discardcb(struct tcpcb *);
1370340c35deSJonathan Lemon void	 tcp_twstart(struct tcpcb *);
13714d77a549SAlfred Perlstein int	 tcp_ctloutput(struct socket *, struct sockopt *);
1372c1537ef0SMike Silbersack void	 tcp_fini(void *);
1373700a395cSJohn Baldwin char	*tcp_log_addrs(struct in_conninfo *, struct tcphdr *, const void *,
13749fb5d4c0SPeter Wemm 	    const void *);
1375700a395cSJohn Baldwin char	*tcp_log_vain(struct in_conninfo *, struct tcphdr *, const void *,
1376b7d747ecSAndre Oppermann 	    const void *);
13774d0770f1SRichard Scheffenegger int	 tcp_reass(struct tcpcb *, struct tcphdr *, tcp_seq *, int *,
13784d0770f1SRichard Scheffenegger 	    struct mbuf *);
13794741bfcbSPatrick Kelsey void	 tcp_reass_global_init(void);
13800c236c4eSLawrence Stewart void	 tcp_reass_flush(struct tcpcb *);
138155bceb1eSRandall Stewart void	 tcp_dooptions(struct tcpopt *, u_char *, int, int);
138255bceb1eSRandall Stewart void	tcp_dropwithreset(struct mbuf *, struct tcphdr *,
138355bceb1eSRandall Stewart 		     struct tcpcb *, int, int);
138455bceb1eSRandall Stewart void	tcp_pulloutofband(struct socket *,
138555bceb1eSRandall Stewart 		     struct tcphdr *, struct mbuf *, int);
138655bceb1eSRandall Stewart void	tcp_xmit_timer(struct tcpcb *, int);
138755bceb1eSRandall Stewart void	tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *);
138855bceb1eSRandall Stewart void	cc_ack_received(struct tcpcb *tp, struct tcphdr *th,
13894b7b743cSLawrence Stewart 			    uint16_t nsegs, uint16_t type);
139055bceb1eSRandall Stewart void 	cc_conn_init(struct tcpcb *tp);
139155bceb1eSRandall Stewart void 	cc_post_recovery(struct tcpcb *tp, struct tcphdr *th);
13924ad24737SRandall Stewart void    cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos);
13935d8fd932SRandall Stewart void	cc_ecnpkt_handler_flags(struct tcpcb *tp, uint16_t flags, uint8_t iptos);
139455bceb1eSRandall Stewart void	cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type);
1395bd79708dSJonathan T. Looney #ifdef TCP_HHOOK
139655bceb1eSRandall Stewart void	hhook_run_tcp_est_in(struct tcpcb *tp,
139755bceb1eSRandall Stewart 			    struct tcphdr *th, struct tcpopt *to);
1398bd79708dSJonathan T. Looney #endif
139955bceb1eSRandall Stewart 
14008f5a8818SKevin Lo int	 tcp_input(struct mbuf **, int *, int);
1401e44c1887SSteven Hartland int	 tcp_autorcvbuf(struct mbuf *, struct tcphdr *, struct socket *,
1402e44c1887SSteven Hartland 	    struct tcpcb *, int);
14039e644c23SMichael Tuexen int	 tcp_input_with_port(struct mbuf **, int *, int, uint16_t);
140435bc0bccSGleb Smirnoff void	tcp_do_segment(struct tcpcb *, struct mbuf *, struct tcphdr *, int,
140535bc0bccSGleb Smirnoff     int, uint8_t);
140655bceb1eSRandall Stewart 
140755bceb1eSRandall Stewart int register_tcp_functions(struct tcp_function_block *blk, int wait);
1408dc6a41b9SJonathan T. Looney int register_tcp_functions_as_names(struct tcp_function_block *blk,
1409dc6a41b9SJonathan T. Looney     int wait, const char *names[], int *num_names);
1410dc6a41b9SJonathan T. Looney int register_tcp_functions_as_name(struct tcp_function_block *blk,
1411dc6a41b9SJonathan T. Looney     const char *name, int wait);
14123ee9c3c4SRandall Stewart int deregister_tcp_functions(struct tcp_function_block *blk, bool quiesce,
14133ee9c3c4SRandall Stewart     bool force);
141455bceb1eSRandall Stewart struct tcp_function_block *find_and_ref_tcp_functions(struct tcp_function_set *fs);
1415e2833083SPeter Lei int find_tcp_function_alias(struct tcp_function_block *blk, struct tcp_function_set *fs);
1416ec6d620bSRandall Stewart uint32_t tcp_get_srtt(struct tcpcb *tp, int granularity);
14173ee9c3c4SRandall Stewart void tcp_switch_back_to_default(struct tcpcb *tp);
14183ee9c3c4SRandall Stewart struct tcp_function_block *
14193ee9c3c4SRandall Stewart find_and_ref_tcp_fb(struct tcp_function_block *fs);
142066fbc19fSGleb Smirnoff int tcp_default_ctloutput(struct tcpcb *tp, struct sockopt *sopt);
1421fd7daa72SMichael Tuexen int tcp_ctloutput_set(struct inpcb *inp, struct sockopt *sopt);
1422945f9a7cSRandall Stewart void tcp_log_socket_option(struct tcpcb *tp, uint32_t option_num,
1423945f9a7cSRandall Stewart     uint32_t option_val, int err);
1424945f9a7cSRandall Stewart 
142555bceb1eSRandall Stewart 
1426af9b9e0dSRandall Stewart extern counter_u64_t tcp_inp_lro_direct_queue;
1427af9b9e0dSRandall Stewart extern counter_u64_t tcp_inp_lro_wokeup_queue;
1428af9b9e0dSRandall Stewart extern counter_u64_t tcp_inp_lro_compressed;
1429af9b9e0dSRandall Stewart extern counter_u64_t tcp_inp_lro_locks_taken;
143069a34e8dSRandall Stewart extern counter_u64_t tcp_extra_mbuf;
143169a34e8dSRandall Stewart extern counter_u64_t tcp_would_have_but;
143269a34e8dSRandall Stewart extern counter_u64_t tcp_comp_total;
143369a34e8dSRandall Stewart extern counter_u64_t tcp_uncomp_total;
1434ca1a7e10SRandall Stewart extern counter_u64_t tcp_bad_csums;
1435af9b9e0dSRandall Stewart 
14365d8fd932SRandall Stewart extern uint32_t tcp_ack_war_time_window;
14375d8fd932SRandall Stewart extern uint32_t tcp_ack_war_cnt;
14381cf55767SRandall Stewart 
1439e4315bbcSGleb Smirnoff /*
1440e4315bbcSGleb Smirnoff  * Used by tcp_maxmtu() to communicate interface specific features
1441e4315bbcSGleb Smirnoff  * and limits at the time of connection setup.
1442e4315bbcSGleb Smirnoff  */
1443e4315bbcSGleb Smirnoff struct tcp_ifcap {
1444e4315bbcSGleb Smirnoff 	int	ifcap;
1445e4315bbcSGleb Smirnoff 	u_int	tsomax;
1446e4315bbcSGleb Smirnoff 	u_int	tsomaxsegcount;
1447e4315bbcSGleb Smirnoff 	u_int	tsomaxsegsize;
1448b6919741SKonstantin Belousov 	bool	ipsec_tso;
1449e4315bbcSGleb Smirnoff };
14503ac12506SJonathan T. Looney uint32_t tcp_maxmtu(struct in_conninfo *, struct tcp_ifcap *);
14513ac12506SJonathan T. Looney uint32_t tcp_maxmtu6(struct in_conninfo *, struct tcp_ifcap *);
1452e4315bbcSGleb Smirnoff 
1453f581a26eSGleb Smirnoff void	 tcp6_use_min_mtu(struct tcpcb *);
14540c39d38dSGleb Smirnoff u_int	 tcp_maxseg(const struct tcpcb *);
14555d8fd932SRandall Stewart u_int	 tcp_fixed_maxseg(const struct tcpcb *);
1456ef341ee1SGleb Smirnoff void	 tcp_mss_update(struct tcpcb *, int, int, struct hc_metrics_lite *,
14573c914c54SAndre Oppermann 	    struct tcp_ifcap *);
14584d77a549SAlfred Perlstein void	 tcp_mss(struct tcpcb *, int);
145997d8d152SAndre Oppermann int	 tcp_mssopt(struct in_conninfo *);
1460df8bae1dSRodney W. Grimes struct tcpcb *
1461baee801cSMichael Tuexen 	 tcp_newtcpcb(struct inpcb *, struct tcpcb *);
14625b08b46aSGleb Smirnoff int	 tcp_default_output(struct tcpcb *);
146357f60867SMark Johnston void	 tcp_state_change(struct tcpcb *, int);
14644d77a549SAlfred Perlstein void	 tcp_respond(struct tcpcb *, void *,
146583c1ec92SRichard Scheffenegger 	    struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, uint16_t);
146640299c55SMichael Tuexen void	 tcp_send_challenge_ack(struct tcpcb *, struct tcphdr *, struct mbuf *);
14670d744519SGleb Smirnoff bool	 tcp_twcheck(struct inpcb *, struct tcpopt *, struct tcphdr *,
14682104448fSAndre Oppermann 	    struct mbuf *, int);
14694d77a549SAlfred Perlstein void	 tcp_setpersist(struct tcpcb *);
1470a36230f7SRandall Stewart void	 tcp_record_dsack(struct tcpcb *tp, tcp_seq start, tcp_seq end, int tlp);
1471fb59c426SYoshinobu Inoue struct tcptemp *
147279909384SJonathan Lemon 	 tcpip_maketemplate(struct inpcb *);
14739e644c23SMichael Tuexen void	 tcpip_fillheaders(struct inpcb *, uint16_t, void *, void *);
1474446ccdd0SGleb Smirnoff void	 tcp_timer_activate(struct tcpcb *, tt_which, u_int);
1475446ccdd0SGleb Smirnoff bool	 tcp_timer_active(struct tcpcb *, tt_which);
1476446ccdd0SGleb Smirnoff void	 tcp_timer_stop(struct tcpcb *);
147789e560f4SRandall Stewart int	 inp_to_cpuid(struct inpcb *inp);
147897d8d152SAndre Oppermann /*
147997d8d152SAndre Oppermann  * All tcp_hc_* functions are IPv4 and IPv6 (via in_conninfo)
148097d8d152SAndre Oppermann  */
148197d8d152SAndre Oppermann void	 tcp_hc_init(void);
1482bc29160dSMarko Zec #ifdef VIMAGE
1483bc29160dSMarko Zec void	 tcp_hc_destroy(void);
1484bc29160dSMarko Zec #endif
1485b80c06ccSGleb Smirnoff void	 tcp_hc_get(const struct in_conninfo *, struct hc_metrics_lite *);
1486b80c06ccSGleb Smirnoff uint32_t tcp_hc_getmtu(const struct in_conninfo *);
1487b80c06ccSGleb Smirnoff void	 tcp_hc_updatemtu(const struct in_conninfo *, uint32_t);
1488b80c06ccSGleb Smirnoff void	 tcp_hc_update(const struct in_conninfo *, struct hc_metrics_lite *);
1489cd84e78fSRandall Stewart void 	 cc_after_idle(struct tcpcb *tp);
1490dd224982SGarrett Wollman 
1491e7d02be1SGleb Smirnoff extern	struct protosw tcp_protosw;		/* shared for TOE */
1492e7d02be1SGleb Smirnoff extern	struct protosw tcp6_protosw;		/* shared for TOE */
14938e02b4e0SMichael Tuexen 
14948e02b4e0SMichael Tuexen uint32_t tcp_new_ts_offset(struct in_conninfo *);
14958e02b4e0SMichael Tuexen tcp_seq	 tcp_new_isn(struct in_conninfo *);
1496dd224982SGarrett Wollman 
149749a6fbe3SRichard Scheffenegger sackstatus_t
149849a6fbe3SRichard Scheffenegger 	 tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
14995d8fd932SRandall Stewart int	 tcp_dsack_block_exists(struct tcpcb *);
1500fe5dee73SMichael Tuexen void	 tcp_update_dsack_list(struct tcpcb *, tcp_seq, tcp_seq);
1501c7c325d0SRichard Scheffenegger void	 tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart,
1502c7c325d0SRichard Scheffenegger 	    tcp_seq rcv_lastend);
1503e5926fd3SRandall Stewart void	 tcp_clean_dsack_blocks(struct tcpcb *tp);
15046d90faf3SPaul Saab void	 tcp_clean_sackreport(struct tcpcb *tp);
1505440f4ba1SRichard Scheffenegger int	 tcp_sack_adjust(struct tcpcb *tp);
1506a55db2b6SPaul Saab struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt);
1507c7c325d0SRichard Scheffenegger void	 tcp_do_prr_ack(struct tcpcb *, struct tcphdr *, struct tcpopt *,
1508c7c325d0SRichard Scheffenegger 	    sackstatus_t, u_int *);
15090471a8c7SRichard Scheffenegger void	 tcp_lost_retransmission(struct tcpcb *, struct tcphdr *);
1510c7c325d0SRichard Scheffenegger void	 tcp_sack_partialack(struct tcpcb *, struct tcphdr *, u_int *);
151130409ecdSRichard Scheffenegger void	 tcp_resend_sackholes(struct tcpcb *tp);
15126d90faf3SPaul Saab void	 tcp_free_sackholes(struct tcpcb *tp);
15130471a8c7SRichard Scheffenegger void	 tcp_sack_lost_retransmission(struct tcpcb *, struct tcphdr *);
15146d90faf3SPaul Saab int	 tcp_newreno(struct tcpcb *, struct tcphdr *);
151512eeb81fSHiren Panchasara int	 tcp_compute_pipe(struct tcpcb *);
15167dc90a1dSMichael Tuexen uint32_t tcp_compute_initwnd(uint32_t);
151766492feaSGleb Smirnoff void	 tcp_sndbuf_autoscale(struct tcpcb *, struct socket *, uint32_t);
1518adc56f5aSEdward Tomasz Napierala int	 tcp_stats_sample_rollthedice(struct tcpcb *tp, void *seed_bytes,
1519adc56f5aSEdward Tomasz Napierala     size_t seed_len);
15205d8fd932SRandall Stewart int tcp_can_enable_pacing(void);
1521e18b97bdSRandall Stewart int tcp_incr_dgp_pacing_cnt(void);
1522e18b97bdSRandall Stewart void tcp_dec_dgp_pacing_cnt(void);
15235d8fd932SRandall Stewart void tcp_decrement_paced_conn(void);
152473ee5756SRandall Stewart void tcp_change_time_units(struct tcpcb *, int);
152573ee5756SRandall Stewart void tcp_handle_orphaned_packets(struct tcpcb *);
15265d8fd932SRandall Stewart 
152789e560f4SRandall Stewart struct mbuf *
152889e560f4SRandall Stewart 	 tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen,
1529b2e60773SJohn Baldwin 	   int32_t seglimit, int32_t segsize, struct sockbuf *sb, bool hw_tls);
153089e560f4SRandall Stewart 
1531adc56f5aSEdward Tomasz Napierala int	tcp_stats_init(void);
1532e570d231SRandall Stewart void tcp_log_end_status(struct tcpcb *tp, uint8_t status);
153373ee5756SRandall Stewart #ifdef TCP_REQUEST_TRK
153457a3a161SRandall Stewart void tcp_req_free_a_slot(struct tcpcb *tp, struct tcp_sendfile_track *ent);
153557a3a161SRandall Stewart struct tcp_sendfile_track *
153657a3a161SRandall Stewart tcp_req_find_a_req_that_is_completed_by(struct tcpcb *tp, tcp_seq th_ack, int *ip);
153757a3a161SRandall Stewart int tcp_req_check_for_comp(struct tcpcb *tp, tcp_seq ack_point);
153873ee5756SRandall Stewart int
153957a3a161SRandall Stewart tcp_req_is_entry_comp(struct tcpcb *tp, struct tcp_sendfile_track *ent, tcp_seq ack_point);
154057a3a161SRandall Stewart struct tcp_sendfile_track *
154157a3a161SRandall Stewart tcp_req_find_req_for_seq(struct tcpcb *tp, tcp_seq seq);
154273ee5756SRandall Stewart void
154357a3a161SRandall Stewart tcp_req_log_req_info(struct tcpcb *tp,
154457a3a161SRandall Stewart     struct tcp_sendfile_track *req, uint16_t slot,
154573ee5756SRandall Stewart     uint8_t val, uint64_t offset, uint64_t nbytes);
154673ee5756SRandall Stewart 
154773ee5756SRandall Stewart uint32_t
154873ee5756SRandall Stewart tcp_estimate_tls_overhead(struct socket *so, uint64_t tls_usr_bytes);
154973ee5756SRandall Stewart void
155057a3a161SRandall Stewart tcp_req_alloc_req(struct tcpcb *tp, union tcp_log_userdata *user,
155173ee5756SRandall Stewart     uint64_t ts);
155273ee5756SRandall Stewart 
155357a3a161SRandall Stewart struct tcp_sendfile_track *
155457a3a161SRandall Stewart tcp_req_alloc_req_full(struct tcpcb *tp, struct tcp_snd_req *req, uint64_t ts, int rec_dups);
155573ee5756SRandall Stewart 
155673ee5756SRandall Stewart 
155773ee5756SRandall Stewart #endif
155869c7c811SRandall Stewart #ifdef TCP_ACCOUNTING
155969c7c811SRandall Stewart int tcp_do_ack_accounting(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to, uint32_t tiwin, int mss);
156069c7c811SRandall Stewart #endif
156169c7c811SRandall Stewart 
1562c3c20de3SGleb Smirnoff static inline void
tcp_lro_features_off(struct tcpcb * tp)1563c3c20de3SGleb Smirnoff tcp_lro_features_off(struct tcpcb *tp)
1564c3c20de3SGleb Smirnoff {
1565c3c20de3SGleb Smirnoff 	tp->t_flags2 &= ~(TF2_SUPPORTS_MBUFQ|
1566c3c20de3SGleb Smirnoff 	    TF2_MBUF_QUEUE_READY|
1567c3c20de3SGleb Smirnoff 	    TF2_DONT_SACK_QUEUE|
1568c3c20de3SGleb Smirnoff 	    TF2_MBUF_ACKCMP|
1569c3c20de3SGleb Smirnoff 	    TF2_MBUF_L_ACKS);
1570c3c20de3SGleb Smirnoff }
15716d90faf3SPaul Saab 
1572255cd9fdSBjoern A. Zeeb static inline void
tcp_fields_to_host(struct tcphdr * th)1573255cd9fdSBjoern A. Zeeb tcp_fields_to_host(struct tcphdr *th)
1574255cd9fdSBjoern A. Zeeb {
1575255cd9fdSBjoern A. Zeeb 
1576255cd9fdSBjoern A. Zeeb 	th->th_seq = ntohl(th->th_seq);
1577255cd9fdSBjoern A. Zeeb 	th->th_ack = ntohl(th->th_ack);
1578255cd9fdSBjoern A. Zeeb 	th->th_win = ntohs(th->th_win);
1579255cd9fdSBjoern A. Zeeb 	th->th_urp = ntohs(th->th_urp);
1580255cd9fdSBjoern A. Zeeb }
1581255cd9fdSBjoern A. Zeeb 
1582cfff3743SGleb Smirnoff static inline void
tcp_fields_to_net(struct tcphdr * th)1583cfff3743SGleb Smirnoff tcp_fields_to_net(struct tcphdr *th)
1584cfff3743SGleb Smirnoff {
1585cfff3743SGleb Smirnoff 
1586cfff3743SGleb Smirnoff 	th->th_seq = htonl(th->th_seq);
1587cfff3743SGleb Smirnoff 	th->th_ack = htonl(th->th_ack);
1588cfff3743SGleb Smirnoff 	th->th_win = htons(th->th_win);
1589cfff3743SGleb Smirnoff 	th->th_urp = htons(th->th_urp);
1590cfff3743SGleb Smirnoff }
15918717c306SRichard Scheffenegger #endif /* _KERNEL */
159267e89281SRandall Stewart 
15932f96f1f4SGarrett Wollman #endif /* _NETINET_TCP_VAR_H_ */
1594