1c398230bSWarner Losh /*-
251369649SPedro F. Giffuni * SPDX-License-Identifier: BSD-3-Clause
351369649SPedro F. Giffuni *
46c52bc46SGarrett Wollman * Copyright (c) 1982, 1986, 1993, 1994, 1995
5df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved.
6df8bae1dSRodney W. Grimes *
7df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without
8df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions
9df8bae1dSRodney W. Grimes * are met:
10df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright
11df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer.
12df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright
13df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the
14df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution.
15fbbd9655SWarner Losh * 3. Neither the name of the University nor the names of its contributors
16df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software
17df8bae1dSRodney W. Grimes * without specific prior written permission.
18df8bae1dSRodney W. Grimes *
19df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29df8bae1dSRodney W. Grimes * SUCH DAMAGE.
30df8bae1dSRodney W. Grimes */
31df8bae1dSRodney W. Grimes
32707f139eSPaul Richards #ifndef _NETINET_TCP_VAR_H_
33707f139eSPaul Richards #define _NETINET_TCP_VAR_H_
34be2ac88cSJonathan Lemon
35f76fcf6dSJeffrey Hsu #include <netinet/tcp.h>
3657a78e3bSGleb Smirnoff #include <netinet/tcp_fsm.h>
37be2ac88cSJonathan Lemon
38eddfbb76SRobert Watson #ifdef _KERNEL
39eddfbb76SRobert Watson #include <net/vnet.h>
4086a996e6SHiren Panchasara #include <sys/mbuf.h>
4128d0a740SAndrew Gallatin #include <sys/ktls.h>
42cc65eb4eSGleb Smirnoff #endif
43f6dfe47aSMarko Zec
44e570d231SRandall Stewart #define TCP_END_BYTE_INFO 8 /* Bytes that makeup the "end information array" */
45e570d231SRandall Stewart /* Types of ending byte info */
46e570d231SRandall Stewart #define TCP_EI_EMPTY_SLOT 0
47e570d231SRandall Stewart #define TCP_EI_STATUS_CLIENT_FIN 0x1
48e570d231SRandall Stewart #define TCP_EI_STATUS_CLIENT_RST 0x2
49e570d231SRandall Stewart #define TCP_EI_STATUS_SERVER_FIN 0x3
50e570d231SRandall Stewart #define TCP_EI_STATUS_SERVER_RST 0x4
51e570d231SRandall Stewart #define TCP_EI_STATUS_RETRAN 0x5
52e570d231SRandall Stewart #define TCP_EI_STATUS_PROGRESS 0x6
53e570d231SRandall Stewart #define TCP_EI_STATUS_PERSIST_MAX 0x7
54e570d231SRandall Stewart #define TCP_EI_STATUS_KEEP_MAX 0x8
55e570d231SRandall Stewart #define TCP_EI_STATUS_DATA_A_CLOSE 0x9
56e570d231SRandall Stewart #define TCP_EI_STATUS_RST_IN_FRONT 0xa
57e570d231SRandall Stewart #define TCP_EI_STATUS_2MSL 0xb
58e570d231SRandall Stewart #define TCP_EI_STATUS_MAX_VALUE 0xb
59e570d231SRandall Stewart
6057a3a161SRandall Stewart #define TCP_TRK_REQ_LOG_NEW 0x01
6157a3a161SRandall Stewart #define TCP_TRK_REQ_LOG_COMPLETE 0x02
6257a3a161SRandall Stewart #define TCP_TRK_REQ_LOG_FREED 0x03
6357a3a161SRandall Stewart #define TCP_TRK_REQ_LOG_ALLOCFAIL 0x04
6457a3a161SRandall Stewart #define TCP_TRK_REQ_LOG_MOREYET 0x05
6557a3a161SRandall Stewart #define TCP_TRK_REQ_LOG_FORCEFREE 0x06
6657a3a161SRandall Stewart #define TCP_TRK_REQ_LOG_STALE 0x07
6757a3a161SRandall Stewart #define TCP_TRK_REQ_LOG_SEARCH 0x08
6873ee5756SRandall Stewart
69e570d231SRandall Stewart /************************************************/
70e570d231SRandall Stewart /* Status bits we track to assure no duplicates,
71e570d231SRandall Stewart * the bits here are not used by the code but
72e570d231SRandall Stewart * for human representation. To check a bit we
73e570d231SRandall Stewart * take and shift over by 1 minus the value (1-8).
74e570d231SRandall Stewart */
75e570d231SRandall Stewart /************************************************/
76e570d231SRandall Stewart #define TCP_EI_BITS_CLIENT_FIN 0x001
77e570d231SRandall Stewart #define TCP_EI_BITS_CLIENT_RST 0x002
78e570d231SRandall Stewart #define TCP_EI_BITS_SERVER_FIN 0x004
79e570d231SRandall Stewart #define TCP_EI_BITS_SERVER_RST 0x008
80e570d231SRandall Stewart #define TCP_EI_BITS_RETRAN 0x010
81e570d231SRandall Stewart #define TCP_EI_BITS_PROGRESS 0x020
82e570d231SRandall Stewart #define TCP_EI_BITS_PRESIST_MAX 0x040
83e570d231SRandall Stewart #define TCP_EI_BITS_KEEP_MAX 0x080
84e570d231SRandall Stewart #define TCP_EI_BITS_DATA_A_CLO 0x100
85e570d231SRandall Stewart #define TCP_EI_BITS_RST_IN_FR 0x200 /* a front state reset */
86e570d231SRandall Stewart #define TCP_EI_BITS_2MS_TIMER 0x400 /* 2 MSL timer expired */
87e570d231SRandall Stewart
88cc65eb4eSGleb Smirnoff #if defined(_KERNEL) || defined(_WANT_TCPCB)
891d14e88eSMark Johnston #include <sys/_callout.h>
901d14e88eSMark Johnston #include <sys/osd.h>
911d14e88eSMark Johnston
92e68b3792SGleb Smirnoff #include <netinet/cc/cc.h>
93e68b3792SGleb Smirnoff
944741bfcbSPatrick Kelsey /* TCP segment queue entry */
954741bfcbSPatrick Kelsey struct tseg_qent {
96c28440dbSRandall Stewart TAILQ_ENTRY(tseg_qent) tqe_q;
974741bfcbSPatrick Kelsey struct mbuf *tqe_m; /* mbuf contains packet */
98c28440dbSRandall Stewart struct mbuf *tqe_last; /* last mbuf in chain */
99c28440dbSRandall Stewart tcp_seq tqe_start; /* TCP Sequence number start */
100c28440dbSRandall Stewart int tqe_len; /* TCP segment data length */
1011ebf4607SRichard Scheffenegger uint32_t tqe_flags; /* The flags from tcp_get_flags() */
102c28440dbSRandall Stewart uint32_t tqe_mbuf_cnt; /* Count of mbuf overhead */
1034741bfcbSPatrick Kelsey };
104c28440dbSRandall Stewart TAILQ_HEAD(tsegqe_head, tseg_qent);
1054741bfcbSPatrick Kelsey
1066d90faf3SPaul Saab struct sackblk {
1076d90faf3SPaul Saab tcp_seq start; /* start seq no. of sack block */
1086d90faf3SPaul Saab tcp_seq end; /* end seq no. */
1096d90faf3SPaul Saab };
1106d90faf3SPaul Saab
1116d90faf3SPaul Saab struct sackhole {
1126d90faf3SPaul Saab tcp_seq start; /* start seq no. of hole */
1136d90faf3SPaul Saab tcp_seq end; /* end seq no. */
1146d90faf3SPaul Saab tcp_seq rxmit; /* next seq. no in hole to be retransmitted */
115a6235da6SPaul Saab TAILQ_ENTRY(sackhole) scblink; /* scoreboard linkage */
1166d90faf3SPaul Saab };
11737332f04SBruce M Simpson
1180077b016SPaul Saab struct sackhint {
1190077b016SPaul Saab struct sackhole *nexthole;
120f359d6ebSRichard Scheffenegger int32_t sack_bytes_rexmit;
121bee9ab2bSLawrence Stewart tcp_seq last_sack_ack; /* Most recent/largest sacked ack */
122962ebef8SLawrence Stewart
123f359d6ebSRichard Scheffenegger int32_t delivered_data; /* Newly acked data from last SACK */
124f359d6ebSRichard Scheffenegger
125f359d6ebSRichard Scheffenegger int32_t sacked_bytes; /* Total sacked bytes reported by the
12612eeb81fSHiren Panchasara * receiver via sack option
12712eeb81fSHiren Panchasara */
1280e1d7c25SRichard Scheffenegger uint32_t recover_fs; /* Flight Size at the start of Loss recovery */
1290e1d7c25SRichard Scheffenegger uint32_t prr_delivered; /* Total bytes delivered using PRR */
130e5313869SRichard Scheffenegger uint32_t prr_out; /* Bytes sent during IN_RECOVERY */
13122dc8609SRichard Scheffenegger int32_t hole_bytes; /* current number of bytes in scoreboard holes */
13222dc8609SRichard Scheffenegger int32_t lost_bytes; /* number of rfc6675 IsLost() bytes */
1330077b016SPaul Saab };
1340077b016SPaul Saab
135c28440dbSRandall Stewart #define SEGQ_EMPTY(tp) TAILQ_EMPTY(&(tp)->t_segq)
136c28440dbSRandall Stewart
1372529f56eSJonathan T. Looney STAILQ_HEAD(tcp_log_stailq, tcp_log_mem);
1382529f56eSJonathan T. Looney
13957a3a161SRandall Stewart #define TCP_TRK_TRACK_FLG_EMPTY 0x00 /* Available */
14057a3a161SRandall Stewart #define TCP_TRK_TRACK_FLG_USED 0x01 /* In use */
14157a3a161SRandall Stewart #define TCP_TRK_TRACK_FLG_OPEN 0x02 /* End is not valid (open range request) */
14257a3a161SRandall Stewart #define TCP_TRK_TRACK_FLG_SEQV 0x04 /* We had a sendfile that touched it */
14357a3a161SRandall Stewart #define TCP_TRK_TRACK_FLG_COMP 0x08 /* Sendfile as placed the last bits (range req only) */
14457a3a161SRandall Stewart #define TCP_TRK_TRACK_FLG_FSND 0x10 /* First send has been done into the seq space */
145e18b97bdSRandall Stewart #define TCP_TRK_TRACK_FLG_LSND 0x20 /* We were able to set the Last Sent */
14657a3a161SRandall Stewart #define MAX_TCP_TRK_REQ 5 /* Max we will have at once */
14773ee5756SRandall Stewart
14857a3a161SRandall Stewart struct tcp_sendfile_track {
14973ee5756SRandall Stewart uint64_t timestamp; /* User sent timestamp */
15073ee5756SRandall Stewart uint64_t start; /* Start of sendfile offset */
15173ee5756SRandall Stewart uint64_t end; /* End if not open-range req */
15273ee5756SRandall Stewart uint64_t localtime; /* Time we actually got the req */
15373ee5756SRandall Stewart uint64_t deadline; /* If in CU mode, deadline to delivery */
15473ee5756SRandall Stewart uint64_t first_send; /* Time of first send in the range */
15573ee5756SRandall Stewart uint64_t cspr; /* Client suggested pace rate */
15673ee5756SRandall Stewart uint64_t sent_at_fs; /* What was t_sndbytes as we begun sending */
15773ee5756SRandall Stewart uint64_t rxt_at_fs; /* What was t_snd_rxt_bytes as we begun sending */
158e18b97bdSRandall Stewart uint64_t sent_at_ls; /* Sent value at the last send */
159e18b97bdSRandall Stewart uint64_t rxt_at_ls; /* Retransmit value at the last send */
16073ee5756SRandall Stewart tcp_seq start_seq; /* First TCP Seq assigned */
16173ee5756SRandall Stewart tcp_seq end_seq; /* If range req last seq */
16273ee5756SRandall Stewart uint32_t flags; /* Type of request open etc */
16373ee5756SRandall Stewart uint32_t sbcc_at_s; /* When we allocate what is the sb_cc */
16473ee5756SRandall Stewart uint32_t hint_maxseg; /* Client hinted maxseg */
165e18b97bdSRandall Stewart uint32_t playout_ms; /* Client playout ms */
16673ee5756SRandall Stewart uint32_t hybrid_flags; /* Hybrid flags on this request */
16773ee5756SRandall Stewart };
16873ee5756SRandall Stewart
16973ee5756SRandall Stewart
17073ee5756SRandall Stewart /*
17173ee5756SRandall Stewart * Change Query responses for a stack switch we create a structure
17273ee5756SRandall Stewart * that allows query response from the new stack to the old, if
17373ee5756SRandall Stewart * supported.
17473ee5756SRandall Stewart *
17573ee5756SRandall Stewart * There are three queries currently defined.
17673ee5756SRandall Stewart * - sendmap
17773ee5756SRandall Stewart * - timers
17873ee5756SRandall Stewart * - rack_times
17973ee5756SRandall Stewart *
18073ee5756SRandall Stewart * For the sendmap query the caller fills in the
18173ee5756SRandall Stewart * req and the req_param as the first seq (usually
18273ee5756SRandall Stewart * snd_una). When the response comes back indicating
18373ee5756SRandall Stewart * that there was data (return value 1), then the caller
18473ee5756SRandall Stewart * can build a sendmap entry based on the range and the
18573ee5756SRandall Stewart * times. The next query would then be done at the
18673ee5756SRandall Stewart * newly created sendmap_end. Repeated until sendmap_end == snd_max.
18773ee5756SRandall Stewart *
18873ee5756SRandall Stewart * Flags in sendmap_flags are defined below as well.
18973ee5756SRandall Stewart *
19073ee5756SRandall Stewart * For timers the standard PACE_TMR_XXXX flags are returned indicating
19173ee5756SRandall Stewart * a pacing timer (possibly) and one other timer. If pacing timer then
19273ee5756SRandall Stewart * the expiration timeout time in microseconds is in timer_pacing_to.
19373ee5756SRandall Stewart * And the value used with whatever timer (if a flag is set) is in
19473ee5756SRandall Stewart * timer_rxt. If no timers are running a 0 is returned and of
19573ee5756SRandall Stewart * course no flags are set in timer_hpts_flags.
19673ee5756SRandall Stewart *
19773ee5756SRandall Stewart * The rack_times are a misc collection of information that
19873ee5756SRandall Stewart * the old stack might possibly fill in. Of course its possible
19973ee5756SRandall Stewart * that an old stack may not have a piece of information. If so
20073ee5756SRandall Stewart * then setting that value to zero is advised. Setting any
20173ee5756SRandall Stewart * timestamp passed should only place a zero in it when it
20273ee5756SRandall Stewart * is unfilled. This may mean that a time is off by a micro-second
20373ee5756SRandall Stewart * but this is ok in the grand scheme of things.
20473ee5756SRandall Stewart *
20573ee5756SRandall Stewart * When switching stacks it is desireable to get as much information
20673ee5756SRandall Stewart * from the old stack to the new stack as possible. Though not always
20773ee5756SRandall Stewart * will the stack be compatible in the types of information. The
20873ee5756SRandall Stewart * init() function needs to take care when it begins changing
20973ee5756SRandall Stewart * things such as inp_flags2 and the timer units to position these
21073ee5756SRandall Stewart * changes at a point where it is unlikely they will fail after
21173ee5756SRandall Stewart * making such changes. A stack optionally can have an "undo"
21273ee5756SRandall Stewart * function
21373ee5756SRandall Stewart *
21473ee5756SRandall Stewart * To transfer information to the old stack from the new in
21573ee5756SRandall Stewart * respect to LRO and the inp_flags2, the new stack should set
21673ee5756SRandall Stewart * the inp_flags2 to what it supports. The old stack in its
21773ee5756SRandall Stewart * fini() function should call the tcp_handle_orphaned_packets()
21873ee5756SRandall Stewart * to clean up any packets. Note that a new stack should attempt
21973ee5756SRandall Stewart */
22073ee5756SRandall Stewart
22173ee5756SRandall Stewart /* Query types */
22273ee5756SRandall Stewart #define TCP_QUERY_SENDMAP 1
22373ee5756SRandall Stewart #define TCP_QUERY_TIMERS_UP 2
22473ee5756SRandall Stewart #define TCP_QUERY_RACK_TIMES 3
22573ee5756SRandall Stewart
22673ee5756SRandall Stewart /* Flags returned in sendmap_flags */
22773ee5756SRandall Stewart #define SNDMAP_ACKED 0x000001/* The remote endpoint acked this */
22873ee5756SRandall Stewart #define SNDMAP_OVERMAX 0x000008/* We have more retran's then we can fit */
22973ee5756SRandall Stewart #define SNDMAP_SACK_PASSED 0x000010/* A sack was done above this block */
23073ee5756SRandall Stewart #define SNDMAP_HAS_FIN 0x000040/* segment is sent with fin */
23173ee5756SRandall Stewart #define SNDMAP_TLP 0x000080/* segment sent as tail-loss-probe */
23273ee5756SRandall Stewart #define SNDMAP_HAS_SYN 0x000800/* SYN is on this guy */
23373ee5756SRandall Stewart #define SNDMAP_HAD_PUSH 0x008000/* Push was sent on original send */
23473ee5756SRandall Stewart #define SNDMAP_MASK (SNDMAP_ACKED|SNDMAP_OVERMAX|SNDMAP_SACK_PASSED|SNDMAP_HAS_FIN\
23573ee5756SRandall Stewart |SNDMAP_TLP|SNDMAP_HAS_SYN|SNDMAP_HAD_PUSH)
23673ee5756SRandall Stewart #define SNDMAP_NRTX 3
23773ee5756SRandall Stewart
23873ee5756SRandall Stewart struct tcp_query_resp {
23973ee5756SRandall Stewart int req;
24073ee5756SRandall Stewart uint32_t req_param;
24173ee5756SRandall Stewart union {
24273ee5756SRandall Stewart struct {
24373ee5756SRandall Stewart tcp_seq sendmap_start;
24473ee5756SRandall Stewart tcp_seq sendmap_end;
24573ee5756SRandall Stewart int sendmap_send_cnt;
24673ee5756SRandall Stewart uint64_t sendmap_time[SNDMAP_NRTX];
24773ee5756SRandall Stewart uint64_t sendmap_ack_arrival;
24873ee5756SRandall Stewart int sendmap_flags;
24973ee5756SRandall Stewart uint32_t sendmap_r_rtr_bytes;
25073ee5756SRandall Stewart /* If FAS is available if not 0 */
25173ee5756SRandall Stewart uint32_t sendmap_fas;
25273ee5756SRandall Stewart uint8_t sendmap_dupacks;
25373ee5756SRandall Stewart };
25473ee5756SRandall Stewart struct {
25573ee5756SRandall Stewart uint32_t timer_hpts_flags;
25673ee5756SRandall Stewart uint32_t timer_pacing_to;
25773ee5756SRandall Stewart uint32_t timer_timer_exp;
25873ee5756SRandall Stewart };
25973ee5756SRandall Stewart struct {
26073ee5756SRandall Stewart /* Timestamps and rtt's */
26173ee5756SRandall Stewart uint32_t rack_reorder_ts; /* Last uscts that reordering was seen */
26273ee5756SRandall Stewart uint32_t rack_num_dsacks; /* Num of dsacks seen */
26373ee5756SRandall Stewart uint32_t rack_rxt_last_time; /* Last time a RXT/TLP or rack tmr went off */
26473ee5756SRandall Stewart uint32_t rack_min_rtt; /* never 0 smallest rtt seen */
26573ee5756SRandall Stewart uint32_t rack_rtt; /* Last rtt used by rack */
26673ee5756SRandall Stewart uint32_t rack_tmit_time; /* The time the rtt seg was tmited */
26773ee5756SRandall Stewart uint32_t rack_time_went_idle; /* If in persist the time we went idle */
26873ee5756SRandall Stewart /* Prr data */
26973ee5756SRandall Stewart uint32_t rack_sacked;
27073ee5756SRandall Stewart uint32_t rack_holes_rxt;
27173ee5756SRandall Stewart uint32_t rack_prr_delivered;
27273ee5756SRandall Stewart uint32_t rack_prr_recovery_fs;
27373ee5756SRandall Stewart uint32_t rack_prr_out;
27473ee5756SRandall Stewart uint32_t rack_prr_sndcnt;
27573ee5756SRandall Stewart /* TLP data */
27673ee5756SRandall Stewart uint16_t rack_tlp_cnt_out; /* How many tlp's have been sent */
27773ee5756SRandall Stewart /* Various bits */
27873ee5756SRandall Stewart uint8_t rack_tlp_out; /* Is a TLP outstanding */
27973ee5756SRandall Stewart uint8_t rack_srtt_measured; /* The previous stack has measured srtt */
28073ee5756SRandall Stewart uint8_t rack_in_persist; /* Is the old stack in persists? */
28173ee5756SRandall Stewart uint8_t rack_wanted_output; /* Did the prevous stack have a want output set */
28273ee5756SRandall Stewart };
28373ee5756SRandall Stewart };
28473ee5756SRandall Stewart };
28573ee5756SRandall Stewart
28673ee5756SRandall Stewart #define TCP_TMR_GRANULARITY_TICKS 1 /* TCP timers are in ticks (msec if hz=1000) */
28773ee5756SRandall Stewart #define TCP_TMR_GRANULARITY_USEC 2 /* TCP timers are in microseconds */
28873ee5756SRandall Stewart
289446ccdd0SGleb Smirnoff typedef enum {
2906b802933SMichael Tuexen TT_REXMT = 0,
291446ccdd0SGleb Smirnoff TT_PERSIST,
292446ccdd0SGleb Smirnoff TT_KEEP,
293446ccdd0SGleb Smirnoff TT_2MSL,
2946b802933SMichael Tuexen TT_DELACK,
295446ccdd0SGleb Smirnoff TT_N,
296446ccdd0SGleb Smirnoff } tt_which;
297446ccdd0SGleb Smirnoff
29876578d60SMichael Tuexen typedef enum {
29976578d60SMichael Tuexen TT_PROCESSING = 0,
30076578d60SMichael Tuexen TT_PROCESSED,
30176578d60SMichael Tuexen TT_STARTING,
30276578d60SMichael Tuexen TT_STOPPING,
30376578d60SMichael Tuexen } tt_what;
30476578d60SMichael Tuexen
30555bceb1eSRandall Stewart /*
306e68b3792SGleb Smirnoff * Tcp control block, one per tcp connection.
307df8bae1dSRodney W. Grimes */
308df8bae1dSRodney W. Grimes struct tcpcb {
3099aff05bbSJohn Baldwin struct inpcb t_inpcb; /* embedded protocol independent cb */
310e68b3792SGleb Smirnoff #define t_start_zero t_fb
311e68b3792SGleb Smirnoff #define t_zero_size (sizeof(struct tcpcb) - \
312e68b3792SGleb Smirnoff offsetof(struct tcpcb, t_start_zero))
313803a2305SRandall Stewart struct tcp_function_block *t_fb;/* TCP function call block */
314803a2305SRandall Stewart void *t_fb_ptr; /* Pointer to t_fb specific data */
315e68b3792SGleb Smirnoff
316446ccdd0SGleb Smirnoff struct callout t_callout;
317446ccdd0SGleb Smirnoff sbintime_t t_timers[TT_N];
318446ccdd0SGleb Smirnoff sbintime_t t_precisions[TT_N];
319e68b3792SGleb Smirnoff
320c2a69e84SGleb Smirnoff /* HPTS. Used by BBR and Rack stacks. See tcp_hpts.c for more info. */
321c2a69e84SGleb Smirnoff TAILQ_ENTRY(tcpcb) t_hpts; /* linkage to HPTS ring */
322c2a69e84SGleb Smirnoff STAILQ_HEAD(, mbuf) t_inqueue; /* HPTS input packets queue */
323c2a69e84SGleb Smirnoff uint32_t t_hpts_request; /* Current hpts request, zero if
324c2a69e84SGleb Smirnoff * fits in the pacing window. */
325c2a69e84SGleb Smirnoff uint32_t t_hpts_slot; /* HPTS wheel slot this tcb is. */
326c2a69e84SGleb Smirnoff uint32_t t_hpts_drop_reas; /* Reason we are dropping the pcb. */
327c2a69e84SGleb Smirnoff uint32_t t_hpts_gencnt;
328c2a69e84SGleb Smirnoff uint16_t t_hpts_cpu; /* CPU chosen by hpts_cpuid(). */
329c2a69e84SGleb Smirnoff uint16_t t_lro_cpu; /* CPU derived from LRO. */
330c2a69e84SGleb Smirnoff #define HPTS_CPU_NONE ((uint16_t)-1)
331c2a69e84SGleb Smirnoff enum {
332c2a69e84SGleb Smirnoff IHPTS_NONE = 0,
333c2a69e84SGleb Smirnoff IHPTS_ONQUEUE,
334c2a69e84SGleb Smirnoff IHPTS_MOVING,
335c2a69e84SGleb Smirnoff } t_in_hpts; /* Is it linked into HPTS? */
336c2a69e84SGleb Smirnoff
337803a2305SRandall Stewart uint32_t t_maxseg:24, /* maximum segment size */
33869c7c811SRandall Stewart _t_logstate:8; /* State of "black box" logging */
33989e560f4SRandall Stewart uint32_t t_port:16, /* Tunneling (over udp) port */
34089e560f4SRandall Stewart t_state:4, /* state of this connection */
34189e560f4SRandall Stewart t_idle_reduce : 1,
34289e560f4SRandall Stewart t_delayed_ack: 7, /* Delayed ack variable */
3433b0b41e6SRandall Stewart t_fin_is_rst: 1, /* Are fin's treated as resets */
344a9a08eceSRandall Stewart t_log_state_set: 1,
345a9a08eceSRandall Stewart bits_spare : 2;
3466bb9a8e7SGarrett Wollman u_int t_flags;
34703041aaaSHiren Panchasara tcp_seq snd_una; /* sent but unacknowledged */
348df8bae1dSRodney W. Grimes tcp_seq snd_max; /* highest sequence number sent;
349df8bae1dSRodney W. Grimes * used to recognize retransmits
350df8bae1dSRodney W. Grimes */
351c3229e05SDavid Greenman tcp_seq snd_nxt; /* send next */
352c3229e05SDavid Greenman tcp_seq snd_up; /* send urgent pointer */
353803a2305SRandall Stewart uint32_t snd_wnd; /* send window */
354803a2305SRandall Stewart uint32_t snd_cwnd; /* congestion-controlled window */
355c3738466SGleb Smirnoff uint32_t ts_offset; /* our timestamp offset */
356c3738466SGleb Smirnoff uint32_t rfbuf_ts; /* recv buffer autoscaling timestamp */
357803a2305SRandall Stewart int rcv_numsacks; /* # distinct sack blks present */
358c3738466SGleb Smirnoff u_int t_tsomax; /* TSO total burst length limit */
359803a2305SRandall Stewart u_int t_tsomaxsegcount; /* TSO maximum segment count */
360803a2305SRandall Stewart u_int t_tsomaxsegsize; /* TSO maximum segment size in bytes */
361c3229e05SDavid Greenman tcp_seq rcv_nxt; /* receive next */
362c3229e05SDavid Greenman tcp_seq rcv_adv; /* advertised window */
3633ac12506SJonathan T. Looney uint32_t rcv_wnd; /* receive window */
364803a2305SRandall Stewart u_int t_flags2; /* More tcpcb flags storage */
365803a2305SRandall Stewart int t_srtt; /* smoothed round-trip time */
366803a2305SRandall Stewart int t_rttvar; /* variance in round-trip time */
367c3738466SGleb Smirnoff uint32_t ts_recent; /* timestamp echo data */
368803a2305SRandall Stewart u_char snd_scale; /* window scaling for send window */
369803a2305SRandall Stewart u_char rcv_scale; /* window scaling for recv window */
370803a2305SRandall Stewart u_char snd_limited; /* segments limited transmitted */
371803a2305SRandall Stewart u_char request_r_scale; /* pending window scaling */
372803a2305SRandall Stewart tcp_seq last_ack_sent;
373803a2305SRandall Stewart u_int t_rcvtime; /* inactivity time */
374c3229e05SDavid Greenman tcp_seq rcv_up; /* receive urgent pointer */
375803a2305SRandall Stewart int t_segqlen; /* segment reassembly queue length */
376c3738466SGleb Smirnoff uint32_t t_segqmbuflen; /* total reassembly queue byte length */
377803a2305SRandall Stewart struct tsegqe_head t_segq; /* segment reassembly queue */
3783ac12506SJonathan T. Looney uint32_t snd_ssthresh; /* snd_cwnd size threshold for
379df8bae1dSRodney W. Grimes * for slow start exponential to
380df8bae1dSRodney W. Grimes * linear switch
381df8bae1dSRodney W. Grimes */
382803a2305SRandall Stewart tcp_seq snd_wl1; /* window update seg seq number */
383803a2305SRandall Stewart tcp_seq snd_wl2; /* window update seg ack number */
384803a2305SRandall Stewart
385803a2305SRandall Stewart tcp_seq irs; /* initial receive sequence number */
386803a2305SRandall Stewart tcp_seq iss; /* initial send sequence number */
3879dc7d8a2SRichard Scheffenegger u_int t_acktime; /* RACK and BBR incoming new data was acked */
3889dc7d8a2SRichard Scheffenegger u_int t_sndtime; /* time last data was sent */
389803a2305SRandall Stewart u_int ts_recent_age; /* when last updated */
390cb942153SJeffrey Hsu tcp_seq snd_recover; /* for use in NewReno Fast Recovery */
391df8bae1dSRodney W. Grimes char t_oobflags; /* have some */
392df8bae1dSRodney W. Grimes char t_iobc; /* input character */
393c0e4090eSAndrew Gallatin uint8_t t_nic_ktls_xmit:1, /* active nic ktls xmit sessions */
394c0e4090eSAndrew Gallatin t_nic_ktls_xmit_dis:1, /* disabled nic xmit ktls? */
395c0e4090eSAndrew Gallatin t_nic_ktls_spare:6; /* spare nic ktls */
396803a2305SRandall Stewart int t_rxtcur; /* current retransmit value (ticks) */
397bf6d304aSAndre Oppermann
398803a2305SRandall Stewart int t_rxtshift; /* log(2) of rexmt exp. backoff */
399803a2305SRandall Stewart u_int t_rtttime; /* RTT measurement start time */
400803a2305SRandall Stewart
401803a2305SRandall Stewart tcp_seq t_rtseq; /* sequence number being timed */
402803a2305SRandall Stewart u_int t_starttime; /* time connection was established */
403c3738466SGleb Smirnoff u_int t_fbyte_in; /* ticks time first byte queued in */
404c3738466SGleb Smirnoff u_int t_fbyte_out; /* ticks time first byte queued out */
405803a2305SRandall Stewart
406803a2305SRandall Stewart u_int t_pmtud_saved_maxseg; /* pre-blackhole MSS */
407b89af8e1SMichael Tuexen int t_blackhole_enter; /* when to enter blackhole detection */
408b89af8e1SMichael Tuexen int t_blackhole_exit; /* when to exit blackhole detection */
409803a2305SRandall Stewart u_int t_rttmin; /* minimum rtt allowed */
410803a2305SRandall Stewart
411803a2305SRandall Stewart int t_softerror; /* possible error not yet reported */
412803a2305SRandall Stewart uint32_t max_sndwnd; /* largest window peer has offered */
4133ac12506SJonathan T. Looney uint32_t snd_cwnd_prev; /* cwnd prior to retransmit */
4143ac12506SJonathan T. Looney uint32_t snd_ssthresh_prev; /* ssthresh prior to retransmit */
4159d11646dSJeffrey Hsu tcp_seq snd_recover_prev; /* snd_recover prior to retransmit */
416f5d34df5SGeorge V. Neville-Neil int t_sndzerowin; /* zero-window updates sent */
4176d90faf3SPaul Saab int snd_numholes; /* number of holes seen by sender */
418803a2305SRandall Stewart u_int t_badrxtwin; /* window for retransmit recovery */
4199d17a7a6SPaul Saab TAILQ_HEAD(sackhole_head, sackhole) snd_holes;
4209d17a7a6SPaul Saab /* SACK scoreboard (sorted) */
421808f11b7SPaul Saab tcp_seq snd_fack; /* last seq number(+1) sack'd by rcv'r*/
422803a2305SRandall Stewart struct sackblk sackblks[MAX_SACK_BLKS]; /* seq nos. of sack blocks */
4230077b016SPaul Saab struct sackhint sackhint; /* SACK scoreboard hint */
424eaf80179SAndre Oppermann int t_rttlow; /* smallest observerved RTT */
4256741ecf5SAndre Oppermann int rfbuf_cnt; /* recv buffer autoscaling byte count */
42609fe6320SNavdeep Parhar struct toedev *tod; /* toedev handling this connection */
427f5d34df5SGeorge V. Neville-Neil int t_sndrexmitpack; /* retransmit packets sent */
428f5d34df5SGeorge V. Neville-Neil int t_rcvoopack; /* out-of-order packets received */
429620721dbSKip Macy void *t_toe; /* TOE pcb pointer */
430e68b3792SGleb Smirnoff struct cc_algo *t_cc; /* congestion control algorithm */
431e68b3792SGleb Smirnoff struct cc_var t_ccv; /* congestion control specific vars */
432803a2305SRandall Stewart int t_bytes_acked; /* # bytes acked during current RTT */
43389e560f4SRandall Stewart u_int t_maxunacktime;
4349077f387SGleb Smirnoff u_int t_keepinit; /* time to establish connection */
4359077f387SGleb Smirnoff u_int t_keepidle; /* time before keepalive probes begin */
4369077f387SGleb Smirnoff u_int t_keepintvl; /* interval between keepalives */
4379077f387SGleb Smirnoff u_int t_keepcnt; /* number of keepalives before close */
438803a2305SRandall Stewart int t_dupacks; /* consecutive dup acks recd */
4392529f56eSJonathan T. Looney int t_lognum; /* Number of log entries */
440a9a08eceSRandall Stewart int t_loglimit; /* Maximum number of log entries */
4410c7f3ae8SGleb Smirnoff uint32_t t_rcep; /* Number of received CE marked pkts */
4420c7f3ae8SGleb Smirnoff uint32_t t_scep; /* Synced number of delivered CE pkts */
443ce398115SJohn Baldwin int64_t t_pacing_rate; /* bytes / sec, -1 => unlimited */
444803a2305SRandall Stewart struct tcp_log_stailq t_logs; /* Log buffer */
4452529f56eSJonathan T. Looney struct tcp_log_id_node *t_lin;
4462529f56eSJonathan T. Looney struct tcp_log_id_bucket *t_lib;
4472529f56eSJonathan T. Looney const char *t_output_caller; /* Function that called tcp_output */
448adc56f5aSEdward Tomasz Napierala struct statsblob *t_stats; /* Per-connection stats */
44969c7c811SRandall Stewart /* Should these be a pointer to the arrays or an array? */
450803a2305SRandall Stewart uint32_t t_logsn; /* Log "serial number" */
451adc56f5aSEdward Tomasz Napierala uint32_t gput_ts; /* Time goodput measurement started */
452adc56f5aSEdward Tomasz Napierala tcp_seq gput_seq; /* Outbound measurement seq */
453adc56f5aSEdward Tomasz Napierala tcp_seq gput_ack; /* Inbound measurement ack */
454adc56f5aSEdward Tomasz Napierala int32_t t_stats_gput_prev; /* XXXLAS: Prev gput measurement */
4555d8fd932SRandall Stewart uint32_t t_sndtlppack; /* tail loss probe packets sent */
4565d8fd932SRandall Stewart uint64_t t_sndtlpbyte; /* total tail loss probe bytes sent */
45767e89281SRandall Stewart uint64_t t_sndbytes; /* total bytes sent */
45867e89281SRandall Stewart uint64_t t_snd_rxt_bytes; /* total bytes retransmitted */
459c3738466SGleb Smirnoff uint32_t t_dsack_bytes; /* dsack bytes received */
460c3738466SGleb Smirnoff uint32_t t_dsack_tlp_bytes; /* dsack bytes received for TLPs sent */
461c3738466SGleb Smirnoff uint32_t t_dsack_pack; /* dsack packets we have eceived */
46273ee5756SRandall Stewart uint8_t t_tmr_granularity; /* Granularity of all timers srtt etc */
46318b83b62SRichard Scheffenegger uint8_t t_rttupdated; /* number of times rtt sampled */
464c3738466SGleb Smirnoff /* TCP Fast Open */
465c3738466SGleb Smirnoff uint8_t t_tfo_client_cookie_len; /* TFO client cookie length */
466e570d231SRandall Stewart uint32_t t_end_info_status; /* Status flag of end info */
46740299c55SMichael Tuexen sbintime_t t_challenge_ack_end; /* End of the challenge ack epoch */
46840299c55SMichael Tuexen uint32_t t_challenge_ack_cnt; /* Number of challenge ACKs sent in
46940299c55SMichael Tuexen * current epoch
47040299c55SMichael Tuexen */
47140299c55SMichael Tuexen
472c3738466SGleb Smirnoff unsigned int *t_tfo_pending; /* TFO server pending counter */
473c560df6fSPatrick Kelsey union {
474c560df6fSPatrick Kelsey uint8_t client[TCP_FASTOPEN_MAX_COOKIE_LEN];
475c560df6fSPatrick Kelsey uint64_t server;
476c560df6fSPatrick Kelsey } t_tfo_cookie; /* TCP Fast Open cookie to send */
477e570d231SRandall Stewart union {
478e570d231SRandall Stewart uint8_t t_end_info_bytes[TCP_END_BYTE_INFO];
479e570d231SRandall Stewart uint64_t t_end_info;
480e570d231SRandall Stewart };
481e68b3792SGleb Smirnoff struct osd t_osd; /* storage for Khelp module data */
48269c7c811SRandall Stewart uint8_t _t_logpoint; /* Used when a BB log points is enabled */
483cf32543fSMichael Tuexen /*
484cf32543fSMichael Tuexen * Keep all #ifdef'ed components at the end of the structure!
485cf32543fSMichael Tuexen * This is important to minimize problems when compiling modules
486cf32543fSMichael Tuexen * using this structure from within the modules' directory.
487cf32543fSMichael Tuexen */
48873ee5756SRandall Stewart #ifdef TCP_REQUEST_TRK
48973ee5756SRandall Stewart /* Response tracking addons. */
49057a3a161SRandall Stewart uint8_t t_tcpreq_req; /* Request count */
49157a3a161SRandall Stewart uint8_t t_tcpreq_open; /* Number of open range requests */
49257a3a161SRandall Stewart uint8_t t_tcpreq_closed; /* Number of closed range requests */
493e4a873bfSMichael Tuexen uint32_t tcp_hybrid_start; /* Num of times we started hybrid pacing */
494e4a873bfSMichael Tuexen uint32_t tcp_hybrid_stop; /* Num of times we stopped hybrid pacing */
495e4a873bfSMichael Tuexen uint32_t tcp_hybrid_error; /* Num of times we failed to start hybrid pacing */
49657a3a161SRandall Stewart struct tcp_sendfile_track t_tcpreq_info[MAX_TCP_TRK_REQ];
49773ee5756SRandall Stewart #endif
498e4a873bfSMichael Tuexen #ifdef TCP_ACCOUNTING
499e4a873bfSMichael Tuexen uint64_t tcp_cnt_counters[TCP_NUM_CNT_COUNTERS];
500e4a873bfSMichael Tuexen uint64_t tcp_proc_time[TCP_NUM_CNT_COUNTERS];
501e4a873bfSMichael Tuexen #endif
502df8bae1dSRodney W. Grimes };
503cc65eb4eSGleb Smirnoff #endif /* _KERNEL || _WANT_TCPCB */
504cc65eb4eSGleb Smirnoff
505cc65eb4eSGleb Smirnoff #ifdef _KERNEL
506cc65eb4eSGleb Smirnoff struct tcptemp {
507cc65eb4eSGleb Smirnoff u_char tt_ipgen[40]; /* the size must be of max ip header, now IPv6 */
508cc65eb4eSGleb Smirnoff struct tcphdr tt_t;
509cc65eb4eSGleb Smirnoff };
510cc65eb4eSGleb Smirnoff
51149a6fbe3SRichard Scheffenegger /* SACK scoreboard update status */
51249a6fbe3SRichard Scheffenegger typedef enum {
51349a6fbe3SRichard Scheffenegger SACK_NOCHANGE = 0,
51449a6fbe3SRichard Scheffenegger SACK_CHANGE,
51549a6fbe3SRichard Scheffenegger SACK_NEWLOSS
51649a6fbe3SRichard Scheffenegger } sackstatus_t;
51749a6fbe3SRichard Scheffenegger
5189e644c23SMichael Tuexen /* Enable TCP/UDP tunneling port */
5199e644c23SMichael Tuexen #define TCP_TUNNELING_PORT_MIN 0
5209e644c23SMichael Tuexen #define TCP_TUNNELING_PORT_MAX 65535
5219e644c23SMichael Tuexen #define TCP_TUNNELING_PORT_DEFAULT 0
5229e644c23SMichael Tuexen
5239e644c23SMichael Tuexen /* Enable TCP/UDP tunneling port */
5249e644c23SMichael Tuexen #define TCP_TUNNELING_OVERHEAD_MIN sizeof(struct udphdr)
5259e644c23SMichael Tuexen #define TCP_TUNNELING_OVERHEAD_MAX 1024
5269e644c23SMichael Tuexen #define TCP_TUNNELING_OVERHEAD_DEFAULT TCP_TUNNELING_OVERHEAD_MIN
5279e644c23SMichael Tuexen
5281cf55767SRandall Stewart /* Minimum map entries limit value, if set */
5291cf55767SRandall Stewart #define TCP_MIN_MAP_ENTRIES_LIMIT 128
5301cf55767SRandall Stewart
531cc65eb4eSGleb Smirnoff /* Flags for tcp functions */
532cc65eb4eSGleb Smirnoff #define TCP_FUNC_BEING_REMOVED 0x01 /* Can no longer be referenced */
533f64dc2abSGleb Smirnoff #define TCP_FUNC_OUTPUT_CANDROP 0x02 /* tfb_tcp_output may ask tcp_drop */
534e0b080f8SMichael Tuexen #define TCP_FUNC_DEFAULT_OK 0x04 /* Can be used as default */
535cc65eb4eSGleb Smirnoff
53673ee5756SRandall Stewart /**
53786c9325dSMichael Tuexen * tfb_tcp_handoff_ok is a mandatory function allowing
53886c9325dSMichael Tuexen * to query a stack, if it can take over a tcpcb.
53986c9325dSMichael Tuexen * You return 0 to say you can take over and run your stack,
54086c9325dSMichael Tuexen * you return non-zero (an error number) to say no you can't.
54173ee5756SRandall Stewart *
54273ee5756SRandall Stewart * tfb_tcp_fb_init is used to allow the new stack to
54373ee5756SRandall Stewart * setup its control block. Among the things it must
54473ee5756SRandall Stewart * do is:
54573ee5756SRandall Stewart * a) Make sure that the inp_flags2 is setup correctly
54673ee5756SRandall Stewart * for LRO. There are two flags that the previous
54773ee5756SRandall Stewart * stack may have set INP_MBUF_ACKCMP and
54873ee5756SRandall Stewart * INP_SUPPORTS_MBUFQ. If the new stack does not
54973ee5756SRandall Stewart * support these it *should* clear the flags.
55073ee5756SRandall Stewart * b) Make sure that the timers are in the proper
55173ee5756SRandall Stewart * granularity that the stack wants. The stack
55273ee5756SRandall Stewart * should check the t_tmr_granularity field. Currently
55373ee5756SRandall Stewart * there are two values that it may hold
55473ee5756SRandall Stewart * TCP_TMR_GRANULARITY_TICKS and TCP_TMR_GRANULARITY_USEC.
55573ee5756SRandall Stewart * Use the functions tcp_timer_convert(tp, granularity);
55673ee5756SRandall Stewart * to move the timers to the correct format for your stack.
55773ee5756SRandall Stewart *
55873ee5756SRandall Stewart * The new stack may also optionally query the tfb_chg_query
55973ee5756SRandall Stewart * function if the old stack has one. The new stack may ask
56073ee5756SRandall Stewart * for one of three entries and can also state to the old
56173ee5756SRandall Stewart * stack its support for the INP_MBUF_ACKCMP and
56273ee5756SRandall Stewart * INP_SUPPORTS_MBUFQ. This is important since if there are
56373ee5756SRandall Stewart * queued ack's without that statement the old stack will
56473ee5756SRandall Stewart * be forced to discard the queued acks. The requests that
56573ee5756SRandall Stewart * can be made for information by the new stacks are:
56673ee5756SRandall Stewart *
56773ee5756SRandall Stewart * Note also that the tfb_tcp_fb_init() when called can
56873ee5756SRandall Stewart * determine if a query is needed by looking at the
56973ee5756SRandall Stewart * value passed in the ptr. The ptr is designed to be
57073ee5756SRandall Stewart * set in with any allocated memory, but the address
57173ee5756SRandall Stewart * of the condtion (ptr == &tp->t_fb_ptr) will be
57273ee5756SRandall Stewart * true if this is not a stack switch but the initial
57373ee5756SRandall Stewart * setup of a tcb (which means no query would be needed).
57473ee5756SRandall Stewart * If, however, the value is not t_fb_ptr, then the caller
57573ee5756SRandall Stewart * is in the middle of a stack switch and is the new stack.
57673ee5756SRandall Stewart * A query would be appropriate (if the new stack support
57773ee5756SRandall Stewart * the query mechanism).
57873ee5756SRandall Stewart *
57973ee5756SRandall Stewart * TCP_QUERY_SENDMAP - Query of outstanding data.
58073ee5756SRandall Stewart * TCP_QUERY_TIMERS_UP - Query about running timers.
58173ee5756SRandall Stewart * TCP_SUPPORTED_LRO - Declaration in req_param of
58273ee5756SRandall Stewart * the inp_flags2 supported by
58373ee5756SRandall Stewart * the new stack.
58473ee5756SRandall Stewart * TCP_QUERY_RACK_TIMES - Enquire about various timestamps
58573ee5756SRandall Stewart * and states the old stack may be in.
58673ee5756SRandall Stewart *
587cc65eb4eSGleb Smirnoff * tfb_tcp_fb_fini is changed to add a flag to tell
588cc65eb4eSGleb Smirnoff * the old stack if the tcb is being destroyed or
589cc65eb4eSGleb Smirnoff * not. A one in the flag means the TCB is being
590cc65eb4eSGleb Smirnoff * destroyed, a zero indicates its transitioning to
59173ee5756SRandall Stewart * another stack (via socket option). The
59273ee5756SRandall Stewart * tfb_tcp_fb_fini() function itself should not change timers
59373ee5756SRandall Stewart * or inp_flags2 (the tfb_tcp_fb_init() must do that). However
59473ee5756SRandall Stewart * if the old stack supports the LRO mbuf queuing, and the new
59573ee5756SRandall Stewart * stack does not communicate via chg messages that it too does,
59673ee5756SRandall Stewart * it must assume it does not and free any queued mbufs.
59773ee5756SRandall Stewart *
598cc65eb4eSGleb Smirnoff */
599cc65eb4eSGleb Smirnoff struct tcp_function_block {
600cc65eb4eSGleb Smirnoff char tfb_tcp_block_name[TCP_FUNCTION_NAME_LEN_MAX];
601cc65eb4eSGleb Smirnoff int (*tfb_tcp_output)(struct tcpcb *);
60235bc0bccSGleb Smirnoff void (*tfb_tcp_do_segment)(struct tcpcb *, struct mbuf *,
60335bc0bccSGleb Smirnoff struct tcphdr *, int, int, uint8_t);
60435bc0bccSGleb Smirnoff int (*tfb_do_segment_nounlock)(struct tcpcb *, struct mbuf *,
60535bc0bccSGleb Smirnoff struct tcphdr *, int, int, uint8_t, int, struct timeval *);
60635bc0bccSGleb Smirnoff int (*tfb_do_queued_segments)(struct tcpcb *, int);
60766fbc19fSGleb Smirnoff int (*tfb_tcp_ctloutput)(struct tcpcb *, struct sockopt *);
608cc65eb4eSGleb Smirnoff /* Optional memory allocation/free routine */
60973ee5756SRandall Stewart int (*tfb_tcp_fb_init)(struct tcpcb *, void **);
610cc65eb4eSGleb Smirnoff void (*tfb_tcp_fb_fini)(struct tcpcb *, int);
611cc65eb4eSGleb Smirnoff /* Optional timers, must define all if you define one */
612cc65eb4eSGleb Smirnoff int (*tfb_tcp_timer_stop_all)(struct tcpcb *);
613cc65eb4eSGleb Smirnoff void (*tfb_tcp_rexmit_tmr)(struct tcpcb *);
614cc65eb4eSGleb Smirnoff int (*tfb_tcp_handoff_ok)(struct tcpcb *);
61573ee5756SRandall Stewart void (*tfb_tcp_mtu_chg)(struct tcpcb *tp);
616d3b6c96bSRandall Stewart int (*tfb_pru_options)(struct tcpcb *, int);
6179e4d9e4cSRandall Stewart void (*tfb_hwtls_change)(struct tcpcb *, int);
61873ee5756SRandall Stewart int (*tfb_chg_query)(struct tcpcb *, struct tcp_query_resp *);
61973ee5756SRandall Stewart void (*tfb_switch_failed)(struct tcpcb *);
62073ee5756SRandall Stewart bool (*tfb_early_wake_check)(struct tcpcb *);
621e5049a17SRandall Stewart int (*tfb_compute_pipe)(struct tcpcb *tp);
622e18b97bdSRandall Stewart int (*tfb_stack_info)(struct tcpcb *tp, struct stack_specific_info *);
623e18b97bdSRandall Stewart void (*tfb_inherit)(struct tcpcb *tp, struct inpcb *h_inp);
624cc65eb4eSGleb Smirnoff volatile uint32_t tfb_refcnt;
625cc65eb4eSGleb Smirnoff uint32_t tfb_flags;
6262529f56eSJonathan T. Looney uint8_t tfb_id;
627cc65eb4eSGleb Smirnoff };
628cc65eb4eSGleb Smirnoff
629859f0f0dSMichael Tuexen /* Maximum number of names each TCP function block can be registered with. */
630859f0f0dSMichael Tuexen #define TCP_FUNCTION_NAME_NUM_MAX 8
631859f0f0dSMichael Tuexen
632cc65eb4eSGleb Smirnoff struct tcp_function {
633cc65eb4eSGleb Smirnoff TAILQ_ENTRY(tcp_function) tf_next;
634dc6a41b9SJonathan T. Looney char tf_name[TCP_FUNCTION_NAME_LEN_MAX];
635cc65eb4eSGleb Smirnoff struct tcp_function_block *tf_fb;
636cc65eb4eSGleb Smirnoff };
637cc65eb4eSGleb Smirnoff
638cc65eb4eSGleb Smirnoff TAILQ_HEAD(tcp_funchead, tcp_function);
6395b08b46aSGleb Smirnoff
640f64dc2abSGleb Smirnoff struct tcpcb * tcp_drop(struct tcpcb *, int);
641f64dc2abSGleb Smirnoff
642f64dc2abSGleb Smirnoff #ifdef _NETINET_IN_PCB_H_
643e68b3792SGleb Smirnoff #define intotcpcb(inp) __containerof((inp), struct tcpcb, t_inpcb)
6449eb0e832SGleb Smirnoff #define sototcpcb(so) intotcpcb(sotoinpcb(so))
645e68b3792SGleb Smirnoff #define tptoinpcb(tp) (&(tp)->t_inpcb)
646e68b3792SGleb Smirnoff #define tptosocket(tp) (tp)->t_inpcb.inp_socket
6479eb0e832SGleb Smirnoff
648f64dc2abSGleb Smirnoff /*
649f64dc2abSGleb Smirnoff * tcp_output()
650f64dc2abSGleb Smirnoff * Handles tcp_drop request from advanced stacks and reports that inpcb is
651f64dc2abSGleb Smirnoff * gone with negative return code.
652f64dc2abSGleb Smirnoff * Drop in replacement for the default stack.
653f64dc2abSGleb Smirnoff */
6545b08b46aSGleb Smirnoff static inline int
tcp_output(struct tcpcb * tp)6555b08b46aSGleb Smirnoff tcp_output(struct tcpcb *tp)
6565b08b46aSGleb Smirnoff {
6579eb0e832SGleb Smirnoff struct inpcb *inp = tptoinpcb(tp);
658f64dc2abSGleb Smirnoff int rv;
6595b08b46aSGleb Smirnoff
6609eb0e832SGleb Smirnoff INP_WLOCK_ASSERT(inp);
661f64dc2abSGleb Smirnoff
662f64dc2abSGleb Smirnoff rv = tp->t_fb->tfb_tcp_output(tp);
663f64dc2abSGleb Smirnoff if (rv < 0) {
664f64dc2abSGleb Smirnoff KASSERT(tp->t_fb->tfb_flags & TCP_FUNC_OUTPUT_CANDROP,
665f64dc2abSGleb Smirnoff ("TCP stack %s requested tcp_drop(%p)",
666f64dc2abSGleb Smirnoff tp->t_fb->tfb_tcp_block_name, tp));
667f64dc2abSGleb Smirnoff tp = tcp_drop(tp, -rv);
668f64dc2abSGleb Smirnoff if (tp)
6699eb0e832SGleb Smirnoff INP_WUNLOCK(inp);
6705b08b46aSGleb Smirnoff }
671f64dc2abSGleb Smirnoff
672f64dc2abSGleb Smirnoff return (rv);
673f64dc2abSGleb Smirnoff }
674f64dc2abSGleb Smirnoff
675f64dc2abSGleb Smirnoff /*
676f64dc2abSGleb Smirnoff * tcp_output_unlock()
677f64dc2abSGleb Smirnoff * Always returns unlocked, handles drop request from advanced stacks.
678f64dc2abSGleb Smirnoff * Always returns positive error code.
679f64dc2abSGleb Smirnoff */
680f64dc2abSGleb Smirnoff static inline int
tcp_output_unlock(struct tcpcb * tp)681f64dc2abSGleb Smirnoff tcp_output_unlock(struct tcpcb *tp)
682f64dc2abSGleb Smirnoff {
6839eb0e832SGleb Smirnoff struct inpcb *inp = tptoinpcb(tp);
684f64dc2abSGleb Smirnoff int rv;
685f64dc2abSGleb Smirnoff
6869eb0e832SGleb Smirnoff INP_WLOCK_ASSERT(inp);
687f64dc2abSGleb Smirnoff
688f64dc2abSGleb Smirnoff rv = tp->t_fb->tfb_tcp_output(tp);
689f64dc2abSGleb Smirnoff if (rv < 0) {
690f64dc2abSGleb Smirnoff KASSERT(tp->t_fb->tfb_flags & TCP_FUNC_OUTPUT_CANDROP,
691f64dc2abSGleb Smirnoff ("TCP stack %s requested tcp_drop(%p)",
692f64dc2abSGleb Smirnoff tp->t_fb->tfb_tcp_block_name, tp));
693f64dc2abSGleb Smirnoff rv = -rv;
694f64dc2abSGleb Smirnoff tp = tcp_drop(tp, rv);
695f64dc2abSGleb Smirnoff if (tp)
6969eb0e832SGleb Smirnoff INP_WUNLOCK(inp);
697f64dc2abSGleb Smirnoff } else
6989eb0e832SGleb Smirnoff INP_WUNLOCK(inp);
699f64dc2abSGleb Smirnoff
700f64dc2abSGleb Smirnoff return (rv);
701f64dc2abSGleb Smirnoff }
702f64dc2abSGleb Smirnoff
703f64dc2abSGleb Smirnoff /*
704f64dc2abSGleb Smirnoff * tcp_output_nodrop()
705f64dc2abSGleb Smirnoff * Always returns locked. It is caller's responsibility to run tcp_drop()!
706f64dc2abSGleb Smirnoff * Useful in syscall implementations, when we want to perform some logging
707f64dc2abSGleb Smirnoff * and/or tracing with tcpcb before calling tcp_drop(). To be used with
708f64dc2abSGleb Smirnoff * tcp_unlock_or_drop() later.
709f64dc2abSGleb Smirnoff *
710f64dc2abSGleb Smirnoff * XXXGL: maybe don't allow stacks to return a drop request at certain
711f64dc2abSGleb Smirnoff * TCP states? Why would it do in connect(2)? In recv(2)?
712f64dc2abSGleb Smirnoff */
713f64dc2abSGleb Smirnoff static inline int
tcp_output_nodrop(struct tcpcb * tp)714f64dc2abSGleb Smirnoff tcp_output_nodrop(struct tcpcb *tp)
715f64dc2abSGleb Smirnoff {
716f64dc2abSGleb Smirnoff int rv;
717f64dc2abSGleb Smirnoff
7189eb0e832SGleb Smirnoff INP_WLOCK_ASSERT(tptoinpcb(tp));
719f64dc2abSGleb Smirnoff
720f64dc2abSGleb Smirnoff rv = tp->t_fb->tfb_tcp_output(tp);
721f64dc2abSGleb Smirnoff KASSERT(rv >= 0 || tp->t_fb->tfb_flags & TCP_FUNC_OUTPUT_CANDROP,
722f64dc2abSGleb Smirnoff ("TCP stack %s requested tcp_drop(%p)",
723f64dc2abSGleb Smirnoff tp->t_fb->tfb_tcp_block_name, tp));
724f64dc2abSGleb Smirnoff return (rv);
725f64dc2abSGleb Smirnoff }
726f64dc2abSGleb Smirnoff
727f64dc2abSGleb Smirnoff /*
728f64dc2abSGleb Smirnoff * tcp_unlock_or_drop()
729f64dc2abSGleb Smirnoff * Handle return code from tfb_tcp_output() after we have logged/traced,
730f64dc2abSGleb Smirnoff * to be used with tcp_output_nodrop().
731f64dc2abSGleb Smirnoff */
732f64dc2abSGleb Smirnoff static inline int
tcp_unlock_or_drop(struct tcpcb * tp,int tcp_output_retval)733f64dc2abSGleb Smirnoff tcp_unlock_or_drop(struct tcpcb *tp, int tcp_output_retval)
734f64dc2abSGleb Smirnoff {
7359eb0e832SGleb Smirnoff struct inpcb *inp = tptoinpcb(tp);
736f64dc2abSGleb Smirnoff
7379eb0e832SGleb Smirnoff INP_WLOCK_ASSERT(inp);
738f64dc2abSGleb Smirnoff
739f64dc2abSGleb Smirnoff if (tcp_output_retval < 0) {
740f64dc2abSGleb Smirnoff tcp_output_retval = -tcp_output_retval;
741f64dc2abSGleb Smirnoff if (tcp_drop(tp, tcp_output_retval) != NULL)
7429eb0e832SGleb Smirnoff INP_WUNLOCK(inp);
743f64dc2abSGleb Smirnoff } else
7449eb0e832SGleb Smirnoff INP_WUNLOCK(inp);
745f64dc2abSGleb Smirnoff
746f64dc2abSGleb Smirnoff return (tcp_output_retval);
747f64dc2abSGleb Smirnoff }
748f64dc2abSGleb Smirnoff #endif /* _NETINET_IN_PCB_H_ */
74937bf391dSRichard Scheffenegger
75037bf391dSRichard Scheffenegger static int inline
tcp_packets_this_ack(struct tcpcb * tp,tcp_seq ack)75137bf391dSRichard Scheffenegger tcp_packets_this_ack(struct tcpcb *tp, tcp_seq ack)
75237bf391dSRichard Scheffenegger {
75337bf391dSRichard Scheffenegger return ((ack - tp->snd_una) / tp->t_maxseg +
75437bf391dSRichard Scheffenegger ((((ack - tp->snd_una) % tp->t_maxseg) != 0) ? 1 : 0));
75537bf391dSRichard Scheffenegger }
756cc65eb4eSGleb Smirnoff #endif /* _KERNEL */
757df8bae1dSRodney W. Grimes
758c3ce7a79SRobert Watson /*
759c3ce7a79SRobert Watson * Flags and utility macros for the t_flags field.
760c3ce7a79SRobert Watson */
76177aabfd9SMichael Tuexen #define TF_ACKNOW 0x00000001 /* ack peer immediately */
76277aabfd9SMichael Tuexen #define TF_DELACK 0x00000002 /* ack, but try to delay it */
76377aabfd9SMichael Tuexen #define TF_NODELAY 0x00000004 /* don't delay packets to coalesce */
76477aabfd9SMichael Tuexen #define TF_NOOPT 0x00000008 /* don't use tcp options */
76577aabfd9SMichael Tuexen #define TF_SENTFIN 0x00000010 /* have sent FIN */
76677aabfd9SMichael Tuexen #define TF_REQ_SCALE 0x00000020 /* have/will request window scaling */
76777aabfd9SMichael Tuexen #define TF_RCVD_SCALE 0x00000040 /* other side has requested scaling */
76877aabfd9SMichael Tuexen #define TF_REQ_TSTMP 0x00000080 /* have/will request timestamps */
76977aabfd9SMichael Tuexen #define TF_RCVD_TSTMP 0x00000100 /* a timestamp was received in SYN */
77077aabfd9SMichael Tuexen #define TF_SACK_PERMIT 0x00000200 /* other side said I could SACK */
77177aabfd9SMichael Tuexen #define TF_NEEDSYN 0x00000400 /* send SYN (implicit state) */
77277aabfd9SMichael Tuexen #define TF_NEEDFIN 0x00000800 /* send FIN (implicit state) */
77377aabfd9SMichael Tuexen #define TF_NOPUSH 0x00001000 /* don't push */
77468e623c3SRichard Scheffenegger #define TF_PREVVALID 0x00002000 /* saved values for bad rxmit valid
77568e623c3SRichard Scheffenegger * Note: accessing and restoring from
77668e623c3SRichard Scheffenegger * these may only be done in the 1st
77768e623c3SRichard Scheffenegger * RTO recovery round (t_rxtshift == 1)
77868e623c3SRichard Scheffenegger */
7794d0770f1SRichard Scheffenegger #define TF_WAKESOR 0x00004000 /* wake up receive socket */
780adc56f5aSEdward Tomasz Napierala #define TF_GPUTINPROG 0x00008000 /* Goodput measurement in progress */
78177aabfd9SMichael Tuexen #define TF_MORETOCOME 0x00010000 /* More data to be appended to sock */
782493105c2SGleb Smirnoff #define TF_SONOTCONN 0x00020000 /* needs soisconnected() on ESTAB */
78377aabfd9SMichael Tuexen #define TF_LASTIDLE 0x00040000 /* connection was previously idle */
78477aabfd9SMichael Tuexen #define TF_RXWIN0SENT 0x00080000 /* sent a receiver win 0 in response */
78577aabfd9SMichael Tuexen #define TF_FASTRECOVERY 0x00100000 /* in NewReno Fast Recovery */
78677aabfd9SMichael Tuexen #define TF_WASFRECOVERY 0x00200000 /* was in NewReno Fast Recovery */
78777aabfd9SMichael Tuexen #define TF_SIGNATURE 0x00400000 /* require MD5 digests (RFC2385) */
78877aabfd9SMichael Tuexen #define TF_FORCEDATA 0x00800000 /* force out a byte */
78977aabfd9SMichael Tuexen #define TF_TSO 0x01000000 /* TSO enabled on this connection */
79077aabfd9SMichael Tuexen #define TF_TOE 0x02000000 /* this connection is offloaded */
79174703901SGleb Smirnoff #define TF_CLOSED 0x04000000 /* close(2) called on socket */
792e18b97bdSRandall Stewart #define TF_SENTSYN 0x08000000 /* At least one syn has been sent */
7930471a8c7SRichard Scheffenegger #define TF_LRD 0x10000000 /* Lost Retransmission Detection */
794dbc42409SLawrence Stewart #define TF_CONGRECOVERY 0x20000000 /* congestion recovery mode */
795dbc42409SLawrence Stewart #define TF_WASCRECOVERY 0x40000000 /* was in congestion recovery */
796281a0fd4SPatrick Kelsey #define TF_FASTOPEN 0x80000000 /* TCP Fast Open indication */
797c3ce7a79SRobert Watson
798dbc42409SLawrence Stewart #define IN_FASTRECOVERY(t_flags) (t_flags & TF_FASTRECOVERY)
799dbc42409SLawrence Stewart #define ENTER_FASTRECOVERY(t_flags) t_flags |= TF_FASTRECOVERY
800dbc42409SLawrence Stewart #define EXIT_FASTRECOVERY(t_flags) t_flags &= ~TF_FASTRECOVERY
801dbc42409SLawrence Stewart
802dbc42409SLawrence Stewart #define IN_CONGRECOVERY(t_flags) (t_flags & TF_CONGRECOVERY)
803dbc42409SLawrence Stewart #define ENTER_CONGRECOVERY(t_flags) t_flags |= TF_CONGRECOVERY
804dbc42409SLawrence Stewart #define EXIT_CONGRECOVERY(t_flags) t_flags &= ~TF_CONGRECOVERY
805dbc42409SLawrence Stewart
806dbc42409SLawrence Stewart #define IN_RECOVERY(t_flags) (t_flags & (TF_CONGRECOVERY | TF_FASTRECOVERY))
807dbc42409SLawrence Stewart #define ENTER_RECOVERY(t_flags) t_flags |= (TF_CONGRECOVERY | TF_FASTRECOVERY)
808dbc42409SLawrence Stewart #define EXIT_RECOVERY(t_flags) t_flags &= ~(TF_CONGRECOVERY | TF_FASTRECOVERY)
809dbc42409SLawrence Stewart
810dbc42409SLawrence Stewart #define BYTES_THIS_ACK(tp, th) (th->th_ack - tp->snd_una)
8119d11646dSJeffrey Hsu
812c3ce7a79SRobert Watson /*
813c3ce7a79SRobert Watson * Flags for the t_oobflags field.
814c3ce7a79SRobert Watson */
815c3ce7a79SRobert Watson #define TCPOOB_HAVEDATA 0x01
816c3ce7a79SRobert Watson #define TCPOOB_HADDATA 0x02
817c3ce7a79SRobert Watson
818eb6ad696SGarrett Wollman /*
8192529f56eSJonathan T. Looney * Flags for the extended TCP flags field, t_flags2
820f6f6703fSSean Bruno */
821f6f6703fSSean Bruno #define TF2_PLPMTU_BLACKHOLE 0x00000001 /* Possible PLPMTUD Black Hole. */
822f6f6703fSSean Bruno #define TF2_PLPMTU_PMTUD 0x00000002 /* Allowed to attempt PLPMTUD. */
823f6f6703fSSean Bruno #define TF2_PLPMTU_MAXSEGSNT 0x00000004 /* Last seg sent was full seg. */
8242529f56eSJonathan T. Looney #define TF2_LOG_AUTO 0x00000008 /* Session is auto-logging. */
82589e560f4SRandall Stewart #define TF2_DROP_AF_DATA 0x00000010 /* Drop after all data ack'd */
8263cf38784SMichael Tuexen #define TF2_ECN_PERMIT 0x00000020 /* connection ECN-ready */
8273cf38784SMichael Tuexen #define TF2_ECN_SND_CWR 0x00000040 /* ECN CWR in queue */
8283cf38784SMichael Tuexen #define TF2_ECN_SND_ECE 0x00000080 /* ECN ECE in queue */
8293cf38784SMichael Tuexen #define TF2_ACE_PERMIT 0x00000100 /* Accurate ECN mode */
830c2a69e84SGleb Smirnoff #define TF2_HPTS_CPU_SET 0x00000200 /* t_hpts_cpu is not random */
831e854dd38SRandall Stewart #define TF2_FBYTES_COMPLETE 0x00000400 /* We have first bytes in and out */
832dc9daa04SRichard Scheffenegger #define TF2_ECN_USE_ECT1 0x00000800 /* Use ECT(1) marking on session */
8338aa2be69SCheng Cui #define TF2_TCP_ACCOUNTING 0x00001000 /* Do TCP accounting */
834c2a69e84SGleb Smirnoff #define TF2_HPTS_CALLS 0x00002000 /* tcp_output() called via HPTS */
835c3c20de3SGleb Smirnoff #define TF2_MBUF_L_ACKS 0x00004000 /* large mbufs for ack compression */
836c3c20de3SGleb Smirnoff #define TF2_MBUF_ACKCMP 0x00008000 /* mbuf ack compression ok */
837c3c20de3SGleb Smirnoff #define TF2_SUPPORTS_MBUFQ 0x00010000 /* Supports the mbuf queue method */
838c3c20de3SGleb Smirnoff #define TF2_MBUF_QUEUE_READY 0x00020000 /* Inputs can be queued */
839c3c20de3SGleb Smirnoff #define TF2_DONT_SACK_QUEUE 0x00040000 /* Don't wake on sack */
840c3c20de3SGleb Smirnoff #define TF2_CANNOT_DO_ECN 0x00080000 /* The stack does not do ECN */
841fce03f85SRandall Stewart #define TF2_PROC_SACK_PROHIBIT 0x00100000 /* Due to small MSS size do not process sack's */
842b6919741SKonstantin Belousov #define TF2_IPSEC_TSO 0x00200000 /* IPSEC + TSO supported */
84352eacec9SMichael Tuexen #define TF2_NO_ISS_CHECK 0x00400000 /* Don't check SEG.ACK against ISS */
844dc9daa04SRichard Scheffenegger
845f6f6703fSSean Bruno /*
846eb6ad696SGarrett Wollman * Structure to hold TCP options that are only used during segment
847eb6ad696SGarrett Wollman * processing (in tcp_input), but not held in the tcpcb.
848eb6ad696SGarrett Wollman * It's basically used to reduce the number of parameters
84902a1a643SAndre Oppermann * to tcp_dooptions and tcp_addoptions.
85002a1a643SAndre Oppermann * The binary order of the to_flags is relevant for packing of the
85102a1a643SAndre Oppermann * options in tcp_addoptions.
852eb6ad696SGarrett Wollman */
853eb6ad696SGarrett Wollman struct tcpopt {
8545d20f974SJonathan T. Looney u_int32_t to_flags; /* which options are present */
85502a1a643SAndre Oppermann #define TOF_MSS 0x0001 /* maximum segment size */
85602a1a643SAndre Oppermann #define TOF_SCALE 0x0002 /* window scaling */
857032fae41SBjoern A. Zeeb #define TOF_SACKPERM 0x0004 /* SACK permitted */
85802a1a643SAndre Oppermann #define TOF_TS 0x0010 /* timestamp */
859032fae41SBjoern A. Zeeb #define TOF_SIGNATURE 0x0040 /* TCP-MD5 signature option (RFC2385) */
860df47e437SAndre Oppermann #define TOF_SACK 0x0080 /* Peer sent SACK option */
861281a0fd4SPatrick Kelsey #define TOF_FASTOPEN 0x0100 /* TCP Fast Open (TFO) cookie */
862281a0fd4SPatrick Kelsey #define TOF_MAXOPT 0x0200
863df47e437SAndre Oppermann u_int32_t to_tsval; /* new timestamp */
86402a1a643SAndre Oppermann u_int32_t to_tsecr; /* reflected timestamp */
865237fbe0aSLawrence Stewart u_char *to_sacks; /* pointer to the first SACK blocks */
866237fbe0aSLawrence Stewart u_char *to_signature; /* pointer to the TCP-MD5 signature */
867c560df6fSPatrick Kelsey u_int8_t *to_tfo_cookie; /* pointer to the TFO cookie */
86802a1a643SAndre Oppermann u_int16_t to_mss; /* maximum segment size */
86902a1a643SAndre Oppermann u_int8_t to_wscale; /* window scaling */
8705a53ca16SPaul Saab u_int8_t to_nsacks; /* number of SACK blocks */
871281a0fd4SPatrick Kelsey u_int8_t to_tfo_len; /* TFO cookie length */
872d9a36286SBjoern A. Zeeb u_int32_t to_spare; /* UTO */
873be2ac88cSJonathan Lemon };
874be2ac88cSJonathan Lemon
875f72167f4SAndre Oppermann /*
876f72167f4SAndre Oppermann * Flags for tcp_dooptions.
877f72167f4SAndre Oppermann */
878f72167f4SAndre Oppermann #define TO_SYN 0x01 /* parse SYN-only options */
879f72167f4SAndre Oppermann
88097d8d152SAndre Oppermann struct hc_metrics_lite { /* must stay in sync with hc_metrics */
88109000cc1SGleb Smirnoff uint32_t hc_mtu; /* MTU for this path */
88209000cc1SGleb Smirnoff uint32_t hc_ssthresh; /* outbound gateway buffer limit */
88309000cc1SGleb Smirnoff uint32_t hc_rtt; /* estimated round trip time */
88409000cc1SGleb Smirnoff uint32_t hc_rttvar; /* estimated rtt variance */
88509000cc1SGleb Smirnoff uint32_t hc_cwnd; /* congestion window */
88609000cc1SGleb Smirnoff uint32_t hc_sendpipe; /* outbound delay-bandwidth product */
88709000cc1SGleb Smirnoff uint32_t hc_recvpipe; /* inbound delay-bandwidth product */
88897d8d152SAndre Oppermann };
88997d8d152SAndre Oppermann
8908411d000SAndre Oppermann #ifndef _NETINET_IN_PCB_H_
8918411d000SAndre Oppermann struct in_conninfo;
8928411d000SAndre Oppermann #endif /* _NETINET_IN_PCB_H_ */
8938411d000SAndre Oppermann
894df8bae1dSRodney W. Grimes /*
895df8bae1dSRodney W. Grimes * The smoothed round-trip time and estimated variance
896df8bae1dSRodney W. Grimes * are stored as fixed point numbers scaled by the values below.
897df8bae1dSRodney W. Grimes * For convenience, these scales are also used in smoothing the average
898df8bae1dSRodney W. Grimes * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed).
899df8bae1dSRodney W. Grimes * With these scales, srtt has 3 bits to the right of the binary point,
900df8bae1dSRodney W. Grimes * and thus an "ALPHA" of 0.875. rttvar has 2 bits to the right of the
901df8bae1dSRodney W. Grimes * binary point, and is smoothed with an ALPHA of 0.75.
902df8bae1dSRodney W. Grimes */
9035a268d86SMichael Tuexen #define TCP_RTT_SCALE 32 /* multiplier for srtt; 5 bits frac. */
9045a268d86SMichael Tuexen #define TCP_RTT_SHIFT 5 /* shift for srtt; 5 bits frac. */
9055a268d86SMichael Tuexen #define TCP_RTTVAR_SCALE 16 /* multiplier for rttvar; 4 bits */
9065a268d86SMichael Tuexen #define TCP_RTTVAR_SHIFT 4 /* shift for rttvar; 4 bits */
907233e8c18SGarrett Wollman #define TCP_DELTA_SHIFT 2 /* see tcp_input.c */
908df8bae1dSRodney W. Grimes
909df8bae1dSRodney W. Grimes /*
910df8bae1dSRodney W. Grimes * The initial retransmission should happen at rtt + 4 * rttvar.
911df8bae1dSRodney W. Grimes * Because of the way we do the smoothing, srtt and rttvar
912df8bae1dSRodney W. Grimes * will each average +1/2 tick of bias. When we compute
913df8bae1dSRodney W. Grimes * the retransmit timer, we want 1/2 tick of rounding and
914df8bae1dSRodney W. Grimes * 1 extra tick because of +-1/2 tick uncertainty in the
915df8bae1dSRodney W. Grimes * firing of the timer. The bias will give us exactly the
916df8bae1dSRodney W. Grimes * 1.5 tick we need. But, because the bias is
917df8bae1dSRodney W. Grimes * statistical, we have to test that we don't drop below
918df8bae1dSRodney W. Grimes * the minimum feasible timer (which is 2 ticks).
919233e8c18SGarrett Wollman * This version of the macro adapted from a paper by Lawrence
920233e8c18SGarrett Wollman * Brakmo and Larry Peterson which outlines a problem caused
921233e8c18SGarrett Wollman * by insufficient precision in the original implementation,
922233e8c18SGarrett Wollman * which results in inappropriately large RTO values for very
923233e8c18SGarrett Wollman * fast networks.
924df8bae1dSRodney W. Grimes */
925233e8c18SGarrett Wollman #define TCP_REXMTVAL(tp) \
926552b7df4SDavid Greenman max((tp)->t_rttmin, (((tp)->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT)) \
9276da5712bSGarrett Wollman + (tp)->t_rttvar) >> TCP_DELTA_SHIFT)
928df8bae1dSRodney W. Grimes
929df8bae1dSRodney W. Grimes /*
930b59753f1SGleb Smirnoff * Global (per-VNET) TCP statistics. The below structure represents what we
931b59753f1SGleb Smirnoff * export to the userland, but in the kernel we have an array of counter_u64_t
932b59753f1SGleb Smirnoff * with as many elements as there are members in the structure. The counters
933*46023d54SGleb Smirnoff * shall be increased by TCPSTAT_INC() or KMOD_TCPSTAT_INC(). Adding a new
934*46023d54SGleb Smirnoff * counter also requires adding corresponding SDT probes into in_kdtrace.h and
935*46023d54SGleb Smirnoff * into in_kdtrace.c.
936df8bae1dSRodney W. Grimes */
937df8bae1dSRodney W. Grimes struct tcpstat {
9385923c293SGleb Smirnoff uint64_t tcps_connattempt; /* connections initiated */
9395923c293SGleb Smirnoff uint64_t tcps_accepts; /* connections accepted */
9405923c293SGleb Smirnoff uint64_t tcps_connects; /* connections established */
9415923c293SGleb Smirnoff uint64_t tcps_drops; /* connections dropped */
9425923c293SGleb Smirnoff uint64_t tcps_conndrops; /* embryonic connections dropped */
9435923c293SGleb Smirnoff uint64_t tcps_minmssdrops; /* average minmss too low drops */
9445923c293SGleb Smirnoff uint64_t tcps_closed; /* conn. closed (includes drops) */
9455923c293SGleb Smirnoff uint64_t tcps_segstimed; /* segs where we tried to get rtt */
9465923c293SGleb Smirnoff uint64_t tcps_rttupdated; /* times we succeeded */
9475923c293SGleb Smirnoff uint64_t tcps_delack; /* delayed acks sent */
9485923c293SGleb Smirnoff uint64_t tcps_timeoutdrop; /* conn. dropped in rxmt timeout */
9495923c293SGleb Smirnoff uint64_t tcps_rexmttimeo; /* retransmit timeouts */
9505923c293SGleb Smirnoff uint64_t tcps_persisttimeo; /* persist timeouts */
9515923c293SGleb Smirnoff uint64_t tcps_keeptimeo; /* keepalive timeouts */
9525923c293SGleb Smirnoff uint64_t tcps_keepprobe; /* keepalive probes sent */
9535923c293SGleb Smirnoff uint64_t tcps_keepdrops; /* connections dropped in keepalive */
95408af8aacSRandall Stewart uint64_t tcps_progdrops; /* drops due to no progress */
955df8bae1dSRodney W. Grimes
9565923c293SGleb Smirnoff uint64_t tcps_sndtotal; /* total packets sent */
9575923c293SGleb Smirnoff uint64_t tcps_sndpack; /* data packets sent */
9585923c293SGleb Smirnoff uint64_t tcps_sndbyte; /* data bytes sent */
9595923c293SGleb Smirnoff uint64_t tcps_sndrexmitpack; /* data packets retransmitted */
9605923c293SGleb Smirnoff uint64_t tcps_sndrexmitbyte; /* data bytes retransmitted */
9615923c293SGleb Smirnoff uint64_t tcps_sndrexmitbad; /* unnecessary packet retransmissions */
9625923c293SGleb Smirnoff uint64_t tcps_sndacks; /* ack-only packets sent */
9635923c293SGleb Smirnoff uint64_t tcps_sndprobe; /* window probes sent */
9645923c293SGleb Smirnoff uint64_t tcps_sndurg; /* packets sent with URG only */
9655923c293SGleb Smirnoff uint64_t tcps_sndwinup; /* window update-only packets sent */
9665923c293SGleb Smirnoff uint64_t tcps_sndctrl; /* control (SYN|FIN|RST) packets sent */
967df8bae1dSRodney W. Grimes
9685923c293SGleb Smirnoff uint64_t tcps_rcvtotal; /* total packets received */
9695923c293SGleb Smirnoff uint64_t tcps_rcvpack; /* packets received in sequence */
9705923c293SGleb Smirnoff uint64_t tcps_rcvbyte; /* bytes received in sequence */
9715923c293SGleb Smirnoff uint64_t tcps_rcvbadsum; /* packets received with ccksum errs */
9725923c293SGleb Smirnoff uint64_t tcps_rcvbadoff; /* packets received with bad offset */
973c669105dSGleb Smirnoff uint64_t tcps_rcvreassfull; /* packets dropped for no reass space */
9745923c293SGleb Smirnoff uint64_t tcps_rcvshort; /* packets received too short */
9755923c293SGleb Smirnoff uint64_t tcps_rcvduppack; /* duplicate-only packets received */
9765923c293SGleb Smirnoff uint64_t tcps_rcvdupbyte; /* duplicate-only bytes received */
9775923c293SGleb Smirnoff uint64_t tcps_rcvpartduppack; /* packets with some duplicate data */
9785923c293SGleb Smirnoff uint64_t tcps_rcvpartdupbyte; /* dup. bytes in part-dup. packets */
9795923c293SGleb Smirnoff uint64_t tcps_rcvoopack; /* out-of-order packets received */
9805923c293SGleb Smirnoff uint64_t tcps_rcvoobyte; /* out-of-order bytes received */
9815923c293SGleb Smirnoff uint64_t tcps_rcvpackafterwin; /* packets with data after window */
9825923c293SGleb Smirnoff uint64_t tcps_rcvbyteafterwin; /* bytes rcvd after window */
9835923c293SGleb Smirnoff uint64_t tcps_rcvafterclose; /* packets rcvd after "close" */
9845923c293SGleb Smirnoff uint64_t tcps_rcvwinprobe; /* rcvd window probe packets */
9855923c293SGleb Smirnoff uint64_t tcps_rcvdupack; /* rcvd duplicate acks */
9865923c293SGleb Smirnoff uint64_t tcps_rcvacktoomuch; /* rcvd acks for unsent data */
9875923c293SGleb Smirnoff uint64_t tcps_rcvackpack; /* rcvd ack packets */
9885923c293SGleb Smirnoff uint64_t tcps_rcvackbyte; /* bytes acked by rcvd acks */
9895923c293SGleb Smirnoff uint64_t tcps_rcvwinupd; /* rcvd window update packets */
9905923c293SGleb Smirnoff uint64_t tcps_pawsdrop; /* segments dropped due to PAWS */
9915923c293SGleb Smirnoff uint64_t tcps_predack; /* times hdr predict ok for acks */
9925923c293SGleb Smirnoff uint64_t tcps_preddat; /* times hdr predict ok for data pkts */
9935923c293SGleb Smirnoff uint64_t tcps_pcbcachemiss;
9945923c293SGleb Smirnoff uint64_t tcps_cachedrtt; /* times cached RTT in route updated */
9955923c293SGleb Smirnoff uint64_t tcps_cachedrttvar; /* times cached rttvar updated */
9965923c293SGleb Smirnoff uint64_t tcps_cachedssthresh; /* times cached ssthresh updated */
9975923c293SGleb Smirnoff uint64_t tcps_usedrtt; /* times RTT initialized from route */
9985923c293SGleb Smirnoff uint64_t tcps_usedrttvar; /* times RTTVAR initialized from rt */
9995923c293SGleb Smirnoff uint64_t tcps_usedssthresh; /* times ssthresh initialized from rt*/
10005923c293SGleb Smirnoff uint64_t tcps_persistdrop; /* timeout in persist state */
10015923c293SGleb Smirnoff uint64_t tcps_badsyn; /* bogus SYN, e.g. premature ACK */
10025923c293SGleb Smirnoff uint64_t tcps_mturesent; /* resends due to MTU discovery */
10035923c293SGleb Smirnoff uint64_t tcps_listendrop; /* listen queue overflows */
10045923c293SGleb Smirnoff uint64_t tcps_badrst; /* ignored RSTs in the window */
1005be2ac88cSJonathan Lemon
10065923c293SGleb Smirnoff uint64_t tcps_sc_added; /* entry added to syncache */
10075923c293SGleb Smirnoff uint64_t tcps_sc_retransmitted; /* syncache entry was retransmitted */
10085923c293SGleb Smirnoff uint64_t tcps_sc_dupsyn; /* duplicate SYN packet */
10095923c293SGleb Smirnoff uint64_t tcps_sc_dropped; /* could not reply to packet */
10105923c293SGleb Smirnoff uint64_t tcps_sc_completed; /* successful extraction of entry */
10115923c293SGleb Smirnoff uint64_t tcps_sc_bucketoverflow;/* syncache per-bucket limit hit */
10125923c293SGleb Smirnoff uint64_t tcps_sc_cacheoverflow; /* syncache cache limit hit */
10135923c293SGleb Smirnoff uint64_t tcps_sc_reset; /* RST removed entry from syncache */
10145923c293SGleb Smirnoff uint64_t tcps_sc_stale; /* timed out or listen socket gone */
10155923c293SGleb Smirnoff uint64_t tcps_sc_aborted; /* syncache entry aborted */
10165923c293SGleb Smirnoff uint64_t tcps_sc_badack; /* removed due to bad ACK */
10175923c293SGleb Smirnoff uint64_t tcps_sc_unreach; /* ICMP unreachable received */
10185923c293SGleb Smirnoff uint64_t tcps_sc_zonefail; /* zalloc() failed */
10195923c293SGleb Smirnoff uint64_t tcps_sc_sendcookie; /* SYN cookie sent */
10205923c293SGleb Smirnoff uint64_t tcps_sc_recvcookie; /* SYN cookie received */
10211396e87aSGleb Smirnoff uint64_t tcps_sc_spurcookie; /* SYN cookie spurious, rejected */
10221396e87aSGleb Smirnoff uint64_t tcps_sc_failcookie; /* SYN cookie failed, rejected */
102397d8d152SAndre Oppermann
10245923c293SGleb Smirnoff uint64_t tcps_hc_added; /* entry added to hostcache */
10255923c293SGleb Smirnoff uint64_t tcps_hc_bucketoverflow;/* hostcache per bucket limit hit */
10266d90faf3SPaul Saab
10275923c293SGleb Smirnoff uint64_t tcps_finwait2_drops; /* Drop FIN_WAIT_2 connection after time limit */
10287c72af87SMohan Srinivasan
10296d90faf3SPaul Saab /* SACK related stats */
10305923c293SGleb Smirnoff uint64_t tcps_sack_recovery_episode; /* SACK recovery episodes */
10315923c293SGleb Smirnoff uint64_t tcps_sack_rexmits; /* SACK rexmit segments */
10322a9aae9eSRichard Scheffenegger uint64_t tcps_sack_rexmits_tso; /* SACK rexmit TSO chunks */
10335923c293SGleb Smirnoff uint64_t tcps_sack_rexmit_bytes; /* SACK rexmit bytes */
10345923c293SGleb Smirnoff uint64_t tcps_sack_rcv_blocks; /* SACK blocks (options) received */
10355923c293SGleb Smirnoff uint64_t tcps_sack_send_blocks; /* SACK blocks (options) sent */
10360471a8c7SRichard Scheffenegger uint64_t tcps_sack_lostrexmt; /* SACK lost retransmission recovered */
10375923c293SGleb Smirnoff uint64_t tcps_sack_sboverflow; /* times scoreboard overflowed */
1038f2512ba1SRui Paulo
1039f2512ba1SRui Paulo /* ECN related stats */
10401a70101aSRichard Scheffenegger uint64_t tcps_ecn_rcvce; /* ECN Congestion Experienced */
10411a70101aSRichard Scheffenegger uint64_t tcps_ecn_rcvect0; /* ECN Capable Transport */
10421a70101aSRichard Scheffenegger uint64_t tcps_ecn_rcvect1; /* ECN Capable Transport */
10435923c293SGleb Smirnoff uint64_t tcps_ecn_shs; /* ECN successful handshakes */
10445923c293SGleb Smirnoff uint64_t tcps_ecn_rcwnd; /* # times ECN reduced the cwnd */
1045962ebef8SLawrence Stewart
10462903309aSAttilio Rao /* TCP_SIGNATURE related stats */
10475923c293SGleb Smirnoff uint64_t tcps_sig_rcvgoodsig; /* Total matching signature received */
10485923c293SGleb Smirnoff uint64_t tcps_sig_rcvbadsig; /* Total bad signature received */
1049fcf59617SAndrey V. Elsukov uint64_t tcps_sig_err_buildsig; /* Failed to make signature */
10505923c293SGleb Smirnoff uint64_t tcps_sig_err_sigopt; /* No signature expected by socket */
10515923c293SGleb Smirnoff uint64_t tcps_sig_err_nosigopt; /* No signature provided by segment */
10522903309aSAttilio Rao
105332a04bb8SSean Bruno /* Path MTU Discovery Black Hole Detection related stats */
105432a04bb8SSean Bruno uint64_t tcps_pmtud_blackhole_activated; /* Black Hole Count */
105532a04bb8SSean Bruno uint64_t tcps_pmtud_blackhole_activated_min_mss; /* BH at min MSS Count */
105632a04bb8SSean Bruno uint64_t tcps_pmtud_blackhole_failed; /* Black Hole Failure Count */
105732a04bb8SSean Bruno
10589e644c23SMichael Tuexen uint64_t tcps_tunneled_pkts; /* Packets encap's in UDP received */
10599e644c23SMichael Tuexen uint64_t tcps_tunneled_errs; /* Packets that had errors that were UDP encaped */
10609e644c23SMichael Tuexen
1061a36230f7SRandall Stewart /* Dsack related stats */
1062a36230f7SRandall Stewart uint64_t tcps_dsack_count; /* Number of ACKs arriving with DSACKs */
1063a36230f7SRandall Stewart uint64_t tcps_dsack_bytes; /* Number of bytes DSACK'ed no TLP */
1064a36230f7SRandall Stewart uint64_t tcps_dsack_tlp_bytes; /* Number of bytes DSACK'ed due to TLPs */
1065a36230f7SRandall Stewart
106671d2d5adSGleb Smirnoff /* TCPS_TIME_WAIT usage stats */
106771d2d5adSGleb Smirnoff uint64_t tcps_tw_recycles; /* Times time-wait was recycled. */
106871d2d5adSGleb Smirnoff uint64_t tcps_tw_resets; /* Times time-wait sent a reset. */
106971d2d5adSGleb Smirnoff uint64_t tcps_tw_responds; /* Times time-wait sent a valid ack. */
107071d2d5adSGleb Smirnoff
10713f169c54SRichard Scheffenegger /* Accurate ECN Handshake stats */
10723f169c54SRichard Scheffenegger uint64_t tcps_ace_nect; /* ACE SYN packet with Non-ECT */
10733f169c54SRichard Scheffenegger uint64_t tcps_ace_ect1; /* ACE SYN packet with ECT1 */
10743f169c54SRichard Scheffenegger uint64_t tcps_ace_ect0; /* ACE SYN packet with ECT0 */
10753f169c54SRichard Scheffenegger uint64_t tcps_ace_ce; /* ACE SYN packet with CE */
10763f169c54SRichard Scheffenegger
10771a70101aSRichard Scheffenegger /* ECN related stats */
10781a70101aSRichard Scheffenegger uint64_t tcps_ecn_sndect0; /* ECN Capable Transport */
10791a70101aSRichard Scheffenegger uint64_t tcps_ecn_sndect1; /* ECN Capable Transport */
10801a70101aSRichard Scheffenegger
1081945f9a7cSRandall Stewart /*
1082945f9a7cSRandall Stewart * BBR and Rack implement TLP's these values count TLP bytes in
1083945f9a7cSRandall Stewart * two catagories, bytes that were retransmitted and bytes that
1084945f9a7cSRandall Stewart * were newly transmited. Both types can serve as TLP's but they
1085945f9a7cSRandall Stewart * are accounted differently.
1086945f9a7cSRandall Stewart */
1087945f9a7cSRandall Stewart uint64_t tcps_tlpresends; /* number of tlp resends */
1088945f9a7cSRandall Stewart uint64_t tcps_tlpresend_bytes; /* number of bytes resent by tlp */
1089945f9a7cSRandall Stewart
1090646c28eaSMichael Tuexen /* SEG.ACK validation failures */
1091646c28eaSMichael Tuexen uint64_t tcps_rcvghostack; /* received ACK for data never sent */
1092646c28eaSMichael Tuexen uint64_t tcps_rcvacktooold; /* received ACK for data too long ago */
1093945f9a7cSRandall Stewart
1094646c28eaSMichael Tuexen
1095646c28eaSMichael Tuexen uint64_t _pad[1]; /* 1 TBD placeholder for STABLE */
1096df8bae1dSRodney W. Grimes };
1097df8bae1dSRodney W. Grimes
1098b1a41566SGleb Smirnoff #define tcps_rcvmemdrop tcps_rcvreassfull /* compat */
1099b1a41566SGleb Smirnoff
1100de231a06SRobert Watson #ifdef _KERNEL
11015923c293SGleb Smirnoff #include <sys/counter.h>
110260d8dbbeSKristof Provost #include <netinet/in_kdtrace.h>
11035923c293SGleb Smirnoff
11045da0521fSAndrey V. Elsukov VNET_PCPUSTAT_DECLARE(struct tcpstat, tcpstat); /* tcp statistics */
1105315e3e38SRobert Watson /*
1106315e3e38SRobert Watson * In-kernel consumers can use these accessor macros directly to update
1107315e3e38SRobert Watson * stats.
1108315e3e38SRobert Watson */
11095da0521fSAndrey V. Elsukov #define TCPSTAT_ADD(name, val) \
111060d8dbbeSKristof Provost do { \
111160d8dbbeSKristof Provost MIB_SDT_PROBE1(tcp, count, name, (val)); \
111260d8dbbeSKristof Provost VNET_PCPUSTAT_ADD(struct tcpstat, tcpstat, name, (val)); \
111360d8dbbeSKristof Provost } while (0)
111478b50714SRobert Watson #define TCPSTAT_INC(name) TCPSTAT_ADD(name, 1)
1115315e3e38SRobert Watson
1116315e3e38SRobert Watson /*
1117315e3e38SRobert Watson * Kernel module consumers must use this accessor macro.
1118315e3e38SRobert Watson */
11197ca6e296SMichael Tuexen void kmod_tcpstat_add(int statnum, int val);
11207ca6e296SMichael Tuexen #define KMOD_TCPSTAT_ADD(name, val) \
112160d8dbbeSKristof Provost do { \
112260d8dbbeSKristof Provost MIB_SDT_PROBE1(tcp, count, name, (val)); \
112360d8dbbeSKristof Provost kmod_tcpstat_add(offsetof(struct tcpstat, name) / \
112460d8dbbeSKristof Provost sizeof(uint64_t), \
112560d8dbbeSKristof Provost val); \
112660d8dbbeSKristof Provost } while (0)
11277ca6e296SMichael Tuexen #define KMOD_TCPSTAT_INC(name) KMOD_TCPSTAT_ADD(name, 1)
112839bc9de5SLawrence Stewart
112939bc9de5SLawrence Stewart /*
1130bf840a17SGleb Smirnoff * Running TCP connection count by state.
1131bf840a17SGleb Smirnoff */
1132bf840a17SGleb Smirnoff VNET_DECLARE(counter_u64_t, tcps_states[TCP_NSTATES]);
1133f59d975eSGleb Smirnoff #define V_tcps_states VNET(tcps_states)
1134f59d975eSGleb Smirnoff #define TCPSTATES_INC(state) counter_u64_add(V_tcps_states[state], 1)
1135f59d975eSGleb Smirnoff #define TCPSTATES_DEC(state) counter_u64_add(V_tcps_states[state], -1)
1136bf840a17SGleb Smirnoff
1137bf840a17SGleb Smirnoff /*
113839bc9de5SLawrence Stewart * TCP specific helper hook point identifiers.
113939bc9de5SLawrence Stewart */
114039bc9de5SLawrence Stewart #define HHOOK_TCP_EST_IN 0
114139bc9de5SLawrence Stewart #define HHOOK_TCP_EST_OUT 1
114239bc9de5SLawrence Stewart #define HHOOK_TCP_LAST HHOOK_TCP_EST_OUT
114339bc9de5SLawrence Stewart
114439bc9de5SLawrence Stewart struct tcp_hhook_data {
114539bc9de5SLawrence Stewart struct tcpcb *tp;
114639bc9de5SLawrence Stewart struct tcphdr *th;
114739bc9de5SLawrence Stewart struct tcpopt *to;
11483ac12506SJonathan T. Looney uint32_t len;
114939bc9de5SLawrence Stewart int tso;
115039bc9de5SLawrence Stewart tcp_seq curack;
115139bc9de5SLawrence Stewart };
115289e560f4SRandall Stewart #ifdef TCP_HHOOK
115389e560f4SRandall Stewart void hhook_run_tcp_est_out(struct tcpcb *tp,
115489e560f4SRandall Stewart struct tcphdr *th, struct tcpopt *to,
115589e560f4SRandall Stewart uint32_t len, int tso);
115689e560f4SRandall Stewart #endif
1157de231a06SRobert Watson #endif
115878b50714SRobert Watson
1159eb6ad696SGarrett Wollman /*
116098271db4SGarrett Wollman * TCB structure exported to user-land via sysctl(3).
1161cc65eb4eSGleb Smirnoff *
1162cc65eb4eSGleb Smirnoff * Fields prefixed with "xt_" are unique to the export structure, and fields
1163cc65eb4eSGleb Smirnoff * with "t_" or other prefixes match corresponding fields of 'struct tcpcb'.
1164cc65eb4eSGleb Smirnoff *
1165cc65eb4eSGleb Smirnoff * Legend:
1166cc65eb4eSGleb Smirnoff * (s) - used by userland utilities in src
1167cc65eb4eSGleb Smirnoff * (p) - used by utilities in ports
1168cc65eb4eSGleb Smirnoff * (3) - is known to be used by third party software not in ports
1169cc65eb4eSGleb Smirnoff * (n) - no known usage
1170cc65eb4eSGleb Smirnoff *
1171a910fdcbSJohn Hay * Evil hack: declare only if in_pcb.h and sys/socketvar.h have been
1172a910fdcbSJohn Hay * included. Not all of our clients do.
117398271db4SGarrett Wollman */
1174a910fdcbSJohn Hay #if defined(_NETINET_IN_PCB_H_) && defined(_SYS_SOCKETVAR_H_)
117598271db4SGarrett Wollman struct xtcpcb {
1176f38b68aeSBrooks Davis ksize_t xt_len; /* length of this structure */
1177cc65eb4eSGleb Smirnoff struct xinpcb xt_inp;
1178e5cccc35SMichael Tuexen char xt_stack[TCP_FUNCTION_NAME_LEN_MAX]; /* (s) */
11792529f56eSJonathan T. Looney char xt_logid[TCP_LOG_ID_LEN]; /* (s) */
118042d75607SMichael Tuexen char xt_cc[TCP_CA_NAME_MAX]; /* (s) */
118142d75607SMichael Tuexen int64_t spare64[6];
1182cc65eb4eSGleb Smirnoff int32_t t_state; /* (s,p) */
1183cc65eb4eSGleb Smirnoff uint32_t t_flags; /* (s,p) */
1184cc65eb4eSGleb Smirnoff int32_t t_sndzerowin; /* (s) */
1185cc65eb4eSGleb Smirnoff int32_t t_sndrexmitpack; /* (s) */
1186cc65eb4eSGleb Smirnoff int32_t t_rcvoopack; /* (s) */
1187cc65eb4eSGleb Smirnoff int32_t t_rcvtime; /* (s) */
1188cc65eb4eSGleb Smirnoff int32_t tt_rexmt; /* (s) */
1189cc65eb4eSGleb Smirnoff int32_t tt_persist; /* (s) */
1190cc65eb4eSGleb Smirnoff int32_t tt_keep; /* (s) */
1191cc65eb4eSGleb Smirnoff int32_t tt_2msl; /* (s) */
1192cc65eb4eSGleb Smirnoff int32_t tt_delack; /* (s) */
11932529f56eSJonathan T. Looney int32_t t_logstate; /* (3) */
119454321200SRichard Scheffenegger uint32_t t_snd_cwnd; /* (s) */
119554321200SRichard Scheffenegger uint32_t t_snd_ssthresh; /* (s) */
119654321200SRichard Scheffenegger uint32_t t_maxseg; /* (s) */
119754321200SRichard Scheffenegger uint32_t t_rcv_wnd; /* (s) */
119854321200SRichard Scheffenegger uint32_t t_snd_wnd; /* (s) */
119954321200SRichard Scheffenegger uint32_t xt_ecn; /* (s) */
1200a36230f7SRandall Stewart uint32_t t_dsack_bytes; /* (n) */
1201a36230f7SRandall Stewart uint32_t t_dsack_tlp_bytes; /* (n) */
1202a36230f7SRandall Stewart uint32_t t_dsack_pack; /* (n) */
12039e644c23SMichael Tuexen uint16_t xt_encaps_port; /* (s) */
12049e644c23SMichael Tuexen int16_t spare16;
1205a36230f7SRandall Stewart int32_t spare32[22];
1206cc65eb4eSGleb Smirnoff } __aligned(8);
12072529f56eSJonathan T. Looney
1208cc65eb4eSGleb Smirnoff #ifdef _KERNEL
1209cc65eb4eSGleb Smirnoff void tcp_inptoxtp(const struct inpcb *, struct xtcpcb *);
1210cc65eb4eSGleb Smirnoff #endif
121198271db4SGarrett Wollman #endif
121298271db4SGarrett Wollman
121398271db4SGarrett Wollman /*
1214f8979519SJonathan T. Looney * TCP function information (name-to-id mapping, aliases, and refcnt)
1215f8979519SJonathan T. Looney * exported to user-land via sysctl(3).
12162529f56eSJonathan T. Looney */
1217f8979519SJonathan T. Looney struct tcp_function_info {
1218f8979519SJonathan T. Looney uint32_t tfi_refcnt;
12192529f56eSJonathan T. Looney uint8_t tfi_id;
12202529f56eSJonathan T. Looney char tfi_name[TCP_FUNCTION_NAME_LEN_MAX];
1221f8979519SJonathan T. Looney char tfi_alias[TCP_FUNCTION_NAME_LEN_MAX];
12222529f56eSJonathan T. Looney };
12232529f56eSJonathan T. Looney
12242529f56eSJonathan T. Looney /*
12255b26ea5dSJohn Baldwin * Identifiers for TCP sysctl nodes
1226eb6ad696SGarrett Wollman */
1227eb6ad696SGarrett Wollman #define TCPCTL_DO_RFC1323 1 /* use RFC-1323 extensions */
1228eb6ad696SGarrett Wollman #define TCPCTL_MSSDFLT 3 /* MSS default */
12292f06d2abSGleb Smirnoff #define TCPCTL_STATS 4 /* statistics */
1230f2ea20e6SGarrett Wollman #define TCPCTL_RTTDFLT 5 /* default RTT estimate */
1231f2ea20e6SGarrett Wollman #define TCPCTL_KEEPIDLE 6 /* keepalive idle timer */
1232f2ea20e6SGarrett Wollman #define TCPCTL_KEEPINTVL 7 /* interval to send keepalives */
1233561c2ad3SPaul Traina #define TCPCTL_SENDSPACE 8 /* send buffer space */
1234561c2ad3SPaul Traina #define TCPCTL_RECVSPACE 9 /* receive buffer space */
1235571214d4SSheldon Hearn #define TCPCTL_KEEPINIT 10 /* timeout for establishing syn */
123698271db4SGarrett Wollman #define TCPCTL_PCBLIST 11 /* list of all outstanding PCBs */
12379b8b58e0SJonathan Lemon #define TCPCTL_DELACKTIME 12 /* time before sending delayed ACK */
123876429de4SYoshinobu Inoue #define TCPCTL_V6MSSDFLT 13 /* MSS default for IPv6 */
12396d90faf3SPaul Saab #define TCPCTL_SACK 14 /* Selective Acknowledgement,rfc 2018 */
1240212a79b0SMaxim Konovalov #define TCPCTL_DROP 15 /* drop tcp connection */
1241bf840a17SGleb Smirnoff #define TCPCTL_STATES 16 /* connection counts by TCP state */
12425fea0d9eSKonstantin Belousov #define TCPCTL_KTLSLIST 17 /* connections with active ktls
12435fea0d9eSKonstantin Belousov session */
12445fea0d9eSKonstantin Belousov #define TCPCTL_KTLSLIST_WKEYS 18 /* KTLSLIST with key data exported */
1245eb6ad696SGarrett Wollman
1246664a31e4SPeter Wemm #ifdef _KERNEL
1247ce02431fSDoug Rabson #ifdef SYSCTL_DECL
1248ce02431fSDoug Rabson SYSCTL_DECL(_net_inet_tcp);
1249a55db2b6SPaul Saab SYSCTL_DECL(_net_inet_tcp_sack);
1250df541e5fSAndre Oppermann MALLOC_DECLARE(M_TCPLOG);
1251ce02431fSDoug Rabson #endif
1252ce02431fSDoug Rabson
1253334fc582SBjoern A. Zeeb VNET_DECLARE(int, tcp_log_in_vain);
1254334fc582SBjoern A. Zeeb #define V_tcp_log_in_vain VNET(tcp_log_in_vain)
12553bdf4c42SGleb Smirnoff
12563bdf4c42SGleb Smirnoff /*
12573bdf4c42SGleb Smirnoff * Global TCP tunables shared between different stacks.
12583bdf4c42SGleb Smirnoff * Please keep the list sorted.
12593bdf4c42SGleb Smirnoff */
12603bdf4c42SGleb Smirnoff VNET_DECLARE(int, drop_synfin);
1261eddfbb76SRobert Watson VNET_DECLARE(int, path_mtu_discovery);
1262dbc42409SLawrence Stewart VNET_DECLARE(int, tcp_abc_l_var);
12634036380eSMichael Tuexen VNET_DECLARE(uint32_t, tcp_ack_war_cnt);
12644036380eSMichael Tuexen VNET_DECLARE(uint32_t, tcp_ack_war_time_window);
12653bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_autorcvbuf_max);
12663bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_autosndbuf_inc);
12673bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_autosndbuf_max);
12685dc99e9bSMark Johnston VNET_DECLARE(int, tcp_bind_all_fibs);
12693bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_delack_enabled);
12703bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_do_autorcvbuf);
12713bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_do_autosndbuf);
12723bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_do_ecn);
12730471a8c7SRichard Scheffenegger VNET_DECLARE(int, tcp_do_lrd);
127490cca08eSRichard Scheffenegger VNET_DECLARE(int, tcp_do_prr);
127590cca08eSRichard Scheffenegger VNET_DECLARE(int, tcp_do_prr_conservative);
1276b72e56e7SMichael Tuexen VNET_DECLARE(int, tcp_do_newcwv);
12773bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_do_rfc1323);
1278d2b3ceddSMichael Tuexen VNET_DECLARE(int, tcp_tolerate_missing_ts);
12793bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_do_rfc3042);
12803bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_do_rfc3390);
12813bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_do_rfc3465);
1282d1de2b05SRichard Scheffenegger VNET_DECLARE(int, tcp_do_newsack);
12833bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_do_sack);
12843bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_do_tso);
12853bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_ecn_maxretries);
12863bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_initcwnd_segments);
12873bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_insecure_rst);
12883bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_insecure_syn);
1289646c28eaSMichael Tuexen VNET_DECLARE(int, tcp_insecure_ack);
12901cf55767SRandall Stewart VNET_DECLARE(uint32_t, tcp_map_entries_limit);
12911cf55767SRandall Stewart VNET_DECLARE(uint32_t, tcp_map_split_limit);
12923bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_minmss);
12933bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_mssdflt);
1294adc56f5aSEdward Tomasz Napierala #ifdef STATS
1295adc56f5aSEdward Tomasz Napierala VNET_DECLARE(int, tcp_perconn_stats_dflt_tpl);
1296adc56f5aSEdward Tomasz Napierala VNET_DECLARE(int, tcp_perconn_stats_enable);
1297adc56f5aSEdward Tomasz Napierala #endif /* STATS */
12983bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_recvspace);
129943b117f8SRichard Scheffenegger VNET_DECLARE(int, tcp_retries);
13003bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_sack_globalholes);
13013bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_sack_globalmaxholes);
13023bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_sack_maxholes);
1303dcdfe449SRichard Scheffenegger VNET_DECLARE(int, tcp_sack_tso);
13043bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_sc_rst_sock_fail);
13053bdf4c42SGleb Smirnoff VNET_DECLARE(int, tcp_sendspace);
13069e644c23SMichael Tuexen VNET_DECLARE(int, tcp_udp_tunneling_overhead);
13079e644c23SMichael Tuexen VNET_DECLARE(int, tcp_udp_tunneling_port);
13083bdf4c42SGleb Smirnoff VNET_DECLARE(struct inpcbinfo, tcbinfo);
13093bdf4c42SGleb Smirnoff
13100471a8c7SRichard Scheffenegger #define V_tcp_do_lrd VNET(tcp_do_lrd)
13110e1d7c25SRichard Scheffenegger #define V_tcp_do_prr VNET(tcp_do_prr)
1312b72e56e7SMichael Tuexen #define V_tcp_do_newcwv VNET(tcp_do_newcwv)
13133bdf4c42SGleb Smirnoff #define V_drop_synfin VNET(drop_synfin)
13143bdf4c42SGleb Smirnoff #define V_path_mtu_discovery VNET(path_mtu_discovery)
13151e77c105SRobert Watson #define V_tcbinfo VNET(tcbinfo)
1316dbc42409SLawrence Stewart #define V_tcp_abc_l_var VNET(tcp_abc_l_var)
13174036380eSMichael Tuexen #define V_tcp_ack_war_cnt VNET(tcp_ack_war_cnt)
13184036380eSMichael Tuexen #define V_tcp_ack_war_time_window VNET(tcp_ack_war_time_window)
13193bdf4c42SGleb Smirnoff #define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max)
13203bdf4c42SGleb Smirnoff #define V_tcp_autosndbuf_inc VNET(tcp_autosndbuf_inc)
13213bdf4c42SGleb Smirnoff #define V_tcp_autosndbuf_max VNET(tcp_autosndbuf_max)
13225dc99e9bSMark Johnston #define V_tcp_bind_all_fibs VNET(tcp_bind_all_fibs)
13233bdf4c42SGleb Smirnoff #define V_tcp_delack_enabled VNET(tcp_delack_enabled)
13243bdf4c42SGleb Smirnoff #define V_tcp_do_autorcvbuf VNET(tcp_do_autorcvbuf)
13253bdf4c42SGleb Smirnoff #define V_tcp_do_autosndbuf VNET(tcp_do_autosndbuf)
13261e77c105SRobert Watson #define V_tcp_do_ecn VNET(tcp_do_ecn)
13273bdf4c42SGleb Smirnoff #define V_tcp_do_rfc1323 VNET(tcp_do_rfc1323)
1328d2b3ceddSMichael Tuexen #define V_tcp_tolerate_missing_ts VNET(tcp_tolerate_missing_ts)
1329d21036e0SMichael Tuexen #define V_tcp_ts_offset_per_conn VNET(tcp_ts_offset_per_conn)
13303bdf4c42SGleb Smirnoff #define V_tcp_do_rfc3042 VNET(tcp_do_rfc3042)
13313bdf4c42SGleb Smirnoff #define V_tcp_do_rfc3390 VNET(tcp_do_rfc3390)
13323bdf4c42SGleb Smirnoff #define V_tcp_do_rfc3465 VNET(tcp_do_rfc3465)
1333d1de2b05SRichard Scheffenegger #define V_tcp_do_newsack VNET(tcp_do_newsack)
13343bdf4c42SGleb Smirnoff #define V_tcp_do_sack VNET(tcp_do_sack)
13353bdf4c42SGleb Smirnoff #define V_tcp_do_tso VNET(tcp_do_tso)
13361e77c105SRobert Watson #define V_tcp_ecn_maxretries VNET(tcp_ecn_maxretries)
13373bdf4c42SGleb Smirnoff #define V_tcp_initcwnd_segments VNET(tcp_initcwnd_segments)
13383bdf4c42SGleb Smirnoff #define V_tcp_insecure_rst VNET(tcp_insecure_rst)
13393bdf4c42SGleb Smirnoff #define V_tcp_insecure_syn VNET(tcp_insecure_syn)
1340646c28eaSMichael Tuexen #define V_tcp_insecure_ack VNET(tcp_insecure_ack)
13411cf55767SRandall Stewart #define V_tcp_map_entries_limit VNET(tcp_map_entries_limit)
13421cf55767SRandall Stewart #define V_tcp_map_split_limit VNET(tcp_map_split_limit)
13433bdf4c42SGleb Smirnoff #define V_tcp_minmss VNET(tcp_minmss)
13443bdf4c42SGleb Smirnoff #define V_tcp_mssdflt VNET(tcp_mssdflt)
1345adc56f5aSEdward Tomasz Napierala #ifdef STATS
1346adc56f5aSEdward Tomasz Napierala #define V_tcp_perconn_stats_dflt_tpl VNET(tcp_perconn_stats_dflt_tpl)
1347adc56f5aSEdward Tomasz Napierala #define V_tcp_perconn_stats_enable VNET(tcp_perconn_stats_enable)
1348adc56f5aSEdward Tomasz Napierala #endif /* STATS */
13493bdf4c42SGleb Smirnoff #define V_tcp_recvspace VNET(tcp_recvspace)
135043b117f8SRichard Scheffenegger #define V_tcp_retries VNET(tcp_retries)
13513bdf4c42SGleb Smirnoff #define V_tcp_sack_globalholes VNET(tcp_sack_globalholes)
13523bdf4c42SGleb Smirnoff #define V_tcp_sack_globalmaxholes VNET(tcp_sack_globalmaxholes)
13533bdf4c42SGleb Smirnoff #define V_tcp_sack_maxholes VNET(tcp_sack_maxholes)
1354dcdfe449SRichard Scheffenegger #define V_tcp_sack_tso VNET(tcp_sack_tso)
13553bdf4c42SGleb Smirnoff #define V_tcp_sc_rst_sock_fail VNET(tcp_sc_rst_sock_fail)
13563bdf4c42SGleb Smirnoff #define V_tcp_sendspace VNET(tcp_sendspace)
135789e560f4SRandall Stewart #define V_tcp_udp_tunneling_overhead VNET(tcp_udp_tunneling_overhead)
135889e560f4SRandall Stewart #define V_tcp_udp_tunneling_port VNET(tcp_udp_tunneling_port)
135989e560f4SRandall Stewart
1360bd79708dSJonathan T. Looney #ifdef TCP_HHOOK
136139bc9de5SLawrence Stewart VNET_DECLARE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST + 1]);
136239bc9de5SLawrence Stewart #define V_tcp_hhh VNET(tcp_hhh)
1363bd79708dSJonathan T. Looney #endif
136439bc9de5SLawrence Stewart
1365219a6ca9SGleb Smirnoff void tcp_account_for_send(struct tcpcb *, uint32_t, uint8_t, uint8_t, bool);
136602a1a643SAndre Oppermann int tcp_addoptions(struct tcpopt *, u_char *);
1367df8bae1dSRodney W. Grimes struct tcpcb *
13684d77a549SAlfred Perlstein tcp_close(struct tcpcb *);
1369623dce13SRobert Watson void tcp_discardcb(struct tcpcb *);
1370340c35deSJonathan Lemon void tcp_twstart(struct tcpcb *);
13714d77a549SAlfred Perlstein int tcp_ctloutput(struct socket *, struct sockopt *);
1372c1537ef0SMike Silbersack void tcp_fini(void *);
1373700a395cSJohn Baldwin char *tcp_log_addrs(struct in_conninfo *, struct tcphdr *, const void *,
13749fb5d4c0SPeter Wemm const void *);
1375700a395cSJohn Baldwin char *tcp_log_vain(struct in_conninfo *, struct tcphdr *, const void *,
1376b7d747ecSAndre Oppermann const void *);
13774d0770f1SRichard Scheffenegger int tcp_reass(struct tcpcb *, struct tcphdr *, tcp_seq *, int *,
13784d0770f1SRichard Scheffenegger struct mbuf *);
13794741bfcbSPatrick Kelsey void tcp_reass_global_init(void);
13800c236c4eSLawrence Stewart void tcp_reass_flush(struct tcpcb *);
138155bceb1eSRandall Stewart void tcp_dooptions(struct tcpopt *, u_char *, int, int);
138255bceb1eSRandall Stewart void tcp_dropwithreset(struct mbuf *, struct tcphdr *,
138355bceb1eSRandall Stewart struct tcpcb *, int, int);
138455bceb1eSRandall Stewart void tcp_pulloutofband(struct socket *,
138555bceb1eSRandall Stewart struct tcphdr *, struct mbuf *, int);
138655bceb1eSRandall Stewart void tcp_xmit_timer(struct tcpcb *, int);
138755bceb1eSRandall Stewart void tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *);
138855bceb1eSRandall Stewart void cc_ack_received(struct tcpcb *tp, struct tcphdr *th,
13894b7b743cSLawrence Stewart uint16_t nsegs, uint16_t type);
139055bceb1eSRandall Stewart void cc_conn_init(struct tcpcb *tp);
139155bceb1eSRandall Stewart void cc_post_recovery(struct tcpcb *tp, struct tcphdr *th);
13924ad24737SRandall Stewart void cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos);
13935d8fd932SRandall Stewart void cc_ecnpkt_handler_flags(struct tcpcb *tp, uint16_t flags, uint8_t iptos);
139455bceb1eSRandall Stewart void cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type);
1395bd79708dSJonathan T. Looney #ifdef TCP_HHOOK
139655bceb1eSRandall Stewart void hhook_run_tcp_est_in(struct tcpcb *tp,
139755bceb1eSRandall Stewart struct tcphdr *th, struct tcpopt *to);
1398bd79708dSJonathan T. Looney #endif
139955bceb1eSRandall Stewart
14008f5a8818SKevin Lo int tcp_input(struct mbuf **, int *, int);
1401e44c1887SSteven Hartland int tcp_autorcvbuf(struct mbuf *, struct tcphdr *, struct socket *,
1402e44c1887SSteven Hartland struct tcpcb *, int);
14039e644c23SMichael Tuexen int tcp_input_with_port(struct mbuf **, int *, int, uint16_t);
140435bc0bccSGleb Smirnoff void tcp_do_segment(struct tcpcb *, struct mbuf *, struct tcphdr *, int,
140535bc0bccSGleb Smirnoff int, uint8_t);
140655bceb1eSRandall Stewart
140755bceb1eSRandall Stewart int register_tcp_functions(struct tcp_function_block *blk, int wait);
1408dc6a41b9SJonathan T. Looney int register_tcp_functions_as_names(struct tcp_function_block *blk,
1409dc6a41b9SJonathan T. Looney int wait, const char *names[], int *num_names);
1410dc6a41b9SJonathan T. Looney int register_tcp_functions_as_name(struct tcp_function_block *blk,
1411dc6a41b9SJonathan T. Looney const char *name, int wait);
14123ee9c3c4SRandall Stewart int deregister_tcp_functions(struct tcp_function_block *blk, bool quiesce,
14133ee9c3c4SRandall Stewart bool force);
141455bceb1eSRandall Stewart struct tcp_function_block *find_and_ref_tcp_functions(struct tcp_function_set *fs);
1415e2833083SPeter Lei int find_tcp_function_alias(struct tcp_function_block *blk, struct tcp_function_set *fs);
1416ec6d620bSRandall Stewart uint32_t tcp_get_srtt(struct tcpcb *tp, int granularity);
14173ee9c3c4SRandall Stewart void tcp_switch_back_to_default(struct tcpcb *tp);
14183ee9c3c4SRandall Stewart struct tcp_function_block *
14193ee9c3c4SRandall Stewart find_and_ref_tcp_fb(struct tcp_function_block *fs);
142066fbc19fSGleb Smirnoff int tcp_default_ctloutput(struct tcpcb *tp, struct sockopt *sopt);
1421fd7daa72SMichael Tuexen int tcp_ctloutput_set(struct inpcb *inp, struct sockopt *sopt);
1422945f9a7cSRandall Stewart void tcp_log_socket_option(struct tcpcb *tp, uint32_t option_num,
1423945f9a7cSRandall Stewart uint32_t option_val, int err);
1424945f9a7cSRandall Stewart
142555bceb1eSRandall Stewart
1426af9b9e0dSRandall Stewart extern counter_u64_t tcp_inp_lro_direct_queue;
1427af9b9e0dSRandall Stewart extern counter_u64_t tcp_inp_lro_wokeup_queue;
1428af9b9e0dSRandall Stewart extern counter_u64_t tcp_inp_lro_compressed;
1429af9b9e0dSRandall Stewart extern counter_u64_t tcp_inp_lro_locks_taken;
143069a34e8dSRandall Stewart extern counter_u64_t tcp_extra_mbuf;
143169a34e8dSRandall Stewart extern counter_u64_t tcp_would_have_but;
143269a34e8dSRandall Stewart extern counter_u64_t tcp_comp_total;
143369a34e8dSRandall Stewart extern counter_u64_t tcp_uncomp_total;
1434ca1a7e10SRandall Stewart extern counter_u64_t tcp_bad_csums;
1435af9b9e0dSRandall Stewart
14365d8fd932SRandall Stewart extern uint32_t tcp_ack_war_time_window;
14375d8fd932SRandall Stewart extern uint32_t tcp_ack_war_cnt;
14381cf55767SRandall Stewart
1439e4315bbcSGleb Smirnoff /*
1440e4315bbcSGleb Smirnoff * Used by tcp_maxmtu() to communicate interface specific features
1441e4315bbcSGleb Smirnoff * and limits at the time of connection setup.
1442e4315bbcSGleb Smirnoff */
1443e4315bbcSGleb Smirnoff struct tcp_ifcap {
1444e4315bbcSGleb Smirnoff int ifcap;
1445e4315bbcSGleb Smirnoff u_int tsomax;
1446e4315bbcSGleb Smirnoff u_int tsomaxsegcount;
1447e4315bbcSGleb Smirnoff u_int tsomaxsegsize;
1448b6919741SKonstantin Belousov bool ipsec_tso;
1449e4315bbcSGleb Smirnoff };
14503ac12506SJonathan T. Looney uint32_t tcp_maxmtu(struct in_conninfo *, struct tcp_ifcap *);
14513ac12506SJonathan T. Looney uint32_t tcp_maxmtu6(struct in_conninfo *, struct tcp_ifcap *);
1452e4315bbcSGleb Smirnoff
1453f581a26eSGleb Smirnoff void tcp6_use_min_mtu(struct tcpcb *);
14540c39d38dSGleb Smirnoff u_int tcp_maxseg(const struct tcpcb *);
14555d8fd932SRandall Stewart u_int tcp_fixed_maxseg(const struct tcpcb *);
1456ef341ee1SGleb Smirnoff void tcp_mss_update(struct tcpcb *, int, int, struct hc_metrics_lite *,
14573c914c54SAndre Oppermann struct tcp_ifcap *);
14584d77a549SAlfred Perlstein void tcp_mss(struct tcpcb *, int);
145997d8d152SAndre Oppermann int tcp_mssopt(struct in_conninfo *);
1460df8bae1dSRodney W. Grimes struct tcpcb *
1461baee801cSMichael Tuexen tcp_newtcpcb(struct inpcb *, struct tcpcb *);
14625b08b46aSGleb Smirnoff int tcp_default_output(struct tcpcb *);
146357f60867SMark Johnston void tcp_state_change(struct tcpcb *, int);
14644d77a549SAlfred Perlstein void tcp_respond(struct tcpcb *, void *,
146583c1ec92SRichard Scheffenegger struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, uint16_t);
146640299c55SMichael Tuexen void tcp_send_challenge_ack(struct tcpcb *, struct tcphdr *, struct mbuf *);
14670d744519SGleb Smirnoff bool tcp_twcheck(struct inpcb *, struct tcpopt *, struct tcphdr *,
14682104448fSAndre Oppermann struct mbuf *, int);
14694d77a549SAlfred Perlstein void tcp_setpersist(struct tcpcb *);
1470a36230f7SRandall Stewart void tcp_record_dsack(struct tcpcb *tp, tcp_seq start, tcp_seq end, int tlp);
1471fb59c426SYoshinobu Inoue struct tcptemp *
147279909384SJonathan Lemon tcpip_maketemplate(struct inpcb *);
14739e644c23SMichael Tuexen void tcpip_fillheaders(struct inpcb *, uint16_t, void *, void *);
1474446ccdd0SGleb Smirnoff void tcp_timer_activate(struct tcpcb *, tt_which, u_int);
1475446ccdd0SGleb Smirnoff bool tcp_timer_active(struct tcpcb *, tt_which);
1476446ccdd0SGleb Smirnoff void tcp_timer_stop(struct tcpcb *);
147789e560f4SRandall Stewart int inp_to_cpuid(struct inpcb *inp);
147897d8d152SAndre Oppermann /*
147997d8d152SAndre Oppermann * All tcp_hc_* functions are IPv4 and IPv6 (via in_conninfo)
148097d8d152SAndre Oppermann */
148197d8d152SAndre Oppermann void tcp_hc_init(void);
1482bc29160dSMarko Zec #ifdef VIMAGE
1483bc29160dSMarko Zec void tcp_hc_destroy(void);
1484bc29160dSMarko Zec #endif
1485b80c06ccSGleb Smirnoff void tcp_hc_get(const struct in_conninfo *, struct hc_metrics_lite *);
1486b80c06ccSGleb Smirnoff uint32_t tcp_hc_getmtu(const struct in_conninfo *);
1487b80c06ccSGleb Smirnoff void tcp_hc_updatemtu(const struct in_conninfo *, uint32_t);
1488b80c06ccSGleb Smirnoff void tcp_hc_update(const struct in_conninfo *, struct hc_metrics_lite *);
1489cd84e78fSRandall Stewart void cc_after_idle(struct tcpcb *tp);
1490dd224982SGarrett Wollman
1491e7d02be1SGleb Smirnoff extern struct protosw tcp_protosw; /* shared for TOE */
1492e7d02be1SGleb Smirnoff extern struct protosw tcp6_protosw; /* shared for TOE */
14938e02b4e0SMichael Tuexen
14948e02b4e0SMichael Tuexen uint32_t tcp_new_ts_offset(struct in_conninfo *);
14958e02b4e0SMichael Tuexen tcp_seq tcp_new_isn(struct in_conninfo *);
1496dd224982SGarrett Wollman
149749a6fbe3SRichard Scheffenegger sackstatus_t
149849a6fbe3SRichard Scheffenegger tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
14995d8fd932SRandall Stewart int tcp_dsack_block_exists(struct tcpcb *);
1500fe5dee73SMichael Tuexen void tcp_update_dsack_list(struct tcpcb *, tcp_seq, tcp_seq);
1501c7c325d0SRichard Scheffenegger void tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart,
1502c7c325d0SRichard Scheffenegger tcp_seq rcv_lastend);
1503e5926fd3SRandall Stewart void tcp_clean_dsack_blocks(struct tcpcb *tp);
15046d90faf3SPaul Saab void tcp_clean_sackreport(struct tcpcb *tp);
1505440f4ba1SRichard Scheffenegger int tcp_sack_adjust(struct tcpcb *tp);
1506a55db2b6SPaul Saab struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt);
1507c7c325d0SRichard Scheffenegger void tcp_do_prr_ack(struct tcpcb *, struct tcphdr *, struct tcpopt *,
1508c7c325d0SRichard Scheffenegger sackstatus_t, u_int *);
15090471a8c7SRichard Scheffenegger void tcp_lost_retransmission(struct tcpcb *, struct tcphdr *);
1510c7c325d0SRichard Scheffenegger void tcp_sack_partialack(struct tcpcb *, struct tcphdr *, u_int *);
151130409ecdSRichard Scheffenegger void tcp_resend_sackholes(struct tcpcb *tp);
15126d90faf3SPaul Saab void tcp_free_sackholes(struct tcpcb *tp);
15130471a8c7SRichard Scheffenegger void tcp_sack_lost_retransmission(struct tcpcb *, struct tcphdr *);
15146d90faf3SPaul Saab int tcp_newreno(struct tcpcb *, struct tcphdr *);
151512eeb81fSHiren Panchasara int tcp_compute_pipe(struct tcpcb *);
15167dc90a1dSMichael Tuexen uint32_t tcp_compute_initwnd(uint32_t);
151766492feaSGleb Smirnoff void tcp_sndbuf_autoscale(struct tcpcb *, struct socket *, uint32_t);
1518adc56f5aSEdward Tomasz Napierala int tcp_stats_sample_rollthedice(struct tcpcb *tp, void *seed_bytes,
1519adc56f5aSEdward Tomasz Napierala size_t seed_len);
15205d8fd932SRandall Stewart int tcp_can_enable_pacing(void);
1521e18b97bdSRandall Stewart int tcp_incr_dgp_pacing_cnt(void);
1522e18b97bdSRandall Stewart void tcp_dec_dgp_pacing_cnt(void);
15235d8fd932SRandall Stewart void tcp_decrement_paced_conn(void);
152473ee5756SRandall Stewart void tcp_change_time_units(struct tcpcb *, int);
152573ee5756SRandall Stewart void tcp_handle_orphaned_packets(struct tcpcb *);
15265d8fd932SRandall Stewart
152789e560f4SRandall Stewart struct mbuf *
152889e560f4SRandall Stewart tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen,
1529b2e60773SJohn Baldwin int32_t seglimit, int32_t segsize, struct sockbuf *sb, bool hw_tls);
153089e560f4SRandall Stewart
1531adc56f5aSEdward Tomasz Napierala int tcp_stats_init(void);
1532e570d231SRandall Stewart void tcp_log_end_status(struct tcpcb *tp, uint8_t status);
153373ee5756SRandall Stewart #ifdef TCP_REQUEST_TRK
153457a3a161SRandall Stewart void tcp_req_free_a_slot(struct tcpcb *tp, struct tcp_sendfile_track *ent);
153557a3a161SRandall Stewart struct tcp_sendfile_track *
153657a3a161SRandall Stewart tcp_req_find_a_req_that_is_completed_by(struct tcpcb *tp, tcp_seq th_ack, int *ip);
153757a3a161SRandall Stewart int tcp_req_check_for_comp(struct tcpcb *tp, tcp_seq ack_point);
153873ee5756SRandall Stewart int
153957a3a161SRandall Stewart tcp_req_is_entry_comp(struct tcpcb *tp, struct tcp_sendfile_track *ent, tcp_seq ack_point);
154057a3a161SRandall Stewart struct tcp_sendfile_track *
154157a3a161SRandall Stewart tcp_req_find_req_for_seq(struct tcpcb *tp, tcp_seq seq);
154273ee5756SRandall Stewart void
154357a3a161SRandall Stewart tcp_req_log_req_info(struct tcpcb *tp,
154457a3a161SRandall Stewart struct tcp_sendfile_track *req, uint16_t slot,
154573ee5756SRandall Stewart uint8_t val, uint64_t offset, uint64_t nbytes);
154673ee5756SRandall Stewart
154773ee5756SRandall Stewart uint32_t
154873ee5756SRandall Stewart tcp_estimate_tls_overhead(struct socket *so, uint64_t tls_usr_bytes);
154973ee5756SRandall Stewart void
155057a3a161SRandall Stewart tcp_req_alloc_req(struct tcpcb *tp, union tcp_log_userdata *user,
155173ee5756SRandall Stewart uint64_t ts);
155273ee5756SRandall Stewart
155357a3a161SRandall Stewart struct tcp_sendfile_track *
155457a3a161SRandall Stewart tcp_req_alloc_req_full(struct tcpcb *tp, struct tcp_snd_req *req, uint64_t ts, int rec_dups);
155573ee5756SRandall Stewart
155673ee5756SRandall Stewart
155773ee5756SRandall Stewart #endif
155869c7c811SRandall Stewart #ifdef TCP_ACCOUNTING
155969c7c811SRandall Stewart int tcp_do_ack_accounting(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to, uint32_t tiwin, int mss);
156069c7c811SRandall Stewart #endif
156169c7c811SRandall Stewart
1562c3c20de3SGleb Smirnoff static inline void
tcp_lro_features_off(struct tcpcb * tp)1563c3c20de3SGleb Smirnoff tcp_lro_features_off(struct tcpcb *tp)
1564c3c20de3SGleb Smirnoff {
1565c3c20de3SGleb Smirnoff tp->t_flags2 &= ~(TF2_SUPPORTS_MBUFQ|
1566c3c20de3SGleb Smirnoff TF2_MBUF_QUEUE_READY|
1567c3c20de3SGleb Smirnoff TF2_DONT_SACK_QUEUE|
1568c3c20de3SGleb Smirnoff TF2_MBUF_ACKCMP|
1569c3c20de3SGleb Smirnoff TF2_MBUF_L_ACKS);
1570c3c20de3SGleb Smirnoff }
15716d90faf3SPaul Saab
1572255cd9fdSBjoern A. Zeeb static inline void
tcp_fields_to_host(struct tcphdr * th)1573255cd9fdSBjoern A. Zeeb tcp_fields_to_host(struct tcphdr *th)
1574255cd9fdSBjoern A. Zeeb {
1575255cd9fdSBjoern A. Zeeb
1576255cd9fdSBjoern A. Zeeb th->th_seq = ntohl(th->th_seq);
1577255cd9fdSBjoern A. Zeeb th->th_ack = ntohl(th->th_ack);
1578255cd9fdSBjoern A. Zeeb th->th_win = ntohs(th->th_win);
1579255cd9fdSBjoern A. Zeeb th->th_urp = ntohs(th->th_urp);
1580255cd9fdSBjoern A. Zeeb }
1581255cd9fdSBjoern A. Zeeb
1582cfff3743SGleb Smirnoff static inline void
tcp_fields_to_net(struct tcphdr * th)1583cfff3743SGleb Smirnoff tcp_fields_to_net(struct tcphdr *th)
1584cfff3743SGleb Smirnoff {
1585cfff3743SGleb Smirnoff
1586cfff3743SGleb Smirnoff th->th_seq = htonl(th->th_seq);
1587cfff3743SGleb Smirnoff th->th_ack = htonl(th->th_ack);
1588cfff3743SGleb Smirnoff th->th_win = htons(th->th_win);
1589cfff3743SGleb Smirnoff th->th_urp = htons(th->th_urp);
1590cfff3743SGleb Smirnoff }
15918717c306SRichard Scheffenegger #endif /* _KERNEL */
159267e89281SRandall Stewart
15932f96f1f4SGarrett Wollman #endif /* _NETINET_TCP_VAR_H_ */
1594