xref: /illumos-gate/usr/src/uts/common/inet/cc.h (revision 069e6b7e31ba5dcbc5441b98af272714d9a5455c)
1 /*
2  * Copyright (c) 2007-2008
3  *	Swinburne University of Technology, Melbourne, Australia.
4  * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
5  * Copyright (c) 2010 The FreeBSD Foundation
6  * All rights reserved.
7  * Copyright (c) 2017 by Delphix. All rights reserved.
8  *
9  * This software was developed at the Centre for Advanced Internet
10  * Architectures, Swinburne University of Technology, by Lawrence Stewart and
11  * James Healy, made possible in part by a grant from the Cisco University
12  * Research Program Fund at Community Foundation Silicon Valley.
13  *
14  * Portions of this software were developed at the Centre for Advanced
15  * Internet Architectures, Swinburne University of Technology, Melbourne,
16  * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
17  *
18  * Redistribution and use in source and binary forms, with or without
19  * modification, are permitted provided that the following conditions
20  * are met:
21  * 1. Redistributions of source code must retain the above copyright
22  *    notice, this list of conditions and the following disclaimer.
23  * 2. Redistributions in binary form must reproduce the above copyright
24  *    notice, this list of conditions and the following disclaimer in the
25  *    documentation and/or other materials provided with the distribution.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  *
39  * $FreeBSD$
40  */
41 
42 /*
43  * This software was first released in 2007 by James Healy and Lawrence Stewart
44  * whilst working on the NewTCP research project at Swinburne University of
45  * Technology's Centre for Advanced Internet Architectures, Melbourne,
46  * Australia, which was made possible in part by a grant from the Cisco
47  * University Research Program Fund at Community Foundation Silicon Valley.
48  * More details are available at:
49  *   http://caia.swin.edu.au/urp/newtcp/
50  */
51 
52 #ifndef _NETINET_CC_H_
53 #define	_NETINET_CC_H_
54 
55 #if (defined(_KERNEL) || defined(_KMEMUSER))
56 
57 #ifdef	__cplusplus
58 extern "C" {
59 #endif
60 
61 #include <netinet/tcp.h>
62 #include <sys/queue.h>
63 #include <sys/rwlock.h>
64 
65 #define	CC_ALGO_NAME_MAX	16	/* max congestion control name length */
66 
67 #define	CC_DEFAULT_ALGO_NAME	"sunreno"
68 
69 struct tcp_s;
70 struct sctp_s;
71 
72 /* CC housekeeping functions. */
73 extern struct cc_algo *cc_load_algo(const char *name);
74 extern int	cc_register_algo(struct cc_algo *add_cc);
75 extern int	cc_deregister_algo(struct cc_algo *remove_cc);
76 
77 /*
78  * Wrapper around transport structs that contain same-named congestion
79  * control variables. Allows algos to be shared amongst multiple CC aware
80  * transports.
81  *
82  * In theory, this code (from FreeBSD) can be used to support pluggable
83  * congestion control for sctp as well as tcp.  However, the support for sctp
84  * in FreeBSD is incomplete, and in practice "type" is ignored.  cc_module.h
85  * provides a CCV macro which implementations can use to get a variable out of
86  * the protocol-appropriate structure.
87  *
88  * If FreeBSD eventually does extend support for pluggable congestion control
89  * to sctp, we'll need to make sure we're setting "type" appropriately or use
90  * a definition of CCV that ignores it.
91  */
92 struct cc_var {
93 	void		*cc_data; /* Per-connection private algorithm data. */
94 	int		bytes_this_ack; /* # bytes acked by the current ACK. */
95 	int		t_bytes_acked; /* # bytes acked during current RTT */
96 	tcp_seq		curack; /* Most recent ACK. */
97 	uint32_t	flags; /* Flags for cc_var (see below) */
98 	int		type; /* Indicates which ptr is valid in ccvc. */
99 	union ccv_container {
100 		struct tcp_s	*tcp;
101 		struct sctp_s	*sctp;
102 	} ccvc;
103 	uint16_t	nsegs; /* # segments coalesced into current chain. */
104 };
105 
106 /*
107  * cc_var flags.
108  *
109  * CCF_ABC_SENTAWND is set when a full congestion window of data has been ACKed
110  *   according to the Appropriate Byte Counting spec, defined in RFC 3465.
111  */
112 #define	CCF_ABC_SENTAWND	0x0001	/* ABC counted cwnd worth of bytes? */
113 #define	CCF_CWND_LIMITED	0x0002	/* Are we currently cwnd limited? */
114 #define	CCF_FASTRECOVERY	0x0004	/* in NewReno Fast Recovery */
115 #define	CCF_WASFRECOVERY	0x0008	/* was in NewReno Fast Recovery */
116 #define	CCF_CONGRECOVERY	0x0010	/* congestion recovery mode */
117 #define	CCF_WASCRECOVERY	0x0020	/* was in congestion recovery */
118 /*
119  * In slow-start due to a retransmission timeout. This flag is enabled for the
120  * duration of the slow-start phase.
121  */
122 #define	CCF_RTO			0x0040	/* in slow-start due to timeout */
123 
124 #define	IN_FASTRECOVERY(flags)		(flags & CCF_FASTRECOVERY)
125 #define	ENTER_FASTRECOVERY(flags)	flags |= CCF_FASTRECOVERY
126 #define	EXIT_FASTRECOVERY(flags)	flags &= ~CCF_FASTRECOVERY
127 
128 #define	IN_CONGRECOVERY(flags)		(flags & CCF_CONGRECOVERY)
129 #define	ENTER_CONGRECOVERY(flags)	flags |= CCF_CONGRECOVERY
130 #define	EXIT_CONGRECOVERY(flags)	flags &= ~CCF_CONGRECOVERY
131 
132 #define	IN_RECOVERY(flags) (flags & (CCF_CONGRECOVERY | CCF_FASTRECOVERY))
133 #define	ENTER_RECOVERY(flags) flags |= (CCF_CONGRECOVERY | CCF_FASTRECOVERY)
134 #define	EXIT_RECOVERY(flags) flags &= ~(CCF_CONGRECOVERY | CCF_FASTRECOVERY)
135 
136 /*
137  * ACK types passed to the ack_received() hook.
138  *
139  * CC_ACK is passed when an ACK acknowledges previously unACKed data.
140  * CC_DUPACK is passed when a duplicate ACK is received.  The conditions under
141  *   which an ACK is considered a duplicate ACK are defined in RFC 5681.
142  */
143 #define	CC_ACK		0x0001	/* Regular in sequence ACK. */
144 #define	CC_DUPACK	0x0002	/* Duplicate ACK. */
145 #define	CC_PARTIALACK	0x0004	/* Not yet. */
146 #define	CC_SACK		0x0008	/* Not yet. */
147 
148 /*
149  * Congestion signal types passed to the cong_signal() hook. The highest order 8
150  * bits (0x01000000 - 0x80000000) are reserved for CC algos to declare their own
151  * congestion signal types.
152  *
153  * The congestion signals defined here cover the following situations:
154  * CC_ECN: A packet with an Explicit Congestion Notification was received
155  *   See RFC 3168.
156  * CC_RTO: A round-trip timeout occured.
157  * CC_RTO_ERR: An ACK was received for a sequence number after we fired an RTO
158  *   for that sequence number
159  * CC_NDUPACK: Trigger fast retransmit based on the assumption that receiving
160  *   N duplicate ACKs indicates packet loss rather than reordering.  Fast
161  *   retransmit is followed by fast recovery.  Fast retransmit and recovery
162  *   were originally described in RFC 2581 and were updated by RFC3782
163  *   (NewReno).  In both RFC2581 and RFC3782, N is 3.
164  */
165 #define	CC_ECN		0x00000001	/* ECN marked packet received. */
166 #define	CC_RTO		0x00000002	/* RTO fired. */
167 #define	CC_RTO_ERR	0x00000004	/* RTO fired in error. */
168 #define	CC_NDUPACK	0x00000008	/* Threshold of dupack's reached. */
169 
170 #define	CC_SIGPRIVMASK	0xFF000000	/* Mask to check if sig is private. */
171 
172 /*
173  * Structure to hold data and function pointers that together represent a
174  * congestion control algorithm.
175  */
176 struct cc_algo {
177 	char	name[CC_ALGO_NAME_MAX];
178 
179 	/* Init CC state for a new control block. */
180 	int	(*cb_init)(struct cc_var *ccv);
181 
182 	/* Cleanup CC state for a terminating control block. */
183 	void	(*cb_destroy)(struct cc_var *ccv);
184 
185 	/* Init variables for a newly established connection. */
186 	void	(*conn_init)(struct cc_var *ccv);
187 
188 	/* Called on receipt of an ack. */
189 	void	(*ack_received)(struct cc_var *ccv, uint16_t type);
190 
191 	/* Called on detection of a congestion signal. */
192 	void	(*cong_signal)(struct cc_var *ccv, uint32_t type);
193 
194 	/* Called after exiting congestion recovery. */
195 	void	(*post_recovery)(struct cc_var *ccv);
196 
197 	/* Called when data transfer resumes after an idle period. */
198 	void	(*after_idle)(struct cc_var *ccv);
199 
200 	STAILQ_ENTRY(cc_algo) entries;
201 };
202 
203 typedef int cc_walk_func_t(void *, struct cc_algo *);
204 extern int	cc_walk_algos(cc_walk_func_t *, void *);
205 
206 /* Macro to obtain the CC algo's struct ptr. */
207 #define	CC_ALGO(tp)	((tp)->tcp_cc_algo)
208 
209 /* Macro to obtain the CC algo's data ptr. */
210 #define	CC_DATA(tp)	((tp)->tcp_ccv.cc_data)
211 
212 #ifdef	__cplusplus
213 }
214 #endif
215 
216 #endif	/* (defined(_KERNEL) || defined(_KMEMUSER)) */
217 
218 #endif /* _NETINET_CC_H_ */
219