xref: /freebsd/sys/netinet/cc/cc.h (revision b6c137de0af1a2bbd4c73bbc9cd96325268f3b0e)
1 /*-
2  * Copyright (c) 2007-2008
3  * 	Swinburne University of Technology, Melbourne, Australia.
4  * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
5  * Copyright (c) 2010 The FreeBSD Foundation
6  * All rights reserved.
7  *
8  * This software was developed at the Centre for Advanced Internet
9  * Architectures, Swinburne University of Technology, by Lawrence Stewart and
10  * James Healy, made possible in part by a grant from the Cisco University
11  * Research Program Fund at Community Foundation Silicon Valley.
12  *
13  * Portions of this software were developed at the Centre for Advanced
14  * Internet Architectures, Swinburne University of Technology, Melbourne,
15  * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
16  *
17  * Redistribution and use in source and binary forms, with or without
18  * modification, are permitted provided that the following conditions
19  * are met:
20  * 1. Redistributions of source code must retain the above copyright
21  *    notice, this list of conditions and the following disclaimer.
22  * 2. Redistributions in binary form must reproduce the above copyright
23  *    notice, this list of conditions and the following disclaimer in the
24  *    documentation and/or other materials provided with the distribution.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  */
38 
39 /*
40  * This software was first released in 2007 by James Healy and Lawrence Stewart
41  * whilst working on the NewTCP research project at Swinburne University of
42  * Technology's Centre for Advanced Internet Architectures, Melbourne,
43  * Australia, which was made possible in part by a grant from the Cisco
44  * University Research Program Fund at Community Foundation Silicon Valley.
45  * More details are available at:
46  *   http://caia.swin.edu.au/urp/newtcp/
47  */
48 
49 #ifndef _NETINET_CC_CC_H_
50 #define _NETINET_CC_CC_H_
51 
52 #ifdef _KERNEL
53 
54 MALLOC_DECLARE(M_CC_MEM);
55 
56 /* Global CC vars. */
57 extern STAILQ_HEAD(cc_head, cc_algo) cc_list;
58 extern const int tcprexmtthresh;
59 
60 /* Per-netstack bits. */
61 VNET_DECLARE(struct cc_algo *, default_cc_ptr);
62 #define	V_default_cc_ptr VNET(default_cc_ptr)
63 
64 VNET_DECLARE(int, cc_do_abe);
65 #define	V_cc_do_abe			VNET(cc_do_abe)
66 
67 VNET_DECLARE(int, cc_abe_frlossreduce);
68 #define	V_cc_abe_frlossreduce		VNET(cc_abe_frlossreduce)
69 
70 /* Define the new net.inet.tcp.cc sysctl tree. */
71 #ifdef _SYS_SYSCTL_H_
72 SYSCTL_DECL(_net_inet_tcp_cc);
73 #endif
74 
75 /* For CC modules that use hystart++ */
76 extern uint32_t hystart_lowcwnd;
77 extern uint32_t hystart_minrtt_thresh;
78 extern uint32_t hystart_maxrtt_thresh;
79 extern uint32_t hystart_n_rttsamples;
80 extern uint32_t hystart_css_growth_div;
81 extern uint32_t hystart_css_rounds;
82 extern uint32_t hystart_bblogs;
83 
84 /* CC housekeeping functions. */
85 int	cc_register_algo(struct cc_algo *add_cc);
86 int	cc_deregister_algo(struct cc_algo *remove_cc);
87 #endif /* _KERNEL */
88 
89 #if defined(_KERNEL) || defined(_WANT_TCPCB)
90 struct cc_var {
91 	void		*cc_data; /* Per-connection private CC algorithm data. */
92 	int		bytes_this_ack; /* # bytes acked by the current ACK. */
93 	tcp_seq		curack; /* Most recent ACK. */
94 	uint32_t	flags; /* Flags for cc_var (see below) */
95 	struct tcpcb	*tp; /* Pointer to tcpcb */
96 	uint16_t	nsegs; /* # segments coalesced into current chain. */
97 	uint8_t		labc;  /* Dont use system abc use passed in */
98 };
99 
100 /* cc_var flags. */
101 #define	CCF_ABC_SENTAWND	0x0001	/* ABC counted cwnd worth of bytes? */
102 #define	CCF_CWND_LIMITED	0x0002	/* Are we currently cwnd limited? */
103 #define	CCF_USE_LOCAL_ABC       0x0004  /* Dont use the system l_abc val */
104 #define	CCF_ACKNOW		0x0008	/* Will this ack be sent now? */
105 #define	CCF_IPHDR_CE		0x0010	/* Does this packet set CE bit? */
106 #define	CCF_TCPHDR_CWR		0x0020	/* Does this packet set CWR bit? */
107 #define	CCF_UNUSED1		0x0040
108 #define	CCF_UNUSED2		0x0080
109 #define	CCF_UNUSED3		0x0100
110 #define	CCF_UNUSED4		0x0200
111 #define CCF_HYSTART_ALLOWED	0x0400	/* If the CC supports it Hystart is allowed */
112 #define CCF_HYSTART_CAN_SH_CWND	0x0800  /* Can hystart when going CSS -> CA slam the cwnd */
113 #define CCF_HYSTART_CONS_SSTH	0x1000	/* Should hystart use the more conservative ssthresh */
114 
115 #endif /* defined(_KERNEL) || defined(_WANT_TCPCB) */
116 typedef enum {
117 #if defined(_KERNEL) || defined(_WANT_TCPCB)
118 	/* ACK types passed to the ack_received() hook. */
119 	CC_ACK =	0x0001,	/* Regular in sequence ACK. */
120 	CC_DUPACK =	0x0002,	/* Duplicate ACK. */
121 	CC_PARTIALACK =	0x0004,	/* Not yet. */
122 	CC_SACK =	0x0008,	/* Not yet. */
123 #endif /* defined(_KERNEL) || defined(_WANT_TCPCB) */
124 	/* Congestion signal types passed to the cong_signal() hook. */
125 	CC_ECN =	0x0100,	/* ECN marked packet received. */
126 	CC_RTO =	0x0200,	/* RTO fired. */
127 	CC_RTO_ERR =	0x0400,	/* RTO fired in error. */
128 	CC_NDUPACK =	0x0800,	/* Threshold of dupack's reached. */
129 	/*
130 	 * The highest order 8 bits (0x01000000 - 0x80000000) are reserved
131 	 * for CC algos to declare their own congestion signal types.
132 	 */
133 	CC_SIGPRIVMASK = 0xFF000000	/* Mask to check if sig is private. */
134 } ccsignal_t;
135 
136 #ifdef _KERNEL
137 /*
138  * Structure to hold data and function pointers that together represent a
139  * congestion control algorithm.
140  */
141 struct cc_algo {
142 	char	name[TCP_CA_NAME_MAX];
143 
144 	/* Init global module state on kldload. */
145 	int	(*mod_init)(void);
146 
147 	/* Cleanup global module state on kldunload. */
148 	int	(*mod_destroy)(void);
149 
150 	/* Return the size of the void pointer the CC needs for state */
151 	size_t  (*cc_data_sz)(void);
152 
153 	/*
154 	 * Init CC state for a new control block. The CC
155 	 * module may be passed a NULL ptr indicating that
156 	 * it must allocate the memory. If it is passed a
157 	 * non-null pointer it is pre-allocated memory by
158 	 * the caller and the cb_init is expected to use that memory.
159 	 * It is not expected to fail if memory is passed in and
160 	 * all currently defined modules do not.
161 	 */
162 	int	(*cb_init)(struct cc_var *ccv, void *ptr);
163 
164 	/* Cleanup CC state for a terminating control block. */
165 	void	(*cb_destroy)(struct cc_var *ccv);
166 
167 	/* Init variables for a newly established connection. */
168 	void	(*conn_init)(struct cc_var *ccv);
169 
170 	/* Called on receipt of an ack. */
171 	void	(*ack_received)(struct cc_var *ccv, ccsignal_t type);
172 
173 	/* Called on detection of a congestion signal. */
174 	void	(*cong_signal)(struct cc_var *ccv, ccsignal_t type);
175 
176 	/* Called after exiting congestion recovery. */
177 	void	(*post_recovery)(struct cc_var *ccv);
178 
179 	/* Called when data transfer resumes after an idle period. */
180 	void	(*after_idle)(struct cc_var *ccv);
181 
182 	/* Called for an additional ECN processing apart from RFC3168. */
183 	void	(*ecnpkt_handler)(struct cc_var *ccv);
184 
185 	/* Called when a new "round" begins, if the transport is tracking rounds.  */
186 	void	(*newround)(struct cc_var *ccv, uint32_t round_cnt);
187 
188 	/*
189 	 *  Called when a RTT sample is made (fas = flight at send, if you dont have it
190 	 *  send the cwnd in).
191 	 */
192 	void	(*rttsample)(struct cc_var *ccv, uint32_t usec_rtt, uint32_t rxtcnt, uint32_t fas);
193 
194 	/* Called for {get|set}sockopt() on a TCP socket with TCP_CCALGOOPT. */
195 	int     (*ctl_output)(struct cc_var *, struct sockopt *, void *);
196 
197 	STAILQ_ENTRY (cc_algo) entries;
198 	u_int	cc_refcount;
199 	uint8_t flags;
200 };
201 
202 #define CC_MODULE_BEING_REMOVED		0x01	/* The module is being removed */
203 
204 /* Macro to obtain the CC algo's struct ptr. */
205 #define	CC_ALGO(tp)	((tp)->t_cc)
206 
207 /* Macro to obtain the CC algo's data ptr. */
208 #define	CC_DATA(tp)	((tp)->t_ccv.cc_data)
209 
210 /* Macro to obtain the system default CC algo's struct ptr. */
211 #define	CC_DEFAULT_ALGO()	V_default_cc_ptr
212 
213 extern struct rwlock cc_list_lock;
214 #define	CC_LIST_LOCK_INIT()	rw_init(&cc_list_lock, "cc_list")
215 #define	CC_LIST_LOCK_DESTROY()	rw_destroy(&cc_list_lock)
216 #define	CC_LIST_RLOCK()		rw_rlock(&cc_list_lock)
217 #define	CC_LIST_RUNLOCK()	rw_runlock(&cc_list_lock)
218 #define	CC_LIST_WLOCK()		rw_wlock(&cc_list_lock)
219 #define	CC_LIST_WUNLOCK()	rw_wunlock(&cc_list_lock)
220 #define	CC_LIST_LOCK_ASSERT()	rw_assert(&cc_list_lock, RA_LOCKED)
221 
222 #define CC_ALGOOPT_LIMIT	2048
223 
224 /*
225  * These routines give NewReno behavior to the caller
226  * they require no state and can be used by any other CC
227  * module that wishes to use NewReno type behaviour (along
228  * with anything else they may add on, pre or post call).
229  */
230 void newreno_cc_post_recovery(struct cc_var *);
231 void newreno_cc_after_idle(struct cc_var *);
232 void newreno_cc_cong_signal(struct cc_var *, ccsignal_t);
233 void newreno_cc_ack_received(struct cc_var *, ccsignal_t);
234 u_int newreno_cc_cwnd_on_multiplicative_decrease(struct cc_var *ccv, uint32_t mss);
235 u_int newreno_cc_cwnd_in_cong_avoid(struct cc_var *ccv);
236 u_int newreno_cc_cwnd_in_slow_start(struct cc_var *ccv);
237 
238 /* Called to temporarily keep an algo from going away during change */
239 void cc_refer(struct cc_algo *algo);
240 /* Called to release the temporary hold */
241 void cc_release(struct cc_algo *algo);
242 
243 /* Called to attach a CC algorithm to a tcpcb */
244 void cc_attach(struct tcpcb *, struct cc_algo *);
245 /* Called to detach a CC algorithm from a tcpcb */
246 void cc_detach(struct tcpcb *);
247 
248 #endif /* _KERNEL */
249 #endif /* _NETINET_CC_CC_H_ */
250