xref: /freebsd/sys/netinet/cc/cc_cubic.h (revision a2f579635f6726b2eb3e7f622985fde1abcd0d82)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2008-2010 Lawrence Stewart <lstewart@freebsd.org>
5  * Copyright (c) 2010 The FreeBSD Foundation
6  * All rights reserved.
7  *
8  * This software was developed by Lawrence Stewart while studying at the Centre
9  * for Advanced Internet Architectures, Swinburne University of Technology, made
10  * possible in part by a grant from the Cisco University Research Program Fund
11  * at Community Foundation Silicon Valley.
12  *
13  * Portions of this software were developed at the Centre for Advanced
14  * Internet Architectures, Swinburne University of Technology, Melbourne,
15  * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
16  *
17  * Redistribution and use in source and binary forms, with or without
18  * modification, are permitted provided that the following conditions
19  * are met:
20  * 1. Redistributions of source code must retain the above copyright
21  *    notice, this list of conditions and the following disclaimer.
22  * 2. Redistributions in binary form must reproduce the above copyright
23  *    notice, this list of conditions and the following disclaimer in the
24  *    documentation and/or other materials provided with the distribution.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  */
38 
39 #ifndef _NETINET_CC_CUBIC_H_
40 #define _NETINET_CC_CUBIC_H_
41 
42 #include <sys/limits.h>
43 
44 /* Number of bits of precision for fixed point math calcs. */
45 #define	CUBIC_SHIFT		8
46 
47 #define	CUBIC_SHIFT_4		32
48 
49 /* 0.5 << CUBIC_SHIFT. */
50 #define	RENO_BETA		128
51 
52 /* ~0.7 << CUBIC_SHIFT. */
53 #define	CUBIC_BETA		179
54 
55 /* ~0.3 << CUBIC_SHIFT. */
56 #define	ONE_SUB_CUBIC_BETA	77
57 
58 /* 3 * ONE_SUB_CUBIC_BETA. */
59 #define	THREE_X_PT3		231
60 
61 /* (2 << CUBIC_SHIFT) - ONE_SUB_CUBIC_BETA. */
62 #define	TWO_SUB_PT3		435
63 
64 /* ~0.4 << CUBIC_SHIFT. */
65 #define	CUBIC_C_FACTOR		102
66 
67 /* CUBIC fast convergence factor: (1+beta_cubic)/2. */
68 #define	CUBIC_FC_FACTOR		217
69 
70 /* Don't trust s_rtt until this many rtt samples have been taken. */
71 #define	CUBIC_MIN_RTT_SAMPLES	8
72 
73 /*
74  * (2^21)^3 is long max. Dividing (2^63) by Cubic_C_factor
75  * and taking cube-root yields 448845 as the effective useful limit
76  */
77 #define	CUBED_ROOT_MAX_ULONG	448845
78 
79 /* Flags used in the cubic structure */
80 #define CUBICFLAG_CONG_EVENT		0x00000001	/* congestion experienced */
81 #define CUBICFLAG_IN_SLOWSTART		0x00000002	/* in slow start */
82 #define CUBICFLAG_IN_APPLIMIT		0x00000004	/* application limited */
83 #define CUBICFLAG_RTO_EVENT		0x00000008	/* RTO experienced */
84 #define CUBICFLAG_HYSTART_ENABLED	0x00000010	/* Hystart++ is enabled */
85 #define CUBICFLAG_HYSTART_IN_CSS	0x00000020	/* We are in Hystart++ CSS */
86 #define CUBICFLAG_IN_TF			0x00000040	/* We are in TCP friendly region */
87 
88 /* Kernel only bits */
89 #ifdef _KERNEL
90 struct cubic {
91 	/*
92 	 * CUBIC K in fixed point form with CUBIC_SHIFT worth of precision.
93 	 * Also means the time period in seconds it takes to increase the
94 	 * congestion window size at the beginning of the current congestion
95 	 * avoidance stage to W_max.
96 	 */
97 	int64_t		K;
98 	/* Sum of RTT samples across an epoch in usecs. */
99 	int64_t		sum_rtt_usecs;
100 	/* Size of cwnd (in bytes) just before cwnd was reduced in the last congestion event. */
101 	uint32_t	W_max;
102 	/* An estimate (in bytes) for the congestion window in the Reno-friendly region */
103 	uint32_t	W_est;
104 	/* An estimate (in bytes) for the congestion window in the CUBIC region */
105 	uint32_t	W_cubic;
106 	/* The cwnd (in bytes) at the beginning of the current congestion avoidance stage. */
107 	uint32_t	cwnd_epoch;
108 	/* various flags */
109 	uint32_t	flags;
110 	/* Minimum observed rtt in usecs. */
111 	int		min_rtt_usecs;
112 	/* Mean observed rtt between congestion epochs. */
113 	int		mean_rtt_usecs;
114 	/* ACKs since last congestion event. */
115 	int		epoch_ack_count;
116 	/* Timestamp (in ticks) at which the current CA epoch started. */
117 	int		t_epoch;
118 	/* Timestamp (in ticks) at which the previous CA epoch started. */
119 	int		undo_t_epoch;
120 	/* Few variables to restore the state after RTO_ERR */
121 	int64_t		undo_K;
122 	uint32_t	undo_W_max;
123 	uint32_t	undo_cwnd_epoch;
124 	uint32_t css_baseline_minrtt;
125 	uint32_t css_current_round_minrtt;
126 	uint32_t css_lastround_minrtt;
127 	uint32_t css_rttsample_count;
128 	uint32_t css_entered_at_round;
129 	uint32_t css_current_round;
130 	uint32_t css_fas_at_css_entry;
131 	uint32_t css_lowrtt_fas;
132 	uint32_t css_last_fas;
133 };
134 #endif
135 
136 /* Userland only bits. */
137 #ifndef _KERNEL
138 
139 extern int hz;
140 
141 /*
142  * Implementation based on the formulas in RFC9438.
143  *
144  */
145 
146 
147 /*
148  * Returns K, the time period in seconds it takes to increase the congestion
149  * window size at the beginning of the current congestion avoidance stage to
150  * W_max.
151  */
152 static inline float
theoretical_cubic_k(uint32_t wmax_segs,uint32_t cwnd_epoch_segs)153 theoretical_cubic_k(uint32_t wmax_segs, uint32_t cwnd_epoch_segs)
154 {
155 	double C;
156 
157 	C = 0.4;
158 	if (wmax_segs <= cwnd_epoch_segs)
159 		return 0.0;
160 
161 	/*
162 	 * Figure 2: K = ((W_max - cwnd_epoch) / C)^(1/3)
163 	 */
164 	return (pow((wmax_segs - cwnd_epoch_segs) / C, (1.0 / 3.0)) * pow(2, CUBIC_SHIFT));
165 }
166 
167 /*
168  * Returns the congestion window in segments at time t in seconds based on the
169  * cubic increase function, where t is the elapsed time in seconds from the
170  * beginning of the current congestion avoidance stage, as described in RFC9438
171  * Section 4.2.
172  */
173 static inline unsigned long
theoretical_cubic_cwnd(int ticks_elapsed,uint32_t wmax_segs,uint32_t cwnd_epoch_segs)174 theoretical_cubic_cwnd(int ticks_elapsed, uint32_t wmax_segs, uint32_t cwnd_epoch_segs)
175 {
176 	double C, t;
177 	float K;
178 
179 	C = 0.4;
180 	t = ticks_elapsed / (double)hz;
181 	K = theoretical_cubic_k(wmax_segs, cwnd_epoch_segs);
182 
183 	/*
184 	 * Figure 1: W_cubic(t) = C * (t - K)^3 + W_max
185 	 */
186 	return (C * pow(t - K / pow(2, CUBIC_SHIFT), 3.0) + wmax_segs);
187 }
188 
189 /*
190  * Returns estimated Reno congestion window in segments.
191  */
192 static inline unsigned long
theoretical_reno_cwnd(int ticks_elapsed,int rtt_ticks,uint32_t wmax_segs)193 theoretical_reno_cwnd(int ticks_elapsed, int rtt_ticks, uint32_t wmax_segs)
194 {
195 
196 	return (wmax_segs * 0.5 + ticks_elapsed / (float)rtt_ticks);
197 }
198 
199 /*
200  * Returns an estimate for the congestion window in segments in the
201  * Reno-friendly region -- that is, an estimate for the congestion window of
202  * Reno, as described in RFC9438 Section 4.3, where:
203  * cwnd: Current congestion window in segments.
204  * cwnd_prior: Size of cwnd in segments at the time of setting ssthresh most
205  *             recently, either upon exiting the first slow start or just before
206  *             cwnd was reduced in the last congestion event.
207  * W_est: An estimate for the congestion window in segments in the Reno-friendly
208  *        region -- that is, an estimate for the congestion window of Reno.
209  */
210 static inline unsigned long
theoretical_tf_cwnd(unsigned long W_est,unsigned long segs_acked,unsigned long cwnd,unsigned long cwnd_prior)211 theoretical_tf_cwnd(unsigned long W_est, unsigned long segs_acked, unsigned long cwnd,
212     unsigned long cwnd_prior)
213 {
214 	float cubic_alpha, cubic_beta;
215 
216 	/* RFC9438 Section 4.6: The parameter β_cubic SHOULD be set to 0.7. */
217 	cubic_beta = 0.7;
218 
219 	if (W_est >= cwnd_prior)
220 		cubic_alpha = 1.0;
221 	else
222 		cubic_alpha = (3.0 * (1.0 - cubic_beta)) / (1.0 + cubic_beta);
223 
224 	/*
225 	 * Figure 4: W_est = W_est + α_cubic * segments_acked / cwnd
226 	 */
227 	return (W_est + cubic_alpha * segs_acked / cwnd);
228 }
229 
230 #endif /* !_KERNEL */
231 
232 /*
233  * Compute the CUBIC K value used in the cwnd calculation, using an
234  * implementation mentioned in Figure. 2 of RFC9438.
235  * The method used here is adapted from Apple Computer Technical Report #KT-32.
236  */
237 static inline int64_t
cubic_k(uint32_t wmax_segs,uint32_t cwnd_epoch_segs)238 cubic_k(uint32_t wmax_segs, uint32_t cwnd_epoch_segs)
239 {
240 	int64_t s, K;
241 	uint16_t p;
242 
243 	K = s = 0;
244 	p = 0;
245 
246 	/* Handle the corner case where W_max <= cwnd_epoch */
247 	if (wmax_segs <= cwnd_epoch_segs) {
248 		return 0;
249 	}
250 
251 	/* (wmax - cwnd_epoch) / C with CUBIC_SHIFT worth of precision. */
252 	s = ((wmax_segs - cwnd_epoch_segs) << (2 * CUBIC_SHIFT)) / CUBIC_C_FACTOR;
253 
254 	/* Rebase s to be between 1 and 1/8 with a shift of CUBIC_SHIFT. */
255 	while (s >= 256) {
256 		s >>= 3;
257 		p++;
258 	}
259 
260 	/*
261 	 * Some magic constants taken from the Apple TR with appropriate
262 	 * shifts: 275 == 1.072302 << CUBIC_SHIFT, 98 == 0.3812513 <<
263 	 * CUBIC_SHIFT, 120 == 0.46946116 << CUBIC_SHIFT.
264 	 */
265 	K = (((s * 275) >> CUBIC_SHIFT) + 98) -
266 	    (((s * s * 120) >> CUBIC_SHIFT) >> CUBIC_SHIFT);
267 
268 	/* Multiply by 2^p to undo the rebasing of s from above. */
269 	return (K <<= p);
270 }
271 
272 /*
273  * Compute and return the new cwnd value in bytes using an implementation
274  * mentioned in Figure. 1 of RFC9438.
275  * Thanks to Kip Macy for help debugging this function.
276  *
277  * XXXLAS: Characterise bounds for overflow.
278  */
279 static inline uint32_t
cubic_cwnd(int usecs_since_epoch,uint32_t wmax,uint32_t smss,int64_t K)280 cubic_cwnd(int usecs_since_epoch, uint32_t wmax, uint32_t smss, int64_t K)
281 {
282 	int64_t cwnd;
283 
284 	/* K is in fixed point form with CUBIC_SHIFT worth of precision. */
285 
286 	/* t - K, with CUBIC_SHIFT worth of precision. */
287 	cwnd = (((int64_t)usecs_since_epoch << CUBIC_SHIFT) - (K * hz * tick)) /
288 	       (hz * tick);
289 
290 	if (cwnd > CUBED_ROOT_MAX_ULONG)
291 		return INT_MAX;
292 	if (cwnd < -CUBED_ROOT_MAX_ULONG)
293 		return 0;
294 
295 	/* (t - K)^3, with CUBIC_SHIFT^3 worth of precision. */
296 	cwnd *= (cwnd * cwnd);
297 
298 	/*
299 	 * Figure 1: C * (t - K)^3 + wmax
300 	 * The down shift by CUBIC_SHIFT_4 is because cwnd has 4 lots of
301 	 * CUBIC_SHIFT included in the value. 3 from the cubing of cwnd above,
302 	 * and an extra from multiplying through by CUBIC_C_FACTOR.
303 	 */
304 
305 	cwnd = ((cwnd * CUBIC_C_FACTOR) >> CUBIC_SHIFT_4) * smss + wmax;
306 
307 	/*
308 	 * for negative cwnd, limiting to zero as lower bound
309 	 */
310 	return (lmax(0,cwnd));
311 }
312 
313 /*
314  * Compute the "TCP friendly" cwnd by newreno in congestion avoidance state.
315  */
316 static inline uint32_t
tf_cwnd(struct cc_var * ccv)317 tf_cwnd(struct cc_var *ccv)
318 {
319 	/* newreno is "TCP friendly" */
320 	return newreno_cc_cwnd_in_cong_avoid(ccv);
321 }
322 
323 #endif /* _NETINET_CC_CUBIC_H_ */
324