1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2008-2010 Lawrence Stewart <lstewart@freebsd.org>
5 * Copyright (c) 2010 The FreeBSD Foundation
6 * All rights reserved.
7 *
8 * This software was developed by Lawrence Stewart while studying at the Centre
9 * for Advanced Internet Architectures, Swinburne University of Technology, made
10 * possible in part by a grant from the Cisco University Research Program Fund
11 * at Community Foundation Silicon Valley.
12 *
13 * Portions of this software were developed at the Centre for Advanced
14 * Internet Architectures, Swinburne University of Technology, Melbourne,
15 * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions
19 * are met:
20 * 1. Redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer.
22 * 2. Redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 */
38
39 #ifndef _NETINET_CC_CUBIC_H_
40 #define _NETINET_CC_CUBIC_H_
41
42 #include <sys/limits.h>
43
44 /* Number of bits of precision for fixed point math calcs. */
45 #define CUBIC_SHIFT 8
46
47 #define CUBIC_SHIFT_4 32
48
49 /* 0.5 << CUBIC_SHIFT. */
50 #define RENO_BETA 128
51
52 /* ~0.7 << CUBIC_SHIFT. */
53 #define CUBIC_BETA 179
54
55 /* ~0.3 << CUBIC_SHIFT. */
56 #define ONE_SUB_CUBIC_BETA 77
57
58 /* 3 * ONE_SUB_CUBIC_BETA. */
59 #define THREE_X_PT3 231
60
61 /* (2 << CUBIC_SHIFT) - ONE_SUB_CUBIC_BETA. */
62 #define TWO_SUB_PT3 435
63
64 /* ~0.4 << CUBIC_SHIFT. */
65 #define CUBIC_C_FACTOR 102
66
67 /* CUBIC fast convergence factor: (1+beta_cubic)/2. */
68 #define CUBIC_FC_FACTOR 217
69
70 /* Don't trust s_rtt until this many rtt samples have been taken. */
71 #define CUBIC_MIN_RTT_SAMPLES 8
72
73 /*
74 * (2^21)^3 is long max. Dividing (2^63) by Cubic_C_factor
75 * and taking cube-root yields 448845 as the effective useful limit
76 */
77 #define CUBED_ROOT_MAX_ULONG 448845
78
79 /* Flags used in the cubic structure */
80 #define CUBICFLAG_CONG_EVENT 0x00000001 /* congestion experienced */
81 #define CUBICFLAG_IN_SLOWSTART 0x00000002 /* in slow start */
82 #define CUBICFLAG_IN_APPLIMIT 0x00000004 /* application limited */
83 #define CUBICFLAG_RTO_EVENT 0x00000008 /* RTO experienced */
84 #define CUBICFLAG_HYSTART_ENABLED 0x00000010 /* Hystart++ is enabled */
85 #define CUBICFLAG_HYSTART_IN_CSS 0x00000020 /* We are in Hystart++ CSS */
86 #define CUBICFLAG_IN_TF 0x00000040 /* We are in TCP friendly region */
87
88 /* Kernel only bits */
89 #ifdef _KERNEL
90 struct cubic {
91 /*
92 * CUBIC K in fixed point form with CUBIC_SHIFT worth of precision.
93 * Also means the time period in seconds it takes to increase the
94 * congestion window size at the beginning of the current congestion
95 * avoidance stage to W_max.
96 */
97 int64_t K;
98 /* Sum of RTT samples across an epoch in usecs. */
99 int64_t sum_rtt_usecs;
100 /* Size of cwnd (in bytes) just before cwnd was reduced in the last congestion event. */
101 uint32_t W_max;
102 /* An estimate (in bytes) for the congestion window in the Reno-friendly region */
103 uint32_t W_est;
104 /* An estimate (in bytes) for the congestion window in the CUBIC region */
105 uint32_t W_cubic;
106 /* The cwnd (in bytes) at the beginning of the current congestion avoidance stage. */
107 uint32_t cwnd_epoch;
108 /* various flags */
109 uint32_t flags;
110 /* Minimum observed rtt in usecs. */
111 int min_rtt_usecs;
112 /* Mean observed rtt between congestion epochs. */
113 int mean_rtt_usecs;
114 /* ACKs since last congestion event. */
115 int epoch_ack_count;
116 /* Timestamp (in ticks) at which the current CA epoch started. */
117 int t_epoch;
118 /* Timestamp (in ticks) at which the previous CA epoch started. */
119 int undo_t_epoch;
120 /* Few variables to restore the state after RTO_ERR */
121 int64_t undo_K;
122 uint32_t undo_W_max;
123 uint32_t undo_cwnd_epoch;
124 uint32_t css_baseline_minrtt;
125 uint32_t css_current_round_minrtt;
126 uint32_t css_lastround_minrtt;
127 uint32_t css_rttsample_count;
128 uint32_t css_entered_at_round;
129 uint32_t css_current_round;
130 uint32_t css_fas_at_css_entry;
131 uint32_t css_lowrtt_fas;
132 uint32_t css_last_fas;
133 };
134 #endif
135
136 /* Userland only bits. */
137 #ifndef _KERNEL
138
139 extern int hz;
140
141 /*
142 * Implementation based on the formulas in RFC9438.
143 *
144 */
145
146
147 /*
148 * Returns K, the time period in seconds it takes to increase the congestion
149 * window size at the beginning of the current congestion avoidance stage to
150 * W_max.
151 */
152 static inline float
theoretical_cubic_k(uint32_t wmax_segs,uint32_t cwnd_epoch_segs)153 theoretical_cubic_k(uint32_t wmax_segs, uint32_t cwnd_epoch_segs)
154 {
155 double C;
156
157 C = 0.4;
158 if (wmax_segs <= cwnd_epoch_segs)
159 return 0.0;
160
161 /*
162 * Figure 2: K = ((W_max - cwnd_epoch) / C)^(1/3)
163 */
164 return (pow((wmax_segs - cwnd_epoch_segs) / C, (1.0 / 3.0)) * pow(2, CUBIC_SHIFT));
165 }
166
167 /*
168 * Returns the congestion window in segments at time t in seconds based on the
169 * cubic increase function, where t is the elapsed time in seconds from the
170 * beginning of the current congestion avoidance stage, as described in RFC9438
171 * Section 4.2.
172 */
173 static inline unsigned long
theoretical_cubic_cwnd(int ticks_elapsed,uint32_t wmax_segs,uint32_t cwnd_epoch_segs)174 theoretical_cubic_cwnd(int ticks_elapsed, uint32_t wmax_segs, uint32_t cwnd_epoch_segs)
175 {
176 double C, t;
177 float K;
178
179 C = 0.4;
180 t = ticks_elapsed / (double)hz;
181 K = theoretical_cubic_k(wmax_segs, cwnd_epoch_segs);
182
183 /*
184 * Figure 1: W_cubic(t) = C * (t - K)^3 + W_max
185 */
186 return (C * pow(t - K / pow(2, CUBIC_SHIFT), 3.0) + wmax_segs);
187 }
188
189 /*
190 * Returns estimated Reno congestion window in segments.
191 */
192 static inline unsigned long
theoretical_reno_cwnd(int ticks_elapsed,int rtt_ticks,uint32_t wmax_segs)193 theoretical_reno_cwnd(int ticks_elapsed, int rtt_ticks, uint32_t wmax_segs)
194 {
195
196 return (wmax_segs * 0.5 + ticks_elapsed / (float)rtt_ticks);
197 }
198
199 /*
200 * Returns an estimate for the congestion window in segments in the
201 * Reno-friendly region -- that is, an estimate for the congestion window of
202 * Reno, as described in RFC9438 Section 4.3, where:
203 * cwnd: Current congestion window in segments.
204 * cwnd_prior: Size of cwnd in segments at the time of setting ssthresh most
205 * recently, either upon exiting the first slow start or just before
206 * cwnd was reduced in the last congestion event.
207 * W_est: An estimate for the congestion window in segments in the Reno-friendly
208 * region -- that is, an estimate for the congestion window of Reno.
209 */
210 static inline unsigned long
theoretical_tf_cwnd(unsigned long W_est,unsigned long segs_acked,unsigned long cwnd,unsigned long cwnd_prior)211 theoretical_tf_cwnd(unsigned long W_est, unsigned long segs_acked, unsigned long cwnd,
212 unsigned long cwnd_prior)
213 {
214 float cubic_alpha, cubic_beta;
215
216 /* RFC9438 Section 4.6: The parameter β_cubic SHOULD be set to 0.7. */
217 cubic_beta = 0.7;
218
219 if (W_est >= cwnd_prior)
220 cubic_alpha = 1.0;
221 else
222 cubic_alpha = (3.0 * (1.0 - cubic_beta)) / (1.0 + cubic_beta);
223
224 /*
225 * Figure 4: W_est = W_est + α_cubic * segments_acked / cwnd
226 */
227 return (W_est + cubic_alpha * segs_acked / cwnd);
228 }
229
230 #endif /* !_KERNEL */
231
232 /*
233 * Compute the CUBIC K value used in the cwnd calculation, using an
234 * implementation mentioned in Figure. 2 of RFC9438.
235 * The method used here is adapted from Apple Computer Technical Report #KT-32.
236 */
237 static inline int64_t
cubic_k(uint32_t wmax_segs,uint32_t cwnd_epoch_segs)238 cubic_k(uint32_t wmax_segs, uint32_t cwnd_epoch_segs)
239 {
240 int64_t s, K;
241 uint16_t p;
242
243 K = s = 0;
244 p = 0;
245
246 /* Handle the corner case where W_max <= cwnd_epoch */
247 if (wmax_segs <= cwnd_epoch_segs) {
248 return 0;
249 }
250
251 /* (wmax - cwnd_epoch) / C with CUBIC_SHIFT worth of precision. */
252 s = ((wmax_segs - cwnd_epoch_segs) << (2 * CUBIC_SHIFT)) / CUBIC_C_FACTOR;
253
254 /* Rebase s to be between 1 and 1/8 with a shift of CUBIC_SHIFT. */
255 while (s >= 256) {
256 s >>= 3;
257 p++;
258 }
259
260 /*
261 * Some magic constants taken from the Apple TR with appropriate
262 * shifts: 275 == 1.072302 << CUBIC_SHIFT, 98 == 0.3812513 <<
263 * CUBIC_SHIFT, 120 == 0.46946116 << CUBIC_SHIFT.
264 */
265 K = (((s * 275) >> CUBIC_SHIFT) + 98) -
266 (((s * s * 120) >> CUBIC_SHIFT) >> CUBIC_SHIFT);
267
268 /* Multiply by 2^p to undo the rebasing of s from above. */
269 return (K <<= p);
270 }
271
272 /*
273 * Compute and return the new cwnd value in bytes using an implementation
274 * mentioned in Figure. 1 of RFC9438.
275 * Thanks to Kip Macy for help debugging this function.
276 *
277 * XXXLAS: Characterise bounds for overflow.
278 */
279 static inline uint32_t
cubic_cwnd(int usecs_since_epoch,uint32_t wmax,uint32_t smss,int64_t K)280 cubic_cwnd(int usecs_since_epoch, uint32_t wmax, uint32_t smss, int64_t K)
281 {
282 int64_t cwnd;
283
284 /* K is in fixed point form with CUBIC_SHIFT worth of precision. */
285
286 /* t - K, with CUBIC_SHIFT worth of precision. */
287 cwnd = (((int64_t)usecs_since_epoch << CUBIC_SHIFT) - (K * hz * tick)) /
288 (hz * tick);
289
290 if (cwnd > CUBED_ROOT_MAX_ULONG)
291 return INT_MAX;
292 if (cwnd < -CUBED_ROOT_MAX_ULONG)
293 return 0;
294
295 /* (t - K)^3, with CUBIC_SHIFT^3 worth of precision. */
296 cwnd *= (cwnd * cwnd);
297
298 /*
299 * Figure 1: C * (t - K)^3 + wmax
300 * The down shift by CUBIC_SHIFT_4 is because cwnd has 4 lots of
301 * CUBIC_SHIFT included in the value. 3 from the cubing of cwnd above,
302 * and an extra from multiplying through by CUBIC_C_FACTOR.
303 */
304
305 cwnd = ((cwnd * CUBIC_C_FACTOR) >> CUBIC_SHIFT_4) * smss + wmax;
306
307 /*
308 * for negative cwnd, limiting to zero as lower bound
309 */
310 return (lmax(0,cwnd));
311 }
312
313 /*
314 * Compute the "TCP friendly" cwnd by newreno in congestion avoidance state.
315 */
316 static inline uint32_t
tf_cwnd(struct cc_var * ccv)317 tf_cwnd(struct cc_var *ccv)
318 {
319 /* newreno is "TCP friendly" */
320 return newreno_cc_cwnd_in_cong_avoid(ccv);
321 }
322
323 #endif /* _NETINET_CC_CUBIC_H_ */
324