1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2008-2010 Lawrence Stewart <lstewart@freebsd.org> 5 * Copyright (c) 2010 The FreeBSD Foundation 6 * All rights reserved. 7 * 8 * This software was developed by Lawrence Stewart while studying at the Centre 9 * for Advanced Internet Architectures, Swinburne University of Technology, made 10 * possible in part by a grant from the Cisco University Research Program Fund 11 * at Community Foundation Silicon Valley. 12 * 13 * Portions of this software were developed at the Centre for Advanced 14 * Internet Architectures, Swinburne University of Technology, Melbourne, 15 * Australia by David Hayes under sponsorship from the FreeBSD Foundation. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions 19 * are met: 20 * 1. Redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer. 22 * 2. Redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 #ifndef _NETINET_CC_CUBIC_H_ 40 #define _NETINET_CC_CUBIC_H_ 41 42 #include <sys/limits.h> 43 44 /* Number of bits of precision for fixed point math calcs. */ 45 #define CUBIC_SHIFT 8 46 47 #define CUBIC_SHIFT_4 32 48 49 /* 0.5 << CUBIC_SHIFT. */ 50 #define RENO_BETA 128 51 52 /* ~0.7 << CUBIC_SHIFT. */ 53 #define CUBIC_BETA 179 54 55 /* ~0.3 << CUBIC_SHIFT. */ 56 #define ONE_SUB_CUBIC_BETA 77 57 58 /* 3 * ONE_SUB_CUBIC_BETA. */ 59 #define THREE_X_PT3 231 60 61 /* (2 << CUBIC_SHIFT) - ONE_SUB_CUBIC_BETA. */ 62 #define TWO_SUB_PT3 435 63 64 /* ~0.4 << CUBIC_SHIFT. */ 65 #define CUBIC_C_FACTOR 102 66 67 /* CUBIC fast convergence factor: (1+beta_cubic)/2. */ 68 #define CUBIC_FC_FACTOR 217 69 70 /* Don't trust s_rtt until this many rtt samples have been taken. */ 71 #define CUBIC_MIN_RTT_SAMPLES 8 72 73 /* 74 * (2^21)^3 is long max. Dividing (2^63) by Cubic_C_factor 75 * and taking cube-root yields 448845 as the effective useful limit 76 */ 77 #define CUBED_ROOT_MAX_ULONG 448845 78 79 /* Flags used in the cubic structure */ 80 #define CUBICFLAG_CONG_EVENT 0x00000001 /* congestion experienced */ 81 #define CUBICFLAG_IN_SLOWSTART 0x00000002 /* in slow start */ 82 #define CUBICFLAG_IN_APPLIMIT 0x00000004 /* application limited */ 83 #define CUBICFLAG_RTO_EVENT 0x00000008 /* RTO experienced */ 84 #define CUBICFLAG_HYSTART_ENABLED 0x00000010 /* Hystart++ is enabled */ 85 #define CUBICFLAG_HYSTART_IN_CSS 0x00000020 /* We are in Hystart++ CSS */ 86 #define CUBICFLAG_IN_TF 0x00000040 /* We are in TCP friendly region */ 87 88 /* Kernel only bits */ 89 #ifdef _KERNEL 90 struct cubic { 91 /* 92 * CUBIC K in fixed point form with CUBIC_SHIFT worth of precision. 93 * Also means the time period in seconds it takes to increase the 94 * congestion window size at the beginning of the current congestion 95 * avoidance stage to W_max. 96 */ 97 int64_t K; 98 /* Sum of RTT samples across an epoch in usecs. */ 99 int64_t sum_rtt_usecs; 100 /* Size of cwnd (in bytes) just before cwnd was reduced in the last congestion event. */ 101 uint32_t W_max; 102 /* An estimate (in bytes) for the congestion window in the Reno-friendly region */ 103 uint32_t W_est; 104 /* An estimate (in bytes) for the congestion window in the CUBIC region */ 105 uint32_t W_cubic; 106 /* The cwnd (in bytes) at the beginning of the current congestion avoidance stage. */ 107 uint32_t cwnd_epoch; 108 /* various flags */ 109 uint32_t flags; 110 /* Minimum observed rtt in usecs. */ 111 int min_rtt_usecs; 112 /* Mean observed rtt between congestion epochs. */ 113 int mean_rtt_usecs; 114 /* ACKs since last congestion event. */ 115 int epoch_ack_count; 116 /* Timestamp (in ticks) at which the current CA epoch started. */ 117 int t_epoch; 118 /* Timestamp (in ticks) at which the previous CA epoch started. */ 119 int undo_t_epoch; 120 /* Few variables to restore the state after RTO_ERR */ 121 int64_t undo_K; 122 uint32_t undo_W_max; 123 uint32_t undo_cwnd_epoch; 124 uint32_t css_baseline_minrtt; 125 uint32_t css_current_round_minrtt; 126 uint32_t css_lastround_minrtt; 127 uint32_t css_rttsample_count; 128 uint32_t css_entered_at_round; 129 uint32_t css_current_round; 130 uint32_t css_fas_at_css_entry; 131 uint32_t css_lowrtt_fas; 132 uint32_t css_last_fas; 133 }; 134 #endif 135 136 /* Userland only bits. */ 137 #ifndef _KERNEL 138 139 extern int hz; 140 141 /* 142 * Implementation based on the formulas in RFC9438. 143 * 144 */ 145 146 147 /* 148 * Returns K, the time period in seconds it takes to increase the congestion 149 * window size at the beginning of the current congestion avoidance stage to 150 * W_max. 151 */ 152 static inline float 153 theoretical_cubic_k(uint32_t wmax_segs, uint32_t cwnd_epoch_segs) 154 { 155 double C; 156 157 C = 0.4; 158 if (wmax_segs <= cwnd_epoch_segs) 159 return 0.0; 160 161 /* 162 * Figure 2: K = ((W_max - cwnd_epoch) / C)^(1/3) 163 */ 164 return (pow((wmax_segs - cwnd_epoch_segs) / C, (1.0 / 3.0)) * pow(2, CUBIC_SHIFT)); 165 } 166 167 /* 168 * Returns the congestion window in segments at time t in seconds based on the 169 * cubic increase function, where t is the elapsed time in seconds from the 170 * beginning of the current congestion avoidance stage, as described in RFC9438 171 * Section 4.2. 172 */ 173 static inline unsigned long 174 theoretical_cubic_cwnd(int ticks_elapsed, uint32_t wmax_segs, uint32_t cwnd_epoch_segs) 175 { 176 double C, t; 177 float K; 178 179 C = 0.4; 180 t = ticks_elapsed / (double)hz; 181 K = theoretical_cubic_k(wmax_segs, cwnd_epoch_segs); 182 183 /* 184 * Figure 1: W_cubic(t) = C * (t - K)^3 + W_max 185 */ 186 return (C * pow(t - K / pow(2, CUBIC_SHIFT), 3.0) + wmax_segs); 187 } 188 189 /* 190 * Returns estimated Reno congestion window in segments. 191 */ 192 static inline unsigned long 193 theoretical_reno_cwnd(int ticks_elapsed, int rtt_ticks, uint32_t wmax_segs) 194 { 195 196 return (wmax_segs * 0.5 + ticks_elapsed / (float)rtt_ticks); 197 } 198 199 /* 200 * Returns an estimate for the congestion window in segments in the 201 * Reno-friendly region -- that is, an estimate for the congestion window of 202 * Reno, as described in RFC9438 Section 4.3, where: 203 * cwnd: Current congestion window in segments. 204 * cwnd_prior: Size of cwnd in segments at the time of setting ssthresh most 205 * recently, either upon exiting the first slow start or just before 206 * cwnd was reduced in the last congestion event. 207 * W_est: An estimate for the congestion window in segments in the Reno-friendly 208 * region -- that is, an estimate for the congestion window of Reno. 209 */ 210 static inline unsigned long 211 theoretical_tf_cwnd(unsigned long W_est, unsigned long segs_acked, unsigned long cwnd, 212 unsigned long cwnd_prior) 213 { 214 float cubic_alpha, cubic_beta; 215 216 /* RFC9438 Section 4.6: The parameter β_cubic SHOULD be set to 0.7. */ 217 cubic_beta = 0.7; 218 219 if (W_est >= cwnd_prior) 220 cubic_alpha = 1.0; 221 else 222 cubic_alpha = (3.0 * (1.0 - cubic_beta)) / (1.0 + cubic_beta); 223 224 /* 225 * Figure 4: W_est = W_est + α_cubic * segments_acked / cwnd 226 */ 227 return (W_est + cubic_alpha * segs_acked / cwnd); 228 } 229 230 #endif /* !_KERNEL */ 231 232 /* 233 * Compute the CUBIC K value used in the cwnd calculation, using an 234 * implementation mentioned in Figure. 2 of RFC9438. 235 * The method used here is adapted from Apple Computer Technical Report #KT-32. 236 */ 237 static inline int64_t 238 cubic_k(uint32_t wmax_segs, uint32_t cwnd_epoch_segs) 239 { 240 int64_t s, K; 241 uint16_t p; 242 243 K = s = 0; 244 p = 0; 245 246 /* Handle the corner case where W_max <= cwnd_epoch */ 247 if (wmax_segs <= cwnd_epoch_segs) { 248 return 0; 249 } 250 251 /* (wmax - cwnd_epoch) / C with CUBIC_SHIFT worth of precision. */ 252 s = ((wmax_segs - cwnd_epoch_segs) << (2 * CUBIC_SHIFT)) / CUBIC_C_FACTOR; 253 254 /* Rebase s to be between 1 and 1/8 with a shift of CUBIC_SHIFT. */ 255 while (s >= 256) { 256 s >>= 3; 257 p++; 258 } 259 260 /* 261 * Some magic constants taken from the Apple TR with appropriate 262 * shifts: 275 == 1.072302 << CUBIC_SHIFT, 98 == 0.3812513 << 263 * CUBIC_SHIFT, 120 == 0.46946116 << CUBIC_SHIFT. 264 */ 265 K = (((s * 275) >> CUBIC_SHIFT) + 98) - 266 (((s * s * 120) >> CUBIC_SHIFT) >> CUBIC_SHIFT); 267 268 /* Multiply by 2^p to undo the rebasing of s from above. */ 269 return (K <<= p); 270 } 271 272 /* 273 * Compute and return the new cwnd value in bytes using an implementation 274 * mentioned in Figure. 1 of RFC9438. 275 * Thanks to Kip Macy for help debugging this function. 276 * 277 * XXXLAS: Characterise bounds for overflow. 278 */ 279 static inline uint32_t 280 cubic_cwnd(int usecs_since_epoch, uint32_t wmax, uint32_t smss, int64_t K) 281 { 282 int64_t cwnd; 283 284 /* K is in fixed point form with CUBIC_SHIFT worth of precision. */ 285 286 /* t - K, with CUBIC_SHIFT worth of precision. */ 287 cwnd = (((int64_t)usecs_since_epoch << CUBIC_SHIFT) - (K * hz * tick)) / 288 (hz * tick); 289 290 if (cwnd > CUBED_ROOT_MAX_ULONG) 291 return INT_MAX; 292 if (cwnd < -CUBED_ROOT_MAX_ULONG) 293 return 0; 294 295 /* (t - K)^3, with CUBIC_SHIFT^3 worth of precision. */ 296 cwnd *= (cwnd * cwnd); 297 298 /* 299 * Figure 1: C * (t - K)^3 + wmax 300 * The down shift by CUBIC_SHIFT_4 is because cwnd has 4 lots of 301 * CUBIC_SHIFT included in the value. 3 from the cubing of cwnd above, 302 * and an extra from multiplying through by CUBIC_C_FACTOR. 303 */ 304 305 cwnd = ((cwnd * CUBIC_C_FACTOR) >> CUBIC_SHIFT_4) * smss + wmax; 306 307 /* 308 * for negative cwnd, limiting to zero as lower bound 309 */ 310 return (lmax(0,cwnd)); 311 } 312 313 /* 314 * Compute the "TCP friendly" cwnd by newreno in congestion avoidance state. 315 */ 316 static inline uint32_t 317 tf_cwnd(struct cc_var *ccv) 318 { 319 /* newreno is "TCP friendly" */ 320 return newreno_cc_cwnd_in_cong_avoid(ccv); 321 } 322 323 #endif /* _NETINET_CC_CUBIC_H_ */ 324