1 /* 2 * Copyright (c) 2008-2010 Lawrence Stewart <lstewart@freebsd.org> 3 * Copyright (c) 2010 The FreeBSD Foundation 4 * All rights reserved. 5 * Copyright (c) 2017 by Delphix. All rights reserved. 6 * Copyright 2019 Joyent, Inc. 7 * Copyright 2020 RackTop Systems, Inc. 8 * 9 * This software was developed by Lawrence Stewart while studying at the Centre 10 * for Advanced Internet Architectures, Swinburne University of Technology, made 11 * possible in part by a grant from the Cisco University Research Program Fund 12 * at Community Foundation Silicon Valley. 13 * 14 * Portions of this software were developed at the Centre for Advanced 15 * Internet Architectures, Swinburne University of Technology, Melbourne, 16 * Australia by David Hayes under sponsorship from the FreeBSD Foundation. 17 * 18 * Redistribution and use in source and binary forms, with or without 19 * modification, are permitted provided that the following conditions 20 * are met: 21 * 1. Redistributions of source code must retain the above copyright 22 * notice, this list of conditions and the following disclaimer. 23 * 2. Redistributions in binary form must reproduce the above copyright 24 * notice, this list of conditions and the following disclaimer in the 25 * documentation and/or other materials provided with the distribution. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * $FreeBSD$ 40 */ 41 42 #ifndef _NETINET_CC_CUBIC_H_ 43 #define _NETINET_CC_CUBIC_H_ 44 45 /* Number of bits of precision for fixed point math calcs. */ 46 #define CUBIC_SHIFT 8 47 48 #define CUBIC_SHIFT_4 32 49 50 /* 0.5 << CUBIC_SHIFT. */ 51 #define RENO_BETA 128 52 53 /* ~0.8 << CUBIC_SHIFT. */ 54 #define CUBIC_BETA 204 55 56 /* ~0.2 << CUBIC_SHIFT. */ 57 #define ONE_SUB_CUBIC_BETA 51 58 59 /* 3 * ONE_SUB_CUBIC_BETA. */ 60 #define THREE_X_PT2 153 61 62 /* (2 << CUBIC_SHIFT) - ONE_SUB_CUBIC_BETA. */ 63 #define TWO_SUB_PT2 461 64 65 /* ~0.4 << CUBIC_SHIFT. */ 66 #define CUBIC_C_FACTOR 102 67 68 /* CUBIC fast convergence factor: ~0.9 << CUBIC_SHIFT. */ 69 #define CUBIC_FC_FACTOR 230 70 71 /* Don't trust s_rtt until this many rtt samples have been taken. */ 72 #define CUBIC_MIN_RTT_SAMPLES 8 73 74 /* 75 * (2^21)^3 is long max. Dividing (2^63) by Cubic_C_factor 76 * and taking cube-root yields 448845 as the effective useful limit 77 */ 78 #define CUBED_ROOT_MAX_ULONG 448845 79 80 /* Userland only bits. */ 81 #ifndef _KERNEL 82 83 extern int hz; 84 85 /* 86 * Implementation based on the formulae found in the CUBIC Internet Draft 87 * "draft-rhee-tcpm-cubic-02". 88 * 89 * Note BETA used in cc_cubic is equal to (1-beta) in the I-D 90 */ 91 92 static __inline float 93 theoretical_cubic_k(double wmax_pkts) 94 { 95 double C; 96 97 C = 0.4; 98 99 return (pow((wmax_pkts * 0.2) / C, (1.0 / 3.0)) * pow(2, CUBIC_SHIFT)); 100 } 101 102 static __inline uint32_t 103 theoretical_cubic_cwnd(int ticks_since_cong, uint32_t wmax, uint32_t smss) 104 { 105 double C, wmax_pkts; 106 107 C = 0.4; 108 wmax_pkts = wmax / (double)smss; 109 110 return (smss * (wmax_pkts + 111 (C * pow(ticks_since_cong / (double)hz - 112 theoretical_cubic_k(wmax_pkts) / pow(2, CUBIC_SHIFT), 3.0)))); 113 } 114 115 static __inline uint32_t 116 theoretical_reno_cwnd(int ticks_since_cong, int rtt_ticks, uint32_t wmax, 117 uint32_t smss) 118 { 119 120 return ((wmax * 0.5) + ((ticks_since_cong / (float)rtt_ticks) * smss)); 121 } 122 123 static __inline uint32_t 124 theoretical_tf_cwnd(int ticks_since_cong, int rtt_ticks, unsigned long wmax, 125 uint32_t smss) 126 { 127 128 return ((wmax * 0.8) + ((3 * 0.2) / (2 - 0.2) * 129 (ticks_since_cong / (float)rtt_ticks) * smss)); 130 } 131 132 #endif /* !_KERNEL */ 133 134 /* 135 * Compute the CUBIC K value used in the cwnd calculation, using an 136 * implementation of eqn 2 in the I-D. The method used 137 * here is adapted from Apple Computer Technical Report #KT-32. 138 */ 139 static __inline int64_t 140 cubic_k(uint32_t wmax_pkts) 141 { 142 int64_t s, K; 143 uint16_t p; 144 145 K = s = 0; 146 p = 0; 147 148 /* (wmax * beta)/C with CUBIC_SHIFT worth of precision. */ 149 s = ((wmax_pkts * ONE_SUB_CUBIC_BETA) << CUBIC_SHIFT) / CUBIC_C_FACTOR; 150 151 /* Rebase s to be between 1 and 1/8 with a shift of CUBIC_SHIFT. */ 152 while (s >= 256) { 153 s >>= 3; 154 p++; 155 } 156 157 /* 158 * Some magic constants taken from the Apple TR with appropriate 159 * shifts: 275 == 1.072302 << CUBIC_SHIFT, 98 == 0.3812513 << 160 * CUBIC_SHIFT, 120 == 0.46946116 << CUBIC_SHIFT. 161 */ 162 K = (((s * 275) >> CUBIC_SHIFT) + 98) - 163 (((s * s * 120) >> CUBIC_SHIFT) >> CUBIC_SHIFT); 164 165 /* Multiply by 2^p to undo the rebasing of s from above. */ 166 return (K <<= p); 167 } 168 169 /* 170 * Compute the new cwnd value using an implementation of eqn 1 from the I-D. 171 * Thanks to Kip Macy for help debugging this function. 172 * 173 * XXXLAS: Characterise bounds for overflow. 174 */ 175 static __inline uint32_t 176 cubic_cwnd(hrtime_t nsecs_since_cong, uint32_t wmax, uint32_t smss, int64_t K) 177 { 178 int64_t t, cwnd; 179 180 /* 181 * Convert nsecs_since_cong to milliseconds, with CUBIC_SHIFT worth 182 * of precision. 183 */ 184 t = NSEC2MSEC(nsecs_since_cong << CUBIC_SHIFT); 185 186 /* 187 * K is the time period in seconds that it will take to reach wmax. The 188 * value is kept in fixed point form with CUBIC_SHIFT worth of 189 * precision. 190 * 191 * For comparison with t, we convert K to milliseconds, and then convert 192 * the result back to seconds. 193 * 194 * cwnd = t - K, with CUBIC_SHIFT worth of precision. 195 */ 196 cwnd = (t - K * MILLISEC) / MILLISEC; 197 198 if (cwnd > CUBED_ROOT_MAX_ULONG) 199 return (INT_MAX); 200 if (cwnd < -CUBED_ROOT_MAX_ULONG) 201 return (0); 202 203 /* cwnd = (t - K)^3, with CUBIC_SHIFT^3 worth of precision. */ 204 cwnd *= (cwnd * cwnd); 205 206 /* 207 * C(t - K)^3 + wmax 208 * The down shift by CUBIC_SHIFT_4 is because cwnd has 4 lots of 209 * CUBIC_SHIFT included in the value. 3 from the cubing of cwnd above, 210 * and an extra from multiplying through by CUBIC_C_FACTOR. 211 */ 212 cwnd = ((cwnd * CUBIC_C_FACTOR * smss) >> CUBIC_SHIFT_4) + wmax; 213 214 /* 215 * for negative cwnd, limiting to zero as lower bound 216 */ 217 return (max(0, cwnd)); 218 } 219 220 /* 221 * Compute an approximation of the "TCP friendly" cwnd some number of 222 * nanoseconds after a congestion event that is designed to yield the same 223 * average cwnd as NewReno while using CUBIC's beta of 0.8. RTT should be the 224 * average RTT estimate for the path measured over the previous congestion 225 * epoch and wmax is the value of cwnd at the last congestion event. 226 */ 227 static __inline uint32_t 228 tf_cwnd(hrtime_t nsecs_since_cong, hrtime_t rtt_nsecs, uint32_t wmax, 229 uint32_t smss) 230 { 231 232 /* Equation 4 of I-D. */ 233 return (((wmax * CUBIC_BETA) + (((THREE_X_PT2 * nsecs_since_cong * 234 smss) << CUBIC_SHIFT) / TWO_SUB_PT2 / rtt_nsecs)) >> CUBIC_SHIFT); 235 } 236 237 #endif /* _NETINET_CC_CUBIC_H_ */ 238