167fef78bSLawrence Stewart /*- 24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause 3fe267a55SPedro F. Giffuni * 467fef78bSLawrence Stewart * Copyright (c) 2008-2010 Lawrence Stewart <lstewart@freebsd.org> 567fef78bSLawrence Stewart * Copyright (c) 2010 The FreeBSD Foundation 667fef78bSLawrence Stewart * All rights reserved. 767fef78bSLawrence Stewart * 867fef78bSLawrence Stewart * This software was developed by Lawrence Stewart while studying at the Centre 9891b8ed4SLawrence Stewart * for Advanced Internet Architectures, Swinburne University of Technology, made 10891b8ed4SLawrence Stewart * possible in part by a grant from the Cisco University Research Program Fund 11891b8ed4SLawrence Stewart * at Community Foundation Silicon Valley. 1267fef78bSLawrence Stewart * 1367fef78bSLawrence Stewart * Portions of this software were developed at the Centre for Advanced 1467fef78bSLawrence Stewart * Internet Architectures, Swinburne University of Technology, Melbourne, 1567fef78bSLawrence Stewart * Australia by David Hayes under sponsorship from the FreeBSD Foundation. 1667fef78bSLawrence Stewart * 1767fef78bSLawrence Stewart * Redistribution and use in source and binary forms, with or without 1867fef78bSLawrence Stewart * modification, are permitted provided that the following conditions 1967fef78bSLawrence Stewart * are met: 2067fef78bSLawrence Stewart * 1. Redistributions of source code must retain the above copyright 2167fef78bSLawrence Stewart * notice, this list of conditions and the following disclaimer. 2267fef78bSLawrence Stewart * 2. Redistributions in binary form must reproduce the above copyright 2367fef78bSLawrence Stewart * notice, this list of conditions and the following disclaimer in the 2467fef78bSLawrence Stewart * documentation and/or other materials provided with the distribution. 2567fef78bSLawrence Stewart * 2667fef78bSLawrence Stewart * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 2767fef78bSLawrence Stewart * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2867fef78bSLawrence Stewart * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2967fef78bSLawrence Stewart * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 3067fef78bSLawrence Stewart * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 3167fef78bSLawrence Stewart * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 3267fef78bSLawrence Stewart * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 3367fef78bSLawrence Stewart * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 3467fef78bSLawrence Stewart * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3567fef78bSLawrence Stewart * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3667fef78bSLawrence Stewart * SUCH DAMAGE. 3767fef78bSLawrence Stewart * 3867fef78bSLawrence Stewart * $FreeBSD$ 3967fef78bSLawrence Stewart */ 4067fef78bSLawrence Stewart 4167fef78bSLawrence Stewart #ifndef _NETINET_CC_CUBIC_H_ 4267fef78bSLawrence Stewart #define _NETINET_CC_CUBIC_H_ 4367fef78bSLawrence Stewart 44c968c769SMichael Tuexen #include <sys/limits.h> 45c968c769SMichael Tuexen 4667fef78bSLawrence Stewart /* Number of bits of precision for fixed point math calcs. */ 4767fef78bSLawrence Stewart #define CUBIC_SHIFT 8 4867fef78bSLawrence Stewart 4967fef78bSLawrence Stewart #define CUBIC_SHIFT_4 32 5067fef78bSLawrence Stewart 5167fef78bSLawrence Stewart /* 0.5 << CUBIC_SHIFT. */ 5267fef78bSLawrence Stewart #define RENO_BETA 128 5367fef78bSLawrence Stewart 5468ff29afSSean Bruno /* ~0.7 << CUBIC_SHIFT. */ 5568ff29afSSean Bruno #define CUBIC_BETA 179 5667fef78bSLawrence Stewart 5768ff29afSSean Bruno /* ~0.3 << CUBIC_SHIFT. */ 5868ff29afSSean Bruno #define ONE_SUB_CUBIC_BETA 77 5967fef78bSLawrence Stewart 6067fef78bSLawrence Stewart /* 3 * ONE_SUB_CUBIC_BETA. */ 6168ff29afSSean Bruno #define THREE_X_PT3 231 6267fef78bSLawrence Stewart 6367fef78bSLawrence Stewart /* (2 << CUBIC_SHIFT) - ONE_SUB_CUBIC_BETA. */ 6468ff29afSSean Bruno #define TWO_SUB_PT3 435 6567fef78bSLawrence Stewart 6667fef78bSLawrence Stewart /* ~0.4 << CUBIC_SHIFT. */ 6767fef78bSLawrence Stewart #define CUBIC_C_FACTOR 102 6867fef78bSLawrence Stewart 6968ff29afSSean Bruno /* CUBIC fast convergence factor: (1+beta_cubic)/2. */ 7068ff29afSSean Bruno #define CUBIC_FC_FACTOR 217 7167fef78bSLawrence Stewart 7267fef78bSLawrence Stewart /* Don't trust s_rtt until this many rtt samples have been taken. */ 7367fef78bSLawrence Stewart #define CUBIC_MIN_RTT_SAMPLES 8 7467fef78bSLawrence Stewart 75c968c769SMichael Tuexen /* 76c968c769SMichael Tuexen * (2^21)^3 is long max. Dividing (2^63) by Cubic_C_factor 77c968c769SMichael Tuexen * and taking cube-root yields 448845 as the effective useful limit 78c968c769SMichael Tuexen */ 79c968c769SMichael Tuexen #define CUBED_ROOT_MAX_ULONG 448845 80c968c769SMichael Tuexen 81a9696510SRandall Stewart /* Flags used in the cubic structure */ 82a9696510SRandall Stewart #define CUBICFLAG_CONG_EVENT 0x00000001 /* congestion experienced */ 83a9696510SRandall Stewart #define CUBICFLAG_IN_SLOWSTART 0x00000002 /* in slow start */ 84a9696510SRandall Stewart #define CUBICFLAG_IN_APPLIMIT 0x00000004 /* application limited */ 85a9696510SRandall Stewart #define CUBICFLAG_RTO_EVENT 0x00000008 /* RTO experienced */ 86a9696510SRandall Stewart #define CUBICFLAG_HYSTART_ENABLED 0x00000010 /* Hystart++ is enabled */ 87a9696510SRandall Stewart #define CUBICFLAG_HYSTART_IN_CSS 0x00000020 /* We are in Hystart++ CSS */ 88a9696510SRandall Stewart 89a9696510SRandall Stewart /* Kernel only bits */ 90a9696510SRandall Stewart #ifdef _KERNEL 91a9696510SRandall Stewart struct cubic { 92ea6d0de2SRichard Scheffenegger /* CUBIC K in fixed point form with CUBIC_SHIFT worth of precision. */ 93a9696510SRandall Stewart int64_t K; 94a3aa6f65SCheng Cui /* Sum of RTT samples across an epoch in usecs. */ 95a3aa6f65SCheng Cui int64_t sum_rtt_usecs; 96*eb5bfdd0SRichard Scheffenegger /* Size of cwnd just before cwnd was reduced in the last congestion event */ 97*eb5bfdd0SRichard Scheffenegger uint64_t W_max; 98*eb5bfdd0SRichard Scheffenegger /* An estimate for the congestion window in the Reno-friendly region */ 99*eb5bfdd0SRichard Scheffenegger uint64_t W_est; 100*eb5bfdd0SRichard Scheffenegger /* The cwnd at the beginning of the current congestion avoidance stage */ 101*eb5bfdd0SRichard Scheffenegger uint64_t cwnd_epoch; 102*eb5bfdd0SRichard Scheffenegger /* 103*eb5bfdd0SRichard Scheffenegger * Size of cwnd at the time of setting ssthresh most recently, 104*eb5bfdd0SRichard Scheffenegger * either upon exiting the first slow start, or just before cwnd 105*eb5bfdd0SRichard Scheffenegger * was reduced in the last congestion event 106*eb5bfdd0SRichard Scheffenegger */ 107*eb5bfdd0SRichard Scheffenegger uint64_t cwnd_prior; 108a9696510SRandall Stewart /* various flags */ 109a9696510SRandall Stewart uint32_t flags; 110a3aa6f65SCheng Cui /* Minimum observed rtt in usecs. */ 111a3aa6f65SCheng Cui int min_rtt_usecs; 112a9696510SRandall Stewart /* Mean observed rtt between congestion epochs. */ 113a3aa6f65SCheng Cui int mean_rtt_usecs; 114a9696510SRandall Stewart /* ACKs since last congestion event. */ 115a9696510SRandall Stewart int epoch_ack_count; 116*eb5bfdd0SRichard Scheffenegger /* Timestamp (in ticks) at which the current CA epoch started. */ 117*eb5bfdd0SRichard Scheffenegger int t_epoch; 118*eb5bfdd0SRichard Scheffenegger /* Timestamp (in ticks) at which the previous CA epoch started. */ 119*eb5bfdd0SRichard Scheffenegger int undo_t_epoch; 120*eb5bfdd0SRichard Scheffenegger /* Few variables to restore the state after RTO_ERR */ 121*eb5bfdd0SRichard Scheffenegger int64_t undo_K; 122*eb5bfdd0SRichard Scheffenegger uint64_t undo_cwnd_prior; 123*eb5bfdd0SRichard Scheffenegger uint64_t undo_W_max; 124*eb5bfdd0SRichard Scheffenegger uint64_t undo_W_est; 125*eb5bfdd0SRichard Scheffenegger uint64_t undo_cwnd_epoch; 126*eb5bfdd0SRichard Scheffenegger /* Number of congestion events experienced */ 127*eb5bfdd0SRichard Scheffenegger uint64_t num_cong_events; 128a9696510SRandall Stewart uint32_t css_baseline_minrtt; 129a9696510SRandall Stewart uint32_t css_current_round_minrtt; 130a9696510SRandall Stewart uint32_t css_lastround_minrtt; 131a9696510SRandall Stewart uint32_t css_rttsample_count; 132a9696510SRandall Stewart uint32_t css_entered_at_round; 133a9696510SRandall Stewart uint32_t css_current_round; 134a9696510SRandall Stewart uint32_t css_fas_at_css_entry; 135a9696510SRandall Stewart uint32_t css_lowrtt_fas; 136a9696510SRandall Stewart uint32_t css_last_fas; 137a9696510SRandall Stewart }; 138a9696510SRandall Stewart #endif 139a9696510SRandall Stewart 14067fef78bSLawrence Stewart /* Userland only bits. */ 14167fef78bSLawrence Stewart #ifndef _KERNEL 14267fef78bSLawrence Stewart 14367fef78bSLawrence Stewart extern int hz; 14467fef78bSLawrence Stewart 14567fef78bSLawrence Stewart /* 14667fef78bSLawrence Stewart * Implementation based on the formulae found in the CUBIC Internet Draft 14768ff29afSSean Bruno * "draft-ietf-tcpm-cubic-04". 14867fef78bSLawrence Stewart * 14967fef78bSLawrence Stewart */ 15067fef78bSLawrence Stewart 15167fef78bSLawrence Stewart static __inline float 15267fef78bSLawrence Stewart theoretical_cubic_k(double wmax_pkts) 15367fef78bSLawrence Stewart { 15467fef78bSLawrence Stewart double C; 15567fef78bSLawrence Stewart 15667fef78bSLawrence Stewart C = 0.4; 15767fef78bSLawrence Stewart 15868ff29afSSean Bruno return (pow((wmax_pkts * 0.3) / C, (1.0 / 3.0)) * pow(2, CUBIC_SHIFT)); 15967fef78bSLawrence Stewart } 16067fef78bSLawrence Stewart 16167fef78bSLawrence Stewart static __inline unsigned long 162*eb5bfdd0SRichard Scheffenegger theoretical_cubic_cwnd(int ticks_since_epoch, unsigned long wmax, uint32_t smss) 16367fef78bSLawrence Stewart { 16467fef78bSLawrence Stewart double C, wmax_pkts; 16567fef78bSLawrence Stewart 16667fef78bSLawrence Stewart C = 0.4; 16767fef78bSLawrence Stewart wmax_pkts = wmax / (double)smss; 16867fef78bSLawrence Stewart 16967fef78bSLawrence Stewart return (smss * (wmax_pkts + 170*eb5bfdd0SRichard Scheffenegger (C * pow(ticks_since_epoch / (double)hz - 17167fef78bSLawrence Stewart theoretical_cubic_k(wmax_pkts) / pow(2, CUBIC_SHIFT), 3.0)))); 17267fef78bSLawrence Stewart } 17367fef78bSLawrence Stewart 17467fef78bSLawrence Stewart static __inline unsigned long 175*eb5bfdd0SRichard Scheffenegger theoretical_reno_cwnd(int ticks_since_epoch, int rtt_ticks, unsigned long wmax, 17667fef78bSLawrence Stewart uint32_t smss) 17767fef78bSLawrence Stewart { 17867fef78bSLawrence Stewart 179*eb5bfdd0SRichard Scheffenegger return ((wmax * 0.5) + ((ticks_since_epoch / (float)rtt_ticks) * smss)); 18067fef78bSLawrence Stewart } 18167fef78bSLawrence Stewart 18267fef78bSLawrence Stewart static __inline unsigned long 183*eb5bfdd0SRichard Scheffenegger theoretical_tf_cwnd(int ticks_since_epoch, int rtt_ticks, unsigned long wmax, 18467fef78bSLawrence Stewart uint32_t smss) 18567fef78bSLawrence Stewart { 18667fef78bSLawrence Stewart 18768ff29afSSean Bruno return ((wmax * 0.7) + ((3 * 0.3) / (2 - 0.3) * 188*eb5bfdd0SRichard Scheffenegger (ticks_since_epoch / (float)rtt_ticks) * smss)); 18967fef78bSLawrence Stewart } 19067fef78bSLawrence Stewart 19167fef78bSLawrence Stewart #endif /* !_KERNEL */ 19267fef78bSLawrence Stewart 19367fef78bSLawrence Stewart /* 19467fef78bSLawrence Stewart * Compute the CUBIC K value used in the cwnd calculation, using an 19567fef78bSLawrence Stewart * implementation of eqn 2 in the I-D. The method used 19667fef78bSLawrence Stewart * here is adapted from Apple Computer Technical Report #KT-32. 19767fef78bSLawrence Stewart */ 19867fef78bSLawrence Stewart static __inline int64_t 19967fef78bSLawrence Stewart cubic_k(unsigned long wmax_pkts) 20067fef78bSLawrence Stewart { 20167fef78bSLawrence Stewart int64_t s, K; 20267fef78bSLawrence Stewart uint16_t p; 20367fef78bSLawrence Stewart 20467fef78bSLawrence Stewart K = s = 0; 20567fef78bSLawrence Stewart p = 0; 20667fef78bSLawrence Stewart 20767fef78bSLawrence Stewart /* (wmax * beta)/C with CUBIC_SHIFT worth of precision. */ 20867fef78bSLawrence Stewart s = ((wmax_pkts * ONE_SUB_CUBIC_BETA) << CUBIC_SHIFT) / CUBIC_C_FACTOR; 20967fef78bSLawrence Stewart 21067fef78bSLawrence Stewart /* Rebase s to be between 1 and 1/8 with a shift of CUBIC_SHIFT. */ 21167fef78bSLawrence Stewart while (s >= 256) { 21267fef78bSLawrence Stewart s >>= 3; 21367fef78bSLawrence Stewart p++; 21467fef78bSLawrence Stewart } 21567fef78bSLawrence Stewart 21667fef78bSLawrence Stewart /* 21767fef78bSLawrence Stewart * Some magic constants taken from the Apple TR with appropriate 21867fef78bSLawrence Stewart * shifts: 275 == 1.072302 << CUBIC_SHIFT, 98 == 0.3812513 << 21967fef78bSLawrence Stewart * CUBIC_SHIFT, 120 == 0.46946116 << CUBIC_SHIFT. 22067fef78bSLawrence Stewart */ 22167fef78bSLawrence Stewart K = (((s * 275) >> CUBIC_SHIFT) + 98) - 22267fef78bSLawrence Stewart (((s * s * 120) >> CUBIC_SHIFT) >> CUBIC_SHIFT); 22367fef78bSLawrence Stewart 22467fef78bSLawrence Stewart /* Multiply by 2^p to undo the rebasing of s from above. */ 22567fef78bSLawrence Stewart return (K <<= p); 22667fef78bSLawrence Stewart } 22767fef78bSLawrence Stewart 22867fef78bSLawrence Stewart /* 22967fef78bSLawrence Stewart * Compute the new cwnd value using an implementation of eqn 1 from the I-D. 23067fef78bSLawrence Stewart * Thanks to Kip Macy for help debugging this function. 23151e712f8SHiren Panchasara * 23251e712f8SHiren Panchasara * XXXLAS: Characterise bounds for overflow. 23367fef78bSLawrence Stewart */ 23467fef78bSLawrence Stewart static __inline unsigned long 235*eb5bfdd0SRichard Scheffenegger cubic_cwnd(int usecs_since_epoch, unsigned long wmax, uint32_t smss, int64_t K) 23667fef78bSLawrence Stewart { 23767fef78bSLawrence Stewart int64_t cwnd; 23867fef78bSLawrence Stewart 23967fef78bSLawrence Stewart /* K is in fixed point form with CUBIC_SHIFT worth of precision. */ 24067fef78bSLawrence Stewart 24167fef78bSLawrence Stewart /* t - K, with CUBIC_SHIFT worth of precision. */ 242*eb5bfdd0SRichard Scheffenegger cwnd = (((int64_t)usecs_since_epoch << CUBIC_SHIFT) - (K * hz * tick)) / 243a3aa6f65SCheng Cui (hz * tick); 244c968c769SMichael Tuexen 245c968c769SMichael Tuexen if (cwnd > CUBED_ROOT_MAX_ULONG) 246c968c769SMichael Tuexen return INT_MAX; 247c968c769SMichael Tuexen if (cwnd < -CUBED_ROOT_MAX_ULONG) 248c968c769SMichael Tuexen return 0; 24967fef78bSLawrence Stewart 25067fef78bSLawrence Stewart /* (t - K)^3, with CUBIC_SHIFT^3 worth of precision. */ 25167fef78bSLawrence Stewart cwnd *= (cwnd * cwnd); 25267fef78bSLawrence Stewart 25367fef78bSLawrence Stewart /* 25467fef78bSLawrence Stewart * C(t - K)^3 + wmax 25567fef78bSLawrence Stewart * The down shift by CUBIC_SHIFT_4 is because cwnd has 4 lots of 25667fef78bSLawrence Stewart * CUBIC_SHIFT included in the value. 3 from the cubing of cwnd above, 25767fef78bSLawrence Stewart * and an extra from multiplying through by CUBIC_C_FACTOR. 25867fef78bSLawrence Stewart */ 25967fef78bSLawrence Stewart 260c968c769SMichael Tuexen cwnd = ((cwnd * CUBIC_C_FACTOR) >> CUBIC_SHIFT_4) * smss + wmax; 261c968c769SMichael Tuexen 262c968c769SMichael Tuexen /* 263c968c769SMichael Tuexen * for negative cwnd, limiting to zero as lower bound 264c968c769SMichael Tuexen */ 265c968c769SMichael Tuexen return (lmax(0,cwnd)); 26667fef78bSLawrence Stewart } 26767fef78bSLawrence Stewart 26867fef78bSLawrence Stewart /* 269a3aa6f65SCheng Cui * Compute an approximation of the NewReno cwnd some number of usecs after a 27067fef78bSLawrence Stewart * congestion event. RTT should be the average RTT estimate for the path 27167fef78bSLawrence Stewart * measured over the previous congestion epoch and wmax is the value of cwnd at 27267fef78bSLawrence Stewart * the last congestion event. The "TCP friendly" concept in the CUBIC I-D is 27367fef78bSLawrence Stewart * rather tricky to understand and it turns out this function is not required. 27467fef78bSLawrence Stewart * It is left here for reference. 275a3aa6f65SCheng Cui * 276a3aa6f65SCheng Cui * XXX: Not used 27767fef78bSLawrence Stewart */ 27867fef78bSLawrence Stewart static __inline unsigned long 279*eb5bfdd0SRichard Scheffenegger reno_cwnd(int usecs_since_epoch, int rtt_usecs, unsigned long wmax, 28067fef78bSLawrence Stewart uint32_t smss) 28167fef78bSLawrence Stewart { 28267fef78bSLawrence Stewart 28367fef78bSLawrence Stewart /* 28467fef78bSLawrence Stewart * For NewReno, beta = 0.5, therefore: W_tcp(t) = wmax*0.5 + t/RTT 28567fef78bSLawrence Stewart * W_tcp(t) deals with cwnd/wmax in pkts, so because our cwnd is in 28667fef78bSLawrence Stewart * bytes, we have to multiply by smss. 28767fef78bSLawrence Stewart */ 288*eb5bfdd0SRichard Scheffenegger return (((wmax * RENO_BETA) + (((usecs_since_epoch * smss) 289a3aa6f65SCheng Cui << CUBIC_SHIFT) / rtt_usecs)) >> CUBIC_SHIFT); 29067fef78bSLawrence Stewart } 29167fef78bSLawrence Stewart 29267fef78bSLawrence Stewart /* 293a3aa6f65SCheng Cui * Compute an approximation of the "TCP friendly" cwnd some number of usecs 29467fef78bSLawrence Stewart * after a congestion event that is designed to yield the same average cwnd as 29568ff29afSSean Bruno * NewReno while using CUBIC's beta of 0.7. RTT should be the average RTT 29667fef78bSLawrence Stewart * estimate for the path measured over the previous congestion epoch and wmax is 29767fef78bSLawrence Stewart * the value of cwnd at the last congestion event. 29867fef78bSLawrence Stewart */ 29967fef78bSLawrence Stewart static __inline unsigned long 300*eb5bfdd0SRichard Scheffenegger tf_cwnd(int usecs_since_epoch, int rtt_usecs, unsigned long wmax, 30167fef78bSLawrence Stewart uint32_t smss) 30267fef78bSLawrence Stewart { 30367fef78bSLawrence Stewart 30467fef78bSLawrence Stewart /* Equation 4 of I-D. */ 305c968c769SMichael Tuexen return (((wmax * CUBIC_BETA) + 306*eb5bfdd0SRichard Scheffenegger (((THREE_X_PT3 * (unsigned long)usecs_since_epoch * 307a3aa6f65SCheng Cui (unsigned long)smss) << CUBIC_SHIFT) / (TWO_SUB_PT3 * rtt_usecs))) 308c968c769SMichael Tuexen >> CUBIC_SHIFT); 30967fef78bSLawrence Stewart } 31067fef78bSLawrence Stewart 31167fef78bSLawrence Stewart #endif /* _NETINET_CC_CUBIC_H_ */ 312