167fef78bSLawrence Stewart /*- 2fe267a55SPedro F. Giffuni * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3fe267a55SPedro F. Giffuni * 467fef78bSLawrence Stewart * Copyright (c) 2008-2010 Lawrence Stewart <lstewart@freebsd.org> 567fef78bSLawrence Stewart * Copyright (c) 2010 The FreeBSD Foundation 667fef78bSLawrence Stewart * All rights reserved. 767fef78bSLawrence Stewart * 867fef78bSLawrence Stewart * This software was developed by Lawrence Stewart while studying at the Centre 9891b8ed4SLawrence Stewart * for Advanced Internet Architectures, Swinburne University of Technology, made 10891b8ed4SLawrence Stewart * possible in part by a grant from the Cisco University Research Program Fund 11891b8ed4SLawrence Stewart * at Community Foundation Silicon Valley. 1267fef78bSLawrence Stewart * 1367fef78bSLawrence Stewart * Portions of this software were developed at the Centre for Advanced 1467fef78bSLawrence Stewart * Internet Architectures, Swinburne University of Technology, Melbourne, 1567fef78bSLawrence Stewart * Australia by David Hayes under sponsorship from the FreeBSD Foundation. 1667fef78bSLawrence Stewart * 1767fef78bSLawrence Stewart * Redistribution and use in source and binary forms, with or without 1867fef78bSLawrence Stewart * modification, are permitted provided that the following conditions 1967fef78bSLawrence Stewart * are met: 2067fef78bSLawrence Stewart * 1. Redistributions of source code must retain the above copyright 2167fef78bSLawrence Stewart * notice, this list of conditions and the following disclaimer. 2267fef78bSLawrence Stewart * 2. Redistributions in binary form must reproduce the above copyright 2367fef78bSLawrence Stewart * notice, this list of conditions and the following disclaimer in the 2467fef78bSLawrence Stewart * documentation and/or other materials provided with the distribution. 2567fef78bSLawrence Stewart * 2667fef78bSLawrence Stewart * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 2767fef78bSLawrence Stewart * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2867fef78bSLawrence Stewart * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2967fef78bSLawrence Stewart * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 3067fef78bSLawrence Stewart * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 3167fef78bSLawrence Stewart * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 3267fef78bSLawrence Stewart * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 3367fef78bSLawrence Stewart * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 3467fef78bSLawrence Stewart * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3567fef78bSLawrence Stewart * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3667fef78bSLawrence Stewart * SUCH DAMAGE. 3767fef78bSLawrence Stewart */ 3867fef78bSLawrence Stewart 3967fef78bSLawrence Stewart /* 4067fef78bSLawrence Stewart * An implementation of the CUBIC congestion control algorithm for FreeBSD, 4167fef78bSLawrence Stewart * based on the Internet Draft "draft-rhee-tcpm-cubic-02" by Rhee, Xu and Ha. 4267fef78bSLawrence Stewart * Originally released as part of the NewTCP research project at Swinburne 43891b8ed4SLawrence Stewart * University of Technology's Centre for Advanced Internet Architectures, 44891b8ed4SLawrence Stewart * Melbourne, Australia, which was made possible in part by a grant from the 45891b8ed4SLawrence Stewart * Cisco University Research Program Fund at Community Foundation Silicon 46891b8ed4SLawrence Stewart * Valley. More details are available at: 4767fef78bSLawrence Stewart * http://caia.swin.edu.au/urp/newtcp/ 4867fef78bSLawrence Stewart */ 4967fef78bSLawrence Stewart 5067fef78bSLawrence Stewart #include <sys/cdefs.h> 5167fef78bSLawrence Stewart __FBSDID("$FreeBSD$"); 5267fef78bSLawrence Stewart 5367fef78bSLawrence Stewart #include <sys/param.h> 5467fef78bSLawrence Stewart #include <sys/kernel.h> 55c968c769SMichael Tuexen #include <sys/limits.h> 5667fef78bSLawrence Stewart #include <sys/malloc.h> 5767fef78bSLawrence Stewart #include <sys/module.h> 5867fef78bSLawrence Stewart #include <sys/socket.h> 5967fef78bSLawrence Stewart #include <sys/socketvar.h> 6067fef78bSLawrence Stewart #include <sys/sysctl.h> 6167fef78bSLawrence Stewart #include <sys/systm.h> 6267fef78bSLawrence Stewart 6367fef78bSLawrence Stewart #include <net/vnet.h> 6467fef78bSLawrence Stewart 652de3e790SGleb Smirnoff #include <netinet/tcp.h> 6667fef78bSLawrence Stewart #include <netinet/tcp_seq.h> 6767fef78bSLawrence Stewart #include <netinet/tcp_timer.h> 6867fef78bSLawrence Stewart #include <netinet/tcp_var.h> 694644fda3SGleb Smirnoff #include <netinet/cc/cc.h> 7067fef78bSLawrence Stewart #include <netinet/cc/cc_cubic.h> 7167fef78bSLawrence Stewart #include <netinet/cc/cc_module.h> 7267fef78bSLawrence Stewart 7367fef78bSLawrence Stewart static void cubic_ack_received(struct cc_var *ccv, uint16_t type); 7467fef78bSLawrence Stewart static void cubic_cb_destroy(struct cc_var *ccv); 7567fef78bSLawrence Stewart static int cubic_cb_init(struct cc_var *ccv); 7667fef78bSLawrence Stewart static void cubic_cong_signal(struct cc_var *ccv, uint32_t type); 7767fef78bSLawrence Stewart static void cubic_conn_init(struct cc_var *ccv); 7867fef78bSLawrence Stewart static int cubic_mod_init(void); 7967fef78bSLawrence Stewart static void cubic_post_recovery(struct cc_var *ccv); 8067fef78bSLawrence Stewart static void cubic_record_rtt(struct cc_var *ccv); 8167fef78bSLawrence Stewart static void cubic_ssthresh_update(struct cc_var *ccv); 8235cd141bSMichael Tuexen static void cubic_after_idle(struct cc_var *ccv); 8367fef78bSLawrence Stewart 8467fef78bSLawrence Stewart struct cubic { 8567fef78bSLawrence Stewart /* Cubic K in fixed point form with CUBIC_SHIFT worth of precision. */ 8667fef78bSLawrence Stewart int64_t K; 8767fef78bSLawrence Stewart /* Sum of RTT samples across an epoch in ticks. */ 8867fef78bSLawrence Stewart int64_t sum_rtt_ticks; 8967fef78bSLawrence Stewart /* cwnd at the most recent congestion event. */ 9067fef78bSLawrence Stewart unsigned long max_cwnd; 9167fef78bSLawrence Stewart /* cwnd at the previous congestion event. */ 9267fef78bSLawrence Stewart unsigned long prev_max_cwnd; 936907bbaeSRichard Scheffenegger /* various flags */ 946907bbaeSRichard Scheffenegger uint32_t flags; 956907bbaeSRichard Scheffenegger #define CUBICFLAG_CONG_EVENT 0x00000001 /* congestion experienced */ 966907bbaeSRichard Scheffenegger #define CUBICFLAG_IN_SLOWSTART 0x00000002 /* in slow start */ 972fda0a6fSRichard Scheffenegger #define CUBICFLAG_IN_APPLIMIT 0x00000004 /* application limited */ 9867fef78bSLawrence Stewart /* Minimum observed rtt in ticks. */ 9967fef78bSLawrence Stewart int min_rtt_ticks; 10067fef78bSLawrence Stewart /* Mean observed rtt between congestion epochs. */ 10167fef78bSLawrence Stewart int mean_rtt_ticks; 10267fef78bSLawrence Stewart /* ACKs since last congestion event. */ 10367fef78bSLawrence Stewart int epoch_ack_count; 10467fef78bSLawrence Stewart /* Time of last congestion event in ticks. */ 10567fef78bSLawrence Stewart int t_last_cong; 10667fef78bSLawrence Stewart }; 10767fef78bSLawrence Stewart 1086bed196cSSergey Kandaurov static MALLOC_DEFINE(M_CUBIC, "cubic data", 10967fef78bSLawrence Stewart "Per connection data required for the CUBIC congestion control algorithm"); 11067fef78bSLawrence Stewart 11167fef78bSLawrence Stewart struct cc_algo cubic_cc_algo = { 11267fef78bSLawrence Stewart .name = "cubic", 11367fef78bSLawrence Stewart .ack_received = cubic_ack_received, 11467fef78bSLawrence Stewart .cb_destroy = cubic_cb_destroy, 11567fef78bSLawrence Stewart .cb_init = cubic_cb_init, 11667fef78bSLawrence Stewart .cong_signal = cubic_cong_signal, 11767fef78bSLawrence Stewart .conn_init = cubic_conn_init, 11867fef78bSLawrence Stewart .mod_init = cubic_mod_init, 11967fef78bSLawrence Stewart .post_recovery = cubic_post_recovery, 12035cd141bSMichael Tuexen .after_idle = cubic_after_idle, 12167fef78bSLawrence Stewart }; 12267fef78bSLawrence Stewart 12367fef78bSLawrence Stewart static void 12467fef78bSLawrence Stewart cubic_ack_received(struct cc_var *ccv, uint16_t type) 12567fef78bSLawrence Stewart { 12667fef78bSLawrence Stewart struct cubic *cubic_data; 12767fef78bSLawrence Stewart unsigned long w_tf, w_cubic_next; 12867fef78bSLawrence Stewart int ticks_since_cong; 12967fef78bSLawrence Stewart 13067fef78bSLawrence Stewart cubic_data = ccv->cc_data; 13167fef78bSLawrence Stewart cubic_record_rtt(ccv); 13267fef78bSLawrence Stewart 13367fef78bSLawrence Stewart /* 13467fef78bSLawrence Stewart * Regular ACK and we're not in cong/fast recovery and we're cwnd 135*2bb6dfabSRichard Scheffenegger * limited and we're either not doing ABC or are just coming out 136*2bb6dfabSRichard Scheffenegger * from slow-start or were application limited or are slow starting 137*2bb6dfabSRichard Scheffenegger * or are doing ABC and we've sent a cwnd's worth of bytes. 13867fef78bSLawrence Stewart */ 13967fef78bSLawrence Stewart if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) && 14067fef78bSLawrence Stewart (ccv->flags & CCF_CWND_LIMITED) && (!V_tcp_do_rfc3465 || 141*2bb6dfabSRichard Scheffenegger (cubic_data->flags & (CUBICFLAG_IN_SLOWSTART | CUBICFLAG_IN_APPLIMIT)) || 14267fef78bSLawrence Stewart CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh) || 143*2bb6dfabSRichard Scheffenegger (V_tcp_do_rfc3465 && (ccv->flags & CCF_ABC_SENTAWND)))) { 14467fef78bSLawrence Stewart /* Use the logic in NewReno ack_received() for slow start. */ 14567fef78bSLawrence Stewart if (CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh) || 1466907bbaeSRichard Scheffenegger cubic_data->min_rtt_ticks == TCPTV_SRTTBASE) { 1476907bbaeSRichard Scheffenegger cubic_data->flags |= CUBICFLAG_IN_SLOWSTART; 14867fef78bSLawrence Stewart newreno_cc_algo.ack_received(ccv, type); 1496907bbaeSRichard Scheffenegger } else { 150*2bb6dfabSRichard Scheffenegger if (cubic_data->flags & (CUBICFLAG_IN_SLOWSTART | 151*2bb6dfabSRichard Scheffenegger CUBICFLAG_IN_APPLIMIT)) { 152*2bb6dfabSRichard Scheffenegger cubic_data->flags &= ~(CUBICFLAG_IN_SLOWSTART | 153*2bb6dfabSRichard Scheffenegger CUBICFLAG_IN_APPLIMIT); 154*2bb6dfabSRichard Scheffenegger cubic_data->t_last_cong = ticks; 155*2bb6dfabSRichard Scheffenegger cubic_data->K = cubic_k(cubic_data->max_cwnd / 156*2bb6dfabSRichard Scheffenegger CCV(ccv, t_maxseg)); 157*2bb6dfabSRichard Scheffenegger } 158c968c769SMichael Tuexen if ((ticks_since_cong = 159c968c769SMichael Tuexen ticks - cubic_data->t_last_cong) < 0) { 160c968c769SMichael Tuexen /* 161c968c769SMichael Tuexen * dragging t_last_cong along 162c968c769SMichael Tuexen */ 163c968c769SMichael Tuexen ticks_since_cong = INT_MAX; 164c968c769SMichael Tuexen cubic_data->t_last_cong = ticks - INT_MAX; 165c968c769SMichael Tuexen } 16667fef78bSLawrence Stewart /* 16767fef78bSLawrence Stewart * The mean RTT is used to best reflect the equations in 16867fef78bSLawrence Stewart * the I-D. Using min_rtt in the tf_cwnd calculation 16967fef78bSLawrence Stewart * causes w_tf to grow much faster than it should if the 17067fef78bSLawrence Stewart * RTT is dominated by network buffering rather than 171a4641f4eSPedro F. Giffuni * propagation delay. 17267fef78bSLawrence Stewart */ 17367fef78bSLawrence Stewart w_tf = tf_cwnd(ticks_since_cong, 17467fef78bSLawrence Stewart cubic_data->mean_rtt_ticks, cubic_data->max_cwnd, 17567fef78bSLawrence Stewart CCV(ccv, t_maxseg)); 17667fef78bSLawrence Stewart 17767fef78bSLawrence Stewart w_cubic_next = cubic_cwnd(ticks_since_cong + 17867fef78bSLawrence Stewart cubic_data->mean_rtt_ticks, cubic_data->max_cwnd, 17967fef78bSLawrence Stewart CCV(ccv, t_maxseg), cubic_data->K); 18067fef78bSLawrence Stewart 18167fef78bSLawrence Stewart ccv->flags &= ~CCF_ABC_SENTAWND; 18267fef78bSLawrence Stewart 183c968c769SMichael Tuexen if (w_cubic_next < w_tf) { 18467fef78bSLawrence Stewart /* 18567fef78bSLawrence Stewart * TCP-friendly region, follow tf 18667fef78bSLawrence Stewart * cwnd growth. 18767fef78bSLawrence Stewart */ 188c968c769SMichael Tuexen if (CCV(ccv, snd_cwnd) < w_tf) 189c968c769SMichael Tuexen CCV(ccv, snd_cwnd) = ulmin(w_tf, INT_MAX); 190cce999b3SRichard Scheffenegger } else if (CCV(ccv, snd_cwnd) < w_cubic_next) { 19167fef78bSLawrence Stewart /* 19267fef78bSLawrence Stewart * Concave or convex region, follow CUBIC 19367fef78bSLawrence Stewart * cwnd growth. 194cce999b3SRichard Scheffenegger * Only update snd_cwnd, if it doesn't shrink. 19567fef78bSLawrence Stewart */ 19667fef78bSLawrence Stewart if (V_tcp_do_rfc3465) 197c968c769SMichael Tuexen CCV(ccv, snd_cwnd) = ulmin(w_cubic_next, 198c968c769SMichael Tuexen INT_MAX); 19967fef78bSLawrence Stewart else 200c968c769SMichael Tuexen CCV(ccv, snd_cwnd) += ulmax(1, 201c968c769SMichael Tuexen ((ulmin(w_cubic_next, INT_MAX) - 20267fef78bSLawrence Stewart CCV(ccv, snd_cwnd)) * 20367fef78bSLawrence Stewart CCV(ccv, t_maxseg)) / 204c968c769SMichael Tuexen CCV(ccv, snd_cwnd)); 20567fef78bSLawrence Stewart } 20667fef78bSLawrence Stewart 20767fef78bSLawrence Stewart /* 20867fef78bSLawrence Stewart * If we're not in slow start and we're probing for a 20967fef78bSLawrence Stewart * new cwnd limit at the start of a connection 21067fef78bSLawrence Stewart * (happens when hostcache has a relevant entry), 21167fef78bSLawrence Stewart * keep updating our current estimate of the 21267fef78bSLawrence Stewart * max_cwnd. 21367fef78bSLawrence Stewart */ 2146907bbaeSRichard Scheffenegger if (((cubic_data->flags & CUBICFLAG_CONG_EVENT) == 0) && 2157d87664aSMichael Tuexen cubic_data->max_cwnd < CCV(ccv, snd_cwnd)) { 21667fef78bSLawrence Stewart cubic_data->max_cwnd = CCV(ccv, snd_cwnd); 217b0c1a13eSMichael Tuexen cubic_data->K = cubic_k(cubic_data->max_cwnd / 218b0c1a13eSMichael Tuexen CCV(ccv, t_maxseg)); 21967fef78bSLawrence Stewart } 22067fef78bSLawrence Stewart } 2212fda0a6fSRichard Scheffenegger } else if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) && 2222fda0a6fSRichard Scheffenegger !(ccv->flags & CCF_CWND_LIMITED)) { 2232fda0a6fSRichard Scheffenegger cubic_data->flags |= CUBICFLAG_IN_APPLIMIT; 22467fef78bSLawrence Stewart } 2257d87664aSMichael Tuexen } 22667fef78bSLawrence Stewart 22735cd141bSMichael Tuexen /* 22835cd141bSMichael Tuexen * This is a Cubic specific implementation of after_idle. 22935cd141bSMichael Tuexen * - Reset cwnd by calling New Reno implementation of after_idle. 23035cd141bSMichael Tuexen * - Reset t_last_cong. 23135cd141bSMichael Tuexen */ 23235cd141bSMichael Tuexen static void 23335cd141bSMichael Tuexen cubic_after_idle(struct cc_var *ccv) 23435cd141bSMichael Tuexen { 23535cd141bSMichael Tuexen struct cubic *cubic_data; 23635cd141bSMichael Tuexen 23735cd141bSMichael Tuexen cubic_data = ccv->cc_data; 23835cd141bSMichael Tuexen 239b0c1a13eSMichael Tuexen cubic_data->max_cwnd = ulmax(cubic_data->max_cwnd, CCV(ccv, snd_cwnd)); 240b0c1a13eSMichael Tuexen cubic_data->K = cubic_k(cubic_data->max_cwnd / CCV(ccv, t_maxseg)); 241b0c1a13eSMichael Tuexen 24235cd141bSMichael Tuexen newreno_cc_algo.after_idle(ccv); 24335cd141bSMichael Tuexen cubic_data->t_last_cong = ticks; 24435cd141bSMichael Tuexen } 24535cd141bSMichael Tuexen 24635cd141bSMichael Tuexen 24767fef78bSLawrence Stewart static void 24867fef78bSLawrence Stewart cubic_cb_destroy(struct cc_var *ccv) 24967fef78bSLawrence Stewart { 25067fef78bSLawrence Stewart free(ccv->cc_data, M_CUBIC); 25167fef78bSLawrence Stewart } 25267fef78bSLawrence Stewart 25367fef78bSLawrence Stewart static int 25467fef78bSLawrence Stewart cubic_cb_init(struct cc_var *ccv) 25567fef78bSLawrence Stewart { 25667fef78bSLawrence Stewart struct cubic *cubic_data; 25767fef78bSLawrence Stewart 25867fef78bSLawrence Stewart cubic_data = malloc(sizeof(struct cubic), M_CUBIC, M_NOWAIT|M_ZERO); 25967fef78bSLawrence Stewart 26067fef78bSLawrence Stewart if (cubic_data == NULL) 26167fef78bSLawrence Stewart return (ENOMEM); 26267fef78bSLawrence Stewart 26367fef78bSLawrence Stewart /* Init some key variables with sensible defaults. */ 26467fef78bSLawrence Stewart cubic_data->t_last_cong = ticks; 26567fef78bSLawrence Stewart cubic_data->min_rtt_ticks = TCPTV_SRTTBASE; 26647f44cddSLawrence Stewart cubic_data->mean_rtt_ticks = 1; 26767fef78bSLawrence Stewart 26867fef78bSLawrence Stewart ccv->cc_data = cubic_data; 26967fef78bSLawrence Stewart 27067fef78bSLawrence Stewart return (0); 27167fef78bSLawrence Stewart } 27267fef78bSLawrence Stewart 27367fef78bSLawrence Stewart /* 27467fef78bSLawrence Stewart * Perform any necessary tasks before we enter congestion recovery. 27567fef78bSLawrence Stewart */ 27667fef78bSLawrence Stewart static void 27767fef78bSLawrence Stewart cubic_cong_signal(struct cc_var *ccv, uint32_t type) 27867fef78bSLawrence Stewart { 27967fef78bSLawrence Stewart struct cubic *cubic_data; 28067fef78bSLawrence Stewart 28167fef78bSLawrence Stewart cubic_data = ccv->cc_data; 28267fef78bSLawrence Stewart 28367fef78bSLawrence Stewart switch (type) { 28467fef78bSLawrence Stewart case CC_NDUPACK: 28567fef78bSLawrence Stewart if (!IN_FASTRECOVERY(CCV(ccv, t_flags))) { 28667fef78bSLawrence Stewart if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) { 28767fef78bSLawrence Stewart cubic_ssthresh_update(ccv); 2886907bbaeSRichard Scheffenegger cubic_data->flags |= CUBICFLAG_CONG_EVENT; 28967fef78bSLawrence Stewart cubic_data->prev_max_cwnd = cubic_data->max_cwnd; 29043053c12SSean Bruno cubic_data->max_cwnd = CCV(ccv, snd_cwnd); 2916907bbaeSRichard Scheffenegger cubic_data->K = cubic_k(cubic_data->max_cwnd / CCV(ccv, t_maxseg)); 29267fef78bSLawrence Stewart } 29367fef78bSLawrence Stewart ENTER_RECOVERY(CCV(ccv, t_flags)); 29467fef78bSLawrence Stewart } 29567fef78bSLawrence Stewart break; 29667fef78bSLawrence Stewart 29767fef78bSLawrence Stewart case CC_ECN: 29867fef78bSLawrence Stewart if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) { 29967fef78bSLawrence Stewart cubic_ssthresh_update(ccv); 3006907bbaeSRichard Scheffenegger cubic_data->flags |= CUBICFLAG_CONG_EVENT; 30167fef78bSLawrence Stewart cubic_data->prev_max_cwnd = cubic_data->max_cwnd; 30243053c12SSean Bruno cubic_data->max_cwnd = CCV(ccv, snd_cwnd); 30367fef78bSLawrence Stewart cubic_data->t_last_cong = ticks; 3046907bbaeSRichard Scheffenegger cubic_data->K = cubic_k(cubic_data->max_cwnd / CCV(ccv, t_maxseg)); 30567fef78bSLawrence Stewart CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh); 30667fef78bSLawrence Stewart ENTER_CONGRECOVERY(CCV(ccv, t_flags)); 30767fef78bSLawrence Stewart } 30867fef78bSLawrence Stewart break; 30967fef78bSLawrence Stewart 31067fef78bSLawrence Stewart case CC_RTO: 31167fef78bSLawrence Stewart /* 31267fef78bSLawrence Stewart * Grab the current time and record it so we know when the 31367fef78bSLawrence Stewart * most recent congestion event was. Only record it when the 31467fef78bSLawrence Stewart * timeout has fired more than once, as there is a reasonable 31567fef78bSLawrence Stewart * chance the first one is a false alarm and may not indicate 31667fef78bSLawrence Stewart * congestion. 3176e26dd0dSRichard Scheffenegger * This will put Cubic firmly into the concave / TCP friendly 3186e26dd0dSRichard Scheffenegger * region, for a slower ramp-up after two consecutive RTOs. 31967fef78bSLawrence Stewart */ 3201edbb54fSEd Maste if (CCV(ccv, t_rxtshift) >= 2) { 3216907bbaeSRichard Scheffenegger cubic_data->flags |= CUBICFLAG_CONG_EVENT; 32267fef78bSLawrence Stewart cubic_data->t_last_cong = ticks; 3236e26dd0dSRichard Scheffenegger cubic_data->max_cwnd = CCV(ccv, snd_cwnd_prev); 3246e26dd0dSRichard Scheffenegger cubic_data->K = cubic_k(cubic_data->max_cwnd / 3256e26dd0dSRichard Scheffenegger CCV(ccv, t_maxseg)); 3261edbb54fSEd Maste } 32767fef78bSLawrence Stewart break; 32867fef78bSLawrence Stewart } 32967fef78bSLawrence Stewart } 33067fef78bSLawrence Stewart 33167fef78bSLawrence Stewart static void 33267fef78bSLawrence Stewart cubic_conn_init(struct cc_var *ccv) 33367fef78bSLawrence Stewart { 33467fef78bSLawrence Stewart struct cubic *cubic_data; 33567fef78bSLawrence Stewart 33667fef78bSLawrence Stewart cubic_data = ccv->cc_data; 33767fef78bSLawrence Stewart 33867fef78bSLawrence Stewart /* 33967fef78bSLawrence Stewart * Ensure we have a sane initial value for max_cwnd recorded. Without 34067fef78bSLawrence Stewart * this here bad things happen when entries from the TCP hostcache 34167fef78bSLawrence Stewart * get used. 34267fef78bSLawrence Stewart */ 34367fef78bSLawrence Stewart cubic_data->max_cwnd = CCV(ccv, snd_cwnd); 34467fef78bSLawrence Stewart } 34567fef78bSLawrence Stewart 34667fef78bSLawrence Stewart static int 34767fef78bSLawrence Stewart cubic_mod_init(void) 34867fef78bSLawrence Stewart { 34967fef78bSLawrence Stewart return (0); 35067fef78bSLawrence Stewart } 35167fef78bSLawrence Stewart 35267fef78bSLawrence Stewart /* 35367fef78bSLawrence Stewart * Perform any necessary tasks before we exit congestion recovery. 35467fef78bSLawrence Stewart */ 35567fef78bSLawrence Stewart static void 35667fef78bSLawrence Stewart cubic_post_recovery(struct cc_var *ccv) 35767fef78bSLawrence Stewart { 35867fef78bSLawrence Stewart struct cubic *cubic_data; 359f81bc34eSHiren Panchasara int pipe; 36067fef78bSLawrence Stewart 36167fef78bSLawrence Stewart cubic_data = ccv->cc_data; 362f81bc34eSHiren Panchasara pipe = 0; 36367fef78bSLawrence Stewart 36467fef78bSLawrence Stewart /* Fast convergence heuristic. */ 36551e712f8SHiren Panchasara if (cubic_data->max_cwnd < cubic_data->prev_max_cwnd) 36667fef78bSLawrence Stewart cubic_data->max_cwnd = (cubic_data->max_cwnd * CUBIC_FC_FACTOR) 36767fef78bSLawrence Stewart >> CUBIC_SHIFT; 36867fef78bSLawrence Stewart 36967fef78bSLawrence Stewart if (IN_FASTRECOVERY(CCV(ccv, t_flags))) { 37067fef78bSLawrence Stewart /* 37167fef78bSLawrence Stewart * If inflight data is less than ssthresh, set cwnd 37267fef78bSLawrence Stewart * conservatively to avoid a burst of data, as suggested in 37367fef78bSLawrence Stewart * the NewReno RFC. Otherwise, use the CUBIC method. 37467fef78bSLawrence Stewart * 37567fef78bSLawrence Stewart * XXXLAS: Find a way to do this without needing curack 37667fef78bSLawrence Stewart */ 377f81bc34eSHiren Panchasara if (V_tcp_do_rfc6675_pipe) 378f81bc34eSHiren Panchasara pipe = tcp_compute_pipe(ccv->ccvc.tcp); 379f81bc34eSHiren Panchasara else 380f81bc34eSHiren Panchasara pipe = CCV(ccv, snd_max) - ccv->curack; 381f81bc34eSHiren Panchasara 382f81bc34eSHiren Panchasara if (pipe < CCV(ccv, snd_ssthresh)) 3835cc11a89SMichael Tuexen /* 3845cc11a89SMichael Tuexen * Ensure that cwnd does not collapse to 1 MSS under 3855cc11a89SMichael Tuexen * adverse conditions. Implements RFC6582 3865cc11a89SMichael Tuexen */ 3875cc11a89SMichael Tuexen CCV(ccv, snd_cwnd) = max(pipe, CCV(ccv, t_maxseg)) + 3885cc11a89SMichael Tuexen CCV(ccv, t_maxseg); 38967fef78bSLawrence Stewart else 39067fef78bSLawrence Stewart /* Update cwnd based on beta and adjusted max_cwnd. */ 39114558b99SRichard Scheffenegger CCV(ccv, snd_cwnd) = max(((uint64_t)cubic_data->max_cwnd * 39214558b99SRichard Scheffenegger CUBIC_BETA) >> CUBIC_SHIFT, 39314558b99SRichard Scheffenegger 2 * CCV(ccv, t_maxseg)); 39467fef78bSLawrence Stewart } 39567fef78bSLawrence Stewart cubic_data->t_last_cong = ticks; 39667fef78bSLawrence Stewart 39767fef78bSLawrence Stewart /* Calculate the average RTT between congestion epochs. */ 39847f44cddSLawrence Stewart if (cubic_data->epoch_ack_count > 0 && 39947f44cddSLawrence Stewart cubic_data->sum_rtt_ticks >= cubic_data->epoch_ack_count) { 40067fef78bSLawrence Stewart cubic_data->mean_rtt_ticks = (int)(cubic_data->sum_rtt_ticks / 40167fef78bSLawrence Stewart cubic_data->epoch_ack_count); 40247f44cddSLawrence Stewart } 40367fef78bSLawrence Stewart 40467fef78bSLawrence Stewart cubic_data->epoch_ack_count = 0; 40567fef78bSLawrence Stewart cubic_data->sum_rtt_ticks = 0; 40651e712f8SHiren Panchasara cubic_data->K = cubic_k(cubic_data->max_cwnd / CCV(ccv, t_maxseg)); 40767fef78bSLawrence Stewart } 40867fef78bSLawrence Stewart 40967fef78bSLawrence Stewart /* 41067fef78bSLawrence Stewart * Record the min RTT and sum samples for the epoch average RTT calculation. 41167fef78bSLawrence Stewart */ 41267fef78bSLawrence Stewart static void 41367fef78bSLawrence Stewart cubic_record_rtt(struct cc_var *ccv) 41467fef78bSLawrence Stewart { 41567fef78bSLawrence Stewart struct cubic *cubic_data; 41667fef78bSLawrence Stewart int t_srtt_ticks; 41767fef78bSLawrence Stewart 41867fef78bSLawrence Stewart /* Ignore srtt until a min number of samples have been taken. */ 41967fef78bSLawrence Stewart if (CCV(ccv, t_rttupdated) >= CUBIC_MIN_RTT_SAMPLES) { 42067fef78bSLawrence Stewart cubic_data = ccv->cc_data; 42167fef78bSLawrence Stewart t_srtt_ticks = CCV(ccv, t_srtt) / TCP_RTT_SCALE; 42267fef78bSLawrence Stewart 42367fef78bSLawrence Stewart /* 42467fef78bSLawrence Stewart * Record the current SRTT as our minrtt if it's the smallest 42567fef78bSLawrence Stewart * we've seen or minrtt is currently equal to its initialised 42667fef78bSLawrence Stewart * value. 42767fef78bSLawrence Stewart * 42867fef78bSLawrence Stewart * XXXLAS: Should there be some hysteresis for minrtt? 42967fef78bSLawrence Stewart */ 43067fef78bSLawrence Stewart if ((t_srtt_ticks < cubic_data->min_rtt_ticks || 43147f44cddSLawrence Stewart cubic_data->min_rtt_ticks == TCPTV_SRTTBASE)) { 43267fef78bSLawrence Stewart cubic_data->min_rtt_ticks = max(1, t_srtt_ticks); 43367fef78bSLawrence Stewart 43447f44cddSLawrence Stewart /* 43547f44cddSLawrence Stewart * If the connection is within its first congestion 43647f44cddSLawrence Stewart * epoch, ensure we prime mean_rtt_ticks with a 43747f44cddSLawrence Stewart * reasonable value until the epoch average RTT is 43847f44cddSLawrence Stewart * calculated in cubic_post_recovery(). 43947f44cddSLawrence Stewart */ 44047f44cddSLawrence Stewart if (cubic_data->min_rtt_ticks > 44147f44cddSLawrence Stewart cubic_data->mean_rtt_ticks) 44247f44cddSLawrence Stewart cubic_data->mean_rtt_ticks = 44347f44cddSLawrence Stewart cubic_data->min_rtt_ticks; 44447f44cddSLawrence Stewart } 44547f44cddSLawrence Stewart 44667fef78bSLawrence Stewart /* Sum samples for epoch average RTT calculation. */ 44767fef78bSLawrence Stewart cubic_data->sum_rtt_ticks += t_srtt_ticks; 44867fef78bSLawrence Stewart cubic_data->epoch_ack_count++; 44967fef78bSLawrence Stewart } 45067fef78bSLawrence Stewart } 45167fef78bSLawrence Stewart 45267fef78bSLawrence Stewart /* 45367fef78bSLawrence Stewart * Update the ssthresh in the event of congestion. 45467fef78bSLawrence Stewart */ 45567fef78bSLawrence Stewart static void 45667fef78bSLawrence Stewart cubic_ssthresh_update(struct cc_var *ccv) 45767fef78bSLawrence Stewart { 45867fef78bSLawrence Stewart struct cubic *cubic_data; 45914558b99SRichard Scheffenegger uint32_t ssthresh; 46067fef78bSLawrence Stewart 46167fef78bSLawrence Stewart cubic_data = ccv->cc_data; 46267fef78bSLawrence Stewart 46367fef78bSLawrence Stewart /* 46467fef78bSLawrence Stewart * On the first congestion event, set ssthresh to cwnd * 0.5, on 46567fef78bSLawrence Stewart * subsequent congestion events, set it to cwnd * beta. 46667fef78bSLawrence Stewart */ 4676907bbaeSRichard Scheffenegger if ((cubic_data->flags & CUBICFLAG_CONG_EVENT) == 0) 46814558b99SRichard Scheffenegger ssthresh = CCV(ccv, snd_cwnd) >> 1; 46967fef78bSLawrence Stewart else 47014558b99SRichard Scheffenegger ssthresh = ((uint64_t)CCV(ccv, snd_cwnd) * 4713ac12506SJonathan T. Looney CUBIC_BETA) >> CUBIC_SHIFT; 47214558b99SRichard Scheffenegger CCV(ccv, snd_ssthresh) = max(ssthresh, 2 * CCV(ccv, t_maxseg)); 47367fef78bSLawrence Stewart } 47467fef78bSLawrence Stewart 47567fef78bSLawrence Stewart 47667fef78bSLawrence Stewart DECLARE_CC_MODULE(cubic, &cubic_cc_algo); 47766ba9aafSRichard Scheffenegger MODULE_VERSION(cubic, 1); 478