11d4ed791SLawrence Stewart /*-
24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause
3fe267a55SPedro F. Giffuni *
41d4ed791SLawrence Stewart * Copyright (c) 2009-2010
51d4ed791SLawrence Stewart * Swinburne University of Technology, Melbourne, Australia
61d4ed791SLawrence Stewart * Copyright (c) 2010 Lawrence Stewart <lstewart@freebsd.org>
71d4ed791SLawrence Stewart * Copyright (c) 2010-2011 The FreeBSD Foundation
81d4ed791SLawrence Stewart * All rights reserved.
91d4ed791SLawrence Stewart *
101d4ed791SLawrence Stewart * This software was developed at the Centre for Advanced Internet
11891b8ed4SLawrence Stewart * Architectures, Swinburne University of Technology, by David Hayes and
12891b8ed4SLawrence Stewart * Lawrence Stewart, made possible in part by a grant from the Cisco University
13891b8ed4SLawrence Stewart * Research Program Fund at Community Foundation Silicon Valley.
141d4ed791SLawrence Stewart *
151d4ed791SLawrence Stewart * Portions of this software were developed at the Centre for Advanced Internet
161d4ed791SLawrence Stewart * Architectures, Swinburne University of Technology, Melbourne, Australia by
171d4ed791SLawrence Stewart * David Hayes under sponsorship from the FreeBSD Foundation.
181d4ed791SLawrence Stewart *
191d4ed791SLawrence Stewart * Redistribution and use in source and binary forms, with or without
201d4ed791SLawrence Stewart * modification, are permitted provided that the following conditions
211d4ed791SLawrence Stewart * are met:
221d4ed791SLawrence Stewart * 1. Redistributions of source code must retain the above copyright
231d4ed791SLawrence Stewart * notice, this list of conditions and the following disclaimer.
241d4ed791SLawrence Stewart * 2. Redistributions in binary form must reproduce the above copyright
251d4ed791SLawrence Stewart * notice, this list of conditions and the following disclaimer in the
261d4ed791SLawrence Stewart * documentation and/or other materials provided with the distribution.
271d4ed791SLawrence Stewart *
281d4ed791SLawrence Stewart * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
291d4ed791SLawrence Stewart * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
301d4ed791SLawrence Stewart * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
311d4ed791SLawrence Stewart * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
321d4ed791SLawrence Stewart * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
331d4ed791SLawrence Stewart * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
341d4ed791SLawrence Stewart * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
351d4ed791SLawrence Stewart * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
361d4ed791SLawrence Stewart * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
371d4ed791SLawrence Stewart * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
381d4ed791SLawrence Stewart * SUCH DAMAGE.
391d4ed791SLawrence Stewart */
401d4ed791SLawrence Stewart
411d4ed791SLawrence Stewart /*
421d4ed791SLawrence Stewart * An implementation of the Vegas congestion control algorithm for FreeBSD,
431d4ed791SLawrence Stewart * based on L. S. Brakmo and L. L. Peterson, "TCP Vegas: end to end congestion
441d4ed791SLawrence Stewart * avoidance on a global internet", IEEE J. Sel. Areas Commun., vol. 13, no. 8,
451d4ed791SLawrence Stewart * pp. 1465-1480, Oct. 1995. The original Vegas duplicate ack policy has not
46ec943febSLawrence Stewart * been implemented, since clock ticks are not as coarse as they were (i.e.
471d4ed791SLawrence Stewart * 500ms) when Vegas was designed. Also, packets are timed once per RTT as in
481d4ed791SLawrence Stewart * the original paper.
491d4ed791SLawrence Stewart *
501d4ed791SLawrence Stewart * Originally released as part of the NewTCP research project at Swinburne
51891b8ed4SLawrence Stewart * University of Technology's Centre for Advanced Internet Architectures,
52891b8ed4SLawrence Stewart * Melbourne, Australia, which was made possible in part by a grant from the
53891b8ed4SLawrence Stewart * Cisco University Research Program Fund at Community Foundation Silicon
54891b8ed4SLawrence Stewart * Valley. More details are available at:
551d4ed791SLawrence Stewart * http://caia.swin.edu.au/urp/newtcp/
561d4ed791SLawrence Stewart */
571d4ed791SLawrence Stewart
581d4ed791SLawrence Stewart #include <sys/param.h>
591d4ed791SLawrence Stewart #include <sys/kernel.h>
601d4ed791SLawrence Stewart #include <sys/khelp.h>
611d4ed791SLawrence Stewart #include <sys/malloc.h>
621d4ed791SLawrence Stewart #include <sys/module.h>
631d4ed791SLawrence Stewart #include <sys/queue.h>
641d4ed791SLawrence Stewart #include <sys/socket.h>
651d4ed791SLawrence Stewart #include <sys/socketvar.h>
661d4ed791SLawrence Stewart #include <sys/sysctl.h>
671d4ed791SLawrence Stewart #include <sys/systm.h>
681d4ed791SLawrence Stewart
691d4ed791SLawrence Stewart #include <net/vnet.h>
701d4ed791SLawrence Stewart
71b8d60729SRandall Stewart #include <net/route.h>
72b8d60729SRandall Stewart #include <net/route/nhop.h>
73b8d60729SRandall Stewart
74b8d60729SRandall Stewart #include <netinet/in_pcb.h>
752de3e790SGleb Smirnoff #include <netinet/tcp.h>
761d4ed791SLawrence Stewart #include <netinet/tcp_timer.h>
771d4ed791SLawrence Stewart #include <netinet/tcp_var.h>
784644fda3SGleb Smirnoff #include <netinet/cc/cc.h>
791d4ed791SLawrence Stewart #include <netinet/cc/cc_module.h>
801d4ed791SLawrence Stewart
811d4ed791SLawrence Stewart #include <netinet/khelp/h_ertt.h>
821d4ed791SLawrence Stewart
831d4ed791SLawrence Stewart /*
841d4ed791SLawrence Stewart * Private signal type for rate based congestion signal.
851d4ed791SLawrence Stewart * See <netinet/cc.h> for appropriate bit-range to use for private signals.
861d4ed791SLawrence Stewart */
87f74352fbSRichard Scheffenegger #define CC_VEGAS_RATE 0x04000000
881d4ed791SLawrence Stewart
89f74352fbSRichard Scheffenegger static void vegas_ack_received(struct cc_var *ccv, ccsignal_t ack_type);
901d4ed791SLawrence Stewart static void vegas_cb_destroy(struct cc_var *ccv);
91b8d60729SRandall Stewart static int vegas_cb_init(struct cc_var *ccv, void *ptr);
92f74352fbSRichard Scheffenegger static void vegas_cong_signal(struct cc_var *ccv, ccsignal_t signal_type);
931d4ed791SLawrence Stewart static void vegas_conn_init(struct cc_var *ccv);
941d4ed791SLawrence Stewart static int vegas_mod_init(void);
95b8d60729SRandall Stewart static size_t vegas_data_sz(void);
961d4ed791SLawrence Stewart
971d4ed791SLawrence Stewart struct vegas {
981d4ed791SLawrence Stewart int slow_start_toggle;
991d4ed791SLawrence Stewart };
1001d4ed791SLawrence Stewart
1011d4ed791SLawrence Stewart static int32_t ertt_id;
1021d4ed791SLawrence Stewart
1035f901c92SAndrew Turner VNET_DEFINE_STATIC(uint32_t, vegas_alpha) = 1;
1045f901c92SAndrew Turner VNET_DEFINE_STATIC(uint32_t, vegas_beta) = 3;
1051d4ed791SLawrence Stewart #define V_vegas_alpha VNET(vegas_alpha)
1061d4ed791SLawrence Stewart #define V_vegas_beta VNET(vegas_beta)
1071d4ed791SLawrence Stewart
1081d4ed791SLawrence Stewart struct cc_algo vegas_cc_algo = {
1091d4ed791SLawrence Stewart .name = "vegas",
1101d4ed791SLawrence Stewart .ack_received = vegas_ack_received,
1111d4ed791SLawrence Stewart .cb_destroy = vegas_cb_destroy,
1121d4ed791SLawrence Stewart .cb_init = vegas_cb_init,
1131d4ed791SLawrence Stewart .cong_signal = vegas_cong_signal,
1141d4ed791SLawrence Stewart .conn_init = vegas_conn_init,
115b8d60729SRandall Stewart .mod_init = vegas_mod_init,
116b8d60729SRandall Stewart .cc_data_sz = vegas_data_sz,
117b8d60729SRandall Stewart .after_idle = newreno_cc_after_idle,
118b8d60729SRandall Stewart .post_recovery = newreno_cc_post_recovery,
1191d4ed791SLawrence Stewart };
1201d4ed791SLawrence Stewart
1211d4ed791SLawrence Stewart /*
1221d4ed791SLawrence Stewart * The vegas window adjustment is done once every RTT, as indicated by the
12317628f1bSGordon Bergling * ERTT_NEW_MEASUREMENT flag. This flag is reset once the new measurement data
1241d4ed791SLawrence Stewart * has been used.
1251d4ed791SLawrence Stewart */
1261d4ed791SLawrence Stewart static void
vegas_ack_received(struct cc_var * ccv,ccsignal_t ack_type)127f74352fbSRichard Scheffenegger vegas_ack_received(struct cc_var *ccv, ccsignal_t ack_type)
1281d4ed791SLawrence Stewart {
1291d4ed791SLawrence Stewart struct ertt *e_t;
1301d4ed791SLawrence Stewart struct vegas *vegas_data;
1311d4ed791SLawrence Stewart long actual_tx_rate, expected_tx_rate, ndiff;
132*22dcc812SRichard Scheffenegger uint32_t mss = tcp_fixed_maxseg(ccv->tp);
1331d4ed791SLawrence Stewart
134e68b3792SGleb Smirnoff e_t = khelp_get_osd(&CCV(ccv, t_osd), ertt_id);
1351d4ed791SLawrence Stewart vegas_data = ccv->cc_data;
1361d4ed791SLawrence Stewart
1371d4ed791SLawrence Stewart if (e_t->flags & ERTT_NEW_MEASUREMENT) { /* Once per RTT. */
1381d4ed791SLawrence Stewart if (e_t->minrtt && e_t->markedpkt_rtt) {
1391d4ed791SLawrence Stewart expected_tx_rate = e_t->marked_snd_cwnd / e_t->minrtt;
1401d4ed791SLawrence Stewart actual_tx_rate = e_t->bytes_tx_in_marked_rtt /
1411d4ed791SLawrence Stewart e_t->markedpkt_rtt;
1421d4ed791SLawrence Stewart ndiff = (expected_tx_rate - actual_tx_rate) *
143*22dcc812SRichard Scheffenegger e_t->minrtt / mss;
1441d4ed791SLawrence Stewart
1451d4ed791SLawrence Stewart if (ndiff < V_vegas_alpha) {
1461d4ed791SLawrence Stewart if (CCV(ccv, snd_cwnd) <=
1471d4ed791SLawrence Stewart CCV(ccv, snd_ssthresh)) {
1481d4ed791SLawrence Stewart vegas_data->slow_start_toggle =
1491d4ed791SLawrence Stewart vegas_data->slow_start_toggle ?
1501d4ed791SLawrence Stewart 0 : 1;
1511d4ed791SLawrence Stewart } else {
1521d4ed791SLawrence Stewart vegas_data->slow_start_toggle = 0;
1531d4ed791SLawrence Stewart CCV(ccv, snd_cwnd) =
154*22dcc812SRichard Scheffenegger min(CCV(ccv, snd_cwnd) + mss,
1551d4ed791SLawrence Stewart TCP_MAXWIN << CCV(ccv, snd_scale));
1561d4ed791SLawrence Stewart }
1571d4ed791SLawrence Stewart } else if (ndiff > V_vegas_beta) {
1581d4ed791SLawrence Stewart /* Rate-based congestion. */
1591d4ed791SLawrence Stewart vegas_cong_signal(ccv, CC_VEGAS_RATE);
1601d4ed791SLawrence Stewart vegas_data->slow_start_toggle = 0;
1611d4ed791SLawrence Stewart }
1621d4ed791SLawrence Stewart }
1631d4ed791SLawrence Stewart e_t->flags &= ~ERTT_NEW_MEASUREMENT;
1641d4ed791SLawrence Stewart }
1651d4ed791SLawrence Stewart
1661d4ed791SLawrence Stewart if (vegas_data->slow_start_toggle)
167b8d60729SRandall Stewart newreno_cc_ack_received(ccv, ack_type);
1681d4ed791SLawrence Stewart }
1691d4ed791SLawrence Stewart
1701d4ed791SLawrence Stewart static void
vegas_cb_destroy(struct cc_var * ccv)1711d4ed791SLawrence Stewart vegas_cb_destroy(struct cc_var *ccv)
1721d4ed791SLawrence Stewart {
173b8d60729SRandall Stewart free(ccv->cc_data, M_CC_MEM);
174b8d60729SRandall Stewart }
175b8d60729SRandall Stewart
176b8d60729SRandall Stewart static size_t
vegas_data_sz(void)177b8d60729SRandall Stewart vegas_data_sz(void)
178b8d60729SRandall Stewart {
179b8d60729SRandall Stewart return (sizeof(struct vegas));
1801d4ed791SLawrence Stewart }
1811d4ed791SLawrence Stewart
1821d4ed791SLawrence Stewart static int
vegas_cb_init(struct cc_var * ccv,void * ptr)183b8d60729SRandall Stewart vegas_cb_init(struct cc_var *ccv, void *ptr)
1841d4ed791SLawrence Stewart {
1851d4ed791SLawrence Stewart struct vegas *vegas_data;
1861d4ed791SLawrence Stewart
18700d3b744SMichael Tuexen INP_WLOCK_ASSERT(tptoinpcb(ccv->tp));
188b8d60729SRandall Stewart if (ptr == NULL) {
189b8d60729SRandall Stewart vegas_data = malloc(sizeof(struct vegas), M_CC_MEM, M_NOWAIT);
1901d4ed791SLawrence Stewart if (vegas_data == NULL)
1911d4ed791SLawrence Stewart return (ENOMEM);
192b8d60729SRandall Stewart } else
193b8d60729SRandall Stewart vegas_data = ptr;
1941d4ed791SLawrence Stewart
1951d4ed791SLawrence Stewart vegas_data->slow_start_toggle = 1;
1961d4ed791SLawrence Stewart ccv->cc_data = vegas_data;
1971d4ed791SLawrence Stewart
1981d4ed791SLawrence Stewart return (0);
1991d4ed791SLawrence Stewart }
2001d4ed791SLawrence Stewart
2011d4ed791SLawrence Stewart /*
2021d4ed791SLawrence Stewart * If congestion has been triggered triggered by the Vegas measured rates, it is
2031d4ed791SLawrence Stewart * handled here, otherwise it falls back to newreno's congestion handling.
2041d4ed791SLawrence Stewart */
2051d4ed791SLawrence Stewart static void
vegas_cong_signal(struct cc_var * ccv,ccsignal_t signal_type)206f74352fbSRichard Scheffenegger vegas_cong_signal(struct cc_var *ccv, ccsignal_t signal_type)
2071d4ed791SLawrence Stewart {
2081d4ed791SLawrence Stewart struct vegas *vegas_data;
2091d4ed791SLawrence Stewart int presignalrecov;
210*22dcc812SRichard Scheffenegger uint32_t mss = tcp_fixed_maxseg(ccv->tp);
2111d4ed791SLawrence Stewart
2121d4ed791SLawrence Stewart vegas_data = ccv->cc_data;
2131d4ed791SLawrence Stewart
2141d4ed791SLawrence Stewart if (IN_RECOVERY(CCV(ccv, t_flags)))
2151d4ed791SLawrence Stewart presignalrecov = 1;
2161d4ed791SLawrence Stewart else
2171d4ed791SLawrence Stewart presignalrecov = 0;
2181d4ed791SLawrence Stewart
219f74352fbSRichard Scheffenegger switch((int)signal_type) {
2201d4ed791SLawrence Stewart case CC_VEGAS_RATE:
2211d4ed791SLawrence Stewart if (!IN_RECOVERY(CCV(ccv, t_flags))) {
222*22dcc812SRichard Scheffenegger CCV(ccv, snd_cwnd) = max(2 * mss,
223*22dcc812SRichard Scheffenegger CCV(ccv, snd_cwnd) - mss);
2241d4ed791SLawrence Stewart if (CCV(ccv, snd_cwnd) < CCV(ccv, snd_ssthresh))
2251d4ed791SLawrence Stewart /* Exit slow start. */
2261d4ed791SLawrence Stewart CCV(ccv, snd_ssthresh) = CCV(ccv, snd_cwnd);
2271d4ed791SLawrence Stewart }
2281d4ed791SLawrence Stewart break;
2291d4ed791SLawrence Stewart
2301d4ed791SLawrence Stewart default:
231b8d60729SRandall Stewart newreno_cc_cong_signal(ccv, signal_type);
232f74352fbSRichard Scheffenegger break;
2331d4ed791SLawrence Stewart }
2341d4ed791SLawrence Stewart
2351d4ed791SLawrence Stewart if (IN_RECOVERY(CCV(ccv, t_flags)) && !presignalrecov)
2361d4ed791SLawrence Stewart vegas_data->slow_start_toggle =
2371d4ed791SLawrence Stewart (CCV(ccv, snd_cwnd) < CCV(ccv, snd_ssthresh)) ? 1 : 0;
2381d4ed791SLawrence Stewart }
2391d4ed791SLawrence Stewart
2401d4ed791SLawrence Stewart static void
vegas_conn_init(struct cc_var * ccv)2411d4ed791SLawrence Stewart vegas_conn_init(struct cc_var *ccv)
2421d4ed791SLawrence Stewart {
2431d4ed791SLawrence Stewart struct vegas *vegas_data;
2441d4ed791SLawrence Stewart
2451d4ed791SLawrence Stewart vegas_data = ccv->cc_data;
2461d4ed791SLawrence Stewart vegas_data->slow_start_toggle = 1;
2471d4ed791SLawrence Stewart }
2481d4ed791SLawrence Stewart
2491d4ed791SLawrence Stewart static int
vegas_mod_init(void)2501d4ed791SLawrence Stewart vegas_mod_init(void)
2511d4ed791SLawrence Stewart {
2521d4ed791SLawrence Stewart ertt_id = khelp_get_id("ertt");
2531d4ed791SLawrence Stewart if (ertt_id <= 0) {
2541d4ed791SLawrence Stewart printf("%s: h_ertt module not found\n", __func__);
2551d4ed791SLawrence Stewart return (ENOENT);
2561d4ed791SLawrence Stewart }
2571d4ed791SLawrence Stewart return (0);
2581d4ed791SLawrence Stewart }
2591d4ed791SLawrence Stewart
2601d4ed791SLawrence Stewart static int
vegas_alpha_handler(SYSCTL_HANDLER_ARGS)2611d4ed791SLawrence Stewart vegas_alpha_handler(SYSCTL_HANDLER_ARGS)
2621d4ed791SLawrence Stewart {
2631d4ed791SLawrence Stewart int error;
2641d4ed791SLawrence Stewart uint32_t new;
2651d4ed791SLawrence Stewart
2661d4ed791SLawrence Stewart new = V_vegas_alpha;
2671d4ed791SLawrence Stewart error = sysctl_handle_int(oidp, &new, 0, req);
2681d4ed791SLawrence Stewart if (error == 0 && req->newptr != NULL) {
269855acb84SBrooks Davis if (new == 0 || new > V_vegas_beta)
2701d4ed791SLawrence Stewart error = EINVAL;
2711d4ed791SLawrence Stewart else
2721d4ed791SLawrence Stewart V_vegas_alpha = new;
2731d4ed791SLawrence Stewart }
2741d4ed791SLawrence Stewart
2751d4ed791SLawrence Stewart return (error);
2761d4ed791SLawrence Stewart }
2771d4ed791SLawrence Stewart
2781d4ed791SLawrence Stewart static int
vegas_beta_handler(SYSCTL_HANDLER_ARGS)2791d4ed791SLawrence Stewart vegas_beta_handler(SYSCTL_HANDLER_ARGS)
2801d4ed791SLawrence Stewart {
2811d4ed791SLawrence Stewart int error;
2821d4ed791SLawrence Stewart uint32_t new;
2831d4ed791SLawrence Stewart
2841d4ed791SLawrence Stewart new = V_vegas_beta;
2851d4ed791SLawrence Stewart error = sysctl_handle_int(oidp, &new, 0, req);
2861d4ed791SLawrence Stewart if (error == 0 && req->newptr != NULL) {
287855acb84SBrooks Davis if (new == 0 || new < V_vegas_alpha)
2881d4ed791SLawrence Stewart error = EINVAL;
2891d4ed791SLawrence Stewart else
2901d4ed791SLawrence Stewart V_vegas_beta = new;
2911d4ed791SLawrence Stewart }
2921d4ed791SLawrence Stewart
2931d4ed791SLawrence Stewart return (error);
2941d4ed791SLawrence Stewart }
2951d4ed791SLawrence Stewart
2961d4ed791SLawrence Stewart SYSCTL_DECL(_net_inet_tcp_cc_vegas);
2977029da5cSPawel Biernacki SYSCTL_NODE(_net_inet_tcp_cc, OID_AUTO, vegas,
2987029da5cSPawel Biernacki CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
2991d4ed791SLawrence Stewart "Vegas related settings");
3001d4ed791SLawrence Stewart
3016df8a710SGleb Smirnoff SYSCTL_PROC(_net_inet_tcp_cc_vegas, OID_AUTO, alpha,
3027029da5cSPawel Biernacki CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
3036df8a710SGleb Smirnoff &VNET_NAME(vegas_alpha), 1, &vegas_alpha_handler, "IU",
3046df8a710SGleb Smirnoff "vegas alpha, specified as number of \"buffers\" (0 < alpha < beta)");
3051d4ed791SLawrence Stewart
3066df8a710SGleb Smirnoff SYSCTL_PROC(_net_inet_tcp_cc_vegas, OID_AUTO, beta,
3077029da5cSPawel Biernacki CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
3086df8a710SGleb Smirnoff &VNET_NAME(vegas_beta), 3, &vegas_beta_handler, "IU",
3096df8a710SGleb Smirnoff "vegas beta, specified as number of \"buffers\" (0 < alpha < beta)");
3101d4ed791SLawrence Stewart
3111d4ed791SLawrence Stewart DECLARE_CC_MODULE(vegas, &vegas_cc_algo);
312b8d60729SRandall Stewart MODULE_VERSION(vegas, 2);
3131d4ed791SLawrence Stewart MODULE_DEPEND(vegas, ertt, 1, 1, 1);
314