xref: /freebsd/sys/netinet/cc/cc.c (revision a8d2bccb87d0738c91f7e6a080375ae276e4c7d5)
1dbc42409SLawrence Stewart /*-
24d846d26SWarner Losh  * SPDX-License-Identifier: BSD-2-Clause
3fe267a55SPedro F. Giffuni  *
4dbc42409SLawrence Stewart  * Copyright (c) 2007-2008
5dbc42409SLawrence Stewart  *	Swinburne University of Technology, Melbourne, Australia.
6dbc42409SLawrence Stewart  * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
7dbc42409SLawrence Stewart  * Copyright (c) 2010 The FreeBSD Foundation
8dbc42409SLawrence Stewart  * All rights reserved.
9dbc42409SLawrence Stewart  *
10dbc42409SLawrence Stewart  * This software was developed at the Centre for Advanced Internet
11891b8ed4SLawrence Stewart  * Architectures, Swinburne University of Technology, by Lawrence Stewart and
12891b8ed4SLawrence Stewart  * James Healy, made possible in part by a grant from the Cisco University
13891b8ed4SLawrence Stewart  * Research Program Fund at Community Foundation Silicon Valley.
14dbc42409SLawrence Stewart  *
15dbc42409SLawrence Stewart  * Portions of this software were developed at the Centre for Advanced
16dbc42409SLawrence Stewart  * Internet Architectures, Swinburne University of Technology, Melbourne,
17dbc42409SLawrence Stewart  * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
18dbc42409SLawrence Stewart  *
19dbc42409SLawrence Stewart  * Redistribution and use in source and binary forms, with or without
20dbc42409SLawrence Stewart  * modification, are permitted provided that the following conditions
21dbc42409SLawrence Stewart  * are met:
22dbc42409SLawrence Stewart  * 1. Redistributions of source code must retain the above copyright
23dbc42409SLawrence Stewart  *    notice, this list of conditions and the following disclaimer.
24dbc42409SLawrence Stewart  * 2. Redistributions in binary form must reproduce the above copyright
25dbc42409SLawrence Stewart  *    notice, this list of conditions and the following disclaimer in the
26dbc42409SLawrence Stewart  *    documentation and/or other materials provided with the distribution.
27dbc42409SLawrence Stewart  *
28dbc42409SLawrence Stewart  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
29dbc42409SLawrence Stewart  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30dbc42409SLawrence Stewart  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31dbc42409SLawrence Stewart  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
32dbc42409SLawrence Stewart  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33dbc42409SLawrence Stewart  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34dbc42409SLawrence Stewart  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35dbc42409SLawrence Stewart  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36dbc42409SLawrence Stewart  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37dbc42409SLawrence Stewart  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38dbc42409SLawrence Stewart  * SUCH DAMAGE.
39dbc42409SLawrence Stewart  */
40dbc42409SLawrence Stewart 
41dbc42409SLawrence Stewart /*
42dbc42409SLawrence Stewart  * This software was first released in 2007 by James Healy and Lawrence Stewart
43891b8ed4SLawrence Stewart  * whilst working on the NewTCP research project at Swinburne University of
44891b8ed4SLawrence Stewart  * Technology's Centre for Advanced Internet Architectures, Melbourne,
45891b8ed4SLawrence Stewart  * Australia, which was made possible in part by a grant from the Cisco
46891b8ed4SLawrence Stewart  * University Research Program Fund at Community Foundation Silicon Valley.
47891b8ed4SLawrence Stewart  * More details are available at:
48dbc42409SLawrence Stewart  *   http://caia.swin.edu.au/urp/newtcp/
49dbc42409SLawrence Stewart  */
50dbc42409SLawrence Stewart 
51dbc42409SLawrence Stewart #include <sys/cdefs.h>
52b8d60729SRandall Stewart #include <opt_cc.h>
53dbc42409SLawrence Stewart #include <sys/param.h>
54dbc42409SLawrence Stewart #include <sys/kernel.h>
55dbc42409SLawrence Stewart #include <sys/libkern.h>
56dbc42409SLawrence Stewart #include <sys/lock.h>
57dbc42409SLawrence Stewart #include <sys/malloc.h>
58dbc42409SLawrence Stewart #include <sys/module.h>
59dbc42409SLawrence Stewart #include <sys/mutex.h>
60dbc42409SLawrence Stewart #include <sys/queue.h>
61dbc42409SLawrence Stewart #include <sys/rwlock.h>
62dbc42409SLawrence Stewart #include <sys/sbuf.h>
63dbc42409SLawrence Stewart #include <sys/socket.h>
64dbc42409SLawrence Stewart #include <sys/socketvar.h>
65dbc42409SLawrence Stewart #include <sys/sysctl.h>
66dbc42409SLawrence Stewart 
67b66d74c1SGleb Smirnoff #include <net/vnet.h>
68dbc42409SLawrence Stewart 
69dbc42409SLawrence Stewart #include <netinet/in.h>
70dbc42409SLawrence Stewart #include <netinet/in_pcb.h>
712de3e790SGleb Smirnoff #include <netinet/tcp.h>
72b8d60729SRandall Stewart #include <netinet/tcp_seq.h>
73dbc42409SLawrence Stewart #include <netinet/tcp_var.h>
74b8d60729SRandall Stewart #include <netinet/tcp_log_buf.h>
75b8d60729SRandall Stewart #include <netinet/tcp_hpts.h>
764644fda3SGleb Smirnoff #include <netinet/cc/cc.h>
77dbc42409SLawrence Stewart #include <netinet/cc/cc_module.h>
78dbc42409SLawrence Stewart 
797e3c9ec9SWarner Losh /*
807e3c9ec9SWarner Losh  * Have a sane default if no CC_DEFAULT is specified in the kernel config file.
817e3c9ec9SWarner Losh  */
827e3c9ec9SWarner Losh #ifndef CC_DEFAULT
83bb1d472dSRichard Scheffenegger #define CC_DEFAULT "cubic"
847e3c9ec9SWarner Losh #endif
857e3c9ec9SWarner Losh 
86a9696510SRandall Stewart uint32_t hystart_minrtt_thresh = 4000;
87a9696510SRandall Stewart uint32_t hystart_maxrtt_thresh = 16000;
88a9696510SRandall Stewart uint32_t hystart_n_rttsamples = 8;
89a9696510SRandall Stewart uint32_t hystart_css_growth_div = 4;
90a9696510SRandall Stewart uint32_t hystart_css_rounds = 5;
91a9696510SRandall Stewart uint32_t hystart_bblogs = 0;
92a9696510SRandall Stewart 
93b8d60729SRandall Stewart MALLOC_DEFINE(M_CC_MEM, "CC Mem", "Congestion Control State memory");
94b8d60729SRandall Stewart 
95dbc42409SLawrence Stewart /*
96dbc42409SLawrence Stewart  * List of available cc algorithms on the current system. First element
97dbc42409SLawrence Stewart  * is used as the system default CC algorithm.
98dbc42409SLawrence Stewart  */
99dbc42409SLawrence Stewart struct cc_head cc_list = STAILQ_HEAD_INITIALIZER(cc_list);
100dbc42409SLawrence Stewart 
101dbc42409SLawrence Stewart /* Protects the cc_list TAILQ. */
102dbc42409SLawrence Stewart struct rwlock cc_list_lock;
103dbc42409SLawrence Stewart 
104b8d60729SRandall Stewart VNET_DEFINE(struct cc_algo *, default_cc_ptr) = NULL;
105b8d60729SRandall Stewart 
106b8d60729SRandall Stewart VNET_DEFINE(uint32_t, newreno_beta) = 50;
107b8d60729SRandall Stewart #define V_newreno_beta VNET(newreno_beta)
1080fdc2472SMichael Tuexen VNET_DEFINE(uint32_t, newreno_beta_ecn) = 80;
109dbc42409SLawrence Stewart 
110ea9017fbSRandall Stewart void
cc_refer(struct cc_algo * algo)111ea9017fbSRandall Stewart cc_refer(struct cc_algo *algo)
112ea9017fbSRandall Stewart {
113ea9017fbSRandall Stewart 	CC_LIST_LOCK_ASSERT();
114ea9017fbSRandall Stewart 	refcount_acquire(&algo->cc_refcount);
115ea9017fbSRandall Stewart }
116ea9017fbSRandall Stewart 
117ea9017fbSRandall Stewart void
cc_release(struct cc_algo * algo)118ea9017fbSRandall Stewart cc_release(struct cc_algo *algo)
119ea9017fbSRandall Stewart {
120ea9017fbSRandall Stewart 	CC_LIST_LOCK_ASSERT();
121ea9017fbSRandall Stewart 	refcount_release(&algo->cc_refcount);
122ea9017fbSRandall Stewart }
123ea9017fbSRandall Stewart 
124ea9017fbSRandall Stewart 
125ea9017fbSRandall Stewart void
cc_attach(struct tcpcb * tp,struct cc_algo * algo)126ea9017fbSRandall Stewart cc_attach(struct tcpcb *tp, struct cc_algo *algo)
127ea9017fbSRandall Stewart {
128ea9017fbSRandall Stewart 	/*
129ea9017fbSRandall Stewart 	 * Attach the tcpcb to the algorithm.
130ea9017fbSRandall Stewart 	 */
131ea9017fbSRandall Stewart 	CC_LIST_RLOCK();
132ea9017fbSRandall Stewart 	CC_ALGO(tp) = algo;
133ea9017fbSRandall Stewart 	cc_refer(algo);
134ea9017fbSRandall Stewart 	CC_LIST_RUNLOCK();
135ea9017fbSRandall Stewart }
136ea9017fbSRandall Stewart 
137ea9017fbSRandall Stewart void
cc_detach(struct tcpcb * tp)138ea9017fbSRandall Stewart cc_detach(struct tcpcb *tp)
139ea9017fbSRandall Stewart {
140ea9017fbSRandall Stewart 	struct cc_algo *algo;
141ea9017fbSRandall Stewart 
142ea9017fbSRandall Stewart 	CC_LIST_RLOCK();
143ea9017fbSRandall Stewart 	algo = CC_ALGO(tp);
144ea9017fbSRandall Stewart 	CC_ALGO(tp) = NULL;
145ea9017fbSRandall Stewart 	cc_release(algo);
146ea9017fbSRandall Stewart 	CC_LIST_RUNLOCK();
147ea9017fbSRandall Stewart }
148ea9017fbSRandall Stewart 
149dbc42409SLawrence Stewart /*
150dbc42409SLawrence Stewart  * Sysctl handler to show and change the default CC algorithm.
151dbc42409SLawrence Stewart  */
152dbc42409SLawrence Stewart static int
cc_default_algo(SYSCTL_HANDLER_ARGS)153dbc42409SLawrence Stewart cc_default_algo(SYSCTL_HANDLER_ARGS)
154dbc42409SLawrence Stewart {
155ebf92e86SLawrence Stewart 	char default_cc[TCP_CA_NAME_MAX];
156dbc42409SLawrence Stewart 	struct cc_algo *funcs;
1570e1152fcSHans Petter Selasky 	int error;
158dbc42409SLawrence Stewart 
1590e1152fcSHans Petter Selasky 	/* Get the current default: */
160dbc42409SLawrence Stewart 	CC_LIST_RLOCK();
161b8d60729SRandall Stewart 	if (CC_DEFAULT_ALGO() != NULL)
162b8d60729SRandall Stewart 		strlcpy(default_cc, CC_DEFAULT_ALGO()->name, sizeof(default_cc));
163b8d60729SRandall Stewart 	else
164b8d60729SRandall Stewart 		memset(default_cc, 0, TCP_CA_NAME_MAX);
165dbc42409SLawrence Stewart 	CC_LIST_RUNLOCK();
1660e1152fcSHans Petter Selasky 
1670e1152fcSHans Petter Selasky 	error = sysctl_handle_string(oidp, default_cc, sizeof(default_cc), req);
1680e1152fcSHans Petter Selasky 
1690e1152fcSHans Petter Selasky 	/* Check for error or no change */
1700e1152fcSHans Petter Selasky 	if (error != 0 || req->newptr == NULL)
1710e1152fcSHans Petter Selasky 		goto done;
1720e1152fcSHans Petter Selasky 
1730e1152fcSHans Petter Selasky 	error = ESRCH;
174dbc42409SLawrence Stewart 	/* Find algo with specified name and set it to default. */
17578b01840SLawrence Stewart 	CC_LIST_RLOCK();
176dbc42409SLawrence Stewart 	STAILQ_FOREACH(funcs, &cc_list, entries) {
1770e1152fcSHans Petter Selasky 		if (strncmp(default_cc, funcs->name, sizeof(default_cc)))
17860a945f9SHans Petter Selasky 			continue;
179ea9017fbSRandall Stewart 		if (funcs->flags & CC_MODULE_BEING_REMOVED) {
180ea9017fbSRandall Stewart 			/* Its being removed, its not eligible */
181ea9017fbSRandall Stewart 			continue;
182ea9017fbSRandall Stewart 		}
18378b01840SLawrence Stewart 		V_default_cc_ptr = funcs;
1840e1152fcSHans Petter Selasky 		error = 0;
1850e1152fcSHans Petter Selasky 		break;
186dbc42409SLawrence Stewart 	}
18778b01840SLawrence Stewart 	CC_LIST_RUNLOCK();
1880e1152fcSHans Petter Selasky done:
1890e1152fcSHans Petter Selasky 	return (error);
190dbc42409SLawrence Stewart }
191dbc42409SLawrence Stewart 
192dbc42409SLawrence Stewart /*
193dbc42409SLawrence Stewart  * Sysctl handler to display the list of available CC algorithms.
194dbc42409SLawrence Stewart  */
195dbc42409SLawrence Stewart static int
cc_list_available(SYSCTL_HANDLER_ARGS)196dbc42409SLawrence Stewart cc_list_available(SYSCTL_HANDLER_ARGS)
197dbc42409SLawrence Stewart {
198dbc42409SLawrence Stewart 	struct cc_algo *algo;
199ea9017fbSRandall Stewart 	int error, nalgos;
200ea9017fbSRandall Stewart 	int linesz;
201ea9017fbSRandall Stewart 	char *buffer, *cp;
202ea9017fbSRandall Stewart 	size_t bufsz, outsz;
203dbc42409SLawrence Stewart 
204ea9017fbSRandall Stewart 	error = nalgos = 0;
205a66ac850SLawrence Stewart 	CC_LIST_RLOCK();
206a66ac850SLawrence Stewart 	STAILQ_FOREACH(algo, &cc_list, entries) {
207a66ac850SLawrence Stewart 		nalgos++;
208a66ac850SLawrence Stewart 	}
209a66ac850SLawrence Stewart 	CC_LIST_RUNLOCK();
210b8d60729SRandall Stewart 	if (nalgos == 0) {
211b8d60729SRandall Stewart 		return (ENOENT);
212b8d60729SRandall Stewart 	}
213ea9017fbSRandall Stewart 	bufsz = (nalgos+2) * ((TCP_CA_NAME_MAX + 13) + 1);
214ea9017fbSRandall Stewart 	buffer = malloc(bufsz, M_TEMP, M_WAITOK);
215ea9017fbSRandall Stewart 	cp = buffer;
216dbc42409SLawrence Stewart 
217ea9017fbSRandall Stewart 	linesz = snprintf(cp, bufsz, "\n%-16s%c %s\n", "CCmod", 'D',
218ea9017fbSRandall Stewart 	    "PCB count");
219ea9017fbSRandall Stewart 	cp += linesz;
220ea9017fbSRandall Stewart 	bufsz -= linesz;
221ea9017fbSRandall Stewart 	outsz = linesz;
222dbc42409SLawrence Stewart 	CC_LIST_RLOCK();
223dbc42409SLawrence Stewart 	STAILQ_FOREACH(algo, &cc_list, entries) {
224ea9017fbSRandall Stewart 		linesz = snprintf(cp, bufsz, "%-16s%c %u\n",
225ea9017fbSRandall Stewart 		    algo->name,
226ea9017fbSRandall Stewart 		    (algo == CC_DEFAULT_ALGO()) ? '*' : ' ',
227ea9017fbSRandall Stewart 		    algo->cc_refcount);
228ea9017fbSRandall Stewart 		if (linesz >= bufsz) {
229ea9017fbSRandall Stewart 			error = EOVERFLOW;
230dbc42409SLawrence Stewart 			break;
231a66ac850SLawrence Stewart 		}
232ea9017fbSRandall Stewart 		cp += linesz;
233ea9017fbSRandall Stewart 		bufsz -= linesz;
234ea9017fbSRandall Stewart 		outsz += linesz;
235dbc42409SLawrence Stewart 	}
236dbc42409SLawrence Stewart 	CC_LIST_RUNLOCK();
237ea9017fbSRandall Stewart 	if (error == 0)
238ea9017fbSRandall Stewart 		error = sysctl_handle_string(oidp, buffer, outsz + 1, req);
239ea9017fbSRandall Stewart 	free(buffer, M_TEMP);
240ea9017fbSRandall Stewart 	return (error);
241dbc42409SLawrence Stewart }
242dbc42409SLawrence Stewart 
243dbc42409SLawrence Stewart /*
244b8d60729SRandall Stewart  * Return the number of times a proposed removal_cc is
245b8d60729SRandall Stewart  * being used as the default.
24678b01840SLawrence Stewart  */
247b8d60729SRandall Stewart static int
cc_check_default(struct cc_algo * remove_cc)248b8d60729SRandall Stewart cc_check_default(struct cc_algo *remove_cc)
24978b01840SLawrence Stewart {
250b8d60729SRandall Stewart 	int cnt = 0;
25178b01840SLawrence Stewart 	VNET_ITERATOR_DECL(vnet_iter);
25278b01840SLawrence Stewart 
25378b01840SLawrence Stewart 	CC_LIST_LOCK_ASSERT();
25478b01840SLawrence Stewart 
25578b01840SLawrence Stewart 	VNET_LIST_RLOCK_NOSLEEP();
25678b01840SLawrence Stewart 	VNET_FOREACH(vnet_iter) {
25778b01840SLawrence Stewart 		CURVNET_SET(vnet_iter);
258b8d60729SRandall Stewart 		if ((CC_DEFAULT_ALGO() != NULL) &&
259b8d60729SRandall Stewart 		    strncmp(CC_DEFAULT_ALGO()->name,
260b8d60729SRandall Stewart 			    remove_cc->name,
261b8d60729SRandall Stewart 			    TCP_CA_NAME_MAX) == 0) {
262b8d60729SRandall Stewart 			cnt++;
263b8d60729SRandall Stewart 		}
26478b01840SLawrence Stewart 		CURVNET_RESTORE();
26578b01840SLawrence Stewart 	}
26678b01840SLawrence Stewart 	VNET_LIST_RUNLOCK_NOSLEEP();
267b8d60729SRandall Stewart 	return (cnt);
26878b01840SLawrence Stewart }
26978b01840SLawrence Stewart 
27078b01840SLawrence Stewart /*
271dbc42409SLawrence Stewart  * Initialise CC subsystem on system boot.
272dbc42409SLawrence Stewart  */
27314f57a8bSLawrence Stewart static void
cc_init(void)27414f57a8bSLawrence Stewart cc_init(void)
275dbc42409SLawrence Stewart {
276dbc42409SLawrence Stewart 	CC_LIST_LOCK_INIT();
277dbc42409SLawrence Stewart 	STAILQ_INIT(&cc_list);
278dbc42409SLawrence Stewart }
279dbc42409SLawrence Stewart 
280dbc42409SLawrence Stewart /*
281dbc42409SLawrence Stewart  * Returns non-zero on success, 0 on failure.
282dbc42409SLawrence Stewart  */
283ccdfd621SMichael Tuexen static int
cc_deregister_algo_locked(struct cc_algo * remove_cc)284ccdfd621SMichael Tuexen cc_deregister_algo_locked(struct cc_algo *remove_cc)
285dbc42409SLawrence Stewart {
286ea9017fbSRandall Stewart 	struct cc_algo *funcs;
287ea9017fbSRandall Stewart 	int found = 0;
288dbc42409SLawrence Stewart 
289ea9017fbSRandall Stewart 	/* This is unlikely to fail */
290ea9017fbSRandall Stewart 	STAILQ_FOREACH(funcs, &cc_list, entries) {
291ea9017fbSRandall Stewart 		if (funcs == remove_cc)
292ea9017fbSRandall Stewart 			found = 1;
293ea9017fbSRandall Stewart 	}
294ea9017fbSRandall Stewart 	if (found == 0) {
295ea9017fbSRandall Stewart 		/* Nothing to remove? */
296ea9017fbSRandall Stewart 		return (ENOENT);
297ea9017fbSRandall Stewart 	}
298ea9017fbSRandall Stewart 	/* We assert it should have been MOD_QUIESCE'd */
299ea9017fbSRandall Stewart 	KASSERT((remove_cc->flags & CC_MODULE_BEING_REMOVED),
300ea9017fbSRandall Stewart 		("remove_cc:%p does not have CC_MODULE_BEING_REMOVED flag", remove_cc));
301b8d60729SRandall Stewart 	if (cc_check_default(remove_cc)) {
302db0ac6deSCy Schubert 		return(EBUSY);
303b8d60729SRandall Stewart 	}
304ea9017fbSRandall Stewart 	if (remove_cc->cc_refcount != 0) {
305ea9017fbSRandall Stewart 		return (EBUSY);
306b8d60729SRandall Stewart 	}
307ccdfd621SMichael Tuexen 	/* Remove algo from cc_list so that new connections can't use it. */
308ea9017fbSRandall Stewart 	STAILQ_REMOVE(&cc_list, remove_cc, cc_algo, entries);
309d4290f7eSMichael Tuexen 	return (0);
310b1fe92b2SMichael Tuexen }
311b1fe92b2SMichael Tuexen 
312b1fe92b2SMichael Tuexen /*
313ccdfd621SMichael Tuexen  * Returns non-zero on success, 0 on failure.
314ccdfd621SMichael Tuexen  */
315ccdfd621SMichael Tuexen int
cc_deregister_algo(struct cc_algo * remove_cc)316ccdfd621SMichael Tuexen cc_deregister_algo(struct cc_algo *remove_cc)
317ccdfd621SMichael Tuexen {
318ccdfd621SMichael Tuexen 	int ret;
319ccdfd621SMichael Tuexen 
320ccdfd621SMichael Tuexen 	CC_LIST_WLOCK();
321ccdfd621SMichael Tuexen 	ret = cc_deregister_algo_locked(remove_cc);
322ccdfd621SMichael Tuexen 	CC_LIST_WUNLOCK();
323ccdfd621SMichael Tuexen 	return (ret);
324ccdfd621SMichael Tuexen }
325ccdfd621SMichael Tuexen 
326ccdfd621SMichael Tuexen /*
327dbc42409SLawrence Stewart  * Returns 0 on success, non-zero on failure.
328dbc42409SLawrence Stewart  */
329dbc42409SLawrence Stewart int
cc_register_algo(struct cc_algo * add_cc)330dbc42409SLawrence Stewart cc_register_algo(struct cc_algo *add_cc)
331dbc42409SLawrence Stewart {
332dbc42409SLawrence Stewart 	struct cc_algo *funcs;
333dbc42409SLawrence Stewart 	int err;
334dbc42409SLawrence Stewart 
335dbc42409SLawrence Stewart 	err = 0;
336dbc42409SLawrence Stewart 
337dbc42409SLawrence Stewart 	/*
338dbc42409SLawrence Stewart 	 * Iterate over list of registered CC algorithms and make sure
339dbc42409SLawrence Stewart 	 * we're not trying to add a duplicate.
340dbc42409SLawrence Stewart 	 */
341dbc42409SLawrence Stewart 	CC_LIST_WLOCK();
342dbc42409SLawrence Stewart 	STAILQ_FOREACH(funcs, &cc_list, entries) {
343b8d60729SRandall Stewart 		if (funcs == add_cc ||
344b8d60729SRandall Stewart 		    strncmp(funcs->name, add_cc->name,
345b8d60729SRandall Stewart 			    TCP_CA_NAME_MAX) == 0) {
346dbc42409SLawrence Stewart 			err = EEXIST;
347b8d60729SRandall Stewart 			break;
348dbc42409SLawrence Stewart 		}
349b8d60729SRandall Stewart 	}
350ea9017fbSRandall Stewart 	/* Init its reference count */
351ea9017fbSRandall Stewart 	if (err == 0)
352ea9017fbSRandall Stewart 		refcount_init(&add_cc->cc_refcount, 0);
353b8d60729SRandall Stewart 	/*
354b8d60729SRandall Stewart 	 * The first loaded congestion control module will become
355b8d60729SRandall Stewart 	 * the default until we find the "CC_DEFAULT" defined in
356b8d60729SRandall Stewart 	 * the config (if we do).
357b8d60729SRandall Stewart 	 */
358b8d60729SRandall Stewart 	if (!err) {
359dbc42409SLawrence Stewart 		STAILQ_INSERT_TAIL(&cc_list, add_cc, entries);
360b8d60729SRandall Stewart 		if (strcmp(add_cc->name, CC_DEFAULT) == 0) {
361b8d60729SRandall Stewart 			V_default_cc_ptr = add_cc;
362b8d60729SRandall Stewart 		} else if (V_default_cc_ptr == NULL) {
363b8d60729SRandall Stewart 			V_default_cc_ptr = add_cc;
364b8d60729SRandall Stewart 		}
365b8d60729SRandall Stewart 	}
366dbc42409SLawrence Stewart 	CC_LIST_WUNLOCK();
367dbc42409SLawrence Stewart 
368dbc42409SLawrence Stewart 	return (err);
369dbc42409SLawrence Stewart }
370dbc42409SLawrence Stewart 
371034a9240SMark Johnston static void
vnet_cc_sysinit(void * arg)372034a9240SMark Johnston vnet_cc_sysinit(void *arg)
373034a9240SMark Johnston {
374034a9240SMark Johnston 	struct cc_algo *cc;
375034a9240SMark Johnston 
376034a9240SMark Johnston 	if (IS_DEFAULT_VNET(curvnet))
377034a9240SMark Johnston 		return;
378034a9240SMark Johnston 
379034a9240SMark Johnston 	CURVNET_SET(vnet0);
380034a9240SMark Johnston 	cc = V_default_cc_ptr;
381034a9240SMark Johnston 	CURVNET_RESTORE();
382034a9240SMark Johnston 
383034a9240SMark Johnston 	V_default_cc_ptr = cc;
384034a9240SMark Johnston }
385034a9240SMark Johnston VNET_SYSINIT(vnet_cc_sysinit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
386034a9240SMark Johnston     vnet_cc_sysinit, NULL);
387034a9240SMark Johnston 
388dbc42409SLawrence Stewart /*
389b8d60729SRandall Stewart  * Perform any necessary tasks before we exit congestion recovery.
390b8d60729SRandall Stewart  */
391b8d60729SRandall Stewart void
newreno_cc_post_recovery(struct cc_var * ccv)392b8d60729SRandall Stewart newreno_cc_post_recovery(struct cc_var *ccv)
393b8d60729SRandall Stewart {
394b8d60729SRandall Stewart 	int pipe;
395*22dcc812SRichard Scheffenegger 	uint32_t mss = tcp_fixed_maxseg(ccv->tp);
396b8d60729SRandall Stewart 
397b8d60729SRandall Stewart 	if (IN_FASTRECOVERY(CCV(ccv, t_flags))) {
398b8d60729SRandall Stewart 		/*
399b8d60729SRandall Stewart 		 * Fast recovery will conclude after returning from this
400b8d60729SRandall Stewart 		 * function. Window inflation should have left us with
401b8d60729SRandall Stewart 		 * approximately snd_ssthresh outstanding data. But in case we
402b8d60729SRandall Stewart 		 * would be inclined to send a burst, better to do it via the
403b8d60729SRandall Stewart 		 * slow start mechanism.
404b8d60729SRandall Stewart 		 */
40500d3b744SMichael Tuexen 		pipe = tcp_compute_pipe(ccv->tp);
406b8d60729SRandall Stewart 		if (pipe < CCV(ccv, snd_ssthresh))
407b8d60729SRandall Stewart 			/*
408b8d60729SRandall Stewart 			 * Ensure that cwnd does not collapse to 1 MSS under
409b4fbc855SGordon Bergling 			 * adverse conditions. Implements RFC6582
410b8d60729SRandall Stewart 			 */
411*22dcc812SRichard Scheffenegger 			CCV(ccv, snd_cwnd) = max(pipe, mss) + mss;
412b8d60729SRandall Stewart 		else
413b8d60729SRandall Stewart 			CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh);
414b8d60729SRandall Stewart 	}
415b8d60729SRandall Stewart }
416b8d60729SRandall Stewart 
417b8d60729SRandall Stewart void
newreno_cc_after_idle(struct cc_var * ccv)418b8d60729SRandall Stewart newreno_cc_after_idle(struct cc_var *ccv)
419b8d60729SRandall Stewart {
420b8d60729SRandall Stewart 	uint32_t rw;
421b8d60729SRandall Stewart 	/*
422b8d60729SRandall Stewart 	 * If we've been idle for more than one retransmit timeout the old
423b8d60729SRandall Stewart 	 * congestion window is no longer current and we have to reduce it to
424b8d60729SRandall Stewart 	 * the restart window before we can transmit again.
425b8d60729SRandall Stewart 	 *
426b8d60729SRandall Stewart 	 * The restart window is the initial window or the last CWND, whichever
427b8d60729SRandall Stewart 	 * is smaller.
428b8d60729SRandall Stewart 	 *
429b8d60729SRandall Stewart 	 * This is done to prevent us from flooding the path with a full CWND at
430b8d60729SRandall Stewart 	 * wirespeed, overloading router and switch buffers along the way.
431b8d60729SRandall Stewart 	 *
432b8d60729SRandall Stewart 	 * See RFC5681 Section 4.1. "Restarting Idle Connections".
433b8d60729SRandall Stewart 	 *
434b8d60729SRandall Stewart 	 * In addition, per RFC2861 Section 2, the ssthresh is set to the
435b8d60729SRandall Stewart 	 * maximum of the former ssthresh or 3/4 of the old cwnd, to
436b8d60729SRandall Stewart 	 * not exit slow-start prematurely.
437b8d60729SRandall Stewart 	 */
43800d3b744SMichael Tuexen 	rw = tcp_compute_initwnd(tcp_fixed_maxseg(ccv->tp));
439b8d60729SRandall Stewart 
440b8d60729SRandall Stewart 	CCV(ccv, snd_ssthresh) = max(CCV(ccv, snd_ssthresh),
441b8d60729SRandall Stewart 	    CCV(ccv, snd_cwnd)-(CCV(ccv, snd_cwnd)>>2));
442b8d60729SRandall Stewart 
443b8d60729SRandall Stewart 	CCV(ccv, snd_cwnd) = min(rw, CCV(ccv, snd_cwnd));
444b8d60729SRandall Stewart }
445b8d60729SRandall Stewart 
446b8d60729SRandall Stewart /*
447b6c137deSCheng Cui  * Get a new congestion window size on a multiplicative decrease event.
448b6c137deSCheng Cui  * */
449b6c137deSCheng Cui u_int
newreno_cc_cwnd_on_multiplicative_decrease(struct cc_var * ccv,uint32_t mss)450b6c137deSCheng Cui newreno_cc_cwnd_on_multiplicative_decrease(struct cc_var *ccv, uint32_t mss)
451b8d60729SRandall Stewart {
452b6c137deSCheng Cui 	uint32_t cwin, factor;
453b8d60729SRandall Stewart 
454b8d60729SRandall Stewart 	cwin = CCV(ccv, snd_cwnd);
455b8d60729SRandall Stewart 	/*
456b8d60729SRandall Stewart 	 * Other TCP congestion controls use newreno_cong_signal(), but
457b8d60729SRandall Stewart 	 * with their own private cc_data. Make sure the cc_data is used
458b8d60729SRandall Stewart 	 * correctly.
459b8d60729SRandall Stewart 	 */
460b8d60729SRandall Stewart 	factor = V_newreno_beta;
461b8d60729SRandall Stewart 
462b6c137deSCheng Cui 	return max(((uint64_t)cwin * (uint64_t)factor) / (100ULL * (uint64_t)mss), 2) * mss;
463b6c137deSCheng Cui }
464b6c137deSCheng Cui 
465b6c137deSCheng Cui /*
466b6c137deSCheng Cui  * Perform any necessary tasks before we enter congestion recovery.
467b6c137deSCheng Cui  */
468b6c137deSCheng Cui void
newreno_cc_cong_signal(struct cc_var * ccv,ccsignal_t type)469b6c137deSCheng Cui newreno_cc_cong_signal(struct cc_var *ccv, ccsignal_t type)
470b6c137deSCheng Cui {
471b6c137deSCheng Cui 	uint32_t cwin, mss, pipe;
472b6c137deSCheng Cui 
473b6c137deSCheng Cui 	mss = tcp_fixed_maxseg(ccv->tp);
474b6c137deSCheng Cui 
475b8d60729SRandall Stewart 	/* Catch algos which mistakenly leak private signal types. */
476b8d60729SRandall Stewart 	KASSERT((type & CC_SIGPRIVMASK) == 0,
477b8d60729SRandall Stewart 	    ("%s: congestion signal type 0x%08x is private\n", __func__, type));
478b8d60729SRandall Stewart 
479b6c137deSCheng Cui 	cwin = newreno_cc_cwnd_on_multiplicative_decrease(ccv, mss);
480b8d60729SRandall Stewart 
481b8d60729SRandall Stewart 	switch (type) {
482b8d60729SRandall Stewart 	case CC_NDUPACK:
483b8d60729SRandall Stewart 		if (!IN_FASTRECOVERY(CCV(ccv, t_flags))) {
484b8d60729SRandall Stewart 			if (!IN_CONGRECOVERY(CCV(ccv, t_flags)))
485b8d60729SRandall Stewart 				CCV(ccv, snd_ssthresh) = cwin;
486b8d60729SRandall Stewart 			ENTER_RECOVERY(CCV(ccv, t_flags));
487b8d60729SRandall Stewart 		}
488b8d60729SRandall Stewart 		break;
489b8d60729SRandall Stewart 	case CC_ECN:
490b8d60729SRandall Stewart 		if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) {
491b8d60729SRandall Stewart 			CCV(ccv, snd_ssthresh) = cwin;
492b8d60729SRandall Stewart 			CCV(ccv, snd_cwnd) = cwin;
493b8d60729SRandall Stewart 			ENTER_CONGRECOVERY(CCV(ccv, t_flags));
494b8d60729SRandall Stewart 		}
495b8d60729SRandall Stewart 		break;
496b8d60729SRandall Stewart 	case CC_RTO:
49732a6df57SRichard Scheffenegger 		if (CCV(ccv, t_rxtshift) == 1) {
49800d3b744SMichael Tuexen 			pipe = tcp_compute_pipe(ccv->tp);
49932a6df57SRichard Scheffenegger 			CCV(ccv, snd_ssthresh) = max(2,
50032a6df57SRichard Scheffenegger 				min(CCV(ccv, snd_wnd), pipe) / 2 / mss) * mss;
50132a6df57SRichard Scheffenegger 		}
502b8d60729SRandall Stewart 		CCV(ccv, snd_cwnd) = mss;
503b8d60729SRandall Stewart 		break;
5048917131eSRichard Scheffenegger 	default:
5058917131eSRichard Scheffenegger 		break;
506b8d60729SRandall Stewart 	}
507b8d60729SRandall Stewart }
508b8d60729SRandall Stewart 
509b6c137deSCheng Cui u_int
newreno_cc_cwnd_in_cong_avoid(struct cc_var * ccv)510b6c137deSCheng Cui newreno_cc_cwnd_in_cong_avoid(struct cc_var *ccv)
511b8d60729SRandall Stewart {
512b8d60729SRandall Stewart 	u_int cw = CCV(ccv, snd_cwnd);
513*22dcc812SRichard Scheffenegger 	u_int incr = tcp_fixed_maxseg(ccv->tp);
514b8d60729SRandall Stewart 
515b6c137deSCheng Cui 	KASSERT(cw > CCV(ccv, snd_ssthresh),
516b6c137deSCheng Cui 		("congestion control state not in congestion avoidance\n"));
517b6c137deSCheng Cui 
518b8d60729SRandall Stewart 	/*
519b8d60729SRandall Stewart 	 * Regular in-order ACK, open the congestion window.
520b6c137deSCheng Cui 	 * The congestion control state we're in is congestion avoidance.
521b8d60729SRandall Stewart 	 *
522b6c137deSCheng Cui 	 * Check if ABC (RFC 3465) is enabled.
523b8d60729SRandall Stewart 	 * cong avoid: cwnd > ssthresh
524b8d60729SRandall Stewart 	 *
525b8d60729SRandall Stewart 	 * cong avoid and ABC (RFC 3465):
526b8d60729SRandall Stewart 	 *   Grow cwnd linearly by maxseg per RTT for each
527b8d60729SRandall Stewart 	 *   cwnd worth of ACKed data.
528b8d60729SRandall Stewart 	 *
529b8d60729SRandall Stewart 	 * cong avoid without ABC (RFC 5681):
530b8d60729SRandall Stewart 	 *   Grow cwnd linearly by approximately maxseg per RTT using
531b8d60729SRandall Stewart 	 *   maxseg^2 / cwnd per ACK as the increment.
532b8d60729SRandall Stewart 	 *   If cwnd > maxseg^2, fix the cwnd increment at 1 byte to
533b8d60729SRandall Stewart 	 *   avoid capping cwnd.
534b8d60729SRandall Stewart 	 */
535b8d60729SRandall Stewart 	if (V_tcp_do_rfc3465) {
536b8d60729SRandall Stewart 		if (ccv->flags & CCF_ABC_SENTAWND)
537b8d60729SRandall Stewart 			ccv->flags &= ~CCF_ABC_SENTAWND;
538b8d60729SRandall Stewart 		else
539b8d60729SRandall Stewart 			incr = 0;
540b8d60729SRandall Stewart 	} else
541b8d60729SRandall Stewart 		incr = max((incr * incr / cw), 1);
542b6c137deSCheng Cui 	/* ABC is on by default, so incr equals 0 frequently. */
543b6c137deSCheng Cui 	if (incr > 0)
544b6c137deSCheng Cui 		return min(cw + incr, TCP_MAXWIN << CCV(ccv, snd_scale));
545b6c137deSCheng Cui 	else
546b6c137deSCheng Cui 		return cw;
547b6c137deSCheng Cui }
548b6c137deSCheng Cui 
549b6c137deSCheng Cui u_int
newreno_cc_cwnd_in_slow_start(struct cc_var * ccv)550b6c137deSCheng Cui newreno_cc_cwnd_in_slow_start(struct cc_var *ccv)
551b6c137deSCheng Cui {
552b6c137deSCheng Cui 	u_int cw = CCV(ccv, snd_cwnd);
553*22dcc812SRichard Scheffenegger 	u_int mss = tcp_fixed_maxseg(ccv->tp);
554*22dcc812SRichard Scheffenegger 	u_int incr = mss;
555b6c137deSCheng Cui 
556b6c137deSCheng Cui 	KASSERT(cw <= CCV(ccv, snd_ssthresh),
557b6c137deSCheng Cui 		("congestion control state not in slow start\n"));
558b6c137deSCheng Cui 
559b6c137deSCheng Cui 	/*
560b6c137deSCheng Cui 	 * Regular in-order ACK, open the congestion window.
561b6c137deSCheng Cui 	 * The congestion control state we're in is slow start.
562b6c137deSCheng Cui 	 *
563b6c137deSCheng Cui 	 * slow start: cwnd <= ssthresh
564b6c137deSCheng Cui 	 *
565b6c137deSCheng Cui 	 * slow start and ABC (RFC 3465):
566b6c137deSCheng Cui 	 *   Grow cwnd exponentially by the amount of data
567b6c137deSCheng Cui 	 *   ACKed capping the max increment per ACK to
568b6c137deSCheng Cui 	 *   (abc_l_var * maxseg) bytes.
569b6c137deSCheng Cui 	 *
570b6c137deSCheng Cui 	 * slow start without ABC (RFC 5681):
571b6c137deSCheng Cui 	 *   Grow cwnd exponentially by maxseg per ACK.
572b6c137deSCheng Cui 	 */
573b6c137deSCheng Cui 	if (V_tcp_do_rfc3465) {
574b8d60729SRandall Stewart 		/*
575b8d60729SRandall Stewart 		 * In slow-start with ABC enabled and no RTO in sight?
576b8d60729SRandall Stewart 		 * (Must not use abc_l_var > 1 if slow starting after
577b8d60729SRandall Stewart 		 * an RTO. On RTO, snd_nxt = snd_una, so the
578b8d60729SRandall Stewart 		 * snd_nxt == snd_max check is sufficient to
579b8d60729SRandall Stewart 		 * handle this).
580b8d60729SRandall Stewart 		 *
581b8d60729SRandall Stewart 		 * XXXLAS: Find a way to signal SS after RTO that
582b8d60729SRandall Stewart 		 * doesn't rely on tcpcb vars.
583b8d60729SRandall Stewart 		 */
584b8d60729SRandall Stewart 		uint16_t abc_val;
585b8d60729SRandall Stewart 
586b8d60729SRandall Stewart 		if (ccv->flags & CCF_USE_LOCAL_ABC)
587b8d60729SRandall Stewart 			abc_val = ccv->labc;
588b8d60729SRandall Stewart 		else
589b8d60729SRandall Stewart 			abc_val = V_tcp_abc_l_var;
590b8d60729SRandall Stewart 		if (CCV(ccv, snd_nxt) == CCV(ccv, snd_max))
591b8d60729SRandall Stewart 			incr = min(ccv->bytes_this_ack,
592*22dcc812SRichard Scheffenegger 			           ccv->nsegs * abc_val * mss);
593b8d60729SRandall Stewart 		else
594*22dcc812SRichard Scheffenegger 			incr = min(ccv->bytes_this_ack, mss);
595b8d60729SRandall Stewart 	}
596b8d60729SRandall Stewart 	/* ABC is on by default, so incr equals 0 frequently. */
597b8d60729SRandall Stewart 	if (incr > 0)
598b6c137deSCheng Cui 		return min(cw + incr, TCP_MAXWIN << CCV(ccv, snd_scale));
599b6c137deSCheng Cui 	else
600b6c137deSCheng Cui 		return cw;
601b6c137deSCheng Cui }
602b6c137deSCheng Cui 
603b6c137deSCheng Cui void
newreno_cc_ack_received(struct cc_var * ccv,ccsignal_t type)604b6c137deSCheng Cui newreno_cc_ack_received(struct cc_var *ccv, ccsignal_t type)
605b6c137deSCheng Cui {
606b6c137deSCheng Cui 	if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) &&
607b6c137deSCheng Cui 	    (ccv->flags & CCF_CWND_LIMITED)) {
608b6c137deSCheng Cui 		if (CCV(ccv, snd_cwnd) > CCV(ccv, snd_ssthresh)) {
609b6c137deSCheng Cui 			CCV(ccv, snd_cwnd) = newreno_cc_cwnd_in_cong_avoid(ccv);
610b6c137deSCheng Cui 		} else {
611b6c137deSCheng Cui 			CCV(ccv, snd_cwnd) = newreno_cc_cwnd_in_slow_start(ccv);
612b6c137deSCheng Cui 		}
613b8d60729SRandall Stewart 	}
614b8d60729SRandall Stewart }
615b8d60729SRandall Stewart 
616ea9017fbSRandall Stewart static int
cc_stop_new_assignments(struct cc_algo * algo)617ea9017fbSRandall Stewart cc_stop_new_assignments(struct cc_algo *algo)
618ea9017fbSRandall Stewart {
619ea9017fbSRandall Stewart 	CC_LIST_WLOCK();
620ea9017fbSRandall Stewart 	if (cc_check_default(algo)) {
621ea9017fbSRandall Stewart 		/* A default cannot be removed */
622ea9017fbSRandall Stewart 		CC_LIST_WUNLOCK();
623ea9017fbSRandall Stewart 		return (EBUSY);
624ea9017fbSRandall Stewart 	}
625ea9017fbSRandall Stewart 	algo->flags |= CC_MODULE_BEING_REMOVED;
626ea9017fbSRandall Stewart 	CC_LIST_WUNLOCK();
627ea9017fbSRandall Stewart 	return (0);
628ea9017fbSRandall Stewart }
629ea9017fbSRandall Stewart 
630b8d60729SRandall Stewart /*
631dbc42409SLawrence Stewart  * Handles kld related events. Returns 0 on success, non-zero on failure.
632dbc42409SLawrence Stewart  */
633dbc42409SLawrence Stewart int
cc_modevent(module_t mod,int event_type,void * data)634dbc42409SLawrence Stewart cc_modevent(module_t mod, int event_type, void *data)
635dbc42409SLawrence Stewart {
636dbc42409SLawrence Stewart 	struct cc_algo *algo;
637dbc42409SLawrence Stewart 	int err;
638dbc42409SLawrence Stewart 
639dbc42409SLawrence Stewart 	err = 0;
640dbc42409SLawrence Stewart 	algo = (struct cc_algo *)data;
641dbc42409SLawrence Stewart 
642dbc42409SLawrence Stewart 	switch(event_type) {
643dbc42409SLawrence Stewart 	case MOD_LOAD:
644b8d60729SRandall Stewart 		if ((algo->cc_data_sz == NULL) && (algo->cb_init != NULL)) {
645b8d60729SRandall Stewart 			/*
646b8d60729SRandall Stewart 			 * A module must have a cc_data_sz function
647b8d60729SRandall Stewart 			 * even if it has no data it should return 0.
648b8d60729SRandall Stewart 			 */
649b8d60729SRandall Stewart 			printf("Module Load Fails, it lacks a cc_data_sz() function but has a cb_init()!\n");
650b8d60729SRandall Stewart 			err = EINVAL;
651b8d60729SRandall Stewart 			break;
652b8d60729SRandall Stewart 		}
653dbc42409SLawrence Stewart 		if (algo->mod_init != NULL)
654dbc42409SLawrence Stewart 			err = algo->mod_init();
655dbc42409SLawrence Stewart 		if (!err)
656dbc42409SLawrence Stewart 			err = cc_register_algo(algo);
657dbc42409SLawrence Stewart 		break;
658dbc42409SLawrence Stewart 
659dbc42409SLawrence Stewart 	case MOD_SHUTDOWN:
660dbc42409SLawrence Stewart 		break;
661ea9017fbSRandall Stewart 	case MOD_QUIESCE:
662ea9017fbSRandall Stewart 		/* Stop any new assigments */
663ea9017fbSRandall Stewart 		err = cc_stop_new_assignments(algo);
664ea9017fbSRandall Stewart 		break;
665ea9017fbSRandall Stewart 	case MOD_UNLOAD:
666ea9017fbSRandall Stewart 		/*
667ea9017fbSRandall Stewart 		 * Deregister and remove the module from the list
668ea9017fbSRandall Stewart 		 */
669ea9017fbSRandall Stewart 		CC_LIST_WLOCK();
670ea9017fbSRandall Stewart 		/* Even with -f we can't unload if its the default */
671ea9017fbSRandall Stewart 		if (cc_check_default(algo)) {
672ea9017fbSRandall Stewart 			/* A default cannot be removed */
673ea9017fbSRandall Stewart 			CC_LIST_WUNLOCK();
674ea9017fbSRandall Stewart 			return (EBUSY);
675ea9017fbSRandall Stewart 		}
676ea9017fbSRandall Stewart 		/*
677ea9017fbSRandall Stewart 		 * If -f was used and users are still attached to
678ea9017fbSRandall Stewart 		 * the algorithm things are going to go boom.
679ea9017fbSRandall Stewart 		 */
680ccdfd621SMichael Tuexen 		err = cc_deregister_algo_locked(algo);
681ccdfd621SMichael Tuexen 		CC_LIST_WUNLOCK();
682ea9017fbSRandall Stewart 		if ((err == 0) && (algo->mod_destroy != NULL)) {
683ea9017fbSRandall Stewart 			algo->mod_destroy();
684ea9017fbSRandall Stewart 		}
685ea9017fbSRandall Stewart 		break;
686dbc42409SLawrence Stewart 	default:
687dbc42409SLawrence Stewart 		err = EINVAL;
688dbc42409SLawrence Stewart 		break;
689dbc42409SLawrence Stewart 	}
690dbc42409SLawrence Stewart 
691dbc42409SLawrence Stewart 	return (err);
692dbc42409SLawrence Stewart }
693dbc42409SLawrence Stewart 
69414f57a8bSLawrence Stewart SYSINIT(cc, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, cc_init, NULL);
69514f57a8bSLawrence Stewart 
696dbc42409SLawrence Stewart /* Declare sysctl tree and populate it. */
6977029da5cSPawel Biernacki SYSCTL_NODE(_net_inet_tcp, OID_AUTO, cc, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
698439e76ecSBrad Davis     "Congestion control related settings");
699dbc42409SLawrence Stewart 
7006df8a710SGleb Smirnoff SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, algorithm,
7017029da5cSPawel Biernacki     CTLFLAG_VNET | CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE,
7027029da5cSPawel Biernacki     NULL, 0, cc_default_algo, "A",
7037029da5cSPawel Biernacki     "Default congestion control algorithm");
704dbc42409SLawrence Stewart 
7057029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, available,
7067029da5cSPawel Biernacki     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
707dbc42409SLawrence Stewart     NULL, 0, cc_list_available, "A",
708439e76ecSBrad Davis     "List available congestion control algorithms");
709370efe5aSLawrence Stewart 
710a9696510SRandall Stewart SYSCTL_NODE(_net_inet_tcp_cc, OID_AUTO, hystartplusplus,
711a9696510SRandall Stewart     CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
712a9696510SRandall Stewart     "New Reno related HyStart++ settings");
713a9696510SRandall Stewart 
714a9696510SRandall Stewart SYSCTL_UINT(_net_inet_tcp_cc_hystartplusplus, OID_AUTO, minrtt_thresh,
715a9696510SRandall Stewart     CTLFLAG_RW,
716a9696510SRandall Stewart     &hystart_minrtt_thresh, 4000,
717a9696510SRandall Stewart    "HyStarts++ minimum RTT thresh used in clamp (in microseconds)");
718a9696510SRandall Stewart 
719a9696510SRandall Stewart SYSCTL_UINT(_net_inet_tcp_cc_hystartplusplus, OID_AUTO, maxrtt_thresh,
720a9696510SRandall Stewart     CTLFLAG_RW,
721a9696510SRandall Stewart     &hystart_maxrtt_thresh, 16000,
722a9696510SRandall Stewart    "HyStarts++ maximum RTT thresh used in clamp (in microseconds)");
723a9696510SRandall Stewart 
724a9696510SRandall Stewart SYSCTL_UINT(_net_inet_tcp_cc_hystartplusplus, OID_AUTO, n_rttsamples,
725a9696510SRandall Stewart     CTLFLAG_RW,
726a9696510SRandall Stewart     &hystart_n_rttsamples, 8,
727a9696510SRandall Stewart    "The number of RTT samples that must be seen to consider HyStart++");
728a9696510SRandall Stewart 
729a9696510SRandall Stewart SYSCTL_UINT(_net_inet_tcp_cc_hystartplusplus, OID_AUTO, css_growth_div,
730a9696510SRandall Stewart     CTLFLAG_RW,
731a9696510SRandall Stewart     &hystart_css_growth_div, 4,
732a9696510SRandall Stewart    "The divisor to the growth when in Hystart++ CSS");
733a9696510SRandall Stewart 
734a9696510SRandall Stewart SYSCTL_UINT(_net_inet_tcp_cc_hystartplusplus, OID_AUTO, css_rounds,
735a9696510SRandall Stewart     CTLFLAG_RW,
736a9696510SRandall Stewart     &hystart_css_rounds, 5,
737a9696510SRandall Stewart    "The number of rounds HyStart++ lasts in CSS before falling to CA");
738a9696510SRandall Stewart 
739a9696510SRandall Stewart SYSCTL_UINT(_net_inet_tcp_cc_hystartplusplus, OID_AUTO, bblogs,
740a9696510SRandall Stewart     CTLFLAG_RW,
741a9696510SRandall Stewart     &hystart_bblogs, 0,
742a9696510SRandall Stewart    "Do we enable HyStart++ Black Box logs to be generated if BB logging is on");
743a9696510SRandall Stewart 
744370efe5aSLawrence Stewart VNET_DEFINE(int, cc_do_abe) = 0;
745370efe5aSLawrence Stewart SYSCTL_INT(_net_inet_tcp_cc, OID_AUTO, abe, CTLFLAG_VNET | CTLFLAG_RW,
746370efe5aSLawrence Stewart     &VNET_NAME(cc_do_abe), 0,
747370efe5aSLawrence Stewart     "Enable draft-ietf-tcpm-alternativebackoff-ecn (TCP Alternative Backoff with ECN)");
748370efe5aSLawrence Stewart 
749370efe5aSLawrence Stewart VNET_DEFINE(int, cc_abe_frlossreduce) = 0;
750370efe5aSLawrence Stewart SYSCTL_INT(_net_inet_tcp_cc, OID_AUTO, abe_frlossreduce, CTLFLAG_VNET | CTLFLAG_RW,
751370efe5aSLawrence Stewart     &VNET_NAME(cc_abe_frlossreduce), 0,
752370efe5aSLawrence Stewart     "Apply standard beta instead of ABE-beta during ECN-signalled congestion "
753370efe5aSLawrence Stewart     "recovery episodes if loss also needs to be repaired");
754