1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2007-2008 5 * Swinburne University of Technology, Melbourne, Australia. 6 * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org> 7 * Copyright (c) 2010 The FreeBSD Foundation 8 * All rights reserved. 9 * 10 * This software was developed at the Centre for Advanced Internet 11 * Architectures, Swinburne University of Technology, by Lawrence Stewart and 12 * James Healy, made possible in part by a grant from the Cisco University 13 * Research Program Fund at Community Foundation Silicon Valley. 14 * 15 * Portions of this software were developed at the Centre for Advanced 16 * Internet Architectures, Swinburne University of Technology, Melbourne, 17 * Australia by David Hayes under sponsorship from the FreeBSD Foundation. 18 * 19 * Redistribution and use in source and binary forms, with or without 20 * modification, are permitted provided that the following conditions 21 * are met: 22 * 1. Redistributions of source code must retain the above copyright 23 * notice, this list of conditions and the following disclaimer. 24 * 2. Redistributions in binary form must reproduce the above copyright 25 * notice, this list of conditions and the following disclaimer in the 26 * documentation and/or other materials provided with the distribution. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 */ 40 41 /* 42 * This software was first released in 2007 by James Healy and Lawrence Stewart 43 * whilst working on the NewTCP research project at Swinburne University of 44 * Technology's Centre for Advanced Internet Architectures, Melbourne, 45 * Australia, which was made possible in part by a grant from the Cisco 46 * University Research Program Fund at Community Foundation Silicon Valley. 47 * More details are available at: 48 * http://caia.swin.edu.au/urp/newtcp/ 49 */ 50 51 #include <sys/cdefs.h> 52 __FBSDID("$FreeBSD$"); 53 54 #include <sys/param.h> 55 #include <sys/kernel.h> 56 #include <sys/libkern.h> 57 #include <sys/lock.h> 58 #include <sys/malloc.h> 59 #include <sys/module.h> 60 #include <sys/mutex.h> 61 #include <sys/queue.h> 62 #include <sys/rwlock.h> 63 #include <sys/sbuf.h> 64 #include <sys/socket.h> 65 #include <sys/socketvar.h> 66 #include <sys/sysctl.h> 67 68 #include <net/vnet.h> 69 70 #include <netinet/in.h> 71 #include <netinet/in_pcb.h> 72 #include <netinet/tcp.h> 73 #include <netinet/tcp_var.h> 74 #include <netinet/cc/cc.h> 75 76 #include <netinet/cc/cc_module.h> 77 78 /* 79 * List of available cc algorithms on the current system. First element 80 * is used as the system default CC algorithm. 81 */ 82 struct cc_head cc_list = STAILQ_HEAD_INITIALIZER(cc_list); 83 84 /* Protects the cc_list TAILQ. */ 85 struct rwlock cc_list_lock; 86 87 VNET_DEFINE(struct cc_algo *, default_cc_ptr) = &newreno_cc_algo; 88 89 /* 90 * Sysctl handler to show and change the default CC algorithm. 91 */ 92 static int 93 cc_default_algo(SYSCTL_HANDLER_ARGS) 94 { 95 char default_cc[TCP_CA_NAME_MAX]; 96 struct cc_algo *funcs; 97 int error; 98 99 /* Get the current default: */ 100 CC_LIST_RLOCK(); 101 strlcpy(default_cc, CC_DEFAULT()->name, sizeof(default_cc)); 102 CC_LIST_RUNLOCK(); 103 104 error = sysctl_handle_string(oidp, default_cc, sizeof(default_cc), req); 105 106 /* Check for error or no change */ 107 if (error != 0 || req->newptr == NULL) 108 goto done; 109 110 error = ESRCH; 111 112 /* Find algo with specified name and set it to default. */ 113 CC_LIST_RLOCK(); 114 STAILQ_FOREACH(funcs, &cc_list, entries) { 115 if (strncmp(default_cc, funcs->name, sizeof(default_cc))) 116 continue; 117 V_default_cc_ptr = funcs; 118 error = 0; 119 break; 120 } 121 CC_LIST_RUNLOCK(); 122 done: 123 return (error); 124 } 125 126 /* 127 * Sysctl handler to display the list of available CC algorithms. 128 */ 129 static int 130 cc_list_available(SYSCTL_HANDLER_ARGS) 131 { 132 struct cc_algo *algo; 133 struct sbuf *s; 134 int err, first, nalgos; 135 136 err = nalgos = 0; 137 first = 1; 138 139 CC_LIST_RLOCK(); 140 STAILQ_FOREACH(algo, &cc_list, entries) { 141 nalgos++; 142 } 143 CC_LIST_RUNLOCK(); 144 145 s = sbuf_new(NULL, NULL, nalgos * TCP_CA_NAME_MAX, SBUF_FIXEDLEN); 146 147 if (s == NULL) 148 return (ENOMEM); 149 150 /* 151 * It is theoretically possible for the CC list to have grown in size 152 * since the call to sbuf_new() and therefore for the sbuf to be too 153 * small. If this were to happen (incredibly unlikely), the sbuf will 154 * reach an overflow condition, sbuf_printf() will return an error and 155 * the sysctl will fail gracefully. 156 */ 157 CC_LIST_RLOCK(); 158 STAILQ_FOREACH(algo, &cc_list, entries) { 159 err = sbuf_printf(s, first ? "%s" : ", %s", algo->name); 160 if (err) { 161 /* Sbuf overflow condition. */ 162 err = EOVERFLOW; 163 break; 164 } 165 first = 0; 166 } 167 CC_LIST_RUNLOCK(); 168 169 if (!err) { 170 sbuf_finish(s); 171 err = sysctl_handle_string(oidp, sbuf_data(s), 0, req); 172 } 173 174 sbuf_delete(s); 175 return (err); 176 } 177 178 /* 179 * Reset the default CC algo to NewReno for any netstack which is using the algo 180 * that is about to go away as its default. 181 */ 182 static void 183 cc_checkreset_default(struct cc_algo *remove_cc) 184 { 185 VNET_ITERATOR_DECL(vnet_iter); 186 187 CC_LIST_LOCK_ASSERT(); 188 189 VNET_LIST_RLOCK_NOSLEEP(); 190 VNET_FOREACH(vnet_iter) { 191 CURVNET_SET(vnet_iter); 192 if (strncmp(CC_DEFAULT()->name, remove_cc->name, 193 TCP_CA_NAME_MAX) == 0) 194 V_default_cc_ptr = &newreno_cc_algo; 195 CURVNET_RESTORE(); 196 } 197 VNET_LIST_RUNLOCK_NOSLEEP(); 198 } 199 200 /* 201 * Initialise CC subsystem on system boot. 202 */ 203 static void 204 cc_init(void) 205 { 206 CC_LIST_LOCK_INIT(); 207 STAILQ_INIT(&cc_list); 208 } 209 210 /* 211 * Returns non-zero on success, 0 on failure. 212 */ 213 int 214 cc_deregister_algo(struct cc_algo *remove_cc) 215 { 216 struct cc_algo *funcs, *tmpfuncs; 217 int err; 218 219 err = ENOENT; 220 221 /* Never allow newreno to be deregistered. */ 222 if (&newreno_cc_algo == remove_cc) 223 return (EPERM); 224 225 /* Remove algo from cc_list so that new connections can't use it. */ 226 CC_LIST_WLOCK(); 227 STAILQ_FOREACH_SAFE(funcs, &cc_list, entries, tmpfuncs) { 228 if (funcs == remove_cc) { 229 cc_checkreset_default(remove_cc); 230 STAILQ_REMOVE(&cc_list, funcs, cc_algo, entries); 231 err = 0; 232 break; 233 } 234 } 235 CC_LIST_WUNLOCK(); 236 237 if (!err) 238 /* 239 * XXXLAS: 240 * - We may need to handle non-zero return values in future. 241 * - If we add CC framework support for protocols other than 242 * TCP, we may want a more generic way to handle this step. 243 */ 244 tcp_ccalgounload(remove_cc); 245 246 return (err); 247 } 248 249 /* 250 * Returns 0 on success, non-zero on failure. 251 */ 252 int 253 cc_register_algo(struct cc_algo *add_cc) 254 { 255 struct cc_algo *funcs; 256 int err; 257 258 err = 0; 259 260 /* 261 * Iterate over list of registered CC algorithms and make sure 262 * we're not trying to add a duplicate. 263 */ 264 CC_LIST_WLOCK(); 265 STAILQ_FOREACH(funcs, &cc_list, entries) { 266 if (funcs == add_cc || strncmp(funcs->name, add_cc->name, 267 TCP_CA_NAME_MAX) == 0) 268 err = EEXIST; 269 } 270 271 if (!err) 272 STAILQ_INSERT_TAIL(&cc_list, add_cc, entries); 273 274 CC_LIST_WUNLOCK(); 275 276 return (err); 277 } 278 279 /* 280 * Handles kld related events. Returns 0 on success, non-zero on failure. 281 */ 282 int 283 cc_modevent(module_t mod, int event_type, void *data) 284 { 285 struct cc_algo *algo; 286 int err; 287 288 err = 0; 289 algo = (struct cc_algo *)data; 290 291 switch(event_type) { 292 case MOD_LOAD: 293 if (algo->mod_init != NULL) 294 err = algo->mod_init(); 295 if (!err) 296 err = cc_register_algo(algo); 297 break; 298 299 case MOD_QUIESCE: 300 case MOD_SHUTDOWN: 301 case MOD_UNLOAD: 302 err = cc_deregister_algo(algo); 303 if (!err && algo->mod_destroy != NULL) 304 algo->mod_destroy(); 305 if (err == ENOENT) 306 err = 0; 307 break; 308 309 default: 310 err = EINVAL; 311 break; 312 } 313 314 return (err); 315 } 316 317 SYSINIT(cc, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, cc_init, NULL); 318 319 /* Declare sysctl tree and populate it. */ 320 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, cc, CTLFLAG_RW, NULL, 321 "Congestion control related settings"); 322 323 SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, algorithm, 324 CTLFLAG_VNET | CTLTYPE_STRING | CTLFLAG_RW, 325 NULL, 0, cc_default_algo, "A", "Default congestion control algorithm"); 326 327 SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, available, CTLTYPE_STRING|CTLFLAG_RD, 328 NULL, 0, cc_list_available, "A", 329 "List available congestion control algorithms"); 330 331 VNET_DEFINE(int, cc_do_abe) = 0; 332 SYSCTL_INT(_net_inet_tcp_cc, OID_AUTO, abe, CTLFLAG_VNET | CTLFLAG_RW, 333 &VNET_NAME(cc_do_abe), 0, 334 "Enable draft-ietf-tcpm-alternativebackoff-ecn (TCP Alternative Backoff with ECN)"); 335 336 VNET_DEFINE(int, cc_abe_frlossreduce) = 0; 337 SYSCTL_INT(_net_inet_tcp_cc, OID_AUTO, abe_frlossreduce, CTLFLAG_VNET | CTLFLAG_RW, 338 &VNET_NAME(cc_abe_frlossreduce), 0, 339 "Apply standard beta instead of ABE-beta during ECN-signalled congestion " 340 "recovery episodes if loss also needs to be repaired"); 341