1 /*- 2 * Copyright (c) 2007-2008 3 * Swinburne University of Technology, Melbourne, Australia. 4 * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org> 5 * Copyright (c) 2010 The FreeBSD Foundation 6 * All rights reserved. 7 * 8 * This software was developed at the Centre for Advanced Internet 9 * Architectures, Swinburne University of Technology, by Lawrence Stewart and 10 * James Healy, made possible in part by a grant from the Cisco University 11 * Research Program Fund at Community Foundation Silicon Valley. 12 * 13 * Portions of this software were developed at the Centre for Advanced 14 * Internet Architectures, Swinburne University of Technology, Melbourne, 15 * Australia by David Hayes under sponsorship from the FreeBSD Foundation. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions 19 * are met: 20 * 1. Redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer. 22 * 2. Redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 /* 40 * This software was first released in 2007 by James Healy and Lawrence Stewart 41 * whilst working on the NewTCP research project at Swinburne University of 42 * Technology's Centre for Advanced Internet Architectures, Melbourne, 43 * Australia, which was made possible in part by a grant from the Cisco 44 * University Research Program Fund at Community Foundation Silicon Valley. 45 * More details are available at: 46 * http://caia.swin.edu.au/urp/newtcp/ 47 */ 48 49 #include <sys/cdefs.h> 50 __FBSDID("$FreeBSD$"); 51 52 #include <sys/param.h> 53 #include <sys/kernel.h> 54 #include <sys/libkern.h> 55 #include <sys/lock.h> 56 #include <sys/malloc.h> 57 #include <sys/module.h> 58 #include <sys/mutex.h> 59 #include <sys/queue.h> 60 #include <sys/rwlock.h> 61 #include <sys/sbuf.h> 62 #include <sys/socket.h> 63 #include <sys/socketvar.h> 64 #include <sys/sysctl.h> 65 66 #include <net/vnet.h> 67 68 #include <netinet/in.h> 69 #include <netinet/in_pcb.h> 70 #include <netinet/tcp.h> 71 #include <netinet/tcp_var.h> 72 #include <netinet/cc/cc.h> 73 74 #include <netinet/cc/cc_module.h> 75 76 /* 77 * List of available cc algorithms on the current system. First element 78 * is used as the system default CC algorithm. 79 */ 80 struct cc_head cc_list = STAILQ_HEAD_INITIALIZER(cc_list); 81 82 /* Protects the cc_list TAILQ. */ 83 struct rwlock cc_list_lock; 84 85 VNET_DEFINE(struct cc_algo *, default_cc_ptr) = &newreno_cc_algo; 86 87 /* 88 * Sysctl handler to show and change the default CC algorithm. 89 */ 90 static int 91 cc_default_algo(SYSCTL_HANDLER_ARGS) 92 { 93 char default_cc[TCP_CA_NAME_MAX]; 94 struct cc_algo *funcs; 95 int error; 96 97 /* Get the current default: */ 98 CC_LIST_RLOCK(); 99 strlcpy(default_cc, CC_DEFAULT()->name, sizeof(default_cc)); 100 CC_LIST_RUNLOCK(); 101 102 error = sysctl_handle_string(oidp, default_cc, sizeof(default_cc), req); 103 104 /* Check for error or no change */ 105 if (error != 0 || req->newptr == NULL) 106 goto done; 107 108 error = ESRCH; 109 110 /* Find algo with specified name and set it to default. */ 111 CC_LIST_RLOCK(); 112 STAILQ_FOREACH(funcs, &cc_list, entries) { 113 if (strncmp(default_cc, funcs->name, sizeof(default_cc))) 114 continue; 115 V_default_cc_ptr = funcs; 116 error = 0; 117 break; 118 } 119 CC_LIST_RUNLOCK(); 120 done: 121 return (error); 122 } 123 124 /* 125 * Sysctl handler to display the list of available CC algorithms. 126 */ 127 static int 128 cc_list_available(SYSCTL_HANDLER_ARGS) 129 { 130 struct cc_algo *algo; 131 struct sbuf *s; 132 int err, first, nalgos; 133 134 err = nalgos = 0; 135 first = 1; 136 137 CC_LIST_RLOCK(); 138 STAILQ_FOREACH(algo, &cc_list, entries) { 139 nalgos++; 140 } 141 CC_LIST_RUNLOCK(); 142 143 s = sbuf_new(NULL, NULL, nalgos * TCP_CA_NAME_MAX, SBUF_FIXEDLEN); 144 145 if (s == NULL) 146 return (ENOMEM); 147 148 /* 149 * It is theoretically possible for the CC list to have grown in size 150 * since the call to sbuf_new() and therefore for the sbuf to be too 151 * small. If this were to happen (incredibly unlikely), the sbuf will 152 * reach an overflow condition, sbuf_printf() will return an error and 153 * the sysctl will fail gracefully. 154 */ 155 CC_LIST_RLOCK(); 156 STAILQ_FOREACH(algo, &cc_list, entries) { 157 err = sbuf_printf(s, first ? "%s" : ", %s", algo->name); 158 if (err) { 159 /* Sbuf overflow condition. */ 160 err = EOVERFLOW; 161 break; 162 } 163 first = 0; 164 } 165 CC_LIST_RUNLOCK(); 166 167 if (!err) { 168 sbuf_finish(s); 169 err = sysctl_handle_string(oidp, sbuf_data(s), 0, req); 170 } 171 172 sbuf_delete(s); 173 return (err); 174 } 175 176 /* 177 * Reset the default CC algo to NewReno for any netstack which is using the algo 178 * that is about to go away as its default. 179 */ 180 static void 181 cc_checkreset_default(struct cc_algo *remove_cc) 182 { 183 VNET_ITERATOR_DECL(vnet_iter); 184 185 CC_LIST_LOCK_ASSERT(); 186 187 VNET_LIST_RLOCK_NOSLEEP(); 188 VNET_FOREACH(vnet_iter) { 189 CURVNET_SET(vnet_iter); 190 if (strncmp(CC_DEFAULT()->name, remove_cc->name, 191 TCP_CA_NAME_MAX) == 0) 192 V_default_cc_ptr = &newreno_cc_algo; 193 CURVNET_RESTORE(); 194 } 195 VNET_LIST_RUNLOCK_NOSLEEP(); 196 } 197 198 /* 199 * Initialise CC subsystem on system boot. 200 */ 201 static void 202 cc_init(void) 203 { 204 CC_LIST_LOCK_INIT(); 205 STAILQ_INIT(&cc_list); 206 } 207 208 /* 209 * Returns non-zero on success, 0 on failure. 210 */ 211 int 212 cc_deregister_algo(struct cc_algo *remove_cc) 213 { 214 struct cc_algo *funcs, *tmpfuncs; 215 int err; 216 217 err = ENOENT; 218 219 /* Never allow newreno to be deregistered. */ 220 if (&newreno_cc_algo == remove_cc) 221 return (EPERM); 222 223 /* Remove algo from cc_list so that new connections can't use it. */ 224 CC_LIST_WLOCK(); 225 STAILQ_FOREACH_SAFE(funcs, &cc_list, entries, tmpfuncs) { 226 if (funcs == remove_cc) { 227 cc_checkreset_default(remove_cc); 228 STAILQ_REMOVE(&cc_list, funcs, cc_algo, entries); 229 err = 0; 230 break; 231 } 232 } 233 CC_LIST_WUNLOCK(); 234 235 if (!err) 236 /* 237 * XXXLAS: 238 * - We may need to handle non-zero return values in future. 239 * - If we add CC framework support for protocols other than 240 * TCP, we may want a more generic way to handle this step. 241 */ 242 tcp_ccalgounload(remove_cc); 243 244 return (err); 245 } 246 247 /* 248 * Returns 0 on success, non-zero on failure. 249 */ 250 int 251 cc_register_algo(struct cc_algo *add_cc) 252 { 253 struct cc_algo *funcs; 254 int err; 255 256 err = 0; 257 258 /* 259 * Iterate over list of registered CC algorithms and make sure 260 * we're not trying to add a duplicate. 261 */ 262 CC_LIST_WLOCK(); 263 STAILQ_FOREACH(funcs, &cc_list, entries) { 264 if (funcs == add_cc || strncmp(funcs->name, add_cc->name, 265 TCP_CA_NAME_MAX) == 0) 266 err = EEXIST; 267 } 268 269 if (!err) 270 STAILQ_INSERT_TAIL(&cc_list, add_cc, entries); 271 272 CC_LIST_WUNLOCK(); 273 274 return (err); 275 } 276 277 /* 278 * Handles kld related events. Returns 0 on success, non-zero on failure. 279 */ 280 int 281 cc_modevent(module_t mod, int event_type, void *data) 282 { 283 struct cc_algo *algo; 284 int err; 285 286 err = 0; 287 algo = (struct cc_algo *)data; 288 289 switch(event_type) { 290 case MOD_LOAD: 291 if (algo->mod_init != NULL) 292 err = algo->mod_init(); 293 if (!err) 294 err = cc_register_algo(algo); 295 break; 296 297 case MOD_QUIESCE: 298 case MOD_SHUTDOWN: 299 case MOD_UNLOAD: 300 err = cc_deregister_algo(algo); 301 if (!err && algo->mod_destroy != NULL) 302 algo->mod_destroy(); 303 if (err == ENOENT) 304 err = 0; 305 break; 306 307 default: 308 err = EINVAL; 309 break; 310 } 311 312 return (err); 313 } 314 315 SYSINIT(cc, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, cc_init, NULL); 316 317 /* Declare sysctl tree and populate it. */ 318 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, cc, CTLFLAG_RW, NULL, 319 "congestion control related settings"); 320 321 SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, algorithm, 322 CTLFLAG_VNET | CTLTYPE_STRING | CTLFLAG_RW, 323 NULL, 0, cc_default_algo, "A", "default congestion control algorithm"); 324 325 SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, available, CTLTYPE_STRING|CTLFLAG_RD, 326 NULL, 0, cc_list_available, "A", 327 "list available congestion control algorithms"); 328