1 /*- 2 * Copyright (c) 2007-2008 3 * Swinburne University of Technology, Melbourne, Australia. 4 * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org> 5 * Copyright (c) 2010 The FreeBSD Foundation 6 * All rights reserved. 7 * 8 * This software was developed at the Centre for Advanced Internet 9 * Architectures, Swinburne University, by Lawrence Stewart and James Healy, 10 * made possible in part by a grant from the Cisco University Research Program 11 * Fund at Community Foundation Silicon Valley. 12 * 13 * Portions of this software were developed at the Centre for Advanced 14 * Internet Architectures, Swinburne University of Technology, Melbourne, 15 * Australia by David Hayes under sponsorship from the FreeBSD Foundation. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions 19 * are met: 20 * 1. Redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer. 22 * 2. Redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 /* 40 * This software was first released in 2007 by James Healy and Lawrence Stewart 41 * whilst working on the NewTCP research project at Swinburne University's 42 * Centre for Advanced Internet Architectures, Melbourne, Australia, which was 43 * made possible in part by a grant from the Cisco University Research Program 44 * Fund at Community Foundation Silicon Valley. More details are available at: 45 * http://caia.swin.edu.au/urp/newtcp/ 46 */ 47 48 #include <sys/cdefs.h> 49 __FBSDID("$FreeBSD$"); 50 51 #include <sys/param.h> 52 #include <sys/kernel.h> 53 #include <sys/libkern.h> 54 #include <sys/lock.h> 55 #include <sys/malloc.h> 56 #include <sys/module.h> 57 #include <sys/mutex.h> 58 #include <sys/queue.h> 59 #include <sys/rwlock.h> 60 #include <sys/sbuf.h> 61 #include <sys/socket.h> 62 #include <sys/socketvar.h> 63 #include <sys/sysctl.h> 64 65 #include <net/if.h> 66 #include <net/if_var.h> 67 68 #include <netinet/cc.h> 69 #include <netinet/in.h> 70 #include <netinet/in_pcb.h> 71 #include <netinet/tcp_var.h> 72 73 #include <netinet/cc/cc_module.h> 74 75 /* 76 * List of available cc algorithms on the current system. First element 77 * is used as the system default CC algorithm. 78 */ 79 struct cc_head cc_list = STAILQ_HEAD_INITIALIZER(cc_list); 80 81 /* Protects the cc_list TAILQ. */ 82 struct rwlock cc_list_lock; 83 84 VNET_DEFINE(struct cc_algo *, default_cc_ptr) = &newreno_cc_algo; 85 86 /* 87 * Sysctl handler to show and change the default CC algorithm. 88 */ 89 static int 90 cc_default_algo(SYSCTL_HANDLER_ARGS) 91 { 92 char default_cc[TCP_CA_NAME_MAX]; 93 struct cc_algo *funcs; 94 int err, found; 95 96 err = found = 0; 97 98 if (req->newptr == NULL) { 99 /* Just print the current default. */ 100 CC_LIST_RLOCK(); 101 strlcpy(default_cc, CC_DEFAULT()->name, TCP_CA_NAME_MAX); 102 CC_LIST_RUNLOCK(); 103 err = sysctl_handle_string(oidp, default_cc, 1, req); 104 } else { 105 /* Find algo with specified name and set it to default. */ 106 CC_LIST_RLOCK(); 107 STAILQ_FOREACH(funcs, &cc_list, entries) { 108 if (strncmp((char *)req->newptr, funcs->name, 109 TCP_CA_NAME_MAX) == 0) { 110 found = 1; 111 V_default_cc_ptr = funcs; 112 } 113 } 114 CC_LIST_RUNLOCK(); 115 116 if (!found) 117 err = ESRCH; 118 } 119 120 return (err); 121 } 122 123 /* 124 * Sysctl handler to display the list of available CC algorithms. 125 */ 126 static int 127 cc_list_available(SYSCTL_HANDLER_ARGS) 128 { 129 struct cc_algo *algo; 130 struct sbuf *s; 131 int err, first, nalgos; 132 133 err = nalgos = 0; 134 first = 1; 135 136 CC_LIST_RLOCK(); 137 STAILQ_FOREACH(algo, &cc_list, entries) { 138 nalgos++; 139 } 140 CC_LIST_RUNLOCK(); 141 142 s = sbuf_new(NULL, NULL, nalgos * TCP_CA_NAME_MAX, SBUF_FIXEDLEN); 143 144 if (s == NULL) 145 return (ENOMEM); 146 147 /* 148 * It is theoretically possible for the CC list to have grown in size 149 * since the call to sbuf_new() and therefore for the sbuf to be too 150 * small. If this were to happen (incredibly unlikely), the sbuf will 151 * reach an overflow condition, sbuf_printf() will return an error and 152 * the sysctl will fail gracefully. 153 */ 154 CC_LIST_RLOCK(); 155 STAILQ_FOREACH(algo, &cc_list, entries) { 156 err = sbuf_printf(s, first ? "%s" : ", %s", algo->name); 157 if (err) { 158 /* Sbuf overflow condition. */ 159 err = EOVERFLOW; 160 break; 161 } 162 first = 0; 163 } 164 CC_LIST_RUNLOCK(); 165 166 if (!err) { 167 sbuf_finish(s); 168 err = sysctl_handle_string(oidp, sbuf_data(s), 1, req); 169 } 170 171 sbuf_delete(s); 172 return (err); 173 } 174 175 /* 176 * Reset the default CC algo to NewReno for any netstack which is using the algo 177 * that is about to go away as its default. 178 */ 179 static void 180 cc_checkreset_default(struct cc_algo *remove_cc) 181 { 182 VNET_ITERATOR_DECL(vnet_iter); 183 184 CC_LIST_LOCK_ASSERT(); 185 186 VNET_LIST_RLOCK_NOSLEEP(); 187 VNET_FOREACH(vnet_iter) { 188 CURVNET_SET(vnet_iter); 189 if (strncmp(CC_DEFAULT()->name, remove_cc->name, 190 TCP_CA_NAME_MAX) == 0) 191 V_default_cc_ptr = &newreno_cc_algo; 192 CURVNET_RESTORE(); 193 } 194 VNET_LIST_RUNLOCK_NOSLEEP(); 195 } 196 197 /* 198 * Initialise CC subsystem on system boot. 199 */ 200 static void 201 cc_init(void) 202 { 203 CC_LIST_LOCK_INIT(); 204 STAILQ_INIT(&cc_list); 205 } 206 207 /* 208 * Returns non-zero on success, 0 on failure. 209 */ 210 int 211 cc_deregister_algo(struct cc_algo *remove_cc) 212 { 213 struct cc_algo *funcs, *tmpfuncs; 214 int err; 215 216 err = ENOENT; 217 218 /* Never allow newreno to be deregistered. */ 219 if (&newreno_cc_algo == remove_cc) 220 return (EPERM); 221 222 /* Remove algo from cc_list so that new connections can't use it. */ 223 CC_LIST_WLOCK(); 224 STAILQ_FOREACH_SAFE(funcs, &cc_list, entries, tmpfuncs) { 225 if (funcs == remove_cc) { 226 cc_checkreset_default(remove_cc); 227 STAILQ_REMOVE(&cc_list, funcs, cc_algo, entries); 228 err = 0; 229 break; 230 } 231 } 232 CC_LIST_WUNLOCK(); 233 234 if (!err) 235 /* 236 * XXXLAS: 237 * - We may need to handle non-zero return values in future. 238 * - If we add CC framework support for protocols other than 239 * TCP, we may want a more generic way to handle this step. 240 */ 241 tcp_ccalgounload(remove_cc); 242 243 return (err); 244 } 245 246 /* 247 * Returns 0 on success, non-zero on failure. 248 */ 249 int 250 cc_register_algo(struct cc_algo *add_cc) 251 { 252 struct cc_algo *funcs; 253 int err; 254 255 err = 0; 256 257 /* 258 * Iterate over list of registered CC algorithms and make sure 259 * we're not trying to add a duplicate. 260 */ 261 CC_LIST_WLOCK(); 262 STAILQ_FOREACH(funcs, &cc_list, entries) { 263 if (funcs == add_cc || strncmp(funcs->name, add_cc->name, 264 TCP_CA_NAME_MAX) == 0) 265 err = EEXIST; 266 } 267 268 if (!err) 269 STAILQ_INSERT_TAIL(&cc_list, add_cc, entries); 270 271 CC_LIST_WUNLOCK(); 272 273 return (err); 274 } 275 276 /* 277 * Handles kld related events. Returns 0 on success, non-zero on failure. 278 */ 279 int 280 cc_modevent(module_t mod, int event_type, void *data) 281 { 282 struct cc_algo *algo; 283 int err; 284 285 err = 0; 286 algo = (struct cc_algo *)data; 287 288 switch(event_type) { 289 case MOD_LOAD: 290 if (algo->mod_init != NULL) 291 err = algo->mod_init(); 292 if (!err) 293 err = cc_register_algo(algo); 294 break; 295 296 case MOD_QUIESCE: 297 case MOD_SHUTDOWN: 298 case MOD_UNLOAD: 299 err = cc_deregister_algo(algo); 300 if (!err && algo->mod_destroy != NULL) 301 algo->mod_destroy(); 302 if (err == ENOENT) 303 err = 0; 304 break; 305 306 default: 307 err = EINVAL; 308 break; 309 } 310 311 return (err); 312 } 313 314 SYSINIT(cc, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, cc_init, NULL); 315 316 /* Declare sysctl tree and populate it. */ 317 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, cc, CTLFLAG_RW, NULL, 318 "congestion control related settings"); 319 320 SYSCTL_VNET_PROC(_net_inet_tcp_cc, OID_AUTO, algorithm, CTLTYPE_STRING|CTLFLAG_RW, 321 NULL, 0, cc_default_algo, "A", "default congestion control algorithm"); 322 323 SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, available, CTLTYPE_STRING|CTLFLAG_RD, 324 NULL, 0, cc_list_available, "A", 325 "list available congestion control algorithms"); 326