1 /*- 2 * Copyright (c) 2007-2008 3 * Swinburne University of Technology, Melbourne, Australia. 4 * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org> 5 * Copyright (c) 2010 The FreeBSD Foundation 6 * All rights reserved. 7 * 8 * This software was developed at the Centre for Advanced Internet 9 * Architectures, Swinburne University of Technology, by Lawrence Stewart and 10 * James Healy, made possible in part by a grant from the Cisco University 11 * Research Program Fund at Community Foundation Silicon Valley. 12 * 13 * Portions of this software were developed at the Centre for Advanced 14 * Internet Architectures, Swinburne University of Technology, Melbourne, 15 * Australia by David Hayes under sponsorship from the FreeBSD Foundation. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions 19 * are met: 20 * 1. Redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer. 22 * 2. Redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 /* 40 * This software was first released in 2007 by James Healy and Lawrence Stewart 41 * whilst working on the NewTCP research project at Swinburne University of 42 * Technology's Centre for Advanced Internet Architectures, Melbourne, 43 * Australia, which was made possible in part by a grant from the Cisco 44 * University Research Program Fund at Community Foundation Silicon Valley. 45 * More details are available at: 46 * http://caia.swin.edu.au/urp/newtcp/ 47 */ 48 49 #include <sys/cdefs.h> 50 __FBSDID("$FreeBSD$"); 51 52 #include <sys/param.h> 53 #include <sys/kernel.h> 54 #include <sys/libkern.h> 55 #include <sys/lock.h> 56 #include <sys/malloc.h> 57 #include <sys/module.h> 58 #include <sys/mutex.h> 59 #include <sys/queue.h> 60 #include <sys/rwlock.h> 61 #include <sys/sbuf.h> 62 #include <sys/socket.h> 63 #include <sys/socketvar.h> 64 #include <sys/sysctl.h> 65 66 #include <net/if.h> 67 #include <net/if_var.h> 68 69 #include <netinet/cc.h> 70 #include <netinet/in.h> 71 #include <netinet/in_pcb.h> 72 #include <netinet/tcp_var.h> 73 74 #include <netinet/cc/cc_module.h> 75 76 /* 77 * List of available cc algorithms on the current system. First element 78 * is used as the system default CC algorithm. 79 */ 80 struct cc_head cc_list = STAILQ_HEAD_INITIALIZER(cc_list); 81 82 /* Protects the cc_list TAILQ. */ 83 struct rwlock cc_list_lock; 84 85 VNET_DEFINE(struct cc_algo *, default_cc_ptr) = &newreno_cc_algo; 86 87 /* 88 * Sysctl handler to show and change the default CC algorithm. 89 */ 90 static int 91 cc_default_algo(SYSCTL_HANDLER_ARGS) 92 { 93 char default_cc[TCP_CA_NAME_MAX]; 94 struct cc_algo *funcs; 95 int err, found; 96 97 err = found = 0; 98 99 if (req->newptr == NULL) { 100 /* Just print the current default. */ 101 CC_LIST_RLOCK(); 102 strlcpy(default_cc, CC_DEFAULT()->name, TCP_CA_NAME_MAX); 103 CC_LIST_RUNLOCK(); 104 err = sysctl_handle_string(oidp, default_cc, 0, req); 105 } else { 106 /* Find algo with specified name and set it to default. */ 107 CC_LIST_RLOCK(); 108 STAILQ_FOREACH(funcs, &cc_list, entries) { 109 /* NOTE: "newptr" is not zero terminated */ 110 if (req->newlen != strnlen(funcs->name, 111 TCP_CA_NAME_MAX - 1)) 112 continue; 113 if (bcmp(req->newptr, funcs->name, req->newlen)) 114 continue; 115 found = 1; 116 V_default_cc_ptr = funcs; 117 } 118 CC_LIST_RUNLOCK(); 119 120 if (!found) 121 err = ESRCH; 122 } 123 124 return (err); 125 } 126 127 /* 128 * Sysctl handler to display the list of available CC algorithms. 129 */ 130 static int 131 cc_list_available(SYSCTL_HANDLER_ARGS) 132 { 133 struct cc_algo *algo; 134 struct sbuf *s; 135 int err, first, nalgos; 136 137 err = nalgos = 0; 138 first = 1; 139 140 CC_LIST_RLOCK(); 141 STAILQ_FOREACH(algo, &cc_list, entries) { 142 nalgos++; 143 } 144 CC_LIST_RUNLOCK(); 145 146 s = sbuf_new(NULL, NULL, nalgos * TCP_CA_NAME_MAX, SBUF_FIXEDLEN); 147 148 if (s == NULL) 149 return (ENOMEM); 150 151 /* 152 * It is theoretically possible for the CC list to have grown in size 153 * since the call to sbuf_new() and therefore for the sbuf to be too 154 * small. If this were to happen (incredibly unlikely), the sbuf will 155 * reach an overflow condition, sbuf_printf() will return an error and 156 * the sysctl will fail gracefully. 157 */ 158 CC_LIST_RLOCK(); 159 STAILQ_FOREACH(algo, &cc_list, entries) { 160 err = sbuf_printf(s, first ? "%s" : ", %s", algo->name); 161 if (err) { 162 /* Sbuf overflow condition. */ 163 err = EOVERFLOW; 164 break; 165 } 166 first = 0; 167 } 168 CC_LIST_RUNLOCK(); 169 170 if (!err) { 171 sbuf_finish(s); 172 err = sysctl_handle_string(oidp, sbuf_data(s), 0, req); 173 } 174 175 sbuf_delete(s); 176 return (err); 177 } 178 179 /* 180 * Reset the default CC algo to NewReno for any netstack which is using the algo 181 * that is about to go away as its default. 182 */ 183 static void 184 cc_checkreset_default(struct cc_algo *remove_cc) 185 { 186 VNET_ITERATOR_DECL(vnet_iter); 187 188 CC_LIST_LOCK_ASSERT(); 189 190 VNET_LIST_RLOCK_NOSLEEP(); 191 VNET_FOREACH(vnet_iter) { 192 CURVNET_SET(vnet_iter); 193 if (strncmp(CC_DEFAULT()->name, remove_cc->name, 194 TCP_CA_NAME_MAX) == 0) 195 V_default_cc_ptr = &newreno_cc_algo; 196 CURVNET_RESTORE(); 197 } 198 VNET_LIST_RUNLOCK_NOSLEEP(); 199 } 200 201 /* 202 * Initialise CC subsystem on system boot. 203 */ 204 static void 205 cc_init(void) 206 { 207 CC_LIST_LOCK_INIT(); 208 STAILQ_INIT(&cc_list); 209 } 210 211 /* 212 * Returns non-zero on success, 0 on failure. 213 */ 214 int 215 cc_deregister_algo(struct cc_algo *remove_cc) 216 { 217 struct cc_algo *funcs, *tmpfuncs; 218 int err; 219 220 err = ENOENT; 221 222 /* Never allow newreno to be deregistered. */ 223 if (&newreno_cc_algo == remove_cc) 224 return (EPERM); 225 226 /* Remove algo from cc_list so that new connections can't use it. */ 227 CC_LIST_WLOCK(); 228 STAILQ_FOREACH_SAFE(funcs, &cc_list, entries, tmpfuncs) { 229 if (funcs == remove_cc) { 230 cc_checkreset_default(remove_cc); 231 STAILQ_REMOVE(&cc_list, funcs, cc_algo, entries); 232 err = 0; 233 break; 234 } 235 } 236 CC_LIST_WUNLOCK(); 237 238 if (!err) 239 /* 240 * XXXLAS: 241 * - We may need to handle non-zero return values in future. 242 * - If we add CC framework support for protocols other than 243 * TCP, we may want a more generic way to handle this step. 244 */ 245 tcp_ccalgounload(remove_cc); 246 247 return (err); 248 } 249 250 /* 251 * Returns 0 on success, non-zero on failure. 252 */ 253 int 254 cc_register_algo(struct cc_algo *add_cc) 255 { 256 struct cc_algo *funcs; 257 int err; 258 259 err = 0; 260 261 /* 262 * Iterate over list of registered CC algorithms and make sure 263 * we're not trying to add a duplicate. 264 */ 265 CC_LIST_WLOCK(); 266 STAILQ_FOREACH(funcs, &cc_list, entries) { 267 if (funcs == add_cc || strncmp(funcs->name, add_cc->name, 268 TCP_CA_NAME_MAX) == 0) 269 err = EEXIST; 270 } 271 272 if (!err) 273 STAILQ_INSERT_TAIL(&cc_list, add_cc, entries); 274 275 CC_LIST_WUNLOCK(); 276 277 return (err); 278 } 279 280 /* 281 * Handles kld related events. Returns 0 on success, non-zero on failure. 282 */ 283 int 284 cc_modevent(module_t mod, int event_type, void *data) 285 { 286 struct cc_algo *algo; 287 int err; 288 289 err = 0; 290 algo = (struct cc_algo *)data; 291 292 switch(event_type) { 293 case MOD_LOAD: 294 if (algo->mod_init != NULL) 295 err = algo->mod_init(); 296 if (!err) 297 err = cc_register_algo(algo); 298 break; 299 300 case MOD_QUIESCE: 301 case MOD_SHUTDOWN: 302 case MOD_UNLOAD: 303 err = cc_deregister_algo(algo); 304 if (!err && algo->mod_destroy != NULL) 305 algo->mod_destroy(); 306 if (err == ENOENT) 307 err = 0; 308 break; 309 310 default: 311 err = EINVAL; 312 break; 313 } 314 315 return (err); 316 } 317 318 SYSINIT(cc, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, cc_init, NULL); 319 320 /* Declare sysctl tree and populate it. */ 321 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, cc, CTLFLAG_RW, NULL, 322 "congestion control related settings"); 323 324 SYSCTL_VNET_PROC(_net_inet_tcp_cc, OID_AUTO, algorithm, CTLTYPE_STRING|CTLFLAG_RW, 325 NULL, 0, cc_default_algo, "A", "default congestion control algorithm"); 326 327 SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, available, CTLTYPE_STRING|CTLFLAG_RD, 328 NULL, 0, cc_list_available, "A", 329 "list available congestion control algorithms"); 330