xref: /freebsd/sys/netinet/cc/cc.c (revision 1545452c5448d6e1f030b8142eb85db172d2c816)
1 /*-
2  * Copyright (c) 2007-2008
3  *	Swinburne University of Technology, Melbourne, Australia.
4  * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
5  * Copyright (c) 2010 The FreeBSD Foundation
6  * All rights reserved.
7  *
8  * This software was developed at the Centre for Advanced Internet
9  * Architectures, Swinburne University of Technology, by Lawrence Stewart and
10  * James Healy, made possible in part by a grant from the Cisco University
11  * Research Program Fund at Community Foundation Silicon Valley.
12  *
13  * Portions of this software were developed at the Centre for Advanced
14  * Internet Architectures, Swinburne University of Technology, Melbourne,
15  * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
16  *
17  * Redistribution and use in source and binary forms, with or without
18  * modification, are permitted provided that the following conditions
19  * are met:
20  * 1. Redistributions of source code must retain the above copyright
21  *    notice, this list of conditions and the following disclaimer.
22  * 2. Redistributions in binary form must reproduce the above copyright
23  *    notice, this list of conditions and the following disclaimer in the
24  *    documentation and/or other materials provided with the distribution.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  */
38 
39 /*
40  * This software was first released in 2007 by James Healy and Lawrence Stewart
41  * whilst working on the NewTCP research project at Swinburne University of
42  * Technology's Centre for Advanced Internet Architectures, Melbourne,
43  * Australia, which was made possible in part by a grant from the Cisco
44  * University Research Program Fund at Community Foundation Silicon Valley.
45  * More details are available at:
46  *   http://caia.swin.edu.au/urp/newtcp/
47  */
48 
49 #include <sys/cdefs.h>
50 __FBSDID("$FreeBSD$");
51 
52 #include <sys/param.h>
53 #include <sys/kernel.h>
54 #include <sys/libkern.h>
55 #include <sys/lock.h>
56 #include <sys/malloc.h>
57 #include <sys/module.h>
58 #include <sys/mutex.h>
59 #include <sys/queue.h>
60 #include <sys/rwlock.h>
61 #include <sys/sbuf.h>
62 #include <sys/socket.h>
63 #include <sys/socketvar.h>
64 #include <sys/sysctl.h>
65 
66 #include <net/if.h>
67 #include <net/if_var.h>
68 
69 #include <netinet/cc.h>
70 #include <netinet/in.h>
71 #include <netinet/in_pcb.h>
72 #include <netinet/tcp_var.h>
73 
74 #include <netinet/cc/cc_module.h>
75 
76 /*
77  * List of available cc algorithms on the current system. First element
78  * is used as the system default CC algorithm.
79  */
80 struct cc_head cc_list = STAILQ_HEAD_INITIALIZER(cc_list);
81 
82 /* Protects the cc_list TAILQ. */
83 struct rwlock cc_list_lock;
84 
85 VNET_DEFINE(struct cc_algo *, default_cc_ptr) = &newreno_cc_algo;
86 
87 /*
88  * Sysctl handler to show and change the default CC algorithm.
89  */
90 static int
91 cc_default_algo(SYSCTL_HANDLER_ARGS)
92 {
93 	char default_cc[TCP_CA_NAME_MAX];
94 	struct cc_algo *funcs;
95 	int err, found;
96 
97 	err = found = 0;
98 
99 	if (req->newptr == NULL) {
100 		/* Just print the current default. */
101 		CC_LIST_RLOCK();
102 		strlcpy(default_cc, CC_DEFAULT()->name, TCP_CA_NAME_MAX);
103 		CC_LIST_RUNLOCK();
104 		err = sysctl_handle_string(oidp, default_cc, 0, req);
105 	} else {
106 		/* Find algo with specified name and set it to default. */
107 		CC_LIST_RLOCK();
108 		STAILQ_FOREACH(funcs, &cc_list, entries) {
109 			/* NOTE: "newptr" is not zero terminated */
110 			if (req->newlen != strnlen(funcs->name,
111 			    TCP_CA_NAME_MAX - 1))
112 				continue;
113 			if (bcmp(req->newptr, funcs->name, req->newlen))
114 				continue;
115 			found = 1;
116 			V_default_cc_ptr = funcs;
117 		}
118 		CC_LIST_RUNLOCK();
119 
120 		if (!found)
121 			err = ESRCH;
122 	}
123 
124 	return (err);
125 }
126 
127 /*
128  * Sysctl handler to display the list of available CC algorithms.
129  */
130 static int
131 cc_list_available(SYSCTL_HANDLER_ARGS)
132 {
133 	struct cc_algo *algo;
134 	struct sbuf *s;
135 	int err, first, nalgos;
136 
137 	err = nalgos = 0;
138 	first = 1;
139 
140 	CC_LIST_RLOCK();
141 	STAILQ_FOREACH(algo, &cc_list, entries) {
142 		nalgos++;
143 	}
144 	CC_LIST_RUNLOCK();
145 
146 	s = sbuf_new(NULL, NULL, nalgos * TCP_CA_NAME_MAX, SBUF_FIXEDLEN);
147 
148 	if (s == NULL)
149 		return (ENOMEM);
150 
151 	/*
152 	 * It is theoretically possible for the CC list to have grown in size
153 	 * since the call to sbuf_new() and therefore for the sbuf to be too
154 	 * small. If this were to happen (incredibly unlikely), the sbuf will
155 	 * reach an overflow condition, sbuf_printf() will return an error and
156 	 * the sysctl will fail gracefully.
157 	 */
158 	CC_LIST_RLOCK();
159 	STAILQ_FOREACH(algo, &cc_list, entries) {
160 		err = sbuf_printf(s, first ? "%s" : ", %s", algo->name);
161 		if (err) {
162 			/* Sbuf overflow condition. */
163 			err = EOVERFLOW;
164 			break;
165 		}
166 		first = 0;
167 	}
168 	CC_LIST_RUNLOCK();
169 
170 	if (!err) {
171 		sbuf_finish(s);
172 		err = sysctl_handle_string(oidp, sbuf_data(s), 0, req);
173 	}
174 
175 	sbuf_delete(s);
176 	return (err);
177 }
178 
179 /*
180  * Reset the default CC algo to NewReno for any netstack which is using the algo
181  * that is about to go away as its default.
182  */
183 static void
184 cc_checkreset_default(struct cc_algo *remove_cc)
185 {
186 	VNET_ITERATOR_DECL(vnet_iter);
187 
188 	CC_LIST_LOCK_ASSERT();
189 
190 	VNET_LIST_RLOCK_NOSLEEP();
191 	VNET_FOREACH(vnet_iter) {
192 		CURVNET_SET(vnet_iter);
193 		if (strncmp(CC_DEFAULT()->name, remove_cc->name,
194 		    TCP_CA_NAME_MAX) == 0)
195 			V_default_cc_ptr = &newreno_cc_algo;
196 		CURVNET_RESTORE();
197 	}
198 	VNET_LIST_RUNLOCK_NOSLEEP();
199 }
200 
201 /*
202  * Initialise CC subsystem on system boot.
203  */
204 static void
205 cc_init(void)
206 {
207 	CC_LIST_LOCK_INIT();
208 	STAILQ_INIT(&cc_list);
209 }
210 
211 /*
212  * Returns non-zero on success, 0 on failure.
213  */
214 int
215 cc_deregister_algo(struct cc_algo *remove_cc)
216 {
217 	struct cc_algo *funcs, *tmpfuncs;
218 	int err;
219 
220 	err = ENOENT;
221 
222 	/* Never allow newreno to be deregistered. */
223 	if (&newreno_cc_algo == remove_cc)
224 		return (EPERM);
225 
226 	/* Remove algo from cc_list so that new connections can't use it. */
227 	CC_LIST_WLOCK();
228 	STAILQ_FOREACH_SAFE(funcs, &cc_list, entries, tmpfuncs) {
229 		if (funcs == remove_cc) {
230 			cc_checkreset_default(remove_cc);
231 			STAILQ_REMOVE(&cc_list, funcs, cc_algo, entries);
232 			err = 0;
233 			break;
234 		}
235 	}
236 	CC_LIST_WUNLOCK();
237 
238 	if (!err)
239 		/*
240 		 * XXXLAS:
241 		 * - We may need to handle non-zero return values in future.
242 		 * - If we add CC framework support for protocols other than
243 		 *   TCP, we may want a more generic way to handle this step.
244 		 */
245 		tcp_ccalgounload(remove_cc);
246 
247 	return (err);
248 }
249 
250 /*
251  * Returns 0 on success, non-zero on failure.
252  */
253 int
254 cc_register_algo(struct cc_algo *add_cc)
255 {
256 	struct cc_algo *funcs;
257 	int err;
258 
259 	err = 0;
260 
261 	/*
262 	 * Iterate over list of registered CC algorithms and make sure
263 	 * we're not trying to add a duplicate.
264 	 */
265 	CC_LIST_WLOCK();
266 	STAILQ_FOREACH(funcs, &cc_list, entries) {
267 		if (funcs == add_cc || strncmp(funcs->name, add_cc->name,
268 		    TCP_CA_NAME_MAX) == 0)
269 			err = EEXIST;
270 	}
271 
272 	if (!err)
273 		STAILQ_INSERT_TAIL(&cc_list, add_cc, entries);
274 
275 	CC_LIST_WUNLOCK();
276 
277 	return (err);
278 }
279 
280 /*
281  * Handles kld related events. Returns 0 on success, non-zero on failure.
282  */
283 int
284 cc_modevent(module_t mod, int event_type, void *data)
285 {
286 	struct cc_algo *algo;
287 	int err;
288 
289 	err = 0;
290 	algo = (struct cc_algo *)data;
291 
292 	switch(event_type) {
293 	case MOD_LOAD:
294 		if (algo->mod_init != NULL)
295 			err = algo->mod_init();
296 		if (!err)
297 			err = cc_register_algo(algo);
298 		break;
299 
300 	case MOD_QUIESCE:
301 	case MOD_SHUTDOWN:
302 	case MOD_UNLOAD:
303 		err = cc_deregister_algo(algo);
304 		if (!err && algo->mod_destroy != NULL)
305 			algo->mod_destroy();
306 		if (err == ENOENT)
307 			err = 0;
308 		break;
309 
310 	default:
311 		err = EINVAL;
312 		break;
313 	}
314 
315 	return (err);
316 }
317 
318 SYSINIT(cc, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, cc_init, NULL);
319 
320 /* Declare sysctl tree and populate it. */
321 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, cc, CTLFLAG_RW, NULL,
322     "congestion control related settings");
323 
324 SYSCTL_VNET_PROC(_net_inet_tcp_cc, OID_AUTO, algorithm, CTLTYPE_STRING|CTLFLAG_RW,
325     NULL, 0, cc_default_algo, "A", "default congestion control algorithm");
326 
327 SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, available, CTLTYPE_STRING|CTLFLAG_RD,
328     NULL, 0, cc_list_available, "A",
329     "list available congestion control algorithms");
330