xref: /freebsd/sys/netinet/cc/cc.c (revision aa0a1e58f0189b0fde359a8bda032887e72057fa)
1 /*-
2  * Copyright (c) 2007-2008
3  *	Swinburne University of Technology, Melbourne, Australia.
4  * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
5  * Copyright (c) 2010 The FreeBSD Foundation
6  * All rights reserved.
7  *
8  * This software was developed at the Centre for Advanced Internet
9  * Architectures, Swinburne University, by Lawrence Stewart and James Healy,
10  * made possible in part by a grant from the Cisco University Research Program
11  * Fund at Community Foundation Silicon Valley.
12  *
13  * Portions of this software were developed at the Centre for Advanced
14  * Internet Architectures, Swinburne University of Technology, Melbourne,
15  * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
16  *
17  * Redistribution and use in source and binary forms, with or without
18  * modification, are permitted provided that the following conditions
19  * are met:
20  * 1. Redistributions of source code must retain the above copyright
21  *    notice, this list of conditions and the following disclaimer.
22  * 2. Redistributions in binary form must reproduce the above copyright
23  *    notice, this list of conditions and the following disclaimer in the
24  *    documentation and/or other materials provided with the distribution.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  */
38 
39 /*
40  * This software was first released in 2007 by James Healy and Lawrence Stewart
41  * whilst working on the NewTCP research project at Swinburne University's
42  * Centre for Advanced Internet Architectures, Melbourne, Australia, which was
43  * made possible in part by a grant from the Cisco University Research Program
44  * Fund at Community Foundation Silicon Valley. More details are available at:
45  *   http://caia.swin.edu.au/urp/newtcp/
46  */
47 
48 #include <sys/cdefs.h>
49 __FBSDID("$FreeBSD$");
50 
51 #include <sys/param.h>
52 #include <sys/kernel.h>
53 #include <sys/libkern.h>
54 #include <sys/lock.h>
55 #include <sys/malloc.h>
56 #include <sys/module.h>
57 #include <sys/mutex.h>
58 #include <sys/queue.h>
59 #include <sys/rwlock.h>
60 #include <sys/sbuf.h>
61 #include <sys/socket.h>
62 #include <sys/socketvar.h>
63 #include <sys/sysctl.h>
64 
65 #include <net/if.h>
66 #include <net/if_var.h>
67 
68 #include <netinet/cc.h>
69 #include <netinet/in.h>
70 #include <netinet/in_pcb.h>
71 #include <netinet/tcp_var.h>
72 
73 #include <netinet/cc/cc_module.h>
74 
75 /*
76  * List of available cc algorithms on the current system. First element
77  * is used as the system default CC algorithm.
78  */
79 struct cc_head cc_list = STAILQ_HEAD_INITIALIZER(cc_list);
80 
81 /* Protects the cc_list TAILQ. */
82 struct rwlock cc_list_lock;
83 
84 VNET_DEFINE(struct cc_algo *, default_cc_ptr) = &newreno_cc_algo;
85 
86 /*
87  * Sysctl handler to show and change the default CC algorithm.
88  */
89 static int
90 cc_default_algo(SYSCTL_HANDLER_ARGS)
91 {
92 	char default_cc[TCP_CA_NAME_MAX];
93 	struct cc_algo *funcs;
94 	int err, found;
95 
96 	err = found = 0;
97 
98 	if (req->newptr == NULL) {
99 		/* Just print the current default. */
100 		CC_LIST_RLOCK();
101 		strlcpy(default_cc, CC_DEFAULT()->name, TCP_CA_NAME_MAX);
102 		CC_LIST_RUNLOCK();
103 		err = sysctl_handle_string(oidp, default_cc, 1, req);
104 	} else {
105 		/* Find algo with specified name and set it to default. */
106 		CC_LIST_RLOCK();
107 		STAILQ_FOREACH(funcs, &cc_list, entries) {
108 			if (strncmp((char *)req->newptr, funcs->name,
109 			    TCP_CA_NAME_MAX) == 0) {
110 				found = 1;
111 				V_default_cc_ptr = funcs;
112 			}
113 		}
114 		CC_LIST_RUNLOCK();
115 
116 		if (!found)
117 			err = ESRCH;
118 	}
119 
120 	return (err);
121 }
122 
123 /*
124  * Sysctl handler to display the list of available CC algorithms.
125  */
126 static int
127 cc_list_available(SYSCTL_HANDLER_ARGS)
128 {
129 	struct cc_algo *algo;
130 	struct sbuf *s;
131 	int err, first, nalgos;
132 
133 	err = nalgos = 0;
134 	first = 1;
135 
136 	CC_LIST_RLOCK();
137 	STAILQ_FOREACH(algo, &cc_list, entries) {
138 		nalgos++;
139 	}
140 	CC_LIST_RUNLOCK();
141 
142 	s = sbuf_new(NULL, NULL, nalgos * TCP_CA_NAME_MAX, SBUF_FIXEDLEN);
143 
144 	if (s == NULL)
145 		return (ENOMEM);
146 
147 	/*
148 	 * It is theoretically possible for the CC list to have grown in size
149 	 * since the call to sbuf_new() and therefore for the sbuf to be too
150 	 * small. If this were to happen (incredibly unlikely), the sbuf will
151 	 * reach an overflow condition, sbuf_printf() will return an error and
152 	 * the sysctl will fail gracefully.
153 	 */
154 	CC_LIST_RLOCK();
155 	STAILQ_FOREACH(algo, &cc_list, entries) {
156 		err = sbuf_printf(s, first ? "%s" : ", %s", algo->name);
157 		if (err) {
158 			/* Sbuf overflow condition. */
159 			err = EOVERFLOW;
160 			break;
161 		}
162 		first = 0;
163 	}
164 	CC_LIST_RUNLOCK();
165 
166 	if (!err) {
167 		sbuf_finish(s);
168 		err = sysctl_handle_string(oidp, sbuf_data(s), 1, req);
169 	}
170 
171 	sbuf_delete(s);
172 	return (err);
173 }
174 
175 /*
176  * Reset the default CC algo to NewReno for any netstack which is using the algo
177  * that is about to go away as its default.
178  */
179 static void
180 cc_checkreset_default(struct cc_algo *remove_cc)
181 {
182 	VNET_ITERATOR_DECL(vnet_iter);
183 
184 	CC_LIST_LOCK_ASSERT();
185 
186 	VNET_LIST_RLOCK_NOSLEEP();
187 	VNET_FOREACH(vnet_iter) {
188 		CURVNET_SET(vnet_iter);
189 		if (strncmp(CC_DEFAULT()->name, remove_cc->name,
190 		    TCP_CA_NAME_MAX) == 0)
191 			V_default_cc_ptr = &newreno_cc_algo;
192 		CURVNET_RESTORE();
193 	}
194 	VNET_LIST_RUNLOCK_NOSLEEP();
195 }
196 
197 /*
198  * Initialise CC subsystem on system boot.
199  */
200 static void
201 cc_init(void)
202 {
203 	CC_LIST_LOCK_INIT();
204 	STAILQ_INIT(&cc_list);
205 }
206 
207 /*
208  * Returns non-zero on success, 0 on failure.
209  */
210 int
211 cc_deregister_algo(struct cc_algo *remove_cc)
212 {
213 	struct cc_algo *funcs, *tmpfuncs;
214 	int err;
215 
216 	err = ENOENT;
217 
218 	/* Never allow newreno to be deregistered. */
219 	if (&newreno_cc_algo == remove_cc)
220 		return (EPERM);
221 
222 	/* Remove algo from cc_list so that new connections can't use it. */
223 	CC_LIST_WLOCK();
224 	STAILQ_FOREACH_SAFE(funcs, &cc_list, entries, tmpfuncs) {
225 		if (funcs == remove_cc) {
226 			cc_checkreset_default(remove_cc);
227 			STAILQ_REMOVE(&cc_list, funcs, cc_algo, entries);
228 			err = 0;
229 			break;
230 		}
231 	}
232 	CC_LIST_WUNLOCK();
233 
234 	if (!err)
235 		/*
236 		 * XXXLAS:
237 		 * - We may need to handle non-zero return values in future.
238 		 * - If we add CC framework support for protocols other than
239 		 *   TCP, we may want a more generic way to handle this step.
240 		 */
241 		tcp_ccalgounload(remove_cc);
242 
243 	return (err);
244 }
245 
246 /*
247  * Returns 0 on success, non-zero on failure.
248  */
249 int
250 cc_register_algo(struct cc_algo *add_cc)
251 {
252 	struct cc_algo *funcs;
253 	int err;
254 
255 	err = 0;
256 
257 	/*
258 	 * Iterate over list of registered CC algorithms and make sure
259 	 * we're not trying to add a duplicate.
260 	 */
261 	CC_LIST_WLOCK();
262 	STAILQ_FOREACH(funcs, &cc_list, entries) {
263 		if (funcs == add_cc || strncmp(funcs->name, add_cc->name,
264 		    TCP_CA_NAME_MAX) == 0)
265 			err = EEXIST;
266 	}
267 
268 	if (!err)
269 		STAILQ_INSERT_TAIL(&cc_list, add_cc, entries);
270 
271 	CC_LIST_WUNLOCK();
272 
273 	return (err);
274 }
275 
276 /*
277  * Handles kld related events. Returns 0 on success, non-zero on failure.
278  */
279 int
280 cc_modevent(module_t mod, int event_type, void *data)
281 {
282 	struct cc_algo *algo;
283 	int err;
284 
285 	err = 0;
286 	algo = (struct cc_algo *)data;
287 
288 	switch(event_type) {
289 	case MOD_LOAD:
290 		if (algo->mod_init != NULL)
291 			err = algo->mod_init();
292 		if (!err)
293 			err = cc_register_algo(algo);
294 		break;
295 
296 	case MOD_QUIESCE:
297 	case MOD_SHUTDOWN:
298 	case MOD_UNLOAD:
299 		err = cc_deregister_algo(algo);
300 		if (!err && algo->mod_destroy != NULL)
301 			algo->mod_destroy();
302 		if (err == ENOENT)
303 			err = 0;
304 		break;
305 
306 	default:
307 		err = EINVAL;
308 		break;
309 	}
310 
311 	return (err);
312 }
313 
314 SYSINIT(cc, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, cc_init, NULL);
315 
316 /* Declare sysctl tree and populate it. */
317 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, cc, CTLFLAG_RW, NULL,
318     "congestion control related settings");
319 
320 SYSCTL_VNET_PROC(_net_inet_tcp_cc, OID_AUTO, algorithm, CTLTYPE_STRING|CTLFLAG_RW,
321     NULL, 0, cc_default_algo, "A", "default congestion control algorithm");
322 
323 SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, available, CTLTYPE_STRING|CTLFLAG_RD,
324     NULL, 0, cc_list_available, "A",
325     "list available congestion control algorithms");
326