xref: /freebsd/sys/netinet/cc/cc.c (revision 6e660824a82f590542932de52f128db584029893)
1 /*-
2  * Copyright (c) 2007-2008
3  *	Swinburne University of Technology, Melbourne, Australia.
4  * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
5  * Copyright (c) 2010 The FreeBSD Foundation
6  * All rights reserved.
7  *
8  * This software was developed at the Centre for Advanced Internet
9  * Architectures, Swinburne University of Technology, by Lawrence Stewart and
10  * James Healy, made possible in part by a grant from the Cisco University
11  * Research Program Fund at Community Foundation Silicon Valley.
12  *
13  * Portions of this software were developed at the Centre for Advanced
14  * Internet Architectures, Swinburne University of Technology, Melbourne,
15  * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
16  *
17  * Redistribution and use in source and binary forms, with or without
18  * modification, are permitted provided that the following conditions
19  * are met:
20  * 1. Redistributions of source code must retain the above copyright
21  *    notice, this list of conditions and the following disclaimer.
22  * 2. Redistributions in binary form must reproduce the above copyright
23  *    notice, this list of conditions and the following disclaimer in the
24  *    documentation and/or other materials provided with the distribution.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  */
38 
39 /*
40  * This software was first released in 2007 by James Healy and Lawrence Stewart
41  * whilst working on the NewTCP research project at Swinburne University of
42  * Technology's Centre for Advanced Internet Architectures, Melbourne,
43  * Australia, which was made possible in part by a grant from the Cisco
44  * University Research Program Fund at Community Foundation Silicon Valley.
45  * More details are available at:
46  *   http://caia.swin.edu.au/urp/newtcp/
47  */
48 
49 #include <sys/cdefs.h>
50 __FBSDID("$FreeBSD$");
51 
52 #include <sys/param.h>
53 #include <sys/kernel.h>
54 #include <sys/libkern.h>
55 #include <sys/lock.h>
56 #include <sys/malloc.h>
57 #include <sys/module.h>
58 #include <sys/mutex.h>
59 #include <sys/queue.h>
60 #include <sys/rwlock.h>
61 #include <sys/sbuf.h>
62 #include <sys/socket.h>
63 #include <sys/socketvar.h>
64 #include <sys/sysctl.h>
65 
66 #include <net/if.h>
67 #include <net/if_var.h>
68 
69 #include <netinet/cc.h>
70 #include <netinet/in.h>
71 #include <netinet/in_pcb.h>
72 #include <netinet/tcp_var.h>
73 
74 #include <netinet/cc/cc_module.h>
75 
76 /*
77  * List of available cc algorithms on the current system. First element
78  * is used as the system default CC algorithm.
79  */
80 struct cc_head cc_list = STAILQ_HEAD_INITIALIZER(cc_list);
81 
82 /* Protects the cc_list TAILQ. */
83 struct rwlock cc_list_lock;
84 
85 VNET_DEFINE(struct cc_algo *, default_cc_ptr) = &newreno_cc_algo;
86 
87 /*
88  * Sysctl handler to show and change the default CC algorithm.
89  */
90 static int
91 cc_default_algo(SYSCTL_HANDLER_ARGS)
92 {
93 	char default_cc[TCP_CA_NAME_MAX];
94 	struct cc_algo *funcs;
95 	int err, found;
96 
97 	err = found = 0;
98 
99 	if (req->newptr == NULL) {
100 		/* Just print the current default. */
101 		CC_LIST_RLOCK();
102 		strlcpy(default_cc, CC_DEFAULT()->name, TCP_CA_NAME_MAX);
103 		CC_LIST_RUNLOCK();
104 		err = sysctl_handle_string(oidp, default_cc, 1, req);
105 	} else {
106 		/* Find algo with specified name and set it to default. */
107 		CC_LIST_RLOCK();
108 		STAILQ_FOREACH(funcs, &cc_list, entries) {
109 			if (strncmp((char *)req->newptr, funcs->name,
110 			    TCP_CA_NAME_MAX) == 0) {
111 				found = 1;
112 				V_default_cc_ptr = funcs;
113 			}
114 		}
115 		CC_LIST_RUNLOCK();
116 
117 		if (!found)
118 			err = ESRCH;
119 	}
120 
121 	return (err);
122 }
123 
124 /*
125  * Sysctl handler to display the list of available CC algorithms.
126  */
127 static int
128 cc_list_available(SYSCTL_HANDLER_ARGS)
129 {
130 	struct cc_algo *algo;
131 	struct sbuf *s;
132 	int err, first, nalgos;
133 
134 	err = nalgos = 0;
135 	first = 1;
136 
137 	CC_LIST_RLOCK();
138 	STAILQ_FOREACH(algo, &cc_list, entries) {
139 		nalgos++;
140 	}
141 	CC_LIST_RUNLOCK();
142 
143 	s = sbuf_new(NULL, NULL, nalgos * TCP_CA_NAME_MAX, SBUF_FIXEDLEN);
144 
145 	if (s == NULL)
146 		return (ENOMEM);
147 
148 	/*
149 	 * It is theoretically possible for the CC list to have grown in size
150 	 * since the call to sbuf_new() and therefore for the sbuf to be too
151 	 * small. If this were to happen (incredibly unlikely), the sbuf will
152 	 * reach an overflow condition, sbuf_printf() will return an error and
153 	 * the sysctl will fail gracefully.
154 	 */
155 	CC_LIST_RLOCK();
156 	STAILQ_FOREACH(algo, &cc_list, entries) {
157 		err = sbuf_printf(s, first ? "%s" : ", %s", algo->name);
158 		if (err) {
159 			/* Sbuf overflow condition. */
160 			err = EOVERFLOW;
161 			break;
162 		}
163 		first = 0;
164 	}
165 	CC_LIST_RUNLOCK();
166 
167 	if (!err) {
168 		sbuf_finish(s);
169 		err = sysctl_handle_string(oidp, sbuf_data(s), 1, req);
170 	}
171 
172 	sbuf_delete(s);
173 	return (err);
174 }
175 
176 /*
177  * Reset the default CC algo to NewReno for any netstack which is using the algo
178  * that is about to go away as its default.
179  */
180 static void
181 cc_checkreset_default(struct cc_algo *remove_cc)
182 {
183 	VNET_ITERATOR_DECL(vnet_iter);
184 
185 	CC_LIST_LOCK_ASSERT();
186 
187 	VNET_LIST_RLOCK_NOSLEEP();
188 	VNET_FOREACH(vnet_iter) {
189 		CURVNET_SET(vnet_iter);
190 		if (strncmp(CC_DEFAULT()->name, remove_cc->name,
191 		    TCP_CA_NAME_MAX) == 0)
192 			V_default_cc_ptr = &newreno_cc_algo;
193 		CURVNET_RESTORE();
194 	}
195 	VNET_LIST_RUNLOCK_NOSLEEP();
196 }
197 
198 /*
199  * Initialise CC subsystem on system boot.
200  */
201 static void
202 cc_init(void)
203 {
204 	CC_LIST_LOCK_INIT();
205 	STAILQ_INIT(&cc_list);
206 }
207 
208 /*
209  * Returns non-zero on success, 0 on failure.
210  */
211 int
212 cc_deregister_algo(struct cc_algo *remove_cc)
213 {
214 	struct cc_algo *funcs, *tmpfuncs;
215 	int err;
216 
217 	err = ENOENT;
218 
219 	/* Never allow newreno to be deregistered. */
220 	if (&newreno_cc_algo == remove_cc)
221 		return (EPERM);
222 
223 	/* Remove algo from cc_list so that new connections can't use it. */
224 	CC_LIST_WLOCK();
225 	STAILQ_FOREACH_SAFE(funcs, &cc_list, entries, tmpfuncs) {
226 		if (funcs == remove_cc) {
227 			cc_checkreset_default(remove_cc);
228 			STAILQ_REMOVE(&cc_list, funcs, cc_algo, entries);
229 			err = 0;
230 			break;
231 		}
232 	}
233 	CC_LIST_WUNLOCK();
234 
235 	if (!err)
236 		/*
237 		 * XXXLAS:
238 		 * - We may need to handle non-zero return values in future.
239 		 * - If we add CC framework support for protocols other than
240 		 *   TCP, we may want a more generic way to handle this step.
241 		 */
242 		tcp_ccalgounload(remove_cc);
243 
244 	return (err);
245 }
246 
247 /*
248  * Returns 0 on success, non-zero on failure.
249  */
250 int
251 cc_register_algo(struct cc_algo *add_cc)
252 {
253 	struct cc_algo *funcs;
254 	int err;
255 
256 	err = 0;
257 
258 	/*
259 	 * Iterate over list of registered CC algorithms and make sure
260 	 * we're not trying to add a duplicate.
261 	 */
262 	CC_LIST_WLOCK();
263 	STAILQ_FOREACH(funcs, &cc_list, entries) {
264 		if (funcs == add_cc || strncmp(funcs->name, add_cc->name,
265 		    TCP_CA_NAME_MAX) == 0)
266 			err = EEXIST;
267 	}
268 
269 	if (!err)
270 		STAILQ_INSERT_TAIL(&cc_list, add_cc, entries);
271 
272 	CC_LIST_WUNLOCK();
273 
274 	return (err);
275 }
276 
277 /*
278  * Handles kld related events. Returns 0 on success, non-zero on failure.
279  */
280 int
281 cc_modevent(module_t mod, int event_type, void *data)
282 {
283 	struct cc_algo *algo;
284 	int err;
285 
286 	err = 0;
287 	algo = (struct cc_algo *)data;
288 
289 	switch(event_type) {
290 	case MOD_LOAD:
291 		if (algo->mod_init != NULL)
292 			err = algo->mod_init();
293 		if (!err)
294 			err = cc_register_algo(algo);
295 		break;
296 
297 	case MOD_QUIESCE:
298 	case MOD_SHUTDOWN:
299 	case MOD_UNLOAD:
300 		err = cc_deregister_algo(algo);
301 		if (!err && algo->mod_destroy != NULL)
302 			algo->mod_destroy();
303 		if (err == ENOENT)
304 			err = 0;
305 		break;
306 
307 	default:
308 		err = EINVAL;
309 		break;
310 	}
311 
312 	return (err);
313 }
314 
315 SYSINIT(cc, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, cc_init, NULL);
316 
317 /* Declare sysctl tree and populate it. */
318 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, cc, CTLFLAG_RW, NULL,
319     "congestion control related settings");
320 
321 SYSCTL_VNET_PROC(_net_inet_tcp_cc, OID_AUTO, algorithm, CTLTYPE_STRING|CTLFLAG_RW,
322     NULL, 0, cc_default_algo, "A", "default congestion control algorithm");
323 
324 SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, available, CTLTYPE_STRING|CTLFLAG_RD,
325     NULL, 0, cc_list_available, "A",
326     "list available congestion control algorithms");
327