xref: /freebsd/sys/kern/uipc_domain.c (revision e39e854e27f53a784c3982cbeb68f4ad1cfd9162)
1 /*-
2  * Copyright (c) 1982, 1986, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 4. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *	@(#)uipc_domain.c	8.2 (Berkeley) 10/18/93
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/socket.h>
37 #include <sys/protosw.h>
38 #include <sys/domain.h>
39 #include <sys/eventhandler.h>
40 #include <sys/mbuf.h>
41 #include <sys/kernel.h>
42 #include <sys/lock.h>
43 #include <sys/mutex.h>
44 #include <sys/socketvar.h>
45 #include <sys/systm.h>
46 
47 #include <net/vnet.h>
48 
49 #include <vm/uma.h>
50 
51 /*
52  * System initialization
53  *
54  * Note: domain initialization takes place on a per domain basis
55  * as a result of traversing a SYSINIT linker set.  Most likely,
56  * each domain would want to call DOMAIN_SET(9) itself, which
57  * would cause the domain to be added just after domaininit()
58  * is called during startup.
59  *
60  * See DOMAIN_SET(9) for details on its use.
61  */
62 
63 static void domaininit(void *);
64 SYSINIT(domain, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, domaininit, NULL);
65 
66 static void domainfinalize(void *);
67 SYSINIT(domainfin, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, domainfinalize,
68     NULL);
69 
70 static struct callout pffast_callout;
71 static struct callout pfslow_callout;
72 
73 static void	pffasttimo(void *);
74 static void	pfslowtimo(void *);
75 
76 struct domain *domains;		/* registered protocol domains */
77 int domain_init_status = 0;
78 static struct mtx dom_mtx;		/* domain list lock */
79 MTX_SYSINIT(domain, &dom_mtx, "domain list", MTX_DEF);
80 
81 /*
82  * Dummy protocol specific user requests function pointer array.
83  * All functions return EOPNOTSUPP.
84  */
85 struct pr_usrreqs nousrreqs = {
86 	.pru_accept =		pru_accept_notsupp,
87 	.pru_attach =		pru_attach_notsupp,
88 	.pru_bind =		pru_bind_notsupp,
89 	.pru_connect =		pru_connect_notsupp,
90 	.pru_connect2 =		pru_connect2_notsupp,
91 	.pru_control =		pru_control_notsupp,
92 	.pru_disconnect	=	pru_disconnect_notsupp,
93 	.pru_listen =		pru_listen_notsupp,
94 	.pru_peeraddr =		pru_peeraddr_notsupp,
95 	.pru_rcvd =		pru_rcvd_notsupp,
96 	.pru_rcvoob =		pru_rcvoob_notsupp,
97 	.pru_send =		pru_send_notsupp,
98 	.pru_sense =		pru_sense_null,
99 	.pru_shutdown =		pru_shutdown_notsupp,
100 	.pru_sockaddr =		pru_sockaddr_notsupp,
101 	.pru_sosend =		pru_sosend_notsupp,
102 	.pru_soreceive =	pru_soreceive_notsupp,
103 	.pru_sopoll =		pru_sopoll_notsupp,
104 };
105 
106 static void
107 protosw_init(struct protosw *pr)
108 {
109 	struct pr_usrreqs *pu;
110 
111 	pu = pr->pr_usrreqs;
112 	KASSERT(pu != NULL, ("protosw_init: %ssw[%d] has no usrreqs!",
113 	    pr->pr_domain->dom_name,
114 	    (int)(pr - pr->pr_domain->dom_protosw)));
115 
116 	/*
117 	 * Protocol switch methods fall into three categories: mandatory,
118 	 * mandatory but protosw_init() provides a default, and optional.
119 	 *
120 	 * For true protocols (i.e., pru_attach != NULL), KASSERT truly
121 	 * mandatory methods with no defaults, and initialize defaults for
122 	 * other mandatory methods if the protocol hasn't defined an
123 	 * implementation (NULL function pointer).
124 	 */
125 #if 0
126 	if (pu->pru_attach != NULL) {
127 		KASSERT(pu->pru_abort != NULL,
128 		    ("protosw_init: %ssw[%d] pru_abort NULL",
129 		    pr->pr_domain->dom_name,
130 		    (int)(pr - pr->pr_domain->dom_protosw)));
131 		KASSERT(pu->pru_send != NULL,
132 		    ("protosw_init: %ssw[%d] pru_send NULL",
133 		    pr->pr_domain->dom_name,
134 		    (int)(pr - pr->pr_domain->dom_protosw)));
135 	}
136 #endif
137 
138 #define DEFAULT(foo, bar)	if ((foo) == NULL)  (foo) = (bar)
139 	DEFAULT(pu->pru_accept, pru_accept_notsupp);
140 	DEFAULT(pu->pru_bind, pru_bind_notsupp);
141 	DEFAULT(pu->pru_connect, pru_connect_notsupp);
142 	DEFAULT(pu->pru_connect2, pru_connect2_notsupp);
143 	DEFAULT(pu->pru_control, pru_control_notsupp);
144 	DEFAULT(pu->pru_disconnect, pru_disconnect_notsupp);
145 	DEFAULT(pu->pru_listen, pru_listen_notsupp);
146 	DEFAULT(pu->pru_peeraddr, pru_peeraddr_notsupp);
147 	DEFAULT(pu->pru_rcvd, pru_rcvd_notsupp);
148 	DEFAULT(pu->pru_rcvoob, pru_rcvoob_notsupp);
149 	DEFAULT(pu->pru_sense, pru_sense_null);
150 	DEFAULT(pu->pru_shutdown, pru_shutdown_notsupp);
151 	DEFAULT(pu->pru_sockaddr, pru_sockaddr_notsupp);
152 	DEFAULT(pu->pru_sosend, sosend_generic);
153 	DEFAULT(pu->pru_soreceive, soreceive_generic);
154 	DEFAULT(pu->pru_sopoll, sopoll_generic);
155 #undef DEFAULT
156 	if (pr->pr_init)
157 		(*pr->pr_init)();
158 }
159 
160 /*
161  * Add a new protocol domain to the list of supported domains
162  * Note: you cant unload it again because a socket may be using it.
163  * XXX can't fail at this time.
164  */
165 void
166 domain_init(void *arg)
167 {
168 	struct domain *dp = arg;
169 	struct protosw *pr;
170 
171 	if (dp->dom_init)
172 		(*dp->dom_init)();
173 	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
174 		protosw_init(pr);
175 	/*
176 	 * update global information about maximums
177 	 */
178 	max_hdr = max_linkhdr + max_protohdr;
179 	max_datalen = MHLEN - max_hdr;
180 	if (max_datalen < 1)
181 		panic("%s: max_datalen < 1", __func__);
182 }
183 
184 #ifdef VIMAGE
185 void
186 vnet_domain_init(void *arg)
187 {
188 
189 	/* Virtualized case is no different -- call init functions. */
190 	domain_init(arg);
191 }
192 
193 void
194 vnet_domain_uninit(void *arg)
195 {
196 	struct domain *dp = arg;
197 	struct protosw *pr;
198 
199 	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
200 		if (pr->pr_destroy)
201 			(*pr->pr_destroy)();
202 	if (dp->dom_destroy)
203 		(*dp->dom_destroy)();
204 }
205 #endif
206 
207 /*
208  * Add a new protocol domain to the list of supported domains
209  * Note: you cant unload it again because a socket may be using it.
210  * XXX can't fail at this time.
211  */
212 void
213 domain_add(void *data)
214 {
215 	struct domain *dp;
216 
217 	dp = (struct domain *)data;
218 	mtx_lock(&dom_mtx);
219 	dp->dom_next = domains;
220 	domains = dp;
221 
222 	KASSERT(domain_init_status >= 1,
223 	    ("attempt to domain_add(%s) before domaininit()",
224 	    dp->dom_name));
225 #ifndef INVARIANTS
226 	if (domain_init_status < 1)
227 		printf("WARNING: attempt to domain_add(%s) before "
228 		    "domaininit()\n", dp->dom_name);
229 #endif
230 #ifdef notyet
231 	KASSERT(domain_init_status < 2,
232 	    ("attempt to domain_add(%s) after domainfinalize()",
233 	    dp->dom_name));
234 #else
235 	if (domain_init_status >= 2)
236 		printf("WARNING: attempt to domain_add(%s) after "
237 		    "domainfinalize()\n", dp->dom_name);
238 #endif
239 	mtx_unlock(&dom_mtx);
240 }
241 
242 static void
243 socket_zone_change(void *tag)
244 {
245 
246 	uma_zone_set_max(socket_zone, maxsockets);
247 }
248 
249 /* ARGSUSED*/
250 static void
251 domaininit(void *dummy)
252 {
253 
254 	/*
255 	 * Before we do any setup, make sure to initialize the
256 	 * zone allocator we get struct sockets from.
257 	 */
258 	socket_zone = uma_zcreate("socket", sizeof(struct socket), NULL, NULL,
259 	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
260 	uma_zone_set_max(socket_zone, maxsockets);
261 	EVENTHANDLER_REGISTER(maxsockets_change, socket_zone_change, NULL,
262 		EVENTHANDLER_PRI_FIRST);
263 
264 	if (max_linkhdr < 16)		/* XXX */
265 		max_linkhdr = 16;
266 
267 	callout_init(&pffast_callout, CALLOUT_MPSAFE);
268 	callout_init(&pfslow_callout, CALLOUT_MPSAFE);
269 
270 	mtx_lock(&dom_mtx);
271 	KASSERT(domain_init_status == 0, ("domaininit called too late!"));
272 	domain_init_status = 1;
273 	mtx_unlock(&dom_mtx);
274 }
275 
276 /* ARGSUSED*/
277 static void
278 domainfinalize(void *dummy)
279 {
280 
281 	mtx_lock(&dom_mtx);
282 	KASSERT(domain_init_status == 1, ("domainfinalize called too late!"));
283 	domain_init_status = 2;
284 	mtx_unlock(&dom_mtx);
285 
286 	callout_reset(&pffast_callout, 1, pffasttimo, NULL);
287 	callout_reset(&pfslow_callout, 1, pfslowtimo, NULL);
288 }
289 
290 struct protosw *
291 pffindtype(int family, int type)
292 {
293 	struct domain *dp;
294 	struct protosw *pr;
295 
296 	for (dp = domains; dp; dp = dp->dom_next)
297 		if (dp->dom_family == family)
298 			goto found;
299 	return (0);
300 found:
301 	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
302 		if (pr->pr_type && pr->pr_type == type)
303 			return (pr);
304 	return (0);
305 }
306 
307 struct protosw *
308 pffindproto(int family, int protocol, int type)
309 {
310 	struct domain *dp;
311 	struct protosw *pr;
312 	struct protosw *maybe = 0;
313 
314 	if (family == 0)
315 		return (0);
316 	for (dp = domains; dp; dp = dp->dom_next)
317 		if (dp->dom_family == family)
318 			goto found;
319 	return (0);
320 found:
321 	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
322 		if ((pr->pr_protocol == protocol) && (pr->pr_type == type))
323 			return (pr);
324 
325 		if (type == SOCK_RAW && pr->pr_type == SOCK_RAW &&
326 		    pr->pr_protocol == 0 && maybe == (struct protosw *)0)
327 			maybe = pr;
328 	}
329 	return (maybe);
330 }
331 
332 /*
333  * The caller must make sure that the new protocol is fully set up and ready to
334  * accept requests before it is registered.
335  */
336 int
337 pf_proto_register(int family, struct protosw *npr)
338 {
339 	VNET_ITERATOR_DECL(vnet_iter);
340 	struct domain *dp;
341 	struct protosw *pr, *fpr;
342 
343 	/* Sanity checks. */
344 	if (family == 0)
345 		return (EPFNOSUPPORT);
346 	if (npr->pr_type == 0)
347 		return (EPROTOTYPE);
348 	if (npr->pr_protocol == 0)
349 		return (EPROTONOSUPPORT);
350 	if (npr->pr_usrreqs == NULL)
351 		return (ENXIO);
352 
353 	/* Try to find the specified domain based on the family. */
354 	for (dp = domains; dp; dp = dp->dom_next)
355 		if (dp->dom_family == family)
356 			goto found;
357 	return (EPFNOSUPPORT);
358 
359 found:
360 	/* Initialize backpointer to struct domain. */
361 	npr->pr_domain = dp;
362 	fpr = NULL;
363 
364 	/*
365 	 * Protect us against races when two protocol registrations for
366 	 * the same protocol happen at the same time.
367 	 */
368 	mtx_lock(&dom_mtx);
369 
370 	/* The new protocol must not yet exist. */
371 	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
372 		if ((pr->pr_type == npr->pr_type) &&
373 		    (pr->pr_protocol == npr->pr_protocol)) {
374 			mtx_unlock(&dom_mtx);
375 			return (EEXIST);	/* XXX: Check only protocol? */
376 		}
377 		/* While here, remember the first free spacer. */
378 		if ((fpr == NULL) && (pr->pr_protocol == PROTO_SPACER))
379 			fpr = pr;
380 	}
381 
382 	/* If no free spacer is found we can't add the new protocol. */
383 	if (fpr == NULL) {
384 		mtx_unlock(&dom_mtx);
385 		return (ENOMEM);
386 	}
387 
388 	/* Copy the new struct protosw over the spacer. */
389 	bcopy(npr, fpr, sizeof(*fpr));
390 
391 	/* Job is done, no more protection required. */
392 	mtx_unlock(&dom_mtx);
393 
394 	/* Initialize and activate the protocol. */
395 	VNET_LIST_RLOCK();
396 	VNET_FOREACH(vnet_iter) {
397 		CURVNET_SET_QUIET(vnet_iter);
398 		protosw_init(fpr);
399 		CURVNET_RESTORE();
400 	}
401 	VNET_LIST_RUNLOCK();
402 
403 	return (0);
404 }
405 
406 /*
407  * The caller must make sure the protocol and its functions correctly shut down
408  * all sockets and release all locks and memory references.
409  */
410 int
411 pf_proto_unregister(int family, int protocol, int type)
412 {
413 	struct domain *dp;
414 	struct protosw *pr, *dpr;
415 
416 	/* Sanity checks. */
417 	if (family == 0)
418 		return (EPFNOSUPPORT);
419 	if (protocol == 0)
420 		return (EPROTONOSUPPORT);
421 	if (type == 0)
422 		return (EPROTOTYPE);
423 
424 	/* Try to find the specified domain based on the family type. */
425 	for (dp = domains; dp; dp = dp->dom_next)
426 		if (dp->dom_family == family)
427 			goto found;
428 	return (EPFNOSUPPORT);
429 
430 found:
431 	dpr = NULL;
432 
433 	/* Lock out everyone else while we are manipulating the protosw. */
434 	mtx_lock(&dom_mtx);
435 
436 	/* The protocol must exist and only once. */
437 	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
438 		if ((pr->pr_type == type) && (pr->pr_protocol == protocol)) {
439 			if (dpr != NULL) {
440 				mtx_unlock(&dom_mtx);
441 				return (EMLINK);   /* Should not happen! */
442 			} else
443 				dpr = pr;
444 		}
445 	}
446 
447 	/* Protocol does not exist. */
448 	if (dpr == NULL) {
449 		mtx_unlock(&dom_mtx);
450 		return (EPROTONOSUPPORT);
451 	}
452 
453 	/* De-orbit the protocol and make the slot available again. */
454 	dpr->pr_type = 0;
455 	dpr->pr_domain = dp;
456 	dpr->pr_protocol = PROTO_SPACER;
457 	dpr->pr_flags = 0;
458 	dpr->pr_input = NULL;
459 	dpr->pr_output = NULL;
460 	dpr->pr_ctlinput = NULL;
461 	dpr->pr_ctloutput = NULL;
462 	dpr->pr_init = NULL;
463 	dpr->pr_fasttimo = NULL;
464 	dpr->pr_slowtimo = NULL;
465 	dpr->pr_drain = NULL;
466 	dpr->pr_usrreqs = &nousrreqs;
467 
468 	/* Job is done, not more protection required. */
469 	mtx_unlock(&dom_mtx);
470 
471 	return (0);
472 }
473 
474 void
475 pfctlinput(int cmd, struct sockaddr *sa)
476 {
477 	struct domain *dp;
478 	struct protosw *pr;
479 
480 	for (dp = domains; dp; dp = dp->dom_next)
481 		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
482 			if (pr->pr_ctlinput)
483 				(*pr->pr_ctlinput)(cmd, sa, (void *)0);
484 }
485 
486 void
487 pfctlinput2(int cmd, struct sockaddr *sa, void *ctlparam)
488 {
489 	struct domain *dp;
490 	struct protosw *pr;
491 
492 	if (!sa)
493 		return;
494 	for (dp = domains; dp; dp = dp->dom_next) {
495 		/*
496 		 * the check must be made by xx_ctlinput() anyways, to
497 		 * make sure we use data item pointed to by ctlparam in
498 		 * correct way.  the following check is made just for safety.
499 		 */
500 		if (dp->dom_family != sa->sa_family)
501 			continue;
502 
503 		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
504 			if (pr->pr_ctlinput)
505 				(*pr->pr_ctlinput)(cmd, sa, ctlparam);
506 	}
507 }
508 
509 static void
510 pfslowtimo(void *arg)
511 {
512 	struct domain *dp;
513 	struct protosw *pr;
514 
515 	for (dp = domains; dp; dp = dp->dom_next)
516 		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
517 			if (pr->pr_slowtimo)
518 				(*pr->pr_slowtimo)();
519 	callout_reset(&pfslow_callout, hz/2, pfslowtimo, NULL);
520 }
521 
522 static void
523 pffasttimo(void *arg)
524 {
525 	struct domain *dp;
526 	struct protosw *pr;
527 
528 	for (dp = domains; dp; dp = dp->dom_next)
529 		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
530 			if (pr->pr_fasttimo)
531 				(*pr->pr_fasttimo)();
532 	callout_reset(&pffast_callout, hz/5, pffasttimo, NULL);
533 }
534