xref: /freebsd/sys/kern/uipc_domain.c (revision 2faf504d1ab821fe2b9df9d2afb49bb35e1334f4)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1982, 1986, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  *	@(#)uipc_domain.c	8.2 (Berkeley) 10/18/93
32  */
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 #include <sys/param.h>
38 #include <sys/socket.h>
39 #include <sys/protosw.h>
40 #include <sys/domain.h>
41 #include <sys/eventhandler.h>
42 #include <sys/epoch.h>
43 #include <sys/mbuf.h>
44 #include <sys/kernel.h>
45 #include <sys/lock.h>
46 #include <sys/mutex.h>
47 #include <sys/socketvar.h>
48 #include <sys/systm.h>
49 
50 #include <machine/atomic.h>
51 
52 #include <net/vnet.h>
53 
54 /*
55  * System initialization
56  *
57  * Note: domain initialization takes place on a per domain basis
58  * as a result of traversing a SYSINIT linker set.  Most likely,
59  * each domain would want to call DOMAIN_SET(9) itself, which
60  * would cause the domain to be added just after domaininit()
61  * is called during startup.
62  *
63  * See DOMAIN_SET(9) for details on its use.
64  */
65 
66 static void domaininit(void *);
67 SYSINIT(domain, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, domaininit, NULL);
68 
69 static void domainfinalize(void *);
70 SYSINIT(domainfin, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, domainfinalize,
71     NULL);
72 
73 static struct callout pffast_callout;
74 static struct callout pfslow_callout;
75 
76 static void	pffasttimo(void *);
77 static void	pfslowtimo(void *);
78 
79 struct domain *domains;		/* registered protocol domains */
80 int domain_init_status = 0;
81 static struct mtx dom_mtx;		/* domain list lock */
82 MTX_SYSINIT(domain, &dom_mtx, "domain list", MTX_DEF);
83 
84 /*
85  * Dummy protocol specific user requests function pointer array.
86  * All functions return EOPNOTSUPP.
87  */
88 struct pr_usrreqs nousrreqs = {
89 	.pru_accept =		pru_accept_notsupp,
90 	.pru_attach =		pru_attach_notsupp,
91 	.pru_bind =		pru_bind_notsupp,
92 	.pru_connect =		pru_connect_notsupp,
93 	.pru_connect2 =		pru_connect2_notsupp,
94 	.pru_control =		pru_control_notsupp,
95 	.pru_disconnect	=	pru_disconnect_notsupp,
96 	.pru_listen =		pru_listen_notsupp,
97 	.pru_peeraddr =		pru_peeraddr_notsupp,
98 	.pru_rcvd =		pru_rcvd_notsupp,
99 	.pru_rcvoob =		pru_rcvoob_notsupp,
100 	.pru_send =		pru_send_notsupp,
101 	.pru_sense =		pru_sense_null,
102 	.pru_shutdown =		pru_shutdown_notsupp,
103 	.pru_sockaddr =		pru_sockaddr_notsupp,
104 	.pru_sosend =		pru_sosend_notsupp,
105 	.pru_soreceive =	pru_soreceive_notsupp,
106 	.pru_sopoll =		pru_sopoll_notsupp,
107 };
108 
109 static void
110 protosw_init(struct protosw *pr)
111 {
112 	struct pr_usrreqs *pu;
113 
114 	pu = pr->pr_usrreqs;
115 	KASSERT(pu != NULL, ("protosw_init: %ssw[%d] has no usrreqs!",
116 	    pr->pr_domain->dom_name,
117 	    (int)(pr - pr->pr_domain->dom_protosw)));
118 
119 	/*
120 	 * Protocol switch methods fall into three categories: mandatory,
121 	 * mandatory but protosw_init() provides a default, and optional.
122 	 *
123 	 * For true protocols (i.e., pru_attach != NULL), KASSERT truly
124 	 * mandatory methods with no defaults, and initialize defaults for
125 	 * other mandatory methods if the protocol hasn't defined an
126 	 * implementation (NULL function pointer).
127 	 */
128 #if 0
129 	if (pu->pru_attach != NULL) {
130 		KASSERT(pu->pru_abort != NULL,
131 		    ("protosw_init: %ssw[%d] pru_abort NULL",
132 		    pr->pr_domain->dom_name,
133 		    (int)(pr - pr->pr_domain->dom_protosw)));
134 		KASSERT(pu->pru_send != NULL,
135 		    ("protosw_init: %ssw[%d] pru_send NULL",
136 		    pr->pr_domain->dom_name,
137 		    (int)(pr - pr->pr_domain->dom_protosw)));
138 	}
139 #endif
140 
141 #define DEFAULT(foo, bar)	if ((foo) == NULL)  (foo) = (bar)
142 	DEFAULT(pu->pru_accept, pru_accept_notsupp);
143 	DEFAULT(pu->pru_aio_queue, pru_aio_queue_notsupp);
144 	DEFAULT(pu->pru_bind, pru_bind_notsupp);
145 	DEFAULT(pu->pru_bindat, pru_bindat_notsupp);
146 	DEFAULT(pu->pru_connect, pru_connect_notsupp);
147 	DEFAULT(pu->pru_connect2, pru_connect2_notsupp);
148 	DEFAULT(pu->pru_connectat, pru_connectat_notsupp);
149 	DEFAULT(pu->pru_control, pru_control_notsupp);
150 	DEFAULT(pu->pru_disconnect, pru_disconnect_notsupp);
151 	DEFAULT(pu->pru_listen, pru_listen_notsupp);
152 	DEFAULT(pu->pru_peeraddr, pru_peeraddr_notsupp);
153 	DEFAULT(pu->pru_rcvd, pru_rcvd_notsupp);
154 	DEFAULT(pu->pru_rcvoob, pru_rcvoob_notsupp);
155 	DEFAULT(pu->pru_sense, pru_sense_null);
156 	DEFAULT(pu->pru_shutdown, pru_shutdown_notsupp);
157 	DEFAULT(pu->pru_sockaddr, pru_sockaddr_notsupp);
158 	DEFAULT(pu->pru_sosend, sosend_generic);
159 	DEFAULT(pu->pru_soreceive, soreceive_generic);
160 	DEFAULT(pu->pru_sopoll, sopoll_generic);
161 	DEFAULT(pu->pru_ready, pru_ready_notsupp);
162 #undef DEFAULT
163 	if (pr->pr_init)
164 		(*pr->pr_init)();
165 }
166 
167 /*
168  * Add a new protocol domain to the list of supported domains
169  * Note: you cant unload it again because a socket may be using it.
170  * XXX can't fail at this time.
171  */
172 void
173 domain_init(void *arg)
174 {
175 	struct domain *dp = arg;
176 	struct protosw *pr;
177 	int flags;
178 
179 	flags = atomic_load_acq_int(&dp->dom_flags);
180 	if ((flags & DOMF_SUPPORTED) == 0)
181 		return;
182 	KASSERT((flags & DOMF_INITED) == 0 || !IS_DEFAULT_VNET(curvnet),
183 	    ("Premature initialization of domain in non-default vnet"));
184 	if (dp->dom_init)
185 		(*dp->dom_init)();
186 	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
187 		protosw_init(pr);
188 	/*
189 	 * update global information about maximums
190 	 */
191 	max_hdr = max_linkhdr + max_protohdr;
192 	max_datalen = MHLEN - max_hdr;
193 	if (max_datalen < 1)
194 		panic("%s: max_datalen < 1", __func__);
195 	if (IS_DEFAULT_VNET(curvnet))
196 		atomic_set_rel_int(&dp->dom_flags, DOMF_INITED);
197 }
198 
199 #ifdef VIMAGE
200 void
201 vnet_domain_init(void *arg)
202 {
203 
204 	/* Virtualized case is no different -- call init functions. */
205 	domain_init(arg);
206 }
207 
208 void
209 vnet_domain_uninit(void *arg)
210 {
211 	struct domain *dp = arg;
212 
213 	if ((atomic_load_acq_int(&dp->dom_flags) & DOMF_SUPPORTED) == 0)
214 		return;
215 	if (dp->dom_destroy)
216 		(*dp->dom_destroy)();
217 }
218 #endif
219 
220 /*
221  * Add a new protocol domain to the list of supported domains
222  * Note: you cant unload it again because a socket may be using it.
223  * XXX can't fail at this time.
224  */
225 void
226 domain_add(void *data)
227 {
228 	struct domain *dp;
229 
230 	dp = (struct domain *)data;
231 	if (dp->dom_probe != NULL && (*dp->dom_probe)() != 0)
232 		return;
233 	atomic_set_rel_int(&dp->dom_flags, DOMF_SUPPORTED);
234 	mtx_lock(&dom_mtx);
235 	dp->dom_next = domains;
236 	domains = dp;
237 
238 	KASSERT(domain_init_status >= 1,
239 	    ("attempt to domain_add(%s) before domaininit()",
240 	    dp->dom_name));
241 #ifndef INVARIANTS
242 	if (domain_init_status < 1)
243 		printf("WARNING: attempt to domain_add(%s) before "
244 		    "domaininit()\n", dp->dom_name);
245 #endif
246 	mtx_unlock(&dom_mtx);
247 }
248 
249 /* ARGSUSED*/
250 static void
251 domaininit(void *dummy)
252 {
253 
254 	if (max_linkhdr < 16)		/* XXX */
255 		max_linkhdr = 16;
256 
257 	callout_init(&pffast_callout, 1);
258 	callout_init(&pfslow_callout, 1);
259 
260 	mtx_lock(&dom_mtx);
261 	KASSERT(domain_init_status == 0, ("domaininit called too late!"));
262 	domain_init_status = 1;
263 	mtx_unlock(&dom_mtx);
264 }
265 
266 /* ARGSUSED*/
267 static void
268 domainfinalize(void *dummy)
269 {
270 
271 	mtx_lock(&dom_mtx);
272 	KASSERT(domain_init_status == 1, ("domainfinalize called too late!"));
273 	domain_init_status = 2;
274 	mtx_unlock(&dom_mtx);
275 
276 	callout_reset(&pffast_callout, 1, pffasttimo, NULL);
277 	callout_reset(&pfslow_callout, 1, pfslowtimo, NULL);
278 }
279 
280 struct domain *
281 pffinddomain(int family)
282 {
283 	struct domain *dp;
284 
285 	for (dp = domains; dp != NULL; dp = dp->dom_next)
286 		if (dp->dom_family == family)
287 			return (dp);
288 	return (NULL);
289 }
290 
291 struct protosw *
292 pffindtype(int family, int type)
293 {
294 	struct domain *dp;
295 	struct protosw *pr;
296 
297 	dp = pffinddomain(family);
298 	if (dp == NULL)
299 		return (NULL);
300 
301 	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
302 		if (pr->pr_type && pr->pr_type == type)
303 			return (pr);
304 	return (NULL);
305 }
306 
307 struct protosw *
308 pffindproto(int family, int protocol, int type)
309 {
310 	struct domain *dp;
311 	struct protosw *pr;
312 	struct protosw *maybe;
313 
314 	maybe = NULL;
315 	if (family == 0)
316 		return (NULL);
317 
318 	dp = pffinddomain(family);
319 	if (dp == NULL)
320 		return (NULL);
321 
322 	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
323 		if ((pr->pr_protocol == protocol) && (pr->pr_type == type))
324 			return (pr);
325 
326 		if (type == SOCK_RAW && pr->pr_type == SOCK_RAW &&
327 		    pr->pr_protocol == 0 && maybe == NULL)
328 			maybe = pr;
329 	}
330 	return (maybe);
331 }
332 
333 /*
334  * The caller must make sure that the new protocol is fully set up and ready to
335  * accept requests before it is registered.
336  */
337 int
338 pf_proto_register(int family, struct protosw *npr)
339 {
340 	VNET_ITERATOR_DECL(vnet_iter);
341 	struct domain *dp;
342 	struct protosw *pr, *fpr;
343 
344 	/* Sanity checks. */
345 	if (family == 0)
346 		return (EPFNOSUPPORT);
347 	if (npr->pr_type == 0)
348 		return (EPROTOTYPE);
349 	if (npr->pr_protocol == 0)
350 		return (EPROTONOSUPPORT);
351 	if (npr->pr_usrreqs == NULL)
352 		return (ENXIO);
353 
354 	/* Try to find the specified domain based on the family. */
355 	dp = pffinddomain(family);
356 	if (dp == NULL)
357 		return (EPFNOSUPPORT);
358 
359 	/* Initialize backpointer to struct domain. */
360 	npr->pr_domain = dp;
361 	fpr = NULL;
362 
363 	/*
364 	 * Protect us against races when two protocol registrations for
365 	 * the same protocol happen at the same time.
366 	 */
367 	mtx_lock(&dom_mtx);
368 
369 	/* The new protocol must not yet exist. */
370 	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
371 		if ((pr->pr_type == npr->pr_type) &&
372 		    (pr->pr_protocol == npr->pr_protocol)) {
373 			mtx_unlock(&dom_mtx);
374 			return (EEXIST);	/* XXX: Check only protocol? */
375 		}
376 		/* While here, remember the first free spacer. */
377 		if ((fpr == NULL) && (pr->pr_protocol == PROTO_SPACER))
378 			fpr = pr;
379 	}
380 
381 	/* If no free spacer is found we can't add the new protocol. */
382 	if (fpr == NULL) {
383 		mtx_unlock(&dom_mtx);
384 		return (ENOMEM);
385 	}
386 
387 	/* Copy the new struct protosw over the spacer. */
388 	bcopy(npr, fpr, sizeof(*fpr));
389 
390 	/* Job is done, no more protection required. */
391 	mtx_unlock(&dom_mtx);
392 
393 	/* Initialize and activate the protocol. */
394 	VNET_LIST_RLOCK();
395 	VNET_FOREACH(vnet_iter) {
396 		CURVNET_SET_QUIET(vnet_iter);
397 		protosw_init(fpr);
398 		CURVNET_RESTORE();
399 	}
400 	VNET_LIST_RUNLOCK();
401 
402 	return (0);
403 }
404 
405 /*
406  * The caller must make sure the protocol and its functions correctly shut down
407  * all sockets and release all locks and memory references.
408  */
409 int
410 pf_proto_unregister(int family, int protocol, int type)
411 {
412 	struct domain *dp;
413 	struct protosw *pr, *dpr;
414 
415 	/* Sanity checks. */
416 	if (family == 0)
417 		return (EPFNOSUPPORT);
418 	if (protocol == 0)
419 		return (EPROTONOSUPPORT);
420 	if (type == 0)
421 		return (EPROTOTYPE);
422 
423 	/* Try to find the specified domain based on the family type. */
424 	dp = pffinddomain(family);
425 	if (dp == NULL)
426 		return (EPFNOSUPPORT);
427 
428 	dpr = NULL;
429 
430 	/* Lock out everyone else while we are manipulating the protosw. */
431 	mtx_lock(&dom_mtx);
432 
433 	/* The protocol must exist and only once. */
434 	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
435 		if ((pr->pr_type == type) && (pr->pr_protocol == protocol)) {
436 			if (dpr != NULL) {
437 				mtx_unlock(&dom_mtx);
438 				return (EMLINK);   /* Should not happen! */
439 			} else
440 				dpr = pr;
441 		}
442 	}
443 
444 	/* Protocol does not exist. */
445 	if (dpr == NULL) {
446 		mtx_unlock(&dom_mtx);
447 		return (EPROTONOSUPPORT);
448 	}
449 
450 	/* De-orbit the protocol and make the slot available again. */
451 	dpr->pr_type = 0;
452 	dpr->pr_domain = dp;
453 	dpr->pr_protocol = PROTO_SPACER;
454 	dpr->pr_flags = 0;
455 	dpr->pr_input = NULL;
456 	dpr->pr_output = NULL;
457 	dpr->pr_ctlinput = NULL;
458 	dpr->pr_ctloutput = NULL;
459 	dpr->pr_init = NULL;
460 	dpr->pr_fasttimo = NULL;
461 	dpr->pr_slowtimo = NULL;
462 	dpr->pr_drain = NULL;
463 	dpr->pr_usrreqs = &nousrreqs;
464 
465 	/* Job is done, not more protection required. */
466 	mtx_unlock(&dom_mtx);
467 
468 	return (0);
469 }
470 
471 void
472 pfctlinput(int cmd, struct sockaddr *sa)
473 {
474 	struct domain *dp;
475 	struct protosw *pr;
476 
477 	for (dp = domains; dp; dp = dp->dom_next)
478 		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
479 			if (pr->pr_ctlinput)
480 				(*pr->pr_ctlinput)(cmd, sa, (void *)0);
481 }
482 
483 static void
484 pfslowtimo(void *arg)
485 {
486 	struct epoch_tracker et;
487 	struct domain *dp;
488 	struct protosw *pr;
489 
490 	NET_EPOCH_ENTER(et);
491 	for (dp = domains; dp; dp = dp->dom_next) {
492 		if ((atomic_load_int(&dp->dom_flags) & DOMF_INITED) == 0)
493 			continue;
494 		atomic_thread_fence_acq();
495 		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
496 			if (pr->pr_slowtimo)
497 				(*pr->pr_slowtimo)();
498 	}
499 	NET_EPOCH_EXIT(et);
500 	callout_reset(&pfslow_callout, hz/2, pfslowtimo, NULL);
501 }
502 
503 static void
504 pffasttimo(void *arg)
505 {
506 	struct epoch_tracker et;
507 	struct domain *dp;
508 	struct protosw *pr;
509 
510 	NET_EPOCH_ENTER(et);
511 	for (dp = domains; dp; dp = dp->dom_next) {
512 		if ((atomic_load_int(&dp->dom_flags) & DOMF_INITED) == 0)
513 			continue;
514 		atomic_thread_fence_acq();
515 		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
516 			if (pr->pr_fasttimo)
517 				(*pr->pr_fasttimo)();
518 	}
519 	NET_EPOCH_EXIT(et);
520 	callout_reset(&pffast_callout, hz/5, pffasttimo, NULL);
521 }
522