xref: /freebsd/sys/kern/uipc_domain.c (revision e674ddec0b4138274539587fe9336b577ff1242a)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1982, 1986, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  *	@(#)uipc_domain.c	8.2 (Berkeley) 10/18/93
32  */
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 #include <sys/param.h>
38 #include <sys/socket.h>
39 #include <sys/protosw.h>
40 #include <sys/domain.h>
41 #include <sys/eventhandler.h>
42 #include <sys/epoch.h>
43 #include <sys/mbuf.h>
44 #include <sys/kernel.h>
45 #include <sys/lock.h>
46 #include <sys/mutex.h>
47 #include <sys/rmlock.h>
48 #include <sys/socketvar.h>
49 #include <sys/systm.h>
50 
51 #include <machine/atomic.h>
52 
53 #include <net/vnet.h>
54 
55 /*
56  * System initialization
57  *
58  * Note: domain initialization takes place on a per domain basis
59  * as a result of traversing a SYSINIT linker set.  Most likely,
60  * each domain would want to call DOMAIN_SET(9) itself, which
61  * would cause the domain to be added just after domaininit()
62  * is called during startup.
63  *
64  * See DOMAIN_SET(9) for details on its use.
65  */
66 
67 static void domaininit(void *);
68 SYSINIT(domain, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, domaininit, NULL);
69 
70 static void domainfinalize(void *);
71 SYSINIT(domainfin, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, domainfinalize,
72     NULL);
73 
74 static struct callout pffast_callout;
75 static struct callout pfslow_callout;
76 
77 static void	pffasttimo(void *);
78 static void	pfslowtimo(void *);
79 
80 static struct rmlock pftimo_lock;
81 RM_SYSINIT(pftimo_lock, &pftimo_lock, "pftimo");
82 
83 static LIST_HEAD(, protosw) pffast_list =
84     LIST_HEAD_INITIALIZER(pffast_list);
85 static LIST_HEAD(, protosw) pfslow_list =
86     LIST_HEAD_INITIALIZER(pfslow_list);
87 
88 struct domain *domains;		/* registered protocol domains */
89 int domain_init_status = 0;
90 static struct mtx dom_mtx;		/* domain list lock */
91 MTX_SYSINIT(domain, &dom_mtx, "domain list", MTX_DEF);
92 
93 /*
94  * Dummy protocol specific user requests function pointer array.
95  * All functions return EOPNOTSUPP.
96  */
97 struct pr_usrreqs nousrreqs = {
98 	.pru_accept =		pru_accept_notsupp,
99 	.pru_attach =		pru_attach_notsupp,
100 	.pru_bind =		pru_bind_notsupp,
101 	.pru_connect =		pru_connect_notsupp,
102 	.pru_connect2 =		pru_connect2_notsupp,
103 	.pru_control =		pru_control_notsupp,
104 	.pru_disconnect	=	pru_disconnect_notsupp,
105 	.pru_listen =		pru_listen_notsupp,
106 	.pru_peeraddr =		pru_peeraddr_notsupp,
107 	.pru_rcvd =		pru_rcvd_notsupp,
108 	.pru_rcvoob =		pru_rcvoob_notsupp,
109 	.pru_send =		pru_send_notsupp,
110 	.pru_sense =		pru_sense_null,
111 	.pru_shutdown =		pru_shutdown_notsupp,
112 	.pru_sockaddr =		pru_sockaddr_notsupp,
113 	.pru_sosend =		pru_sosend_notsupp,
114 	.pru_soreceive =	pru_soreceive_notsupp,
115 	.pru_sopoll =		pru_sopoll_notsupp,
116 };
117 
118 static void
119 pr_usrreqs_init(struct protosw *pr)
120 {
121 	struct pr_usrreqs *pu;
122 
123 	pu = pr->pr_usrreqs;
124 	KASSERT(pu != NULL, ("%s: %ssw[%d] has no usrreqs!", __func__,
125 	    pr->pr_domain->dom_name,
126 	    (int)(pr - pr->pr_domain->dom_protosw)));
127 
128 	/*
129 	 * Protocol switch methods fall into three categories: mandatory,
130 	 * mandatory but protosw_init() provides a default, and optional.
131 	 *
132 	 * For true protocols (i.e., pru_attach != NULL), KASSERT truly
133 	 * mandatory methods with no defaults, and initialize defaults for
134 	 * other mandatory methods if the protocol hasn't defined an
135 	 * implementation (NULL function pointer).
136 	 */
137 #if 0
138 	if (pu->pru_attach != NULL) {
139 		KASSERT(pu->pru_abort != NULL,
140 		    ("protosw_init: %ssw[%d] pru_abort NULL",
141 		    pr->pr_domain->dom_name,
142 		    (int)(pr - pr->pr_domain->dom_protosw)));
143 		KASSERT(pu->pru_send != NULL,
144 		    ("protosw_init: %ssw[%d] pru_send NULL",
145 		    pr->pr_domain->dom_name,
146 		    (int)(pr - pr->pr_domain->dom_protosw)));
147 	}
148 #endif
149 
150 #define DEFAULT(foo, bar)	if ((foo) == NULL)  (foo) = (bar)
151 	DEFAULT(pu->pru_accept, pru_accept_notsupp);
152 	DEFAULT(pu->pru_aio_queue, pru_aio_queue_notsupp);
153 	DEFAULT(pu->pru_bind, pru_bind_notsupp);
154 	DEFAULT(pu->pru_bindat, pru_bindat_notsupp);
155 	DEFAULT(pu->pru_connect, pru_connect_notsupp);
156 	DEFAULT(pu->pru_connect2, pru_connect2_notsupp);
157 	DEFAULT(pu->pru_connectat, pru_connectat_notsupp);
158 	DEFAULT(pu->pru_control, pru_control_notsupp);
159 	DEFAULT(pu->pru_disconnect, pru_disconnect_notsupp);
160 	DEFAULT(pu->pru_listen, pru_listen_notsupp);
161 	DEFAULT(pu->pru_peeraddr, pru_peeraddr_notsupp);
162 	DEFAULT(pu->pru_rcvd, pru_rcvd_notsupp);
163 	DEFAULT(pu->pru_rcvoob, pru_rcvoob_notsupp);
164 	DEFAULT(pu->pru_sense, pru_sense_null);
165 	DEFAULT(pu->pru_shutdown, pru_shutdown_notsupp);
166 	DEFAULT(pu->pru_sockaddr, pru_sockaddr_notsupp);
167 	DEFAULT(pu->pru_sosend, sosend_generic);
168 	DEFAULT(pu->pru_soreceive, soreceive_generic);
169 	DEFAULT(pu->pru_sopoll, sopoll_generic);
170 	DEFAULT(pu->pru_ready, pru_ready_notsupp);
171 #undef DEFAULT
172 }
173 
174 /*
175  * Add a new protocol domain to the list of supported domains
176  * Note: you cant unload it again because a socket may be using it.
177  * XXX can't fail at this time.
178  */
179 void
180 domain_init(void *arg)
181 {
182 	struct domain *dp = arg;
183 	struct protosw *pr;
184 	int flags;
185 
186 	MPASS(IS_DEFAULT_VNET(curvnet));
187 
188 	flags = atomic_load_acq_int(&dp->dom_flags);
189 	if ((flags & DOMF_SUPPORTED) == 0)
190 		return;
191 	MPASS((flags & DOMF_INITED) == 0);
192 
193 	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
194 		pr_usrreqs_init(pr);
195 		rm_wlock(&pftimo_lock);
196 		if (pr->pr_fasttimo != NULL)
197 			LIST_INSERT_HEAD(&pffast_list, pr, pr_fasttimos);
198 		if (pr->pr_slowtimo != NULL)
199 			LIST_INSERT_HEAD(&pfslow_list, pr, pr_slowtimos);
200 		rm_wunlock(&pftimo_lock);
201 	}
202 
203 	/*
204 	 * update global information about maximums
205 	 */
206 	max_hdr = max_linkhdr + max_protohdr;
207 	max_datalen = MHLEN - max_hdr;
208 	if (max_datalen < 1)
209 		panic("%s: max_datalen < 1", __func__);
210 	atomic_set_rel_int(&dp->dom_flags, DOMF_INITED);
211 }
212 
213 /*
214  * Add a new protocol domain to the list of supported domains
215  * Note: you cant unload it again because a socket may be using it.
216  * XXX can't fail at this time.
217  */
218 void
219 domain_add(void *data)
220 {
221 	struct domain *dp;
222 
223 	dp = (struct domain *)data;
224 	if (dp->dom_probe != NULL && (*dp->dom_probe)() != 0)
225 		return;
226 	atomic_set_rel_int(&dp->dom_flags, DOMF_SUPPORTED);
227 	mtx_lock(&dom_mtx);
228 	dp->dom_next = domains;
229 	domains = dp;
230 
231 	KASSERT(domain_init_status >= 1,
232 	    ("attempt to domain_add(%s) before domaininit()",
233 	    dp->dom_name));
234 #ifndef INVARIANTS
235 	if (domain_init_status < 1)
236 		printf("WARNING: attempt to domain_add(%s) before "
237 		    "domaininit()\n", dp->dom_name);
238 #endif
239 	mtx_unlock(&dom_mtx);
240 }
241 
242 void
243 domain_remove(void *data)
244 {
245 	struct domain *dp = (struct domain *)data;
246 
247 	if ((dp->dom_flags & DOMF_UNLOADABLE) == 0)
248 		return;
249 
250 	mtx_lock(&dom_mtx);
251 	if (domains == dp) {
252 		domains = dp->dom_next;
253 	} else {
254 		struct domain *curr;
255 		for (curr = domains; curr != NULL; curr = curr->dom_next) {
256 			if (curr->dom_next == dp) {
257 				curr->dom_next = dp->dom_next;
258 				break;
259 			}
260 		}
261 	}
262 	mtx_unlock(&dom_mtx);
263 }
264 
265 /* ARGSUSED*/
266 static void
267 domaininit(void *dummy)
268 {
269 
270 	if (max_linkhdr < 16)		/* XXX */
271 		max_linkhdr = 16;
272 
273 	callout_init(&pffast_callout, 1);
274 	callout_init(&pfslow_callout, 1);
275 
276 	mtx_lock(&dom_mtx);
277 	KASSERT(domain_init_status == 0, ("domaininit called too late!"));
278 	domain_init_status = 1;
279 	mtx_unlock(&dom_mtx);
280 }
281 
282 /* ARGSUSED*/
283 static void
284 domainfinalize(void *dummy)
285 {
286 
287 	mtx_lock(&dom_mtx);
288 	KASSERT(domain_init_status == 1, ("domainfinalize called too late!"));
289 	domain_init_status = 2;
290 	mtx_unlock(&dom_mtx);
291 
292 	callout_reset(&pffast_callout, 1, pffasttimo, NULL);
293 	callout_reset(&pfslow_callout, 1, pfslowtimo, NULL);
294 }
295 
296 struct domain *
297 pffinddomain(int family)
298 {
299 	struct domain *dp;
300 
301 	for (dp = domains; dp != NULL; dp = dp->dom_next)
302 		if (dp->dom_family == family)
303 			return (dp);
304 	return (NULL);
305 }
306 
307 struct protosw *
308 pffindtype(int family, int type)
309 {
310 	struct domain *dp;
311 	struct protosw *pr;
312 
313 	dp = pffinddomain(family);
314 	if (dp == NULL)
315 		return (NULL);
316 
317 	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
318 		if (pr->pr_type && pr->pr_type == type)
319 			return (pr);
320 	return (NULL);
321 }
322 
323 struct protosw *
324 pffindproto(int family, int protocol, int type)
325 {
326 	struct domain *dp;
327 	struct protosw *pr;
328 	struct protosw *maybe;
329 
330 	maybe = NULL;
331 	if (family == 0)
332 		return (NULL);
333 
334 	dp = pffinddomain(family);
335 	if (dp == NULL)
336 		return (NULL);
337 
338 	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
339 		if ((pr->pr_protocol == protocol) && (pr->pr_type == type))
340 			return (pr);
341 
342 		if (type == SOCK_RAW && pr->pr_type == SOCK_RAW &&
343 		    pr->pr_protocol == 0 && maybe == NULL)
344 			maybe = pr;
345 	}
346 	return (maybe);
347 }
348 
349 /*
350  * The caller must make sure that the new protocol is fully set up and ready to
351  * accept requests before it is registered.
352  */
353 int
354 pf_proto_register(int family, struct protosw *npr)
355 {
356 	struct domain *dp;
357 	struct protosw *pr, *fpr;
358 
359 	/* Sanity checks. */
360 	if (family == 0)
361 		return (EPFNOSUPPORT);
362 	if (npr->pr_type == 0)
363 		return (EPROTOTYPE);
364 	if (npr->pr_protocol == 0)
365 		return (EPROTONOSUPPORT);
366 	if (npr->pr_usrreqs == NULL)
367 		return (ENXIO);
368 
369 	/* Try to find the specified domain based on the family. */
370 	dp = pffinddomain(family);
371 	if (dp == NULL)
372 		return (EPFNOSUPPORT);
373 
374 	/* Initialize backpointer to struct domain. */
375 	npr->pr_domain = dp;
376 	fpr = NULL;
377 
378 	/*
379 	 * Protect us against races when two protocol registrations for
380 	 * the same protocol happen at the same time.
381 	 */
382 	mtx_lock(&dom_mtx);
383 
384 	/* The new protocol must not yet exist. */
385 	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
386 		if ((pr->pr_type == npr->pr_type) &&
387 		    (pr->pr_protocol == npr->pr_protocol)) {
388 			mtx_unlock(&dom_mtx);
389 			return (EEXIST);	/* XXX: Check only protocol? */
390 		}
391 		/* While here, remember the first free spacer. */
392 		if ((fpr == NULL) && (pr->pr_protocol == PROTO_SPACER))
393 			fpr = pr;
394 	}
395 
396 	/* If no free spacer is found we can't add the new protocol. */
397 	if (fpr == NULL) {
398 		mtx_unlock(&dom_mtx);
399 		return (ENOMEM);
400 	}
401 
402 	/* Copy the new struct protosw over the spacer. */
403 	bcopy(npr, fpr, sizeof(*fpr));
404 
405 	pr_usrreqs_init(fpr);
406 	rm_wlock(&pftimo_lock);
407 	if (fpr->pr_fasttimo != NULL)
408 		LIST_INSERT_HEAD(&pffast_list, fpr, pr_fasttimos);
409 	if (fpr->pr_slowtimo != NULL)
410 		LIST_INSERT_HEAD(&pfslow_list, fpr, pr_slowtimos);
411 	rm_wunlock(&pftimo_lock);
412 
413 	/* Job is done, no more protection required. */
414 	mtx_unlock(&dom_mtx);
415 
416 	return (0);
417 }
418 
419 /*
420  * The caller must make sure the protocol and its functions correctly shut down
421  * all sockets and release all locks and memory references.
422  */
423 int
424 pf_proto_unregister(int family, int protocol, int type)
425 {
426 	struct domain *dp;
427 	struct protosw *pr, *dpr;
428 
429 	/* Sanity checks. */
430 	if (family == 0)
431 		return (EPFNOSUPPORT);
432 	if (protocol == 0)
433 		return (EPROTONOSUPPORT);
434 	if (type == 0)
435 		return (EPROTOTYPE);
436 
437 	/* Try to find the specified domain based on the family type. */
438 	dp = pffinddomain(family);
439 	if (dp == NULL)
440 		return (EPFNOSUPPORT);
441 
442 	dpr = NULL;
443 
444 	/* Lock out everyone else while we are manipulating the protosw. */
445 	mtx_lock(&dom_mtx);
446 
447 	/* The protocol must exist and only once. */
448 	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
449 		if ((pr->pr_type == type) && (pr->pr_protocol == protocol)) {
450 			if (dpr != NULL) {
451 				mtx_unlock(&dom_mtx);
452 				return (EMLINK);   /* Should not happen! */
453 			} else
454 				dpr = pr;
455 		}
456 	}
457 
458 	/* Protocol does not exist. */
459 	if (dpr == NULL) {
460 		mtx_unlock(&dom_mtx);
461 		return (EPROTONOSUPPORT);
462 	}
463 
464 	rm_wlock(&pftimo_lock);
465 	if (dpr->pr_fasttimo != NULL)
466 		LIST_REMOVE(dpr, pr_fasttimos);
467 	if (dpr->pr_slowtimo != NULL)
468 		LIST_REMOVE(dpr, pr_slowtimos);
469 	rm_wunlock(&pftimo_lock);
470 
471 	/* De-orbit the protocol and make the slot available again. */
472 	dpr->pr_type = 0;
473 	dpr->pr_domain = dp;
474 	dpr->pr_protocol = PROTO_SPACER;
475 	dpr->pr_flags = 0;
476 	dpr->pr_input = NULL;
477 	dpr->pr_ctlinput = NULL;
478 	dpr->pr_ctloutput = NULL;
479 	dpr->pr_fasttimo = NULL;
480 	dpr->pr_slowtimo = NULL;
481 	dpr->pr_drain = NULL;
482 	dpr->pr_usrreqs = &nousrreqs;
483 
484 	/* Job is done, not more protection required. */
485 	mtx_unlock(&dom_mtx);
486 
487 	return (0);
488 }
489 
490 static void
491 pfslowtimo(void *arg)
492 {
493 	struct rm_priotracker tracker;
494 	struct epoch_tracker et;
495 	struct protosw *pr;
496 
497 	rm_rlock(&pftimo_lock, &tracker);
498 	NET_EPOCH_ENTER(et);
499 	LIST_FOREACH(pr, &pfslow_list, pr_slowtimos) {
500 		(*pr->pr_slowtimo)();
501 	}
502 	NET_EPOCH_EXIT(et);
503 	rm_runlock(&pftimo_lock, &tracker);
504 	callout_reset(&pfslow_callout, hz / PR_SLOWHZ, pfslowtimo, NULL);
505 }
506 
507 static void
508 pffasttimo(void *arg)
509 {
510 	struct rm_priotracker tracker;
511 	struct epoch_tracker et;
512 	struct protosw *pr;
513 
514 	rm_rlock(&pftimo_lock, &tracker);
515 	NET_EPOCH_ENTER(et);
516 	LIST_FOREACH(pr, &pffast_list, pr_fasttimos) {
517 		(*pr->pr_fasttimo)();
518 	}
519 	NET_EPOCH_EXIT(et);
520 	rm_runlock(&pftimo_lock, &tracker);
521 	callout_reset(&pffast_callout, hz / PR_FASTHZ, pffasttimo, NULL);
522 }
523