xref: /freebsd/sys/netgraph/ng_socket.c (revision b7c60aadbbd5c846a250c05791fe7406d6d78bf4)
1 /*
2  * ng_socket.c
3  */
4 
5 /*-
6  * Copyright (c) 1996-1999 Whistle Communications, Inc.
7  * All rights reserved.
8  *
9  * Subject to the following obligations and disclaimer of warranty, use and
10  * redistribution of this software, in source or object code forms, with or
11  * without modifications are expressly permitted by Whistle Communications;
12  * provided, however, that:
13  * 1. Any and all reproductions of the source or object code must include the
14  *    copyright notice above and the following disclaimer of warranties; and
15  * 2. No rights are granted, in any manner or form, to use Whistle
16  *    Communications, Inc. trademarks, including the mark "WHISTLE
17  *    COMMUNICATIONS" on advertising, endorsements, or otherwise except as
18  *    such appears in the above copyright notice or in the software.
19  *
20  * THIS SOFTWARE IS BEING PROVIDED BY WHISTLE COMMUNICATIONS "AS IS", AND
21  * TO THE MAXIMUM EXTENT PERMITTED BY LAW, WHISTLE COMMUNICATIONS MAKES NO
22  * REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, REGARDING THIS SOFTWARE,
23  * INCLUDING WITHOUT LIMITATION, ANY AND ALL IMPLIED WARRANTIES OF
24  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT.
25  * WHISTLE COMMUNICATIONS DOES NOT WARRANT, GUARANTEE, OR MAKE ANY
26  * REPRESENTATIONS REGARDING THE USE OF, OR THE RESULTS OF THE USE OF THIS
27  * SOFTWARE IN TERMS OF ITS CORRECTNESS, ACCURACY, RELIABILITY OR OTHERWISE.
28  * IN NO EVENT SHALL WHISTLE COMMUNICATIONS BE LIABLE FOR ANY DAMAGES
29  * RESULTING FROM OR ARISING OUT OF ANY USE OF THIS SOFTWARE, INCLUDING
30  * WITHOUT LIMITATION, ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
31  * PUNITIVE, OR CONSEQUENTIAL DAMAGES, PROCUREMENT OF SUBSTITUTE GOODS OR
32  * SERVICES, LOSS OF USE, DATA OR PROFITS, HOWEVER CAUSED AND UNDER ANY
33  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
35  * THIS SOFTWARE, EVEN IF WHISTLE COMMUNICATIONS IS ADVISED OF THE POSSIBILITY
36  * OF SUCH DAMAGE.
37  *
38  * Author: Julian Elischer <julian@freebsd.org>
39  *
40  * $FreeBSD$
41  * $Whistle: ng_socket.c,v 1.28 1999/11/01 09:24:52 julian Exp $
42  */
43 
44 /*
45  * Netgraph socket nodes
46  *
47  * There are two types of netgraph sockets, control and data.
48  * Control sockets have a netgraph node, but data sockets are
49  * parasitic on control sockets, and have no node of their own.
50  */
51 
52 #include <sys/param.h>
53 #include <sys/domain.h>
54 #include <sys/hash.h>
55 #include <sys/kernel.h>
56 #include <sys/linker.h>
57 #include <sys/lock.h>
58 #include <sys/malloc.h>
59 #include <sys/mbuf.h>
60 #include <sys/mutex.h>
61 #include <sys/priv.h>
62 #include <sys/protosw.h>
63 #include <sys/queue.h>
64 #include <sys/socket.h>
65 #include <sys/socketvar.h>
66 #include <sys/syscallsubr.h>
67 #include <sys/sysctl.h>
68 
69 #include <net/vnet.h>
70 
71 #include <netgraph/ng_message.h>
72 #include <netgraph/netgraph.h>
73 #include <netgraph/ng_socketvar.h>
74 #include <netgraph/ng_socket.h>
75 
76 #ifdef NG_SEPARATE_MALLOC
77 static MALLOC_DEFINE(M_NETGRAPH_PATH, "netgraph_path", "netgraph path info");
78 static MALLOC_DEFINE(M_NETGRAPH_SOCK, "netgraph_sock", "netgraph socket info");
79 #else
80 #define M_NETGRAPH_PATH M_NETGRAPH
81 #define M_NETGRAPH_SOCK M_NETGRAPH
82 #endif
83 
84 /*
85  * It's Ascii-art time!
86  *   +-------------+   +-------------+
87  *   |socket  (ctl)|   |socket (data)|
88  *   +-------------+   +-------------+
89  *          ^                 ^
90  *          |                 |
91  *          v                 v
92  *    +-----------+     +-----------+
93  *    |pcb   (ctl)|     |pcb  (data)|
94  *    +-----------+     +-----------+
95  *          ^                 ^
96  *          |                 |
97  *          v                 v
98  *      +--------------------------+
99  *      |   Socket type private    |
100  *      |       data               |
101  *      +--------------------------+
102  *                   ^
103  *                   |
104  *                   v
105  *           +----------------+
106  *           | struct ng_node |
107  *           +----------------+
108  */
109 
110 /* Netgraph node methods */
111 static ng_constructor_t	ngs_constructor;
112 static ng_rcvmsg_t	ngs_rcvmsg;
113 static ng_shutdown_t	ngs_shutdown;
114 static ng_newhook_t	ngs_newhook;
115 static ng_connect_t	ngs_connect;
116 static ng_findhook_t	ngs_findhook;
117 static ng_rcvdata_t	ngs_rcvdata;
118 static ng_disconnect_t	ngs_disconnect;
119 
120 /* Internal methods */
121 static int	ng_attach_data(struct socket *so);
122 static int	ng_attach_cntl(struct socket *so);
123 static int	ng_attach_common(struct socket *so, int type);
124 static void	ng_detach_common(struct ngpcb *pcbp, int type);
125 static void	ng_socket_free_priv(struct ngsock *priv);
126 static int	ng_connect_data(struct sockaddr *nam, struct ngpcb *pcbp);
127 static int	ng_bind(struct sockaddr *nam, struct ngpcb *pcbp);
128 
129 static int	ngs_mod_event(module_t mod, int event, void *data);
130 static void	ng_socket_item_applied(void *context, int error);
131 
132 /* Netgraph type descriptor */
133 static struct ng_type typestruct = {
134 	.version =	NG_ABI_VERSION,
135 	.name =		NG_SOCKET_NODE_TYPE,
136 	.mod_event =	ngs_mod_event,
137 	.constructor =	ngs_constructor,
138 	.rcvmsg =	ngs_rcvmsg,
139 	.shutdown =	ngs_shutdown,
140 	.newhook =	ngs_newhook,
141 	.connect =	ngs_connect,
142 	.findhook =	ngs_findhook,
143 	.rcvdata =	ngs_rcvdata,
144 	.disconnect =	ngs_disconnect,
145 };
146 NETGRAPH_INIT_ORDERED(socket, &typestruct, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY);
147 
148 /* Buffer space */
149 static u_long ngpdg_sendspace = 20 * 1024;	/* really max datagram size */
150 SYSCTL_ULONG(_net_graph, OID_AUTO, maxdgram, CTLFLAG_RW,
151     &ngpdg_sendspace , 0, "Maximum outgoing Netgraph datagram size");
152 static u_long ngpdg_recvspace = 20 * 1024;
153 SYSCTL_ULONG(_net_graph, OID_AUTO, recvspace, CTLFLAG_RW,
154     &ngpdg_recvspace , 0, "Maximum space for incoming Netgraph datagrams");
155 
156 /* List of all sockets (for netstat -f netgraph) */
157 static LIST_HEAD(, ngpcb) ngsocklist;
158 
159 static struct mtx	ngsocketlist_mtx;
160 
161 #define sotongpcb(so) ((struct ngpcb *)(so)->so_pcb)
162 
163 /* If getting unexplained errors returned, set this to "kdb_enter("X"); */
164 #ifndef TRAP_ERROR
165 #define TRAP_ERROR
166 #endif
167 
168 struct hookpriv {
169 	LIST_ENTRY(hookpriv)	next;
170 	hook_p			hook;
171 };
172 LIST_HEAD(ngshash, hookpriv);
173 
174 /* Per-node private data */
175 struct ngsock {
176 	struct ng_node	*node;		/* the associated netgraph node */
177 	struct ngpcb	*datasock;	/* optional data socket */
178 	struct ngpcb	*ctlsock;	/* optional control socket */
179 	struct ngshash	*hash;		/* hash for hook names */
180 	u_long		hmask;		/* hash mask */
181 	int	flags;
182 	int	refs;
183 	struct mtx	mtx;		/* mtx to wait on */
184 	int		error;		/* place to store error */
185 };
186 
187 #define	NGS_FLAG_NOLINGER	1	/* close with last hook */
188 
189 /***************************************************************
190 	Control sockets
191 ***************************************************************/
192 
193 static int
194 ngc_attach(struct socket *so, int proto, struct thread *td)
195 {
196 	struct ngpcb *const pcbp = sotongpcb(so);
197 	int error;
198 
199 	error = priv_check(td, PRIV_NETGRAPH_CONTROL);
200 	if (error)
201 		return (error);
202 	if (pcbp != NULL)
203 		return (EISCONN);
204 	return (ng_attach_cntl(so));
205 }
206 
207 static void
208 ngc_detach(struct socket *so)
209 {
210 	struct ngpcb *const pcbp = sotongpcb(so);
211 
212 	KASSERT(pcbp != NULL, ("ngc_detach: pcbp == NULL"));
213 	ng_detach_common(pcbp, NG_CONTROL);
214 }
215 
216 static int
217 ngc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
218 	 struct mbuf *control, struct thread *td)
219 {
220 	struct ngpcb *const pcbp = sotongpcb(so);
221 	struct ngsock *const priv = NG_NODE_PRIVATE(pcbp->sockdata->node);
222 	struct sockaddr_ng *const sap = (struct sockaddr_ng *) addr;
223 	struct ng_mesg *msg;
224 	struct mbuf *m0;
225 	item_p item;
226 	char *path = NULL;
227 	int len, error = 0;
228 	struct ng_apply_info apply;
229 
230 	if (control) {
231 		error = EINVAL;
232 		goto release;
233 	}
234 
235 	/* Require destination as there may be >= 1 hooks on this node. */
236 	if (addr == NULL) {
237 		error = EDESTADDRREQ;
238 		goto release;
239 	}
240 
241 	/*
242 	 * Allocate an expendable buffer for the path, chop off
243 	 * the sockaddr header, and make sure it's NUL terminated.
244 	 */
245 	len = sap->sg_len - 2;
246 	path = malloc(len + 1, M_NETGRAPH_PATH, M_WAITOK);
247 	bcopy(sap->sg_data, path, len);
248 	path[len] = '\0';
249 
250 	/*
251 	 * Move the actual message out of mbufs into a linear buffer.
252 	 * Start by adding up the size of the data. (could use mh_len?)
253 	 */
254 	for (len = 0, m0 = m; m0 != NULL; m0 = m0->m_next)
255 		len += m0->m_len;
256 
257 	/*
258 	 * Move the data into a linear buffer as well.
259 	 * Messages are not delivered in mbufs.
260 	 */
261 	msg = malloc(len + 1, M_NETGRAPH_MSG, M_WAITOK);
262 	m_copydata(m, 0, len, (char *)msg);
263 
264 	if (msg->header.version != NG_VERSION) {
265 		free(msg, M_NETGRAPH_MSG);
266 		error = EINVAL;
267 		goto release;
268 	}
269 
270 	/*
271 	 * Hack alert!
272 	 * We look into the message and if it mkpeers a node of unknown type, we
273 	 * try to load it. We need to do this now, in syscall thread, because if
274 	 * message gets queued and applied later we will get panic.
275 	 */
276 	if (msg->header.typecookie == NGM_GENERIC_COOKIE &&
277 	    msg->header.cmd == NGM_MKPEER) {
278 		struct ngm_mkpeer *const mkp = (struct ngm_mkpeer *) msg->data;
279 
280 		if (ng_findtype(mkp->type) == NULL) {
281 			char filename[NG_TYPESIZ + 3];
282 			int fileid;
283 
284 			/* Not found, try to load it as a loadable module. */
285 			snprintf(filename, sizeof(filename), "ng_%s",
286 			    mkp->type);
287 			error = kern_kldload(curthread, filename, &fileid);
288 			if (error != 0) {
289 				free(msg, M_NETGRAPH_MSG);
290 				goto release;
291 			}
292 
293 			/* See if type has been loaded successfully. */
294 			if (ng_findtype(mkp->type) == NULL) {
295 				free(msg, M_NETGRAPH_MSG);
296 				(void)kern_kldunload(curthread, fileid,
297 				    LINKER_UNLOAD_NORMAL);
298 				error =  ENXIO;
299 				goto release;
300 			}
301 		}
302 	}
303 
304 	item = ng_package_msg(msg, M_WAITOK);
305 	if ((error = ng_address_path((pcbp->sockdata->node), item, path, 0))
306 	    != 0) {
307 #ifdef TRACE_MESSAGES
308 		printf("ng_address_path: errx=%d\n", error);
309 #endif
310 		goto release;
311 	}
312 
313 #ifdef TRACE_MESSAGES
314 	printf("[%x]:<---------[socket]: c=<%d>cmd=%x(%s) f=%x #%d (%s)\n",
315 		item->el_dest->nd_ID,
316 		msg->header.typecookie,
317 		msg->header.cmd,
318 		msg->header.cmdstr,
319 		msg->header.flags,
320 		msg->header.token,
321 		item->el_dest->nd_type->name);
322 #endif
323 	SAVE_LINE(item);
324 	/*
325 	 * We do not want to return from syscall until the item
326 	 * is processed by destination node. We register callback
327 	 * on the item, which will update priv->error when item
328 	 * was applied.
329 	 * If ng_snd_item() has queued item, we sleep until
330 	 * callback wakes us up.
331 	 */
332 	bzero(&apply, sizeof(apply));
333 	apply.apply = ng_socket_item_applied;
334 	apply.context = priv;
335 	item->apply = &apply;
336 	priv->error = -1;
337 
338 	error = ng_snd_item(item, 0);
339 
340 	mtx_lock(&priv->mtx);
341 	if (priv->error == -1)
342 		msleep(priv, &priv->mtx, 0, "ngsock", 0);
343 	mtx_unlock(&priv->mtx);
344 	KASSERT(priv->error != -1,
345 	    ("ng_socket: priv->error wasn't updated"));
346 	error = priv->error;
347 
348 release:
349 	if (path != NULL)
350 		free(path, M_NETGRAPH_PATH);
351 	if (control != NULL)
352 		m_freem(control);
353 	if (m != NULL)
354 		m_freem(m);
355 	return (error);
356 }
357 
358 static int
359 ngc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
360 {
361 	struct ngpcb *const pcbp = sotongpcb(so);
362 
363 	if (pcbp == 0)
364 		return (EINVAL);
365 	return (ng_bind(nam, pcbp));
366 }
367 
368 static int
369 ngc_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
370 {
371 	/*
372 	 * At this time refuse to do this.. it used to
373 	 * do something but it was undocumented and not used.
374 	 */
375 	printf("program tried to connect control socket to remote node\n");
376 	return (EINVAL);
377 }
378 
379 /***************************************************************
380 	Data sockets
381 ***************************************************************/
382 
383 static int
384 ngd_attach(struct socket *so, int proto, struct thread *td)
385 {
386 	struct ngpcb *const pcbp = sotongpcb(so);
387 
388 	if (pcbp != NULL)
389 		return (EISCONN);
390 	return (ng_attach_data(so));
391 }
392 
393 static void
394 ngd_detach(struct socket *so)
395 {
396 	struct ngpcb *const pcbp = sotongpcb(so);
397 
398 	KASSERT(pcbp != NULL, ("ngd_detach: pcbp == NULL"));
399 	ng_detach_common(pcbp, NG_DATA);
400 }
401 
402 static int
403 ngd_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
404 	 struct mbuf *control, struct thread *td)
405 {
406 	struct ngpcb *const pcbp = sotongpcb(so);
407 	struct sockaddr_ng *const sap = (struct sockaddr_ng *) addr;
408 	int	len, error;
409 	hook_p  hook = NULL;
410 	char	hookname[NG_HOOKSIZ];
411 
412 	if ((pcbp == NULL) || (control != NULL)) {
413 		error = EINVAL;
414 		goto release;
415 	}
416 	if (pcbp->sockdata == NULL) {
417 		error = ENOTCONN;
418 		goto release;
419 	}
420 
421 	if (sap == NULL)
422 		len = 0;		/* Make compiler happy. */
423 	else
424 		len = sap->sg_len - 2;
425 
426 	/*
427 	 * If the user used any of these ways to not specify an address
428 	 * then handle specially.
429 	 */
430 	if ((sap == NULL) || (len <= 0) || (*sap->sg_data == '\0')) {
431 		if (NG_NODE_NUMHOOKS(pcbp->sockdata->node) != 1) {
432 			error = EDESTADDRREQ;
433 			goto release;
434 		}
435 		/*
436 		 * If exactly one hook exists, just use it.
437 		 * Special case to allow write(2) to work on an ng_socket.
438 		 */
439 		hook = LIST_FIRST(&pcbp->sockdata->node->nd_hooks);
440 	} else {
441 		if (len >= NG_HOOKSIZ) {
442 			error = EINVAL;
443 			goto release;
444 		}
445 
446 		/*
447 		 * chop off the sockaddr header, and make sure it's NUL
448 		 * terminated
449 		 */
450 		bcopy(sap->sg_data, hookname, len);
451 		hookname[len] = '\0';
452 
453 		/* Find the correct hook from 'hookname' */
454 		hook = ng_findhook(pcbp->sockdata->node, hookname);
455 		if (hook == NULL) {
456 			error = EHOSTUNREACH;
457 			goto release;
458 		}
459 	}
460 
461 	/* Send data. */
462 	NG_SEND_DATA_FLAGS(error, hook, m, NG_WAITOK);
463 
464 release:
465 	if (control != NULL)
466 		m_freem(control);
467 	if (m != NULL)
468 		m_freem(m);
469 	return (error);
470 }
471 
472 static int
473 ngd_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
474 {
475 	struct ngpcb *const pcbp = sotongpcb(so);
476 
477 	if (pcbp == 0)
478 		return (EINVAL);
479 	return (ng_connect_data(nam, pcbp));
480 }
481 
482 /*
483  * Used for both data and control sockets
484  */
485 static int
486 ng_getsockaddr(struct socket *so, struct sockaddr **addr)
487 {
488 	struct ngpcb *pcbp;
489 	struct sockaddr_ng *sg;
490 	int sg_len;
491 	int error = 0;
492 
493 	/* Why isn't sg_data a `char[1]' ? :-( */
494 	sg_len = sizeof(struct sockaddr_ng) - sizeof(sg->sg_data) + 1;
495 
496 	pcbp = sotongpcb(so);
497 	if ((pcbp == NULL) || (pcbp->sockdata == NULL))
498 		/* XXXGL: can this still happen? */
499 		return (EINVAL);
500 
501 	mtx_lock(&pcbp->sockdata->mtx);
502 	if (pcbp->sockdata->node != NULL) {
503 		node_p node = pcbp->sockdata->node;
504 		int namelen = 0;	/* silence compiler! */
505 
506 		if (NG_NODE_HAS_NAME(node))
507 			sg_len += namelen = strlen(NG_NODE_NAME(node));
508 
509 		sg = malloc(sg_len, M_SONAME, M_WAITOK | M_ZERO);
510 
511 		if (NG_NODE_HAS_NAME(node))
512 			bcopy(NG_NODE_NAME(node), sg->sg_data, namelen);
513 
514 		sg->sg_len = sg_len;
515 		sg->sg_family = AF_NETGRAPH;
516 		*addr = (struct sockaddr *)sg;
517 		mtx_unlock(&pcbp->sockdata->mtx);
518 	} else {
519 		mtx_unlock(&pcbp->sockdata->mtx);
520 		error = EINVAL;
521 	}
522 
523 	return (error);
524 }
525 
526 /*
527  * Attach a socket to it's protocol specific partner.
528  * For a control socket, actually create a netgraph node and attach
529  * to it as well.
530  */
531 
532 static int
533 ng_attach_cntl(struct socket *so)
534 {
535 	struct ngsock *priv;
536 	struct ngpcb *pcbp;
537 	node_p node;
538 	int error;
539 
540 	/* Setup protocol control block */
541 	if ((error = ng_attach_common(so, NG_CONTROL)) != 0)
542 		return (error);
543 	pcbp = sotongpcb(so);
544 
545 	/* Make the generic node components */
546 	if ((error = ng_make_node_common(&typestruct, &node)) != 0) {
547 		ng_detach_common(pcbp, NG_CONTROL);
548 		return (error);
549 	}
550 
551 	/*
552 	 * Allocate node private info and hash. We start
553 	 * with 16 hash entries, however we may grow later
554 	 * in ngs_newhook(). We can't predict how much hooks
555 	 * does this node plan to have.
556 	 */
557 	priv = malloc(sizeof(*priv), M_NETGRAPH_SOCK, M_WAITOK | M_ZERO);
558 	priv->hash = hashinit(16, M_NETGRAPH_SOCK, &priv->hmask);
559 
560 	/* Initialize mutex. */
561 	mtx_init(&priv->mtx, "ng_socket", NULL, MTX_DEF);
562 
563 	/* Link the pcb the private data. */
564 	priv->ctlsock = pcbp;
565 	pcbp->sockdata = priv;
566 	priv->refs++;
567 	priv->node = node;
568 	pcbp->node_id = node->nd_ID;	/* hint for netstat(1) */
569 
570 	/* Link the node and the private data. */
571 	NG_NODE_SET_PRIVATE(priv->node, priv);
572 	NG_NODE_REF(priv->node);
573 	priv->refs++;
574 
575 	return (0);
576 }
577 
578 static int
579 ng_attach_data(struct socket *so)
580 {
581 	return (ng_attach_common(so, NG_DATA));
582 }
583 
584 /*
585  * Set up a socket protocol control block.
586  * This code is shared between control and data sockets.
587  */
588 static int
589 ng_attach_common(struct socket *so, int type)
590 {
591 	struct ngpcb *pcbp;
592 	int error;
593 
594 	/* Standard socket setup stuff. */
595 	error = soreserve(so, ngpdg_sendspace, ngpdg_recvspace);
596 	if (error)
597 		return (error);
598 
599 	/* Allocate the pcb. */
600 	pcbp = malloc(sizeof(struct ngpcb), M_PCB, M_WAITOK | M_ZERO);
601 	pcbp->type = type;
602 
603 	/* Link the pcb and the socket. */
604 	so->so_pcb = (caddr_t)pcbp;
605 	pcbp->ng_socket = so;
606 
607 	/* Add the socket to linked list */
608 	mtx_lock(&ngsocketlist_mtx);
609 	LIST_INSERT_HEAD(&ngsocklist, pcbp, socks);
610 	mtx_unlock(&ngsocketlist_mtx);
611 	return (0);
612 }
613 
614 /*
615  * Disassociate the socket from it's protocol specific
616  * partner. If it's attached to a node's private data structure,
617  * then unlink from that too. If we were the last socket attached to it,
618  * then shut down the entire node. Shared code for control and data sockets.
619  */
620 static void
621 ng_detach_common(struct ngpcb *pcbp, int which)
622 {
623 	struct ngsock *priv = pcbp->sockdata;
624 
625 	if (priv != NULL) {
626 		mtx_lock(&priv->mtx);
627 
628 		switch (which) {
629 		case NG_CONTROL:
630 			priv->ctlsock = NULL;
631 			break;
632 		case NG_DATA:
633 			priv->datasock = NULL;
634 			break;
635 		default:
636 			panic("%s", __func__);
637 		}
638 		pcbp->sockdata = NULL;
639 		pcbp->node_id = 0;
640 
641 		ng_socket_free_priv(priv);
642 	}
643 
644 	pcbp->ng_socket->so_pcb = NULL;
645 	mtx_lock(&ngsocketlist_mtx);
646 	LIST_REMOVE(pcbp, socks);
647 	mtx_unlock(&ngsocketlist_mtx);
648 	free(pcbp, M_PCB);
649 }
650 
651 /*
652  * Remove a reference from node private data.
653  */
654 static void
655 ng_socket_free_priv(struct ngsock *priv)
656 {
657 	mtx_assert(&priv->mtx, MA_OWNED);
658 
659 	priv->refs--;
660 
661 	if (priv->refs == 0) {
662 		mtx_destroy(&priv->mtx);
663 		hashdestroy(priv->hash, M_NETGRAPH_SOCK, priv->hmask);
664 		free(priv, M_NETGRAPH_SOCK);
665 		return;
666 	}
667 
668 	if ((priv->refs == 1) && (priv->node != NULL)) {
669 		node_p node = priv->node;
670 
671 		priv->node = NULL;
672 		mtx_unlock(&priv->mtx);
673 		NG_NODE_UNREF(node);
674 		ng_rmnode_self(node);
675 	} else
676 		mtx_unlock(&priv->mtx);
677 }
678 
679 /*
680  * Connect the data socket to a named control socket node.
681  */
682 static int
683 ng_connect_data(struct sockaddr *nam, struct ngpcb *pcbp)
684 {
685 	struct sockaddr_ng *sap;
686 	node_p farnode;
687 	struct ngsock *priv;
688 	int error;
689 	item_p item;
690 
691 	/* If we are already connected, don't do it again. */
692 	if (pcbp->sockdata != NULL)
693 		return (EISCONN);
694 
695 	/*
696 	 * Find the target (victim) and check it doesn't already have
697 	 * a data socket. Also check it is a 'socket' type node.
698 	 * Use ng_package_data() and ng_address_path() to do this.
699 	 */
700 
701 	sap = (struct sockaddr_ng *) nam;
702 	/* The item will hold the node reference. */
703 	item = ng_package_data(NULL, NG_WAITOK);
704 
705 	if ((error = ng_address_path(NULL, item,  sap->sg_data, 0)))
706 		return (error); /* item is freed on failure */
707 
708 	/*
709 	 * Extract node from item and free item. Remember we now have
710 	 * a reference on the node. The item holds it for us.
711 	 * when we free the item we release the reference.
712 	 */
713 	farnode = item->el_dest; /* shortcut */
714 	if (strcmp(farnode->nd_type->name, NG_SOCKET_NODE_TYPE) != 0) {
715 		NG_FREE_ITEM(item); /* drop the reference to the node */
716 		return (EINVAL);
717 	}
718 	priv = NG_NODE_PRIVATE(farnode);
719 	if (priv->datasock != NULL) {
720 		NG_FREE_ITEM(item);	/* drop the reference to the node */
721 		return (EADDRINUSE);
722 	}
723 
724 	/*
725 	 * Link the PCB and the private data struct. and note the extra
726 	 * reference. Drop the extra reference on the node.
727 	 */
728 	mtx_lock(&priv->mtx);
729 	priv->datasock = pcbp;
730 	pcbp->sockdata = priv;
731 	pcbp->node_id = priv->node->nd_ID;	/* hint for netstat(1) */
732 	priv->refs++;
733 	mtx_unlock(&priv->mtx);
734 	NG_FREE_ITEM(item);	/* drop the reference to the node */
735 	return (0);
736 }
737 
738 /*
739  * Binding a socket means giving the corresponding node a name
740  */
741 static int
742 ng_bind(struct sockaddr *nam, struct ngpcb *pcbp)
743 {
744 	struct ngsock *const priv = pcbp->sockdata;
745 	struct sockaddr_ng *const sap = (struct sockaddr_ng *) nam;
746 
747 	if (priv == NULL) {
748 		TRAP_ERROR;
749 		return (EINVAL);
750 	}
751 	if ((sap->sg_len < 4) || (sap->sg_len > (NG_NODESIZ + 2)) ||
752 	    (sap->sg_data[0] == '\0') ||
753 	    (sap->sg_data[sap->sg_len - 3] != '\0')) {
754 		TRAP_ERROR;
755 		return (EINVAL);
756 	}
757 	return (ng_name_node(priv->node, sap->sg_data));
758 }
759 
760 /***************************************************************
761 	Netgraph node
762 ***************************************************************/
763 
764 /*
765  * You can only create new nodes from the socket end of things.
766  */
767 static int
768 ngs_constructor(node_p nodep)
769 {
770 	return (EINVAL);
771 }
772 
773 static void
774 ngs_rehash(node_p node)
775 {
776 	struct ngsock *priv = NG_NODE_PRIVATE(node);
777 	struct ngshash *new;
778 	struct hookpriv *hp;
779 	hook_p hook;
780 	uint32_t h;
781 	u_long hmask;
782 
783 	new = hashinit_flags((priv->hmask + 1) * 2, M_NETGRAPH_SOCK, &hmask,
784 	    HASH_NOWAIT);
785 	if (new == NULL)
786 		return;
787 
788 	LIST_FOREACH(hook, &node->nd_hooks, hk_hooks) {
789 		hp = NG_HOOK_PRIVATE(hook);
790 #ifdef INVARIANTS
791 		LIST_REMOVE(hp, next);
792 #endif
793 		h = hash32_str(NG_HOOK_NAME(hook), HASHINIT) & hmask;
794 		LIST_INSERT_HEAD(&new[h], hp, next);
795 	}
796 
797 	hashdestroy(priv->hash, M_NETGRAPH_SOCK, priv->hmask);
798 	priv->hash = new;
799 	priv->hmask = hmask;
800 }
801 
802 /*
803  * We allow any hook to be connected to the node.
804  * There is no per-hook private information though.
805  */
806 static int
807 ngs_newhook(node_p node, hook_p hook, const char *name)
808 {
809 	struct ngsock *const priv = NG_NODE_PRIVATE(node);
810 	struct hookpriv *hp;
811 	uint32_t h;
812 
813 	hp = malloc(sizeof(*hp), M_NETGRAPH_SOCK, M_NOWAIT);
814 	if (hp == NULL)
815 		return (ENOMEM);
816 	if (node->nd_numhooks * 2 > priv->hmask)
817 		ngs_rehash(node);
818 	hp->hook = hook;
819 	h = hash32_str(name, HASHINIT) & priv->hmask;
820 	LIST_INSERT_HEAD(&priv->hash[h], hp, next);
821 	NG_HOOK_SET_PRIVATE(hook, hp);
822 
823 	return (0);
824 }
825 
826 /*
827  * If only one hook, allow read(2) and write(2) to work.
828  */
829 static int
830 ngs_connect(hook_p hook)
831 {
832 	node_p node = NG_HOOK_NODE(hook);
833 	struct ngsock *priv = NG_NODE_PRIVATE(node);
834 
835 	if ((priv->datasock) && (priv->datasock->ng_socket)) {
836 		if (NG_NODE_NUMHOOKS(node) == 1)
837 			priv->datasock->ng_socket->so_state |= SS_ISCONNECTED;
838 		else
839 			priv->datasock->ng_socket->so_state &= ~SS_ISCONNECTED;
840 	}
841 	return (0);
842 }
843 
844 /* Look up hook by name */
845 static hook_p
846 ngs_findhook(node_p node, const char *name)
847 {
848 	struct ngsock *priv = NG_NODE_PRIVATE(node);
849 	struct hookpriv *hp;
850 	uint32_t h;
851 
852 	/*
853 	 * Microoptimisations for a ng_socket with no
854 	 * hooks, or with a single hook, which is a
855 	 * common case.
856 	 */
857 	if (node->nd_numhooks == 0)
858 		return (NULL);
859 	if (node->nd_numhooks == 1) {
860 		hook_p hook;
861 
862 		hook = LIST_FIRST(&node->nd_hooks);
863 
864 		if (strcmp(NG_HOOK_NAME(hook), name) == 0)
865 			return (hook);
866 		else
867 			return (NULL);
868 	}
869 
870 	h = hash32_str(name, HASHINIT) & priv->hmask;
871 
872 	LIST_FOREACH(hp, &priv->hash[h], next)
873 		if (strcmp(NG_HOOK_NAME(hp->hook), name) == 0)
874 			return (hp->hook);
875 
876 	return (NULL);
877 }
878 
879 /*
880  * Incoming messages get passed up to the control socket.
881  * Unless they are for us specifically (socket_type)
882  */
883 static int
884 ngs_rcvmsg(node_p node, item_p item, hook_p lasthook)
885 {
886 	struct ngsock *const priv = NG_NODE_PRIVATE(node);
887 	struct ngpcb *pcbp;
888 	struct socket *so;
889 	struct sockaddr_ng addr;
890 	struct ng_mesg *msg;
891 	struct mbuf *m;
892 	ng_ID_t	retaddr = NGI_RETADDR(item);
893 	int addrlen;
894 	int error = 0;
895 
896 	NGI_GET_MSG(item, msg);
897 	NG_FREE_ITEM(item);
898 
899 	/*
900 	 * Grab priv->mtx here to prevent destroying of control socket
901 	 * after checking that priv->ctlsock is not NULL.
902 	 */
903 	mtx_lock(&priv->mtx);
904 	pcbp = priv->ctlsock;
905 
906 	/*
907 	 * Only allow mesgs to be passed if we have the control socket.
908 	 * Data sockets can only support the generic messages.
909 	 */
910 	if (pcbp == NULL) {
911 		mtx_unlock(&priv->mtx);
912 		TRAP_ERROR;
913 		NG_FREE_MSG(msg);
914 		return (EINVAL);
915 	}
916 	so = pcbp->ng_socket;
917 	SOCKBUF_LOCK(&so->so_rcv);
918 
919 	/* As long as the race is handled, priv->mtx may be unlocked now. */
920 	mtx_unlock(&priv->mtx);
921 
922 #ifdef TRACE_MESSAGES
923 	printf("[%x]:---------->[socket]: c=<%d>cmd=%x(%s) f=%x #%d\n",
924 		retaddr,
925 		msg->header.typecookie,
926 		msg->header.cmd,
927 		msg->header.cmdstr,
928 		msg->header.flags,
929 		msg->header.token);
930 #endif
931 
932 	if (msg->header.typecookie == NGM_SOCKET_COOKIE) {
933 		switch (msg->header.cmd) {
934 		case NGM_SOCK_CMD_NOLINGER:
935 			priv->flags |= NGS_FLAG_NOLINGER;
936 			break;
937 		case NGM_SOCK_CMD_LINGER:
938 			priv->flags &= ~NGS_FLAG_NOLINGER;
939 			break;
940 		default:
941 			error = EINVAL;		/* unknown command */
942 		}
943 		SOCKBUF_UNLOCK(&so->so_rcv);
944 
945 		/* Free the message and return. */
946 		NG_FREE_MSG(msg);
947 		return (error);
948 	}
949 
950 	/* Get the return address into a sockaddr. */
951 	bzero(&addr, sizeof(addr));
952 	addr.sg_len = sizeof(addr);
953 	addr.sg_family = AF_NETGRAPH;
954 	addrlen = snprintf((char *)&addr.sg_data, sizeof(addr.sg_data),
955 	    "[%x]:", retaddr);
956 	if (addrlen < 0 || addrlen > sizeof(addr.sg_data)) {
957 		SOCKBUF_UNLOCK(&so->so_rcv);
958 		printf("%s: snprintf([%x]) failed - %d\n", __func__, retaddr,
959 		    addrlen);
960 		NG_FREE_MSG(msg);
961 		return (EINVAL);
962 	}
963 
964 	/* Copy the message itself into an mbuf chain. */
965 	m = m_devget((caddr_t)msg, sizeof(struct ng_mesg) + msg->header.arglen,
966 	    0, NULL, NULL);
967 
968 	/*
969 	 * Here we free the message. We need to do that
970 	 * regardless of whether we got mbufs.
971 	 */
972 	NG_FREE_MSG(msg);
973 
974 	if (m == NULL) {
975 		SOCKBUF_UNLOCK(&so->so_rcv);
976 		TRAP_ERROR;
977 		return (ENOBUFS);
978 	}
979 
980 	/* Send it up to the socket. */
981 	if (sbappendaddr_locked(&so->so_rcv, (struct sockaddr *)&addr, m,
982 	    NULL) == 0) {
983 		SOCKBUF_UNLOCK(&so->so_rcv);
984 		TRAP_ERROR;
985 		m_freem(m);
986 		return (ENOBUFS);
987 	}
988 	sorwakeup_locked(so);
989 
990 	return (error);
991 }
992 
993 /*
994  * Receive data on a hook
995  */
996 static int
997 ngs_rcvdata(hook_p hook, item_p item)
998 {
999 	struct ngsock *const priv = NG_NODE_PRIVATE(NG_HOOK_NODE(hook));
1000 	struct ngpcb *const pcbp = priv->datasock;
1001 	struct socket *so;
1002 	struct sockaddr_ng *addr;
1003 	char *addrbuf[NG_HOOKSIZ + 4];
1004 	int addrlen;
1005 	struct mbuf *m;
1006 
1007 	NGI_GET_M(item, m);
1008 	NG_FREE_ITEM(item);
1009 
1010 	/* If there is no data socket, black-hole it. */
1011 	if (pcbp == NULL) {
1012 		NG_FREE_M(m);
1013 		return (0);
1014 	}
1015 	so = pcbp->ng_socket;
1016 
1017 	/* Get the return address into a sockaddr. */
1018 	addrlen = strlen(NG_HOOK_NAME(hook));	/* <= NG_HOOKSIZ - 1 */
1019 	addr = (struct sockaddr_ng *) addrbuf;
1020 	addr->sg_len = addrlen + 3;
1021 	addr->sg_family = AF_NETGRAPH;
1022 	bcopy(NG_HOOK_NAME(hook), addr->sg_data, addrlen);
1023 	addr->sg_data[addrlen] = '\0';
1024 
1025 	/* Try to tell the socket which hook it came in on. */
1026 	if (sbappendaddr(&so->so_rcv, (struct sockaddr *)addr, m, NULL) == 0) {
1027 		m_freem(m);
1028 		TRAP_ERROR;
1029 		return (ENOBUFS);
1030 	}
1031 	sorwakeup(so);
1032 	return (0);
1033 }
1034 
1035 /*
1036  * Hook disconnection
1037  *
1038  * For this type, removal of the last link destroys the node
1039  * if the NOLINGER flag is set.
1040  */
1041 static int
1042 ngs_disconnect(hook_p hook)
1043 {
1044 	node_p node = NG_HOOK_NODE(hook);
1045 	struct ngsock *const priv = NG_NODE_PRIVATE(node);
1046 	struct hookpriv *hp = NG_HOOK_PRIVATE(hook);
1047 
1048 	LIST_REMOVE(hp, next);
1049 	free(hp, M_NETGRAPH_SOCK);
1050 
1051 	if ((priv->datasock) && (priv->datasock->ng_socket)) {
1052 		if (NG_NODE_NUMHOOKS(node) == 1)
1053 			priv->datasock->ng_socket->so_state |= SS_ISCONNECTED;
1054 		else
1055 			priv->datasock->ng_socket->so_state &= ~SS_ISCONNECTED;
1056 	}
1057 
1058 	if ((priv->flags & NGS_FLAG_NOLINGER) &&
1059 	    (NG_NODE_NUMHOOKS(node) == 0) && (NG_NODE_IS_VALID(node)))
1060 		ng_rmnode_self(node);
1061 
1062 	return (0);
1063 }
1064 
1065 /*
1066  * Do local shutdown processing.
1067  * In this case, that involves making sure the socket
1068  * knows we should be shutting down.
1069  */
1070 static int
1071 ngs_shutdown(node_p node)
1072 {
1073 	struct ngsock *const priv = NG_NODE_PRIVATE(node);
1074 	struct ngpcb *dpcbp, *pcbp;
1075 
1076 	mtx_lock(&priv->mtx);
1077 	dpcbp = priv->datasock;
1078 	pcbp = priv->ctlsock;
1079 
1080 	if (dpcbp != NULL)
1081 		soisdisconnected(dpcbp->ng_socket);
1082 
1083 	if (pcbp != NULL)
1084 		soisdisconnected(pcbp->ng_socket);
1085 
1086 	priv->node = NULL;
1087 	NG_NODE_SET_PRIVATE(node, NULL);
1088 	ng_socket_free_priv(priv);
1089 
1090 	NG_NODE_UNREF(node);
1091 	return (0);
1092 }
1093 
1094 static void
1095 ng_socket_item_applied(void *context, int error)
1096 {
1097 	struct ngsock *const priv = (struct ngsock *)context;
1098 
1099 	mtx_lock(&priv->mtx);
1100 	priv->error = error;
1101 	wakeup(priv);
1102 	mtx_unlock(&priv->mtx);
1103 
1104 }
1105 
1106 static	int
1107 dummy_disconnect(struct socket *so)
1108 {
1109 	return (0);
1110 }
1111 /*
1112  * Control and data socket type descriptors
1113  *
1114  * XXXRW: Perhaps _close should do something?
1115  */
1116 
1117 static struct pr_usrreqs ngc_usrreqs = {
1118 	.pru_abort =		NULL,
1119 	.pru_attach =		ngc_attach,
1120 	.pru_bind =		ngc_bind,
1121 	.pru_connect =		ngc_connect,
1122 	.pru_detach =		ngc_detach,
1123 	.pru_disconnect =	dummy_disconnect,
1124 	.pru_peeraddr =		NULL,
1125 	.pru_send =		ngc_send,
1126 	.pru_shutdown =		NULL,
1127 	.pru_sockaddr =		ng_getsockaddr,
1128 	.pru_close =		NULL,
1129 };
1130 
1131 static struct pr_usrreqs ngd_usrreqs = {
1132 	.pru_abort =		NULL,
1133 	.pru_attach =		ngd_attach,
1134 	.pru_bind =		NULL,
1135 	.pru_connect =		ngd_connect,
1136 	.pru_detach =		ngd_detach,
1137 	.pru_disconnect =	dummy_disconnect,
1138 	.pru_peeraddr =		NULL,
1139 	.pru_send =		ngd_send,
1140 	.pru_shutdown =		NULL,
1141 	.pru_sockaddr =		ng_getsockaddr,
1142 	.pru_close =		NULL,
1143 };
1144 
1145 /*
1146  * Definitions of protocols supported in the NETGRAPH domain.
1147  */
1148 
1149 extern struct domain ngdomain;		/* stop compiler warnings */
1150 
1151 static struct protosw ngsw[] = {
1152 {
1153 	.pr_type =		SOCK_DGRAM,
1154 	.pr_domain =		&ngdomain,
1155 	.pr_protocol =		NG_CONTROL,
1156 	.pr_flags =		PR_ATOMIC | PR_ADDR /* | PR_RIGHTS */,
1157 	.pr_usrreqs =		&ngc_usrreqs
1158 },
1159 {
1160 	.pr_type =		SOCK_DGRAM,
1161 	.pr_domain =		&ngdomain,
1162 	.pr_protocol =		NG_DATA,
1163 	.pr_flags =		PR_ATOMIC | PR_ADDR,
1164 	.pr_usrreqs =		&ngd_usrreqs
1165 }
1166 };
1167 
1168 struct domain ngdomain = {
1169 	.dom_family =		AF_NETGRAPH,
1170 	.dom_name =		"netgraph",
1171 	.dom_protosw =		ngsw,
1172 	.dom_protoswNPROTOSW =	&ngsw[sizeof(ngsw) / sizeof(ngsw[0])]
1173 };
1174 
1175 /*
1176  * Handle loading and unloading for this node type.
1177  * This is to handle auxiliary linkages (e.g protocol domain addition).
1178  */
1179 static int
1180 ngs_mod_event(module_t mod, int event, void *data)
1181 {
1182 	int error = 0;
1183 
1184 	switch (event) {
1185 	case MOD_LOAD:
1186 		mtx_init(&ngsocketlist_mtx, "ng_socketlist", NULL, MTX_DEF);
1187 		break;
1188 	case MOD_UNLOAD:
1189 		/* Ensure there are no open netgraph sockets. */
1190 		if (!LIST_EMPTY(&ngsocklist)) {
1191 			error = EBUSY;
1192 			break;
1193 		}
1194 #ifdef NOTYET
1195 		/* Unregister protocol domain XXX can't do this yet.. */
1196 #endif
1197 		error = EBUSY;
1198 		break;
1199 	default:
1200 		error = EOPNOTSUPP;
1201 		break;
1202 	}
1203 	return (error);
1204 }
1205 
1206 VNET_DOMAIN_SET(ng);
1207 
1208 SYSCTL_INT(_net_graph, OID_AUTO, family, CTLFLAG_RD, 0, AF_NETGRAPH, "");
1209 static SYSCTL_NODE(_net_graph, OID_AUTO, data, CTLFLAG_RW, 0, "DATA");
1210 SYSCTL_INT(_net_graph_data, OID_AUTO, proto, CTLFLAG_RD, 0, NG_DATA, "");
1211 static SYSCTL_NODE(_net_graph, OID_AUTO, control, CTLFLAG_RW, 0, "CONTROL");
1212 SYSCTL_INT(_net_graph_control, OID_AUTO, proto, CTLFLAG_RD, 0, NG_CONTROL, "");
1213 
1214