xref: /freebsd/sys/kern/uipc_usrreq.c (revision df7f5d4de4592a8948a25ce01e5bddfbb7ce39dc)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	From: @(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
34  *	$Id: uipc_usrreq.c,v 1.19 1997/02/22 09:39:29 peter Exp $
35  */
36 
37 #include <sys/param.h>
38 #include <sys/queue.h>
39 #include <sys/systm.h>
40 #include <sys/kernel.h>
41 #include <sys/domain.h>
42 #include <sys/file.h>
43 #include <sys/filedesc.h>
44 #include <sys/mbuf.h>
45 #include <sys/namei.h>
46 #include <sys/proc.h>
47 #include <sys/protosw.h>
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 #include <sys/stat.h>
51 #include <sys/sysctl.h>
52 #include <sys/un.h>
53 #include <sys/unpcb.h>
54 #include <sys/vnode.h>
55 
56 /*
57  * Unix communications domain.
58  *
59  * TODO:
60  *	SEQPACKET, RDM
61  *	rethink name space problems
62  *	need a proper out-of-band
63  */
64 static struct	sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL };
65 static ino_t	unp_ino;		/* prototype for fake inode numbers */
66 
67 static int     unp_attach __P((struct socket *));
68 static void    unp_detach __P((struct unpcb *));
69 static int     unp_bind __P((struct unpcb *,struct mbuf *, struct proc *));
70 static int     unp_connect __P((struct socket *,struct mbuf *, struct proc *));
71 static void    unp_disconnect __P((struct unpcb *));
72 static void    unp_shutdown __P((struct unpcb *));
73 static void    unp_drop __P((struct unpcb *, int));
74 static void    unp_gc __P((void));
75 static void    unp_scan __P((struct mbuf *, void (*)(struct file *)));
76 static void    unp_mark __P((struct file *));
77 static void    unp_discard __P((struct file *));
78 static int     unp_internalize __P((struct mbuf *, struct proc *));
79 
80 
81 /*ARGSUSED*/
82 int
83 uipc_usrreq(so, req, m, nam, control)
84 	struct socket *so;
85 	int req;
86 	struct mbuf *m, *nam, *control;
87 {
88 	struct unpcb *unp = sotounpcb(so);
89 	register struct socket *so2;
90 	register int error = 0;
91 	struct proc *p = curproc;	/* XXX */
92 
93 	if (req == PRU_CONTROL)
94 		return (EOPNOTSUPP);
95 	if (req != PRU_SEND && control && control->m_len) {
96 		error = EOPNOTSUPP;
97 		goto release;
98 	}
99 	if (unp == 0 && req != PRU_ATTACH) {
100 		error = EINVAL;
101 		goto release;
102 	}
103 	switch (req) {
104 
105 	case PRU_ATTACH:
106 		if (unp) {
107 			error = EISCONN;
108 			break;
109 		}
110 		error = unp_attach(so);
111 		break;
112 
113 	case PRU_DETACH:
114 		unp_detach(unp);
115 		break;
116 
117 	case PRU_BIND:
118 		error = unp_bind(unp, nam, p);
119 		break;
120 
121 	case PRU_LISTEN:
122 		if (unp->unp_vnode == 0)
123 			error = EINVAL;
124 		break;
125 
126 	case PRU_CONNECT:
127 		error = unp_connect(so, nam, p);
128 		break;
129 
130 	case PRU_CONNECT2:
131 		error = unp_connect2(so, (struct socket *)nam);
132 		break;
133 
134 	case PRU_DISCONNECT:
135 		unp_disconnect(unp);
136 		break;
137 
138 	case PRU_ACCEPT:
139 		/*
140 		 * Pass back name of connected socket,
141 		 * if it was bound and we are still connected
142 		 * (our peer may have closed already!).
143 		 */
144 		if (unp->unp_conn && unp->unp_conn->unp_addr) {
145 			nam->m_len = unp->unp_conn->unp_addr->m_len;
146 			bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
147 			    mtod(nam, caddr_t), (unsigned)nam->m_len);
148 		} else {
149 			nam->m_len = sizeof(sun_noname);
150 			*(mtod(nam, struct sockaddr *)) = sun_noname;
151 		}
152 		break;
153 
154 	case PRU_SHUTDOWN:
155 		socantsendmore(so);
156 		unp_shutdown(unp);
157 		break;
158 
159 	case PRU_RCVD:
160 		switch (so->so_type) {
161 
162 		case SOCK_DGRAM:
163 			panic("uipc 1");
164 			/*NOTREACHED*/
165 
166 		case SOCK_STREAM:
167 #define	rcv (&so->so_rcv)
168 #define snd (&so2->so_snd)
169 			if (unp->unp_conn == 0)
170 				break;
171 			so2 = unp->unp_conn->unp_socket;
172 			/*
173 			 * Adjust backpressure on sender
174 			 * and wakeup any waiting to write.
175 			 */
176 			snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
177 			unp->unp_mbcnt = rcv->sb_mbcnt;
178 			snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
179 			unp->unp_cc = rcv->sb_cc;
180 			sowwakeup(so2);
181 #undef snd
182 #undef rcv
183 			break;
184 
185 		default:
186 			panic("uipc 2");
187 		}
188 		break;
189 
190 	case PRU_SEND:
191 	case PRU_SEND_EOF:
192 		if (control && (error = unp_internalize(control, p)))
193 			break;
194 		switch (so->so_type) {
195 
196 		case SOCK_DGRAM: {
197 			struct sockaddr *from;
198 
199 			if (nam) {
200 				if (unp->unp_conn) {
201 					error = EISCONN;
202 					break;
203 				}
204 				error = unp_connect(so, nam, p);
205 				if (error)
206 					break;
207 			} else {
208 				if (unp->unp_conn == 0) {
209 					error = ENOTCONN;
210 					break;
211 				}
212 			}
213 			so2 = unp->unp_conn->unp_socket;
214 			if (unp->unp_addr)
215 				from = mtod(unp->unp_addr, struct sockaddr *);
216 			else
217 				from = &sun_noname;
218 			if (sbappendaddr(&so2->so_rcv, from, m, control)) {
219 				sorwakeup(so2);
220 				m = 0;
221 				control = 0;
222 			} else
223 				error = ENOBUFS;
224 			if (nam)
225 				unp_disconnect(unp);
226 			break;
227 		}
228 
229 		case SOCK_STREAM:
230 #define	rcv (&so2->so_rcv)
231 #define	snd (&so->so_snd)
232 			/* Connect if not connected yet. */
233 			/*
234 			 * Note: A better implementation would complain
235 			 * if not equal to the peer's address.
236 			 */
237 			if ((so->so_state & SS_ISCONNECTED) == 0) {
238 				if (nam) {
239 		    			error = unp_connect(so, nam, p);
240 					if (error)
241 						break;	/* XXX */
242 				} else {
243 					error = ENOTCONN;
244 					break;
245 				}
246 			}
247 
248 			if (so->so_state & SS_CANTSENDMORE) {
249 				error = EPIPE;
250 				break;
251 			}
252 			if (unp->unp_conn == 0)
253 				panic("uipc 3");
254 			so2 = unp->unp_conn->unp_socket;
255 			/*
256 			 * Send to paired receive port, and then reduce
257 			 * send buffer hiwater marks to maintain backpressure.
258 			 * Wake up readers.
259 			 */
260 			if (control) {
261 				if (sbappendcontrol(rcv, m, control))
262 					control = 0;
263 			} else
264 				sbappend(rcv, m);
265 			snd->sb_mbmax -=
266 			    rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
267 			unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
268 			snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
269 			unp->unp_conn->unp_cc = rcv->sb_cc;
270 			sorwakeup(so2);
271 			m = 0;
272 #undef snd
273 #undef rcv
274 			break;
275 
276 		default:
277 			panic("uipc 4");
278 		}
279 		/*
280 		 * SEND_EOF is equivalent to a SEND followed by
281 		 * a SHUTDOWN.
282 		 */
283 		if (req == PRU_SEND_EOF) {
284 			socantsendmore(so);
285 			unp_shutdown(unp);
286 		}
287 		break;
288 
289 	case PRU_ABORT:
290 		unp_drop(unp, ECONNABORTED);
291 		break;
292 
293 	case PRU_SENSE:
294 		((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
295 		if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
296 			so2 = unp->unp_conn->unp_socket;
297 			((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
298 		}
299 		((struct stat *) m)->st_dev = NODEV;
300 		if (unp->unp_ino == 0)
301 			unp->unp_ino = unp_ino++;
302 		((struct stat *) m)->st_ino = unp->unp_ino;
303 		return (0);
304 
305 	case PRU_RCVOOB:
306 		return (EOPNOTSUPP);
307 
308 	case PRU_SENDOOB:
309 		error = EOPNOTSUPP;
310 		break;
311 
312 	case PRU_SOCKADDR:
313 		if (unp->unp_addr) {
314 			nam->m_len = unp->unp_addr->m_len;
315 			bcopy(mtod(unp->unp_addr, caddr_t),
316 			    mtod(nam, caddr_t), (unsigned)nam->m_len);
317 		} else
318 			nam->m_len = 0;
319 		break;
320 
321 	case PRU_PEERADDR:
322 		if (unp->unp_conn && unp->unp_conn->unp_addr) {
323 			nam->m_len = unp->unp_conn->unp_addr->m_len;
324 			bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
325 			    mtod(nam, caddr_t), (unsigned)nam->m_len);
326 		} else
327 			nam->m_len = 0;
328 		break;
329 
330 	case PRU_SLOWTIMO:
331 		break;
332 
333 	default:
334 		panic("piusrreq");
335 	}
336 release:
337 	if (control)
338 		m_freem(control);
339 	if (m)
340 		m_freem(m);
341 	return (error);
342 }
343 
344 /*
345  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
346  * for stream sockets, although the total for sender and receiver is
347  * actually only PIPSIZ.
348  * Datagram sockets really use the sendspace as the maximum datagram size,
349  * and don't really want to reserve the sendspace.  Their recvspace should
350  * be large enough for at least one max-size datagram plus address.
351  */
352 #ifndef PIPSIZ
353 #define	PIPSIZ	8192
354 #endif
355 static u_long	unpst_sendspace = PIPSIZ;
356 static u_long	unpst_recvspace = PIPSIZ;
357 static u_long	unpdg_sendspace = 2*1024;	/* really max datagram size */
358 static u_long	unpdg_recvspace = 4*1024;
359 
360 static int	unp_rights;			/* file descriptors in flight */
361 
362 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW,
363 	   &unpst_sendspace, 0, "");
364 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
365 	   &unpst_recvspace, 0, "");
366 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
367 	   &unpdg_sendspace, 0, "");
368 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
369 	   &unpdg_recvspace, 0, "");
370 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, "");
371 
372 static int
373 unp_attach(so)
374 	struct socket *so;
375 {
376 	register struct mbuf *m;
377 	register struct unpcb *unp;
378 	int error;
379 
380 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
381 		switch (so->so_type) {
382 
383 		case SOCK_STREAM:
384 			error = soreserve(so, unpst_sendspace, unpst_recvspace);
385 			break;
386 
387 		case SOCK_DGRAM:
388 			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
389 			break;
390 
391 		default:
392 			panic("unp_attach");
393 		}
394 		if (error)
395 			return (error);
396 	}
397 	m = m_getclr(M_DONTWAIT, MT_PCB);
398 	if (m == NULL)
399 		return (ENOBUFS);
400 	unp = mtod(m, struct unpcb *);
401 	so->so_pcb = (caddr_t)unp;
402 	unp->unp_socket = so;
403 	return (0);
404 }
405 
406 static void
407 unp_detach(unp)
408 	register struct unpcb *unp;
409 {
410 
411 	if (unp->unp_vnode) {
412 		unp->unp_vnode->v_socket = 0;
413 		vrele(unp->unp_vnode);
414 		unp->unp_vnode = 0;
415 	}
416 	if (unp->unp_conn)
417 		unp_disconnect(unp);
418 	while (unp->unp_refs)
419 		unp_drop(unp->unp_refs, ECONNRESET);
420 	soisdisconnected(unp->unp_socket);
421 	unp->unp_socket->so_pcb = 0;
422 	if (unp_rights) {
423 		/*
424 		 * Normally the receive buffer is flushed later,
425 		 * in sofree, but if our receive buffer holds references
426 		 * to descriptors that are now garbage, we will dispose
427 		 * of those descriptor references after the garbage collector
428 		 * gets them (resulting in a "panic: closef: count < 0").
429 		 */
430 		sorflush(unp->unp_socket);
431 		unp_gc();
432 	}
433 	m_freem(unp->unp_addr);
434 	(void) m_free(dtom(unp));
435 }
436 
437 static int
438 unp_bind(unp, nam, p)
439 	struct unpcb *unp;
440 	struct mbuf *nam;
441 	struct proc *p;
442 {
443 	struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
444 	register struct vnode *vp;
445 	struct vattr vattr;
446 	int error;
447 	struct nameidata nd;
448 
449 	NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
450 	    soun->sun_path, p);
451 	if (unp->unp_vnode != NULL)
452 		return (EINVAL);
453 	if (nam->m_len == MLEN) {
454 		if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
455 			return (EINVAL);
456 	} else
457 		*(mtod(nam, caddr_t) + nam->m_len) = 0;
458 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
459 	error = namei(&nd);
460 	if (error)
461 		return (error);
462 	vp = nd.ni_vp;
463 	if (vp != NULL) {
464 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
465 		if (nd.ni_dvp == vp)
466 			vrele(nd.ni_dvp);
467 		else
468 			vput(nd.ni_dvp);
469 		vrele(vp);
470 		return (EADDRINUSE);
471 	}
472 	VATTR_NULL(&vattr);
473 	vattr.va_type = VSOCK;
474 	vattr.va_mode = ACCESSPERMS;
475 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
476 	if (error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr))
477 		return (error);
478 	vp = nd.ni_vp;
479 	vp->v_socket = unp->unp_socket;
480 	unp->unp_vnode = vp;
481 	unp->unp_addr = m_copy(nam, 0, (int)M_COPYALL);
482 	VOP_UNLOCK(vp, 0, p);
483 	return (0);
484 }
485 
486 static int
487 unp_connect(so, nam, p)
488 	struct socket *so;
489 	struct mbuf *nam;
490 	struct proc *p;
491 {
492 	register struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
493 	register struct vnode *vp;
494 	register struct socket *so2, *so3;
495 	struct unpcb *unp2, *unp3;
496 	int error;
497 	struct nameidata nd;
498 
499 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p);
500 	if (nam->m_data + nam->m_len == &nam->m_dat[MLEN]) {	/* XXX */
501 		if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
502 			return (EMSGSIZE);
503 	} else
504 		*(mtod(nam, caddr_t) + nam->m_len) = 0;
505 	error = namei(&nd);
506 	if (error)
507 		return (error);
508 	vp = nd.ni_vp;
509 	if (vp->v_type != VSOCK) {
510 		error = ENOTSOCK;
511 		goto bad;
512 	}
513 	error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p);
514 	if (error)
515 		goto bad;
516 	so2 = vp->v_socket;
517 	if (so2 == 0) {
518 		error = ECONNREFUSED;
519 		goto bad;
520 	}
521 	if (so->so_type != so2->so_type) {
522 		error = EPROTOTYPE;
523 		goto bad;
524 	}
525 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
526 		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
527 		    (so3 = sonewconn(so2, 0)) == 0) {
528 			error = ECONNREFUSED;
529 			goto bad;
530 		}
531 		unp2 = sotounpcb(so2);
532 		unp3 = sotounpcb(so3);
533 		if (unp2->unp_addr)
534 			unp3->unp_addr =
535 				  m_copy(unp2->unp_addr, 0, (int)M_COPYALL);
536 		so2 = so3;
537 	}
538 	error = unp_connect2(so, so2);
539 bad:
540 	vput(vp);
541 	return (error);
542 }
543 
544 int
545 unp_connect2(so, so2)
546 	register struct socket *so;
547 	register struct socket *so2;
548 {
549 	register struct unpcb *unp = sotounpcb(so);
550 	register struct unpcb *unp2;
551 
552 	if (so2->so_type != so->so_type)
553 		return (EPROTOTYPE);
554 	unp2 = sotounpcb(so2);
555 	unp->unp_conn = unp2;
556 	switch (so->so_type) {
557 
558 	case SOCK_DGRAM:
559 		unp->unp_nextref = unp2->unp_refs;
560 		unp2->unp_refs = unp;
561 		soisconnected(so);
562 		break;
563 
564 	case SOCK_STREAM:
565 		unp2->unp_conn = unp;
566 		soisconnected(so);
567 		soisconnected(so2);
568 		break;
569 
570 	default:
571 		panic("unp_connect2");
572 	}
573 	return (0);
574 }
575 
576 static void
577 unp_disconnect(unp)
578 	struct unpcb *unp;
579 {
580 	register struct unpcb *unp2 = unp->unp_conn;
581 
582 	if (unp2 == 0)
583 		return;
584 	unp->unp_conn = 0;
585 	switch (unp->unp_socket->so_type) {
586 
587 	case SOCK_DGRAM:
588 		if (unp2->unp_refs == unp)
589 			unp2->unp_refs = unp->unp_nextref;
590 		else {
591 			unp2 = unp2->unp_refs;
592 			for (;;) {
593 				if (unp2 == 0)
594 					panic("unp_disconnect");
595 				if (unp2->unp_nextref == unp)
596 					break;
597 				unp2 = unp2->unp_nextref;
598 			}
599 			unp2->unp_nextref = unp->unp_nextref;
600 		}
601 		unp->unp_nextref = 0;
602 		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
603 		break;
604 
605 	case SOCK_STREAM:
606 		soisdisconnected(unp->unp_socket);
607 		unp2->unp_conn = 0;
608 		soisdisconnected(unp2->unp_socket);
609 		break;
610 	}
611 }
612 
613 #ifdef notdef
614 void
615 unp_abort(unp)
616 	struct unpcb *unp;
617 {
618 
619 	unp_detach(unp);
620 }
621 #endif
622 
623 static void
624 unp_shutdown(unp)
625 	struct unpcb *unp;
626 {
627 	struct socket *so;
628 
629 	if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
630 	    (so = unp->unp_conn->unp_socket))
631 		socantrcvmore(so);
632 }
633 
634 static void
635 unp_drop(unp, errno)
636 	struct unpcb *unp;
637 	int errno;
638 {
639 	struct socket *so = unp->unp_socket;
640 
641 	so->so_error = errno;
642 	unp_disconnect(unp);
643 	if (so->so_head) {
644 		so->so_pcb = (caddr_t) 0;
645 		m_freem(unp->unp_addr);
646 		(void) m_free(dtom(unp));
647 		sofree(so);
648 	}
649 }
650 
651 #ifdef notdef
652 void
653 unp_drain()
654 {
655 
656 }
657 #endif
658 
659 int
660 unp_externalize(rights)
661 	struct mbuf *rights;
662 {
663 	struct proc *p = curproc;		/* XXX */
664 	register int i;
665 	register struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
666 	register struct file **rp = (struct file **)(cm + 1);
667 	register struct file *fp;
668 	int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof (int);
669 	int f;
670 
671 	/*
672 	 * if the new FD's will not fit, then we free them all
673 	 */
674 	if (!fdavail(p, newfds)) {
675 		for (i = 0; i < newfds; i++) {
676 			fp = *rp;
677 			unp_discard(fp);
678 			*rp++ = 0;
679 		}
680 		return (EMSGSIZE);
681 	}
682 	/*
683 	 * now change each pointer to an fd in the global table to
684 	 * an integer that is the index to the local fd table entry
685 	 * that we set up to point to the global one we are transferring.
686 	 * XXX this assumes a pointer and int are the same size...!
687 	 */
688 	for (i = 0; i < newfds; i++) {
689 		if (fdalloc(p, 0, &f))
690 			panic("unp_externalize");
691 		fp = *rp;
692 		p->p_fd->fd_ofiles[f] = fp;
693 		fp->f_msgcount--;
694 		unp_rights--;
695 		*(int *)rp++ = f;
696 	}
697 	return (0);
698 }
699 
700 static int
701 unp_internalize(control, p)
702 	struct mbuf *control;
703 	struct proc *p;
704 {
705 	struct filedesc *fdp = p->p_fd;
706 	register struct cmsghdr *cm = mtod(control, struct cmsghdr *);
707 	register struct file **rp;
708 	register struct file *fp;
709 	register int i, fd;
710 	int oldfds;
711 
712 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
713 	    cm->cmsg_len != control->m_len)
714 		return (EINVAL);
715 	oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
716 	/*
717 	 * check that all the FDs passed in refer to legal OPEN files
718 	 * If not, reject the entire operation.
719 	 */
720 	rp = (struct file **)(cm + 1);
721 	for (i = 0; i < oldfds; i++) {
722 		fd = *(int *)rp++;
723 		if ((unsigned)fd >= fdp->fd_nfiles ||
724 		    fdp->fd_ofiles[fd] == NULL)
725 			return (EBADF);
726 	}
727 	/*
728 	 * Now replace the integer FDs with pointers to
729 	 * the associated global file table entry..
730 	 * XXX this assumes a pointer and an int are the same size!
731 	 */
732 	rp = (struct file **)(cm + 1);
733 	for (i = 0; i < oldfds; i++) {
734 		fp = fdp->fd_ofiles[*(int *)rp];
735 		*rp++ = fp;
736 		fp->f_count++;
737 		fp->f_msgcount++;
738 		unp_rights++;
739 	}
740 	return (0);
741 }
742 
743 static int	unp_defer, unp_gcing;
744 
745 static void
746 unp_gc()
747 {
748 	register struct file *fp, *nextfp;
749 	register struct socket *so;
750 	struct file **extra_ref, **fpp;
751 	int nunref, i;
752 
753 	if (unp_gcing)
754 		return;
755 	unp_gcing = 1;
756 	unp_defer = 0;
757 	/*
758 	 * before going through all this, set all FDs to
759 	 * be NOT defered and NOT externally accessible
760 	 */
761 	for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next)
762 		fp->f_flag &= ~(FMARK|FDEFER);
763 	do {
764 		for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) {
765 			/*
766 			 * If the file is not open, skip it
767 			 */
768 			if (fp->f_count == 0)
769 				continue;
770 			/*
771 			 * If we already marked it as 'defer'  in a
772 			 * previous pass, then try process it this time
773 			 * and un-mark it
774 			 */
775 			if (fp->f_flag & FDEFER) {
776 				fp->f_flag &= ~FDEFER;
777 				unp_defer--;
778 			} else {
779 				/*
780 				 * if it's not defered, then check if it's
781 				 * already marked.. if so skip it
782 				 */
783 				if (fp->f_flag & FMARK)
784 					continue;
785 				/*
786 				 * If all references are from messages
787 				 * in transit, then skip it. it's not
788 				 * externally accessible.
789 				 */
790 				if (fp->f_count == fp->f_msgcount)
791 					continue;
792 				/*
793 				 * If it got this far then it must be
794 				 * externally accessible.
795 				 */
796 				fp->f_flag |= FMARK;
797 			}
798 			/*
799 			 * either it was defered, or it is externally
800 			 * accessible and not already marked so.
801 			 * Now check if it is possibly one of OUR sockets.
802 			 */
803 			if (fp->f_type != DTYPE_SOCKET ||
804 			    (so = (struct socket *)fp->f_data) == 0)
805 				continue;
806 			if (so->so_proto->pr_domain != &localdomain ||
807 			    (so->so_proto->pr_flags&PR_RIGHTS) == 0)
808 				continue;
809 #ifdef notdef
810 			if (so->so_rcv.sb_flags & SB_LOCK) {
811 				/*
812 				 * This is problematical; it's not clear
813 				 * we need to wait for the sockbuf to be
814 				 * unlocked (on a uniprocessor, at least),
815 				 * and it's also not clear what to do
816 				 * if sbwait returns an error due to receipt
817 				 * of a signal.  If sbwait does return
818 				 * an error, we'll go into an infinite
819 				 * loop.  Delete all of this for now.
820 				 */
821 				(void) sbwait(&so->so_rcv);
822 				goto restart;
823 			}
824 #endif
825 			/*
826 			 * So, Ok, it's one of our sockets and it IS externally
827 			 * accessible (or was defered). Now we look
828 			 * to see if we hold any file descriptors in it's
829 			 * message buffers. Follow those links and mark them
830 			 * as accessible too.
831 			 */
832 			unp_scan(so->so_rcv.sb_mb, unp_mark);
833 		}
834 	} while (unp_defer);
835 	/*
836 	 * We grab an extra reference to each of the file table entries
837 	 * that are not otherwise accessible and then free the rights
838 	 * that are stored in messages on them.
839 	 *
840 	 * The bug in the orginal code is a little tricky, so I'll describe
841 	 * what's wrong with it here.
842 	 *
843 	 * It is incorrect to simply unp_discard each entry for f_msgcount
844 	 * times -- consider the case of sockets A and B that contain
845 	 * references to each other.  On a last close of some other socket,
846 	 * we trigger a gc since the number of outstanding rights (unp_rights)
847 	 * is non-zero.  If during the sweep phase the gc code un_discards,
848 	 * we end up doing a (full) closef on the descriptor.  A closef on A
849 	 * results in the following chain.  Closef calls soo_close, which
850 	 * calls soclose.   Soclose calls first (through the switch
851 	 * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
852 	 * returns because the previous instance had set unp_gcing, and
853 	 * we return all the way back to soclose, which marks the socket
854 	 * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
855 	 * to free up the rights that are queued in messages on the socket A,
856 	 * i.e., the reference on B.  The sorflush calls via the dom_dispose
857 	 * switch unp_dispose, which unp_scans with unp_discard.  This second
858 	 * instance of unp_discard just calls closef on B.
859 	 *
860 	 * Well, a similar chain occurs on B, resulting in a sorflush on B,
861 	 * which results in another closef on A.  Unfortunately, A is already
862 	 * being closed, and the descriptor has already been marked with
863 	 * SS_NOFDREF, and soclose panics at this point.
864 	 *
865 	 * Here, we first take an extra reference to each inaccessible
866 	 * descriptor.  Then, we call sorflush ourself, since we know
867 	 * it is a Unix domain socket anyhow.  After we destroy all the
868 	 * rights carried in messages, we do a last closef to get rid
869 	 * of our extra reference.  This is the last close, and the
870 	 * unp_detach etc will shut down the socket.
871 	 *
872 	 * 91/09/19, bsy@cs.cmu.edu
873 	 */
874 	extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK);
875 	for (nunref = 0, fp = filehead.lh_first, fpp = extra_ref; fp != 0;
876 	    fp = nextfp) {
877 		nextfp = fp->f_list.le_next;
878 		/*
879 		 * If it's not open, skip it
880 		 */
881 		if (fp->f_count == 0)
882 			continue;
883 		/*
884 		 * If all refs are from msgs, and it's not marked accessible
885 		 * then it must be referenced from some unreachable cycle
886 		 * of (shut-down) FDs, so include it in our
887 		 * list of FDs to remove
888 		 */
889 		if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
890 			*fpp++ = fp;
891 			nunref++;
892 			fp->f_count++;
893 		}
894 	}
895 	/*
896 	 * for each FD on our hit list, do the following two things
897 	 */
898 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
899 		sorflush((struct socket *)(*fpp)->f_data);
900 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
901 		closef(*fpp, (struct proc *) NULL);
902 	free((caddr_t)extra_ref, M_FILE);
903 	unp_gcing = 0;
904 }
905 
906 void
907 unp_dispose(m)
908 	struct mbuf *m;
909 {
910 
911 	if (m)
912 		unp_scan(m, unp_discard);
913 }
914 
915 static void
916 unp_scan(m0, op)
917 	register struct mbuf *m0;
918 	void (*op) __P((struct file *));
919 {
920 	register struct mbuf *m;
921 	register struct file **rp;
922 	register struct cmsghdr *cm;
923 	register int i;
924 	int qfds;
925 
926 	while (m0) {
927 		for (m = m0; m; m = m->m_next)
928 			if (m->m_type == MT_CONTROL &&
929 			    m->m_len >= sizeof(*cm)) {
930 				cm = mtod(m, struct cmsghdr *);
931 				if (cm->cmsg_level != SOL_SOCKET ||
932 				    cm->cmsg_type != SCM_RIGHTS)
933 					continue;
934 				qfds = (cm->cmsg_len - sizeof *cm)
935 						/ sizeof (struct file *);
936 				rp = (struct file **)(cm + 1);
937 				for (i = 0; i < qfds; i++)
938 					(*op)(*rp++);
939 				break;		/* XXX, but saves time */
940 			}
941 		m0 = m0->m_act;
942 	}
943 }
944 
945 static void
946 unp_mark(fp)
947 	struct file *fp;
948 {
949 
950 	if (fp->f_flag & FMARK)
951 		return;
952 	unp_defer++;
953 	fp->f_flag |= (FMARK|FDEFER);
954 }
955 
956 static void
957 unp_discard(fp)
958 	struct file *fp;
959 {
960 
961 	fp->f_msgcount--;
962 	unp_rights--;
963 	(void) closef(fp, (struct proc *)NULL);
964 }
965