xref: /freebsd/sys/kern/uipc_usrreq.c (revision afe61c15161c324a7af299a9b8457aba5afc92db)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
34  */
35 
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/proc.h>
39 #include <sys/filedesc.h>
40 #include <sys/domain.h>
41 #include <sys/protosw.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/unpcb.h>
45 #include <sys/un.h>
46 #include <sys/namei.h>
47 #include <sys/vnode.h>
48 #include <sys/file.h>
49 #include <sys/stat.h>
50 #include <sys/mbuf.h>
51 
52 void	unp_detach	__P((struct unpcb *));
53 void	unp_disconnect	__P((struct unpcb *));
54 void	unp_shutdown	__P((struct unpcb *));
55 void	unp_drop	__P((struct unpcb *, int));
56 void	unp_gc		__P((void));
57 void	unp_scan	__P((struct mbuf *, void (*)(struct file *)));
58 void	unp_mark	__P((struct file *));
59 void	unp_discard	__P((struct file *));
60 
61 /*
62  * Unix communications domain.
63  *
64  * TODO:
65  *	SEQPACKET, RDM
66  *	rethink name space problems
67  *	need a proper out-of-band
68  */
69 struct	sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX };
70 ino_t	unp_ino;			/* prototype for fake inode numbers */
71 
72 /*ARGSUSED*/
73 int
74 uipc_usrreq(so, req, m, nam, control)
75 	struct socket *so;
76 	int req;
77 	struct mbuf *m, *nam, *control;
78 {
79 	struct unpcb *unp = sotounpcb(so);
80 	register struct socket *so2;
81 	register int error = 0;
82 	struct proc *p = curproc;	/* XXX */
83 
84 	if (req == PRU_CONTROL)
85 		return (EOPNOTSUPP);
86 	if (req != PRU_SEND && control && control->m_len) {
87 		error = EOPNOTSUPP;
88 		goto release;
89 	}
90 	if (unp == 0 && req != PRU_ATTACH) {
91 		error = EINVAL;
92 		goto release;
93 	}
94 	switch (req) {
95 
96 	case PRU_ATTACH:
97 		if (unp) {
98 			error = EISCONN;
99 			break;
100 		}
101 		error = unp_attach(so);
102 		break;
103 
104 	case PRU_DETACH:
105 		unp_detach(unp);
106 		break;
107 
108 	case PRU_BIND:
109 		error = unp_bind(unp, nam, p);
110 		break;
111 
112 	case PRU_LISTEN:
113 		if (unp->unp_vnode == 0)
114 			error = EINVAL;
115 		break;
116 
117 	case PRU_CONNECT:
118 		error = unp_connect(so, nam, p);
119 		break;
120 
121 	case PRU_CONNECT2:
122 		error = unp_connect2(so, (struct socket *)nam);
123 		break;
124 
125 	case PRU_DISCONNECT:
126 		unp_disconnect(unp);
127 		break;
128 
129 	case PRU_ACCEPT:
130 		/*
131 		 * Pass back name of connected socket,
132 		 * if it was bound and we are still connected
133 		 * (our peer may have closed already!).
134 		 */
135 		if (unp->unp_conn && unp->unp_conn->unp_addr) {
136 			nam->m_len = unp->unp_conn->unp_addr->m_len;
137 			bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
138 			    mtod(nam, caddr_t), (unsigned)nam->m_len);
139 		} else {
140 			nam->m_len = sizeof(sun_noname);
141 			*(mtod(nam, struct sockaddr *)) = sun_noname;
142 		}
143 		break;
144 
145 	case PRU_SHUTDOWN:
146 		socantsendmore(so);
147 		unp_shutdown(unp);
148 		break;
149 
150 	case PRU_RCVD:
151 		switch (so->so_type) {
152 
153 		case SOCK_DGRAM:
154 			panic("uipc 1");
155 			/*NOTREACHED*/
156 
157 		case SOCK_STREAM:
158 #define	rcv (&so->so_rcv)
159 #define snd (&so2->so_snd)
160 			if (unp->unp_conn == 0)
161 				break;
162 			so2 = unp->unp_conn->unp_socket;
163 			/*
164 			 * Adjust backpressure on sender
165 			 * and wakeup any waiting to write.
166 			 */
167 			snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
168 			unp->unp_mbcnt = rcv->sb_mbcnt;
169 			snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
170 			unp->unp_cc = rcv->sb_cc;
171 			sowwakeup(so2);
172 #undef snd
173 #undef rcv
174 			break;
175 
176 		default:
177 			panic("uipc 2");
178 		}
179 		break;
180 
181 	case PRU_SEND:
182 		if (control && (error = unp_internalize(control, p)))
183 			break;
184 		switch (so->so_type) {
185 
186 		case SOCK_DGRAM: {
187 			struct sockaddr *from;
188 
189 			if (nam) {
190 				if (unp->unp_conn) {
191 					error = EISCONN;
192 					break;
193 				}
194 				error = unp_connect(so, nam, p);
195 				if (error)
196 					break;
197 			} else {
198 				if (unp->unp_conn == 0) {
199 					error = ENOTCONN;
200 					break;
201 				}
202 			}
203 			so2 = unp->unp_conn->unp_socket;
204 			if (unp->unp_addr)
205 				from = mtod(unp->unp_addr, struct sockaddr *);
206 			else
207 				from = &sun_noname;
208 			if (sbappendaddr(&so2->so_rcv, from, m, control)) {
209 				sorwakeup(so2);
210 				m = 0;
211 				control = 0;
212 			} else
213 				error = ENOBUFS;
214 			if (nam)
215 				unp_disconnect(unp);
216 			break;
217 		}
218 
219 		case SOCK_STREAM:
220 #define	rcv (&so2->so_rcv)
221 #define	snd (&so->so_snd)
222 			if (so->so_state & SS_CANTSENDMORE) {
223 				error = EPIPE;
224 				break;
225 			}
226 			if (unp->unp_conn == 0)
227 				panic("uipc 3");
228 			so2 = unp->unp_conn->unp_socket;
229 			/*
230 			 * Send to paired receive port, and then reduce
231 			 * send buffer hiwater marks to maintain backpressure.
232 			 * Wake up readers.
233 			 */
234 			if (control) {
235 				if (sbappendcontrol(rcv, m, control))
236 					control = 0;
237 			} else
238 				sbappend(rcv, m);
239 			snd->sb_mbmax -=
240 			    rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
241 			unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
242 			snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
243 			unp->unp_conn->unp_cc = rcv->sb_cc;
244 			sorwakeup(so2);
245 			m = 0;
246 #undef snd
247 #undef rcv
248 			break;
249 
250 		default:
251 			panic("uipc 4");
252 		}
253 		break;
254 
255 	case PRU_ABORT:
256 		unp_drop(unp, ECONNABORTED);
257 		break;
258 
259 	case PRU_SENSE:
260 		((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
261 		if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
262 			so2 = unp->unp_conn->unp_socket;
263 			((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
264 		}
265 		((struct stat *) m)->st_dev = NODEV;
266 		if (unp->unp_ino == 0)
267 			unp->unp_ino = unp_ino++;
268 		((struct stat *) m)->st_ino = unp->unp_ino;
269 		return (0);
270 
271 	case PRU_RCVOOB:
272 		return (EOPNOTSUPP);
273 
274 	case PRU_SENDOOB:
275 		error = EOPNOTSUPP;
276 		break;
277 
278 	case PRU_SOCKADDR:
279 		if (unp->unp_addr) {
280 			nam->m_len = unp->unp_addr->m_len;
281 			bcopy(mtod(unp->unp_addr, caddr_t),
282 			    mtod(nam, caddr_t), (unsigned)nam->m_len);
283 		} else
284 			nam->m_len = 0;
285 		break;
286 
287 	case PRU_PEERADDR:
288 		if (unp->unp_conn && unp->unp_conn->unp_addr) {
289 			nam->m_len = unp->unp_conn->unp_addr->m_len;
290 			bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
291 			    mtod(nam, caddr_t), (unsigned)nam->m_len);
292 		} else
293 			nam->m_len = 0;
294 		break;
295 
296 	case PRU_SLOWTIMO:
297 		break;
298 
299 	default:
300 		panic("piusrreq");
301 	}
302 release:
303 	if (control)
304 		m_freem(control);
305 	if (m)
306 		m_freem(m);
307 	return (error);
308 }
309 
310 /*
311  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
312  * for stream sockets, although the total for sender and receiver is
313  * actually only PIPSIZ.
314  * Datagram sockets really use the sendspace as the maximum datagram size,
315  * and don't really want to reserve the sendspace.  Their recvspace should
316  * be large enough for at least one max-size datagram plus address.
317  */
318 #define	PIPSIZ	4096
319 u_long	unpst_sendspace = PIPSIZ;
320 u_long	unpst_recvspace = PIPSIZ;
321 u_long	unpdg_sendspace = 2*1024;	/* really max datagram size */
322 u_long	unpdg_recvspace = 4*1024;
323 
324 int	unp_rights;			/* file descriptors in flight */
325 
326 int
327 unp_attach(so)
328 	struct socket *so;
329 {
330 	register struct mbuf *m;
331 	register struct unpcb *unp;
332 	int error;
333 
334 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
335 		switch (so->so_type) {
336 
337 		case SOCK_STREAM:
338 			error = soreserve(so, unpst_sendspace, unpst_recvspace);
339 			break;
340 
341 		case SOCK_DGRAM:
342 			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
343 			break;
344 
345 		default:
346 			panic("unp_attach");
347 		}
348 		if (error)
349 			return (error);
350 	}
351 	m = m_getclr(M_DONTWAIT, MT_PCB);
352 	if (m == NULL)
353 		return (ENOBUFS);
354 	unp = mtod(m, struct unpcb *);
355 	so->so_pcb = (caddr_t)unp;
356 	unp->unp_socket = so;
357 	return (0);
358 }
359 
360 void
361 unp_detach(unp)
362 	register struct unpcb *unp;
363 {
364 
365 	if (unp->unp_vnode) {
366 		unp->unp_vnode->v_socket = 0;
367 		vrele(unp->unp_vnode);
368 		unp->unp_vnode = 0;
369 	}
370 	if (unp->unp_conn)
371 		unp_disconnect(unp);
372 	while (unp->unp_refs)
373 		unp_drop(unp->unp_refs, ECONNRESET);
374 	soisdisconnected(unp->unp_socket);
375 	unp->unp_socket->so_pcb = 0;
376 	m_freem(unp->unp_addr);
377 	(void) m_free(dtom(unp));
378 	if (unp_rights) {
379 		/*
380 		 * Normally the receive buffer is flushed later,
381 		 * in sofree, but if our receive buffer holds references
382 		 * to descriptors that are now garbage, we will dispose
383 		 * of those descriptor references after the garbage collector
384 		 * gets them (resulting in a "panic: closef: count < 0").
385 		 */
386 		sorflush(unp->unp_socket);
387 		unp_gc();
388 	}
389 }
390 
391 int
392 unp_bind(unp, nam, p)
393 	struct unpcb *unp;
394 	struct mbuf *nam;
395 	struct proc *p;
396 {
397 	struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
398 	register struct vnode *vp;
399 	struct vattr vattr;
400 	int error;
401 	struct nameidata nd;
402 
403 	NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
404 		soun->sun_path, p);
405 	if (unp->unp_vnode != NULL)
406 		return (EINVAL);
407 	if (nam->m_len == MLEN) {
408 		if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
409 			return (EINVAL);
410 	} else
411 		*(mtod(nam, caddr_t) + nam->m_len) = 0;
412 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
413 	if (error = namei(&nd))
414 		return (error);
415 	vp = nd.ni_vp;
416 	if (vp != NULL) {
417 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
418 		if (nd.ni_dvp == vp)
419 			vrele(nd.ni_dvp);
420 		else
421 			vput(nd.ni_dvp);
422 		vrele(vp);
423 		return (EADDRINUSE);
424 	}
425 	VATTR_NULL(&vattr);
426 	vattr.va_type = VSOCK;
427 	vattr.va_mode = ACCESSPERMS;
428 	LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
429 	if (error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr))
430 		return (error);
431 	vp = nd.ni_vp;
432 	vp->v_socket = unp->unp_socket;
433 	unp->unp_vnode = vp;
434 	unp->unp_addr = m_copy(nam, 0, (int)M_COPYALL);
435 	VOP_UNLOCK(vp);
436 	return (0);
437 }
438 
439 int
440 unp_connect(so, nam, p)
441 	struct socket *so;
442 	struct mbuf *nam;
443 	struct proc *p;
444 {
445 	register struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
446 	register struct vnode *vp;
447 	register struct socket *so2, *so3;
448 	struct unpcb *unp2, *unp3;
449 	int error;
450 	struct nameidata nd;
451 
452 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p);
453 	if (nam->m_data + nam->m_len == &nam->m_dat[MLEN]) {	/* XXX */
454 		if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
455 			return (EMSGSIZE);
456 	} else
457 		*(mtod(nam, caddr_t) + nam->m_len) = 0;
458 	if (error = namei(&nd))
459 		return (error);
460 	vp = nd.ni_vp;
461 	if (vp->v_type != VSOCK) {
462 		error = ENOTSOCK;
463 		goto bad;
464 	}
465 	if (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p))
466 		goto bad;
467 	so2 = vp->v_socket;
468 	if (so2 == 0) {
469 		error = ECONNREFUSED;
470 		goto bad;
471 	}
472 	if (so->so_type != so2->so_type) {
473 		error = EPROTOTYPE;
474 		goto bad;
475 	}
476 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
477 		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
478 		    (so3 = sonewconn(so2, 0)) == 0) {
479 			error = ECONNREFUSED;
480 			goto bad;
481 		}
482 		unp2 = sotounpcb(so2);
483 		unp3 = sotounpcb(so3);
484 		if (unp2->unp_addr)
485 			unp3->unp_addr =
486 				  m_copy(unp2->unp_addr, 0, (int)M_COPYALL);
487 		so2 = so3;
488 	}
489 	error = unp_connect2(so, so2);
490 bad:
491 	vput(vp);
492 	return (error);
493 }
494 
495 int
496 unp_connect2(so, so2)
497 	register struct socket *so;
498 	register struct socket *so2;
499 {
500 	register struct unpcb *unp = sotounpcb(so);
501 	register struct unpcb *unp2;
502 
503 	if (so2->so_type != so->so_type)
504 		return (EPROTOTYPE);
505 	unp2 = sotounpcb(so2);
506 	unp->unp_conn = unp2;
507 	switch (so->so_type) {
508 
509 	case SOCK_DGRAM:
510 		unp->unp_nextref = unp2->unp_refs;
511 		unp2->unp_refs = unp;
512 		soisconnected(so);
513 		break;
514 
515 	case SOCK_STREAM:
516 		unp2->unp_conn = unp;
517 		soisconnected(so);
518 		soisconnected(so2);
519 		break;
520 
521 	default:
522 		panic("unp_connect2");
523 	}
524 	return (0);
525 }
526 
527 void
528 unp_disconnect(unp)
529 	struct unpcb *unp;
530 {
531 	register struct unpcb *unp2 = unp->unp_conn;
532 
533 	if (unp2 == 0)
534 		return;
535 	unp->unp_conn = 0;
536 	switch (unp->unp_socket->so_type) {
537 
538 	case SOCK_DGRAM:
539 		if (unp2->unp_refs == unp)
540 			unp2->unp_refs = unp->unp_nextref;
541 		else {
542 			unp2 = unp2->unp_refs;
543 			for (;;) {
544 				if (unp2 == 0)
545 					panic("unp_disconnect");
546 				if (unp2->unp_nextref == unp)
547 					break;
548 				unp2 = unp2->unp_nextref;
549 			}
550 			unp2->unp_nextref = unp->unp_nextref;
551 		}
552 		unp->unp_nextref = 0;
553 		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
554 		break;
555 
556 	case SOCK_STREAM:
557 		soisdisconnected(unp->unp_socket);
558 		unp2->unp_conn = 0;
559 		soisdisconnected(unp2->unp_socket);
560 		break;
561 	}
562 }
563 
564 #ifdef notdef
565 void
566 unp_abort(unp)
567 	struct unpcb *unp;
568 {
569 
570 	unp_detach(unp);
571 }
572 #endif
573 
574 void
575 unp_shutdown(unp)
576 	struct unpcb *unp;
577 {
578 	struct socket *so;
579 
580 	if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
581 	    (so = unp->unp_conn->unp_socket))
582 		socantrcvmore(so);
583 }
584 
585 void
586 unp_drop(unp, errno)
587 	struct unpcb *unp;
588 	int errno;
589 {
590 	struct socket *so = unp->unp_socket;
591 
592 	so->so_error = errno;
593 	unp_disconnect(unp);
594 	if (so->so_head) {
595 		so->so_pcb = (caddr_t) 0;
596 		m_freem(unp->unp_addr);
597 		(void) m_free(dtom(unp));
598 		sofree(so);
599 	}
600 }
601 
602 #ifdef notdef
603 void
604 unp_drain()
605 {
606 
607 }
608 #endif
609 
610 int
611 unp_externalize(rights)
612 	struct mbuf *rights;
613 {
614 	struct proc *p = curproc;		/* XXX */
615 	register int i;
616 	register struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
617 	register struct file **rp = (struct file **)(cm + 1);
618 	register struct file *fp;
619 	int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof (int);
620 	int f;
621 
622 	if (!fdavail(p, newfds)) {
623 		for (i = 0; i < newfds; i++) {
624 			fp = *rp;
625 			unp_discard(fp);
626 			*rp++ = 0;
627 		}
628 		return (EMSGSIZE);
629 	}
630 	for (i = 0; i < newfds; i++) {
631 		if (fdalloc(p, 0, &f))
632 			panic("unp_externalize");
633 		fp = *rp;
634 		p->p_fd->fd_ofiles[f] = fp;
635 		fp->f_msgcount--;
636 		unp_rights--;
637 		*(int *)rp++ = f;
638 	}
639 	return (0);
640 }
641 
642 int
643 unp_internalize(control, p)
644 	struct mbuf *control;
645 	struct proc *p;
646 {
647 	struct filedesc *fdp = p->p_fd;
648 	register struct cmsghdr *cm = mtod(control, struct cmsghdr *);
649 	register struct file **rp;
650 	register struct file *fp;
651 	register int i, fd;
652 	int oldfds;
653 
654 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
655 	    cm->cmsg_len != control->m_len)
656 		return (EINVAL);
657 	oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
658 	rp = (struct file **)(cm + 1);
659 	for (i = 0; i < oldfds; i++) {
660 		fd = *(int *)rp++;
661 		if ((unsigned)fd >= fdp->fd_nfiles ||
662 		    fdp->fd_ofiles[fd] == NULL)
663 			return (EBADF);
664 	}
665 	rp = (struct file **)(cm + 1);
666 	for (i = 0; i < oldfds; i++) {
667 		fp = fdp->fd_ofiles[*(int *)rp];
668 		*rp++ = fp;
669 		fp->f_count++;
670 		fp->f_msgcount++;
671 		unp_rights++;
672 	}
673 	return (0);
674 }
675 
676 int	unp_defer, unp_gcing;
677 extern	struct domain unixdomain;
678 
679 void
680 unp_gc()
681 {
682 	register struct file *fp, *nextfp;
683 	register struct socket *so;
684 	struct file **extra_ref, **fpp;
685 	int nunref, i;
686 
687 	if (unp_gcing)
688 		return;
689 	unp_gcing = 1;
690 	unp_defer = 0;
691 	for (fp = filehead; fp; fp = fp->f_filef)
692 		fp->f_flag &= ~(FMARK|FDEFER);
693 	do {
694 		for (fp = filehead; fp; fp = fp->f_filef) {
695 			if (fp->f_count == 0)
696 				continue;
697 			if (fp->f_flag & FDEFER) {
698 				fp->f_flag &= ~FDEFER;
699 				unp_defer--;
700 			} else {
701 				if (fp->f_flag & FMARK)
702 					continue;
703 				if (fp->f_count == fp->f_msgcount)
704 					continue;
705 				fp->f_flag |= FMARK;
706 			}
707 			if (fp->f_type != DTYPE_SOCKET ||
708 			    (so = (struct socket *)fp->f_data) == 0)
709 				continue;
710 			if (so->so_proto->pr_domain != &unixdomain ||
711 			    (so->so_proto->pr_flags&PR_RIGHTS) == 0)
712 				continue;
713 #ifdef notdef
714 			if (so->so_rcv.sb_flags & SB_LOCK) {
715 				/*
716 				 * This is problematical; it's not clear
717 				 * we need to wait for the sockbuf to be
718 				 * unlocked (on a uniprocessor, at least),
719 				 * and it's also not clear what to do
720 				 * if sbwait returns an error due to receipt
721 				 * of a signal.  If sbwait does return
722 				 * an error, we'll go into an infinite
723 				 * loop.  Delete all of this for now.
724 				 */
725 				(void) sbwait(&so->so_rcv);
726 				goto restart;
727 			}
728 #endif
729 			unp_scan(so->so_rcv.sb_mb, unp_mark);
730 		}
731 	} while (unp_defer);
732 	/*
733 	 * We grab an extra reference to each of the file table entries
734 	 * that are not otherwise accessible and then free the rights
735 	 * that are stored in messages on them.
736 	 *
737 	 * The bug in the orginal code is a little tricky, so I'll describe
738 	 * what's wrong with it here.
739 	 *
740 	 * It is incorrect to simply unp_discard each entry for f_msgcount
741 	 * times -- consider the case of sockets A and B that contain
742 	 * references to each other.  On a last close of some other socket,
743 	 * we trigger a gc since the number of outstanding rights (unp_rights)
744 	 * is non-zero.  If during the sweep phase the gc code un_discards,
745 	 * we end up doing a (full) closef on the descriptor.  A closef on A
746 	 * results in the following chain.  Closef calls soo_close, which
747 	 * calls soclose.   Soclose calls first (through the switch
748 	 * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
749 	 * returns because the previous instance had set unp_gcing, and
750 	 * we return all the way back to soclose, which marks the socket
751 	 * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
752 	 * to free up the rights that are queued in messages on the socket A,
753 	 * i.e., the reference on B.  The sorflush calls via the dom_dispose
754 	 * switch unp_dispose, which unp_scans with unp_discard.  This second
755 	 * instance of unp_discard just calls closef on B.
756 	 *
757 	 * Well, a similar chain occurs on B, resulting in a sorflush on B,
758 	 * which results in another closef on A.  Unfortunately, A is already
759 	 * being closed, and the descriptor has already been marked with
760 	 * SS_NOFDREF, and soclose panics at this point.
761 	 *
762 	 * Here, we first take an extra reference to each inaccessible
763 	 * descriptor.  Then, we call sorflush ourself, since we know
764 	 * it is a Unix domain socket anyhow.  After we destroy all the
765 	 * rights carried in messages, we do a last closef to get rid
766 	 * of our extra reference.  This is the last close, and the
767 	 * unp_detach etc will shut down the socket.
768 	 *
769 	 * 91/09/19, bsy@cs.cmu.edu
770 	 */
771 	extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK);
772 	for (nunref = 0, fp = filehead, fpp = extra_ref; fp; fp = nextfp) {
773 		nextfp = fp->f_filef;
774 		if (fp->f_count == 0)
775 			continue;
776 		if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
777 			*fpp++ = fp;
778 			nunref++;
779 			fp->f_count++;
780 		}
781 	}
782 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
783 		sorflush((struct socket *)(*fpp)->f_data);
784 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
785 		closef(*fpp);
786 	free((caddr_t)extra_ref, M_FILE);
787 	unp_gcing = 0;
788 }
789 
790 void
791 unp_dispose(m)
792 	struct mbuf *m;
793 {
794 	if (m)
795 		unp_scan(m, unp_discard);
796 }
797 
798 void
799 unp_scan(m0, op)
800 	register struct mbuf *m0;
801 	void (*op)(struct file *);
802 {
803 	register struct mbuf *m;
804 	register struct file **rp;
805 	register struct cmsghdr *cm;
806 	register int i;
807 	int qfds;
808 
809 	while (m0) {
810 		for (m = m0; m; m = m->m_next)
811 			if (m->m_type == MT_CONTROL &&
812 			    m->m_len >= sizeof(*cm)) {
813 				cm = mtod(m, struct cmsghdr *);
814 				if (cm->cmsg_level != SOL_SOCKET ||
815 				    cm->cmsg_type != SCM_RIGHTS)
816 					continue;
817 				qfds = (cm->cmsg_len - sizeof *cm)
818 						/ sizeof (struct file *);
819 				rp = (struct file **)(cm + 1);
820 				for (i = 0; i < qfds; i++)
821 					(*op)(*rp++);
822 				break;		/* XXX, but saves time */
823 			}
824 		m0 = m0->m_act;
825 	}
826 }
827 
828 void
829 unp_mark(fp)
830 	struct file *fp;
831 {
832 
833 	if (fp->f_flag & FMARK)
834 		return;
835 	unp_defer++;
836 	fp->f_flag |= (FMARK|FDEFER);
837 }
838 
839 void
840 unp_discard(fp)
841 	struct file *fp;
842 {
843 
844 	fp->f_msgcount--;
845 	unp_rights--;
846 	(void) closef(fp, (struct proc *)NULL);
847 }
848