xref: /freebsd/sys/kern/uipc_socket.c (revision 807a5caa14df5ff04b331e24b45893f6a2f6bc1b)
1 /*
2  * Copyright (c) 1982, 1986, 1988, 1990, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)uipc_socket.c	8.3 (Berkeley) 4/15/94
34  * $FreeBSD$
35  */
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/fcntl.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/domain.h>
43 #include <sys/kernel.h>
44 #include <sys/malloc.h>
45 #include <sys/poll.h>
46 #include <sys/proc.h>
47 #include <sys/protosw.h>
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 #include <sys/resourcevar.h>
51 #include <sys/signalvar.h>
52 #include <sys/sysctl.h>
53 #include <sys/uio.h>
54 #include <vm/vm_zone.h>
55 
56 #include <machine/limits.h>
57 
58 struct	vm_zone *socket_zone;
59 so_gen_t	so_gencnt;	/* generation count for sockets */
60 
61 MALLOC_DEFINE(M_SONAME, "soname", "socket name");
62 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
63 
64 SYSCTL_DECL(_kern_ipc);
65 
66 static int somaxconn = SOMAXCONN;
67 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW,
68     &somaxconn, 0, "Maximum pending socket connection queue size");
69 
70 /*
71  * Socket operation routines.
72  * These routines are called by the routines in
73  * sys_socket.c or from a system process, and
74  * implement the semantics of socket operations by
75  * switching out to the protocol specific routines.
76  */
77 
78 /*
79  * Get a socket structure from our zone, and initialize it.
80  * We don't implement `waitok' yet (see comments in uipc_domain.c).
81  * Note that it would probably be better to allocate socket
82  * and PCB at the same time, but I'm not convinced that all
83  * the protocols can be easily modified to do this.
84  */
85 struct socket *
86 soalloc(waitok)
87 	int waitok;
88 {
89 	struct socket *so;
90 
91 	so = zalloci(socket_zone);
92 	if (so) {
93 		/* XXX race condition for reentrant kernel */
94 		bzero(so, sizeof *so);
95 		so->so_gencnt = ++so_gencnt;
96 		so->so_zone = socket_zone;
97 		TAILQ_INIT(&so->so_aiojobq);
98 	}
99 	return so;
100 }
101 
102 int
103 socreate(dom, aso, type, proto, p)
104 	int dom;
105 	struct socket **aso;
106 	register int type;
107 	int proto;
108 	struct proc *p;
109 {
110 	register struct protosw *prp;
111 	register struct socket *so;
112 	register int error;
113 
114 	if (proto)
115 		prp = pffindproto(dom, proto, type);
116 	else
117 		prp = pffindtype(dom, type);
118 	if (prp == 0 || prp->pr_usrreqs->pru_attach == 0)
119 		return (EPROTONOSUPPORT);
120 	if (prp->pr_type != type)
121 		return (EPROTOTYPE);
122 	so = soalloc(p != 0);
123 	if (so == 0)
124 		return (ENOBUFS);
125 
126 	TAILQ_INIT(&so->so_incomp);
127 	TAILQ_INIT(&so->so_comp);
128 	so->so_type = type;
129 	so->so_cred = p->p_ucred;
130 	crhold(so->so_cred);
131 	so->so_proto = prp;
132 	error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
133 	if (error) {
134 		so->so_state |= SS_NOFDREF;
135 		sofree(so);
136 		return (error);
137 	}
138 	*aso = so;
139 	return (0);
140 }
141 
142 int
143 sobind(so, nam, p)
144 	struct socket *so;
145 	struct sockaddr *nam;
146 	struct proc *p;
147 {
148 	int s = splnet();
149 	int error;
150 
151 	error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
152 	splx(s);
153 	return (error);
154 }
155 
156 void
157 sodealloc(so)
158 	struct socket *so;
159 {
160 
161 	so->so_gencnt = ++so_gencnt;
162 	if (so->so_rcv.sb_hiwat)
163 		(void)chgsbsize(so->so_cred->cr_uid,
164 		    -(rlim_t)so->so_rcv.sb_hiwat);
165 	if (so->so_snd.sb_hiwat)
166 		(void)chgsbsize(so->so_cred->cr_uid,
167 		    -(rlim_t)so->so_snd.sb_hiwat);
168 	crfree(so->so_cred);
169 	zfreei(so->so_zone, so);
170 }
171 
172 int
173 solisten(so, backlog, p)
174 	register struct socket *so;
175 	int backlog;
176 	struct proc *p;
177 {
178 	int s, error;
179 
180 	s = splnet();
181 	error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
182 	if (error) {
183 		splx(s);
184 		return (error);
185 	}
186 	if (TAILQ_EMPTY(&so->so_comp))
187 		so->so_options |= SO_ACCEPTCONN;
188 	if (backlog < 0 || backlog > somaxconn)
189 		backlog = somaxconn;
190 	so->so_qlimit = backlog;
191 	splx(s);
192 	return (0);
193 }
194 
195 void
196 sofree(so)
197 	register struct socket *so;
198 {
199 	struct socket *head = so->so_head;
200 
201 	if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
202 		return;
203 	if (head != NULL) {
204 		if (so->so_state & SS_INCOMP) {
205 			TAILQ_REMOVE(&head->so_incomp, so, so_list);
206 			head->so_incqlen--;
207 		} else if (so->so_state & SS_COMP) {
208 			/*
209 			 * We must not decommission a socket that's
210 			 * on the accept(2) queue.  If we do, then
211 			 * accept(2) may hang after select(2) indicated
212 			 * that the listening socket was ready.
213 			 */
214 			return;
215 		} else {
216 			panic("sofree: not queued");
217 		}
218 		head->so_qlen--;
219 		so->so_state &= ~SS_INCOMP;
220 		so->so_head = NULL;
221 	}
222 	sbrelease(&so->so_snd, so);
223 	sorflush(so);
224 	sodealloc(so);
225 }
226 
227 /*
228  * Close a socket on last file table reference removal.
229  * Initiate disconnect if connected.
230  * Free socket when disconnect complete.
231  */
232 int
233 soclose(so)
234 	register struct socket *so;
235 {
236 	int s = splnet();		/* conservative */
237 	int error = 0;
238 
239 	funsetown(so->so_sigio);
240 	if (so->so_options & SO_ACCEPTCONN) {
241 		struct socket *sp, *sonext;
242 
243 		sp = TAILQ_FIRST(&so->so_incomp);
244 		for (; sp != NULL; sp = sonext) {
245 			sonext = TAILQ_NEXT(sp, so_list);
246 			(void) soabort(sp);
247 		}
248 		for (sp = TAILQ_FIRST(&so->so_comp); sp != NULL; sp = sonext) {
249 			sonext = TAILQ_NEXT(sp, so_list);
250 			/* Dequeue from so_comp since sofree() won't do it */
251 			TAILQ_REMOVE(&so->so_comp, sp, so_list);
252 			so->so_qlen--;
253 			sp->so_state &= ~SS_COMP;
254 			sp->so_head = NULL;
255 			(void) soabort(sp);
256 		}
257 	}
258 	if (so->so_pcb == 0)
259 		goto discard;
260 	if (so->so_state & SS_ISCONNECTED) {
261 		if ((so->so_state & SS_ISDISCONNECTING) == 0) {
262 			error = sodisconnect(so);
263 			if (error)
264 				goto drop;
265 		}
266 		if (so->so_options & SO_LINGER) {
267 			if ((so->so_state & SS_ISDISCONNECTING) &&
268 			    (so->so_state & SS_NBIO))
269 				goto drop;
270 			while (so->so_state & SS_ISCONNECTED) {
271 				error = tsleep((caddr_t)&so->so_timeo,
272 				    PSOCK | PCATCH, "soclos", so->so_linger * hz);
273 				if (error)
274 					break;
275 			}
276 		}
277 	}
278 drop:
279 	if (so->so_pcb) {
280 		int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
281 		if (error == 0)
282 			error = error2;
283 	}
284 discard:
285 	if (so->so_state & SS_NOFDREF)
286 		panic("soclose: NOFDREF");
287 	so->so_state |= SS_NOFDREF;
288 	sofree(so);
289 	splx(s);
290 	return (error);
291 }
292 
293 /*
294  * Must be called at splnet...
295  */
296 int
297 soabort(so)
298 	struct socket *so;
299 {
300 	int error;
301 
302 	error = (*so->so_proto->pr_usrreqs->pru_abort)(so);
303 	if (error) {
304 		sofree(so);
305 		return error;
306 	}
307 	return (0);
308 }
309 
310 int
311 soaccept(so, nam)
312 	register struct socket *so;
313 	struct sockaddr **nam;
314 {
315 	int s = splnet();
316 	int error;
317 
318 	if ((so->so_state & SS_NOFDREF) == 0)
319 		panic("soaccept: !NOFDREF");
320 	so->so_state &= ~SS_NOFDREF;
321  	if ((so->so_state & SS_ISDISCONNECTED) == 0)
322 		error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
323 	else {
324 		if (nam)
325 			*nam = 0;
326 		error = 0;
327 	}
328 	splx(s);
329 	return (error);
330 }
331 
332 int
333 soconnect(so, nam, p)
334 	register struct socket *so;
335 	struct sockaddr *nam;
336 	struct proc *p;
337 {
338 	int s;
339 	int error;
340 
341 	if (so->so_options & SO_ACCEPTCONN)
342 		return (EOPNOTSUPP);
343 	s = splnet();
344 	/*
345 	 * If protocol is connection-based, can only connect once.
346 	 * Otherwise, if connected, try to disconnect first.
347 	 * This allows user to disconnect by connecting to, e.g.,
348 	 * a null address.
349 	 */
350 	if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
351 	    ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
352 	    (error = sodisconnect(so))))
353 		error = EISCONN;
354 	else
355 		error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
356 	splx(s);
357 	return (error);
358 }
359 
360 int
361 soconnect2(so1, so2)
362 	register struct socket *so1;
363 	struct socket *so2;
364 {
365 	int s = splnet();
366 	int error;
367 
368 	error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
369 	splx(s);
370 	return (error);
371 }
372 
373 int
374 sodisconnect(so)
375 	register struct socket *so;
376 {
377 	int s = splnet();
378 	int error;
379 
380 	if ((so->so_state & SS_ISCONNECTED) == 0) {
381 		error = ENOTCONN;
382 		goto bad;
383 	}
384 	if (so->so_state & SS_ISDISCONNECTING) {
385 		error = EALREADY;
386 		goto bad;
387 	}
388 	error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
389 bad:
390 	splx(s);
391 	return (error);
392 }
393 
394 #define	SBLOCKWAIT(f)	(((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
395 /*
396  * Send on a socket.
397  * If send must go all at once and message is larger than
398  * send buffering, then hard error.
399  * Lock against other senders.
400  * If must go all at once and not enough room now, then
401  * inform user that this would block and do nothing.
402  * Otherwise, if nonblocking, send as much as possible.
403  * The data to be sent is described by "uio" if nonzero,
404  * otherwise by the mbuf chain "top" (which must be null
405  * if uio is not).  Data provided in mbuf chain must be small
406  * enough to send all at once.
407  *
408  * Returns nonzero on error, timeout or signal; callers
409  * must check for short counts if EINTR/ERESTART are returned.
410  * Data and control buffers are freed on return.
411  */
412 int
413 sosend(so, addr, uio, top, control, flags, p)
414 	register struct socket *so;
415 	struct sockaddr *addr;
416 	struct uio *uio;
417 	struct mbuf *top;
418 	struct mbuf *control;
419 	int flags;
420 	struct proc *p;
421 {
422 	struct mbuf **mp;
423 	register struct mbuf *m;
424 	register long space, len, resid;
425 	int clen = 0, error, s, dontroute, mlen;
426 	int atomic = sosendallatonce(so) || top;
427 
428 	if (uio)
429 		resid = uio->uio_resid;
430 	else
431 		resid = top->m_pkthdr.len;
432 	/*
433 	 * In theory resid should be unsigned.
434 	 * However, space must be signed, as it might be less than 0
435 	 * if we over-committed, and we must use a signed comparison
436 	 * of space and resid.  On the other hand, a negative resid
437 	 * causes us to loop sending 0-length segments to the protocol.
438 	 *
439 	 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
440 	 * type sockets since that's an error.
441 	 */
442 	if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
443 		error = EINVAL;
444 		goto out;
445 	}
446 
447 	dontroute =
448 	    (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
449 	    (so->so_proto->pr_flags & PR_ATOMIC);
450 	if (p)
451 		p->p_stats->p_ru.ru_msgsnd++;
452 	if (control)
453 		clen = control->m_len;
454 #define	snderr(errno)	{ error = errno; splx(s); goto release; }
455 
456 restart:
457 	error = sblock(&so->so_snd, SBLOCKWAIT(flags));
458 	if (error)
459 		goto out;
460 	do {
461 		s = splnet();
462 		if (so->so_state & SS_CANTSENDMORE)
463 			snderr(EPIPE);
464 		if (so->so_error) {
465 			error = so->so_error;
466 			so->so_error = 0;
467 			splx(s);
468 			goto release;
469 		}
470 		if ((so->so_state & SS_ISCONNECTED) == 0) {
471 			/*
472 			 * `sendto' and `sendmsg' is allowed on a connection-
473 			 * based socket if it supports implied connect.
474 			 * Return ENOTCONN if not connected and no address is
475 			 * supplied.
476 			 */
477 			if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
478 			    (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
479 				if ((so->so_state & SS_ISCONFIRMING) == 0 &&
480 				    !(resid == 0 && clen != 0))
481 					snderr(ENOTCONN);
482 			} else if (addr == 0)
483 			    snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ?
484 				   ENOTCONN : EDESTADDRREQ);
485 		}
486 		space = sbspace(&so->so_snd);
487 		if (flags & MSG_OOB)
488 			space += 1024;
489 		if ((atomic && resid > so->so_snd.sb_hiwat) ||
490 		    clen > so->so_snd.sb_hiwat)
491 			snderr(EMSGSIZE);
492 		if (space < resid + clen && uio &&
493 		    (atomic || space < so->so_snd.sb_lowat || space < clen)) {
494 			if (so->so_state & SS_NBIO)
495 				snderr(EWOULDBLOCK);
496 			sbunlock(&so->so_snd);
497 			error = sbwait(&so->so_snd);
498 			splx(s);
499 			if (error)
500 				goto out;
501 			goto restart;
502 		}
503 		splx(s);
504 		mp = &top;
505 		space -= clen;
506 		do {
507 		    if (uio == NULL) {
508 			/*
509 			 * Data is prepackaged in "top".
510 			 */
511 			resid = 0;
512 			if (flags & MSG_EOR)
513 				top->m_flags |= M_EOR;
514 		    } else do {
515 			if (top == 0) {
516 				MGETHDR(m, M_WAIT, MT_DATA);
517 				if (m == NULL) {
518 					error = ENOBUFS;
519 					goto release;
520 				}
521 				mlen = MHLEN;
522 				m->m_pkthdr.len = 0;
523 				m->m_pkthdr.rcvif = (struct ifnet *)0;
524 			} else {
525 				MGET(m, M_WAIT, MT_DATA);
526 				if (m == NULL) {
527 					error = ENOBUFS;
528 					goto release;
529 				}
530 				mlen = MLEN;
531 			}
532 			if (resid >= MINCLSIZE) {
533 				MCLGET(m, M_WAIT);
534 				if ((m->m_flags & M_EXT) == 0)
535 					goto nopages;
536 				mlen = MCLBYTES;
537 				len = min(min(mlen, resid), space);
538 			} else {
539 nopages:
540 				len = min(min(mlen, resid), space);
541 				/*
542 				 * For datagram protocols, leave room
543 				 * for protocol headers in first mbuf.
544 				 */
545 				if (atomic && top == 0 && len < mlen)
546 					MH_ALIGN(m, len);
547 			}
548 			space -= len;
549 			error = uiomove(mtod(m, caddr_t), (int)len, uio);
550 			resid = uio->uio_resid;
551 			m->m_len = len;
552 			*mp = m;
553 			top->m_pkthdr.len += len;
554 			if (error)
555 				goto release;
556 			mp = &m->m_next;
557 			if (resid <= 0) {
558 				if (flags & MSG_EOR)
559 					top->m_flags |= M_EOR;
560 				break;
561 			}
562 		    } while (space > 0 && atomic);
563 		    if (dontroute)
564 			    so->so_options |= SO_DONTROUTE;
565 		    s = splnet();				/* XXX */
566 		    /*
567 		     * XXX all the SS_CANTSENDMORE checks previously
568 		     * done could be out of date.  We could have recieved
569 		     * a reset packet in an interrupt or maybe we slept
570 		     * while doing page faults in uiomove() etc. We could
571 		     * probably recheck again inside the splnet() protection
572 		     * here, but there are probably other places that this
573 		     * also happens.  We must rethink this.
574 		     */
575 		    error = (*so->so_proto->pr_usrreqs->pru_send)(so,
576 			(flags & MSG_OOB) ? PRUS_OOB :
577 			/*
578 			 * If the user set MSG_EOF, the protocol
579 			 * understands this flag and nothing left to
580 			 * send then use PRU_SEND_EOF instead of PRU_SEND.
581 			 */
582 			((flags & MSG_EOF) &&
583 			 (so->so_proto->pr_flags & PR_IMPLOPCL) &&
584 			 (resid <= 0)) ?
585 				PRUS_EOF :
586 			/* If there is more to send set PRUS_MORETOCOME */
587 			(resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
588 			top, addr, control, p);
589 		    splx(s);
590 		    if (dontroute)
591 			    so->so_options &= ~SO_DONTROUTE;
592 		    clen = 0;
593 		    control = 0;
594 		    top = 0;
595 		    mp = &top;
596 		    if (error)
597 			goto release;
598 		} while (resid && space > 0);
599 	} while (resid);
600 
601 release:
602 	sbunlock(&so->so_snd);
603 out:
604 	if (top)
605 		m_freem(top);
606 	if (control)
607 		m_freem(control);
608 	return (error);
609 }
610 
611 /*
612  * Implement receive operations on a socket.
613  * We depend on the way that records are added to the sockbuf
614  * by sbappend*.  In particular, each record (mbufs linked through m_next)
615  * must begin with an address if the protocol so specifies,
616  * followed by an optional mbuf or mbufs containing ancillary data,
617  * and then zero or more mbufs of data.
618  * In order to avoid blocking network interrupts for the entire time here,
619  * we splx() while doing the actual copy to user space.
620  * Although the sockbuf is locked, new data may still be appended,
621  * and thus we must maintain consistency of the sockbuf during that time.
622  *
623  * The caller may receive the data as a single mbuf chain by supplying
624  * an mbuf **mp0 for use in returning the chain.  The uio is then used
625  * only for the count in uio_resid.
626  */
627 int
628 soreceive(so, psa, uio, mp0, controlp, flagsp)
629 	register struct socket *so;
630 	struct sockaddr **psa;
631 	struct uio *uio;
632 	struct mbuf **mp0;
633 	struct mbuf **controlp;
634 	int *flagsp;
635 {
636 	register struct mbuf *m, **mp;
637 	register int flags, len, error, s, offset;
638 	struct protosw *pr = so->so_proto;
639 	struct mbuf *nextrecord;
640 	int moff, type = 0;
641 	int orig_resid = uio->uio_resid;
642 
643 	mp = mp0;
644 	if (psa)
645 		*psa = 0;
646 	if (controlp)
647 		*controlp = 0;
648 	if (flagsp)
649 		flags = *flagsp &~ MSG_EOR;
650 	else
651 		flags = 0;
652 	if (flags & MSG_OOB) {
653 		m = m_get(M_WAIT, MT_DATA);
654 		if (m == NULL)
655 			return (ENOBUFS);
656 		error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
657 		if (error)
658 			goto bad;
659 		do {
660 			error = uiomove(mtod(m, caddr_t),
661 			    (int) min(uio->uio_resid, m->m_len), uio);
662 			m = m_free(m);
663 		} while (uio->uio_resid && error == 0 && m);
664 bad:
665 		if (m)
666 			m_freem(m);
667 		return (error);
668 	}
669 	if (mp)
670 		*mp = (struct mbuf *)0;
671 	if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
672 		(*pr->pr_usrreqs->pru_rcvd)(so, 0);
673 
674 restart:
675 	error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
676 	if (error)
677 		return (error);
678 	s = splnet();
679 
680 	m = so->so_rcv.sb_mb;
681 	/*
682 	 * If we have less data than requested, block awaiting more
683 	 * (subject to any timeout) if:
684 	 *   1. the current count is less than the low water mark, or
685 	 *   2. MSG_WAITALL is set, and it is possible to do the entire
686 	 *	receive operation at once if we block (resid <= hiwat).
687 	 *   3. MSG_DONTWAIT is not set
688 	 * If MSG_WAITALL is set but resid is larger than the receive buffer,
689 	 * we have to do the receive in sections, and thus risk returning
690 	 * a short count if a timeout or signal occurs after we start.
691 	 */
692 	if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
693 	    so->so_rcv.sb_cc < uio->uio_resid) &&
694 	    (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
695 	    ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
696 	    m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
697 		KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1"));
698 		if (so->so_error) {
699 			if (m)
700 				goto dontblock;
701 			error = so->so_error;
702 			if ((flags & MSG_PEEK) == 0)
703 				so->so_error = 0;
704 			goto release;
705 		}
706 		if (so->so_state & SS_CANTRCVMORE) {
707 			if (m)
708 				goto dontblock;
709 			else
710 				goto release;
711 		}
712 		for (; m; m = m->m_next)
713 			if (m->m_type == MT_OOBDATA  || (m->m_flags & M_EOR)) {
714 				m = so->so_rcv.sb_mb;
715 				goto dontblock;
716 			}
717 		if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
718 		    (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
719 			error = ENOTCONN;
720 			goto release;
721 		}
722 		if (uio->uio_resid == 0)
723 			goto release;
724 		if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
725 			error = EWOULDBLOCK;
726 			goto release;
727 		}
728 		sbunlock(&so->so_rcv);
729 		error = sbwait(&so->so_rcv);
730 		splx(s);
731 		if (error)
732 			return (error);
733 		goto restart;
734 	}
735 dontblock:
736 	if (uio->uio_procp)
737 		uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
738 	nextrecord = m->m_nextpkt;
739 	if (pr->pr_flags & PR_ADDR) {
740 		KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
741 		orig_resid = 0;
742 		if (psa)
743 			*psa = dup_sockaddr(mtod(m, struct sockaddr *),
744 					    mp0 == 0);
745 		if (flags & MSG_PEEK) {
746 			m = m->m_next;
747 		} else {
748 			sbfree(&so->so_rcv, m);
749 			MFREE(m, so->so_rcv.sb_mb);
750 			m = so->so_rcv.sb_mb;
751 		}
752 	}
753 	while (m && m->m_type == MT_CONTROL && error == 0) {
754 		if (flags & MSG_PEEK) {
755 			if (controlp)
756 				*controlp = m_copy(m, 0, m->m_len);
757 			m = m->m_next;
758 		} else {
759 			sbfree(&so->so_rcv, m);
760 			if (controlp) {
761 				if (pr->pr_domain->dom_externalize &&
762 				    mtod(m, struct cmsghdr *)->cmsg_type ==
763 				    SCM_RIGHTS)
764 				   error = (*pr->pr_domain->dom_externalize)(m);
765 				*controlp = m;
766 				so->so_rcv.sb_mb = m->m_next;
767 				m->m_next = 0;
768 				m = so->so_rcv.sb_mb;
769 			} else {
770 				MFREE(m, so->so_rcv.sb_mb);
771 				m = so->so_rcv.sb_mb;
772 			}
773 		}
774 		if (controlp) {
775 			orig_resid = 0;
776 			controlp = &(*controlp)->m_next;
777 		}
778 	}
779 	if (m) {
780 		if ((flags & MSG_PEEK) == 0)
781 			m->m_nextpkt = nextrecord;
782 		type = m->m_type;
783 		if (type == MT_OOBDATA)
784 			flags |= MSG_OOB;
785 	}
786 	moff = 0;
787 	offset = 0;
788 	while (m && uio->uio_resid > 0 && error == 0) {
789 		if (m->m_type == MT_OOBDATA) {
790 			if (type != MT_OOBDATA)
791 				break;
792 		} else if (type == MT_OOBDATA)
793 			break;
794 		else
795 		    KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER,
796 			("receive 3"));
797 		so->so_state &= ~SS_RCVATMARK;
798 		len = uio->uio_resid;
799 		if (so->so_oobmark && len > so->so_oobmark - offset)
800 			len = so->so_oobmark - offset;
801 		if (len > m->m_len - moff)
802 			len = m->m_len - moff;
803 		/*
804 		 * If mp is set, just pass back the mbufs.
805 		 * Otherwise copy them out via the uio, then free.
806 		 * Sockbuf must be consistent here (points to current mbuf,
807 		 * it points to next record) when we drop priority;
808 		 * we must note any additions to the sockbuf when we
809 		 * block interrupts again.
810 		 */
811 		if (mp == 0) {
812 			splx(s);
813 			error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
814 			s = splnet();
815 			if (error)
816 				goto release;
817 		} else
818 			uio->uio_resid -= len;
819 		if (len == m->m_len - moff) {
820 			if (m->m_flags & M_EOR)
821 				flags |= MSG_EOR;
822 			if (flags & MSG_PEEK) {
823 				m = m->m_next;
824 				moff = 0;
825 			} else {
826 				nextrecord = m->m_nextpkt;
827 				sbfree(&so->so_rcv, m);
828 				if (mp) {
829 					*mp = m;
830 					mp = &m->m_next;
831 					so->so_rcv.sb_mb = m = m->m_next;
832 					*mp = (struct mbuf *)0;
833 				} else {
834 					MFREE(m, so->so_rcv.sb_mb);
835 					m = so->so_rcv.sb_mb;
836 				}
837 				if (m)
838 					m->m_nextpkt = nextrecord;
839 			}
840 		} else {
841 			if (flags & MSG_PEEK)
842 				moff += len;
843 			else {
844 				if (mp)
845 					*mp = m_copym(m, 0, len, M_WAIT);
846 				m->m_data += len;
847 				m->m_len -= len;
848 				so->so_rcv.sb_cc -= len;
849 			}
850 		}
851 		if (so->so_oobmark) {
852 			if ((flags & MSG_PEEK) == 0) {
853 				so->so_oobmark -= len;
854 				if (so->so_oobmark == 0) {
855 					so->so_state |= SS_RCVATMARK;
856 					break;
857 				}
858 			} else {
859 				offset += len;
860 				if (offset == so->so_oobmark)
861 					break;
862 			}
863 		}
864 		if (flags & MSG_EOR)
865 			break;
866 		/*
867 		 * If the MSG_WAITALL flag is set (for non-atomic socket),
868 		 * we must not quit until "uio->uio_resid == 0" or an error
869 		 * termination.  If a signal/timeout occurs, return
870 		 * with a short count but without error.
871 		 * Keep sockbuf locked against other readers.
872 		 */
873 		while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
874 		    !sosendallatonce(so) && !nextrecord) {
875 			if (so->so_error || so->so_state & SS_CANTRCVMORE)
876 				break;
877 			error = sbwait(&so->so_rcv);
878 			if (error) {
879 				sbunlock(&so->so_rcv);
880 				splx(s);
881 				return (0);
882 			}
883 			m = so->so_rcv.sb_mb;
884 			if (m)
885 				nextrecord = m->m_nextpkt;
886 		}
887 	}
888 
889 	if (m && pr->pr_flags & PR_ATOMIC) {
890 		flags |= MSG_TRUNC;
891 		if ((flags & MSG_PEEK) == 0)
892 			(void) sbdroprecord(&so->so_rcv);
893 	}
894 	if ((flags & MSG_PEEK) == 0) {
895 		if (m == 0)
896 			so->so_rcv.sb_mb = nextrecord;
897 		if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
898 			(*pr->pr_usrreqs->pru_rcvd)(so, flags);
899 	}
900 	if (orig_resid == uio->uio_resid && orig_resid &&
901 	    (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
902 		sbunlock(&so->so_rcv);
903 		splx(s);
904 		goto restart;
905 	}
906 
907 	if (flagsp)
908 		*flagsp |= flags;
909 release:
910 	sbunlock(&so->so_rcv);
911 	splx(s);
912 	return (error);
913 }
914 
915 int
916 soshutdown(so, how)
917 	register struct socket *so;
918 	register int how;
919 {
920 	register struct protosw *pr = so->so_proto;
921 
922 	how++;
923 	if (how & FREAD)
924 		sorflush(so);
925 	if (how & FWRITE)
926 		return ((*pr->pr_usrreqs->pru_shutdown)(so));
927 	return (0);
928 }
929 
930 void
931 sorflush(so)
932 	register struct socket *so;
933 {
934 	register struct sockbuf *sb = &so->so_rcv;
935 	register struct protosw *pr = so->so_proto;
936 	register int s;
937 	struct sockbuf asb;
938 
939 	sb->sb_flags |= SB_NOINTR;
940 	(void) sblock(sb, M_WAITOK);
941 	s = splimp();
942 	socantrcvmore(so);
943 	sbunlock(sb);
944 	asb = *sb;
945 	bzero((caddr_t)sb, sizeof (*sb));
946 	splx(s);
947 	if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
948 		(*pr->pr_domain->dom_dispose)(asb.sb_mb);
949 	sbrelease(&asb, so);
950 }
951 
952 /*
953  * Perhaps this routine, and sooptcopyout(), below, ought to come in
954  * an additional variant to handle the case where the option value needs
955  * to be some kind of integer, but not a specific size.
956  * In addition to their use here, these functions are also called by the
957  * protocol-level pr_ctloutput() routines.
958  */
959 int
960 sooptcopyin(sopt, buf, len, minlen)
961 	struct	sockopt *sopt;
962 	void	*buf;
963 	size_t	len;
964 	size_t	minlen;
965 {
966 	size_t	valsize;
967 
968 	/*
969 	 * If the user gives us more than we wanted, we ignore it,
970 	 * but if we don't get the minimum length the caller
971 	 * wants, we return EINVAL.  On success, sopt->sopt_valsize
972 	 * is set to however much we actually retrieved.
973 	 */
974 	if ((valsize = sopt->sopt_valsize) < minlen)
975 		return EINVAL;
976 	if (valsize > len)
977 		sopt->sopt_valsize = valsize = len;
978 
979 	if (sopt->sopt_p != 0)
980 		return (copyin(sopt->sopt_val, buf, valsize));
981 
982 	bcopy(sopt->sopt_val, buf, valsize);
983 	return 0;
984 }
985 
986 int
987 sosetopt(so, sopt)
988 	struct socket *so;
989 	struct sockopt *sopt;
990 {
991 	int	error, optval;
992 	struct	linger l;
993 	struct	timeval tv;
994 	u_long  val;
995 
996 	error = 0;
997 	if (sopt->sopt_level != SOL_SOCKET) {
998 		if (so->so_proto && so->so_proto->pr_ctloutput)
999 			return ((*so->so_proto->pr_ctloutput)
1000 				  (so, sopt));
1001 		error = ENOPROTOOPT;
1002 	} else {
1003 		switch (sopt->sopt_name) {
1004 		case SO_LINGER:
1005 			error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
1006 			if (error)
1007 				goto bad;
1008 
1009 			so->so_linger = l.l_linger;
1010 			if (l.l_onoff)
1011 				so->so_options |= SO_LINGER;
1012 			else
1013 				so->so_options &= ~SO_LINGER;
1014 			break;
1015 
1016 		case SO_DEBUG:
1017 		case SO_KEEPALIVE:
1018 		case SO_DONTROUTE:
1019 		case SO_USELOOPBACK:
1020 		case SO_BROADCAST:
1021 		case SO_REUSEADDR:
1022 		case SO_REUSEPORT:
1023 		case SO_OOBINLINE:
1024 		case SO_TIMESTAMP:
1025 			error = sooptcopyin(sopt, &optval, sizeof optval,
1026 					    sizeof optval);
1027 			if (error)
1028 				goto bad;
1029 			if (optval)
1030 				so->so_options |= sopt->sopt_name;
1031 			else
1032 				so->so_options &= ~sopt->sopt_name;
1033 			break;
1034 
1035 		case SO_SNDBUF:
1036 		case SO_RCVBUF:
1037 		case SO_SNDLOWAT:
1038 		case SO_RCVLOWAT:
1039 			error = sooptcopyin(sopt, &optval, sizeof optval,
1040 					    sizeof optval);
1041 			if (error)
1042 				goto bad;
1043 
1044 			/*
1045 			 * Values < 1 make no sense for any of these
1046 			 * options, so disallow them.
1047 			 */
1048 			if (optval < 1) {
1049 				error = EINVAL;
1050 				goto bad;
1051 			}
1052 
1053 			switch (sopt->sopt_name) {
1054 			case SO_SNDBUF:
1055 			case SO_RCVBUF:
1056 				if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
1057 				    &so->so_snd : &so->so_rcv, (u_long)optval,
1058 				    so, curproc) == 0) {
1059 					error = ENOBUFS;
1060 					goto bad;
1061 				}
1062 				break;
1063 
1064 			/*
1065 			 * Make sure the low-water is never greater than
1066 			 * the high-water.
1067 			 */
1068 			case SO_SNDLOWAT:
1069 				so->so_snd.sb_lowat =
1070 				    (optval > so->so_snd.sb_hiwat) ?
1071 				    so->so_snd.sb_hiwat : optval;
1072 				break;
1073 			case SO_RCVLOWAT:
1074 				so->so_rcv.sb_lowat =
1075 				    (optval > so->so_rcv.sb_hiwat) ?
1076 				    so->so_rcv.sb_hiwat : optval;
1077 				break;
1078 			}
1079 			break;
1080 
1081 		case SO_SNDTIMEO:
1082 		case SO_RCVTIMEO:
1083 			error = sooptcopyin(sopt, &tv, sizeof tv,
1084 					    sizeof tv);
1085 			if (error)
1086 				goto bad;
1087 
1088 			/* assert(hz > 0); */
1089 			if (tv.tv_sec < 0 || tv.tv_sec > SHRT_MAX / hz ||
1090 			    tv.tv_usec < 0 || tv.tv_usec >= 1000000) {
1091 				error = EDOM;
1092 				goto bad;
1093 			}
1094 			/* assert(tick > 0); */
1095 			/* assert(ULONG_MAX - SHRT_MAX >= 1000000); */
1096 			val = (u_long)(tv.tv_sec * hz) + tv.tv_usec / tick;
1097 			if (val > SHRT_MAX) {
1098 				error = EDOM;
1099 				goto bad;
1100 			}
1101 
1102 			switch (sopt->sopt_name) {
1103 			case SO_SNDTIMEO:
1104 				so->so_snd.sb_timeo = val;
1105 				break;
1106 			case SO_RCVTIMEO:
1107 				so->so_rcv.sb_timeo = val;
1108 				break;
1109 			}
1110 			break;
1111 
1112 		default:
1113 			error = ENOPROTOOPT;
1114 			break;
1115 		}
1116 		if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
1117 			(void) ((*so->so_proto->pr_ctloutput)
1118 				  (so, sopt));
1119 		}
1120 	}
1121 bad:
1122 	return (error);
1123 }
1124 
1125 /* Helper routine for getsockopt */
1126 int
1127 sooptcopyout(sopt, buf, len)
1128 	struct	sockopt *sopt;
1129 	void	*buf;
1130 	size_t	len;
1131 {
1132 	int	error;
1133 	size_t	valsize;
1134 
1135 	error = 0;
1136 
1137 	/*
1138 	 * Documented get behavior is that we always return a value,
1139 	 * possibly truncated to fit in the user's buffer.
1140 	 * Traditional behavior is that we always tell the user
1141 	 * precisely how much we copied, rather than something useful
1142 	 * like the total amount we had available for her.
1143 	 * Note that this interface is not idempotent; the entire answer must
1144 	 * generated ahead of time.
1145 	 */
1146 	valsize = min(len, sopt->sopt_valsize);
1147 	sopt->sopt_valsize = valsize;
1148 	if (sopt->sopt_val != 0) {
1149 		if (sopt->sopt_p != 0)
1150 			error = copyout(buf, sopt->sopt_val, valsize);
1151 		else
1152 			bcopy(buf, sopt->sopt_val, valsize);
1153 	}
1154 	return error;
1155 }
1156 
1157 int
1158 sogetopt(so, sopt)
1159 	struct socket *so;
1160 	struct sockopt *sopt;
1161 {
1162 	int	error, optval;
1163 	struct	linger l;
1164 	struct	timeval tv;
1165 
1166 	error = 0;
1167 	if (sopt->sopt_level != SOL_SOCKET) {
1168 		if (so->so_proto && so->so_proto->pr_ctloutput) {
1169 			return ((*so->so_proto->pr_ctloutput)
1170 				  (so, sopt));
1171 		} else
1172 			return (ENOPROTOOPT);
1173 	} else {
1174 		switch (sopt->sopt_name) {
1175 		case SO_LINGER:
1176 			l.l_onoff = so->so_options & SO_LINGER;
1177 			l.l_linger = so->so_linger;
1178 			error = sooptcopyout(sopt, &l, sizeof l);
1179 			break;
1180 
1181 		case SO_USELOOPBACK:
1182 		case SO_DONTROUTE:
1183 		case SO_DEBUG:
1184 		case SO_KEEPALIVE:
1185 		case SO_REUSEADDR:
1186 		case SO_REUSEPORT:
1187 		case SO_BROADCAST:
1188 		case SO_OOBINLINE:
1189 		case SO_TIMESTAMP:
1190 			optval = so->so_options & sopt->sopt_name;
1191 integer:
1192 			error = sooptcopyout(sopt, &optval, sizeof optval);
1193 			break;
1194 
1195 		case SO_TYPE:
1196 			optval = so->so_type;
1197 			goto integer;
1198 
1199 		case SO_ERROR:
1200 			optval = so->so_error;
1201 			so->so_error = 0;
1202 			goto integer;
1203 
1204 		case SO_SNDBUF:
1205 			optval = so->so_snd.sb_hiwat;
1206 			goto integer;
1207 
1208 		case SO_RCVBUF:
1209 			optval = so->so_rcv.sb_hiwat;
1210 			goto integer;
1211 
1212 		case SO_SNDLOWAT:
1213 			optval = so->so_snd.sb_lowat;
1214 			goto integer;
1215 
1216 		case SO_RCVLOWAT:
1217 			optval = so->so_rcv.sb_lowat;
1218 			goto integer;
1219 
1220 		case SO_SNDTIMEO:
1221 		case SO_RCVTIMEO:
1222 			optval = (sopt->sopt_name == SO_SNDTIMEO ?
1223 				  so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
1224 
1225 			tv.tv_sec = optval / hz;
1226 			tv.tv_usec = (optval % hz) * tick;
1227 			error = sooptcopyout(sopt, &tv, sizeof tv);
1228 			break;
1229 
1230 		default:
1231 			error = ENOPROTOOPT;
1232 			break;
1233 		}
1234 		return (error);
1235 	}
1236 }
1237 
1238 /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
1239 int
1240 soopt_getm(struct sockopt *sopt, struct mbuf **mp)
1241 {
1242 	struct mbuf *m, *m_prev;
1243 	int sopt_size = sopt->sopt_valsize;
1244 
1245 	MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
1246 	if (m == 0)
1247 		return ENOBUFS;
1248 	if (sopt_size > MLEN) {
1249 		MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
1250 		if ((m->m_flags & M_EXT) == 0) {
1251 			m_free(m);
1252 			return ENOBUFS;
1253 		}
1254 		m->m_len = min(MCLBYTES, sopt_size);
1255 	} else {
1256 		m->m_len = min(MLEN, sopt_size);
1257 	}
1258 	sopt_size -= m->m_len;
1259 	*mp = m;
1260 	m_prev = m;
1261 
1262 	while (sopt_size) {
1263 		MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
1264 		if (m == 0) {
1265 			m_freem(*mp);
1266 			return ENOBUFS;
1267 		}
1268 		if (sopt_size > MLEN) {
1269 			MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
1270 			if ((m->m_flags & M_EXT) == 0) {
1271 				m_freem(*mp);
1272 				return ENOBUFS;
1273 			}
1274 			m->m_len = min(MCLBYTES, sopt_size);
1275 		} else {
1276 			m->m_len = min(MLEN, sopt_size);
1277 		}
1278 		sopt_size -= m->m_len;
1279 		m_prev->m_next = m;
1280 		m_prev = m;
1281 	}
1282 	return 0;
1283 }
1284 
1285 /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */
1286 int
1287 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
1288 {
1289 	struct mbuf *m0 = m;
1290 
1291 	if (sopt->sopt_val == NULL)
1292 		return 0;
1293 	while (m != NULL && sopt->sopt_valsize >= m->m_len) {
1294 		if (sopt->sopt_p != NULL) {
1295 			int error;
1296 
1297 			error = copyin(sopt->sopt_val, mtod(m, char *),
1298 				       m->m_len);
1299 			if (error != 0) {
1300 				m_freem(m0);
1301 				return(error);
1302 			}
1303 		} else
1304 			bcopy(sopt->sopt_val, mtod(m, char *), m->m_len);
1305 		sopt->sopt_valsize -= m->m_len;
1306 		(caddr_t)sopt->sopt_val += m->m_len;
1307 		m = m->m_next;
1308 	}
1309 	if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */
1310 		panic("ip6_sooptmcopyin");
1311 	return 0;
1312 }
1313 
1314 /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */
1315 int
1316 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
1317 {
1318 	struct mbuf *m0 = m;
1319 	size_t valsize = 0;
1320 
1321 	if (sopt->sopt_val == NULL)
1322 		return 0;
1323 	while (m != NULL && sopt->sopt_valsize >= m->m_len) {
1324 		if (sopt->sopt_p != NULL) {
1325 			int error;
1326 
1327 			error = copyout(mtod(m, char *), sopt->sopt_val,
1328 				       m->m_len);
1329 			if (error != 0) {
1330 				m_freem(m0);
1331 				return(error);
1332 			}
1333 		} else
1334 			bcopy(mtod(m, char *), sopt->sopt_val, m->m_len);
1335 	       sopt->sopt_valsize -= m->m_len;
1336 	       (caddr_t)sopt->sopt_val += m->m_len;
1337 	       valsize += m->m_len;
1338 	       m = m->m_next;
1339 	}
1340 	if (m != NULL) {
1341 		/* enough soopt buffer should be given from user-land */
1342 		m_freem(m0);
1343 		return(EINVAL);
1344 	}
1345 	sopt->sopt_valsize = valsize;
1346 	return 0;
1347 }
1348 
1349 void
1350 sohasoutofband(so)
1351 	register struct socket *so;
1352 {
1353 	if (so->so_sigio != NULL)
1354 		pgsigio(so->so_sigio, SIGURG, 0);
1355 	selwakeup(&so->so_rcv.sb_sel);
1356 }
1357 
1358 int
1359 sopoll(struct socket *so, int events, struct ucred *cred, struct proc *p)
1360 {
1361 	int revents = 0;
1362 	int s = splnet();
1363 
1364 	if (events & (POLLIN | POLLRDNORM))
1365 		if (soreadable(so))
1366 			revents |= events & (POLLIN | POLLRDNORM);
1367 
1368 	if (events & (POLLOUT | POLLWRNORM))
1369 		if (sowriteable(so))
1370 			revents |= events & (POLLOUT | POLLWRNORM);
1371 
1372 	if (events & (POLLPRI | POLLRDBAND))
1373 		if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
1374 			revents |= events & (POLLPRI | POLLRDBAND);
1375 
1376 	if (revents == 0) {
1377 		if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
1378 			selrecord(p, &so->so_rcv.sb_sel);
1379 			so->so_rcv.sb_flags |= SB_SEL;
1380 		}
1381 
1382 		if (events & (POLLOUT | POLLWRNORM)) {
1383 			selrecord(p, &so->so_snd.sb_sel);
1384 			so->so_snd.sb_flags |= SB_SEL;
1385 		}
1386 	}
1387 
1388 	splx(s);
1389 	return (revents);
1390 }
1391