xref: /titanic_51/usr/src/uts/common/fs/sockfs/sockstr.c (revision bdfc6d18da790deeec2e0eb09c625902defe2498)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/inttypes.h>
31 #include <sys/t_lock.h>
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/buf.h>
35 #include <sys/conf.h>
36 #include <sys/cred.h>
37 #include <sys/kmem.h>
38 #include <sys/sysmacros.h>
39 #include <sys/vfs.h>
40 #include <sys/vnode.h>
41 #include <sys/debug.h>
42 #include <sys/errno.h>
43 #include <sys/time.h>
44 #include <sys/file.h>
45 #include <sys/user.h>
46 #include <sys/stream.h>
47 #include <sys/strsubr.h>
48 #include <sys/esunddi.h>
49 #include <sys/flock.h>
50 #include <sys/modctl.h>
51 #include <sys/vtrace.h>
52 #include <sys/strsun.h>
53 #include <sys/cmn_err.h>
54 #include <sys/proc.h>
55 #include <sys/ddi.h>
56 #include <sys/kmem_impl.h>
57 
58 #include <sys/suntpi.h>
59 #include <sys/socket.h>
60 #include <sys/sockio.h>
61 #include <sys/socketvar.h>
62 #include <netinet/in.h>
63 
64 #include <sys/tiuser.h>
65 #define	_SUN_TPI_VERSION	2
66 #include <sys/tihdr.h>
67 
68 #include <c2/audit.h>
69 
70 int so_default_version = SOV_SOCKSTREAM;
71 
72 #ifdef DEBUG
73 /* Set sockdebug to print debug messages when SO_DEBUG is set */
74 int sockdebug = 0;
75 
76 /* Set sockprinterr to print error messages when SO_DEBUG is set */
77 int sockprinterr = 0;
78 
79 /*
80  * Set so_default_options to SO_DEBUG is all sockets should be created
81  * with SO_DEBUG set. This is needed to get debug printouts from the
82  * socket() call itself.
83  */
84 int so_default_options = 0;
85 #endif /* DEBUG */
86 
87 #ifdef SOCK_TEST
88 /*
89  * Set to number of ticks to limit cv_waits for code coverage testing.
90  * Set to 1000 when SO_DEBUG is set to 2.
91  */
92 clock_t sock_test_timelimit = 0;
93 #endif /* SOCK_TEST */
94 
95 /*
96  * For concurrency testing of e.g. opening /dev/ip which does not
97  * handle T_INFO_REQ messages.
98  */
99 int so_no_tinfo = 0;
100 
101 /*
102  * Timeout for getting a T_CAPABILITY_ACK - it is possible for a provider
103  * to simply ignore the T_CAPABILITY_REQ.
104  */
105 clock_t	sock_capability_timeout	= 2;	/* seconds */
106 
107 static int	do_tcapability(struct sonode *so, t_uscalar_t cap_bits1);
108 static void	so_removehooks(struct sonode *so);
109 
110 static mblk_t *strsock_proto(vnode_t *vp, mblk_t *mp,
111 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
112 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups);
113 static mblk_t *strsock_misc(vnode_t *vp, mblk_t *mp,
114 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
115 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups);
116 
117 static int tlitosyserr(int terr);
118 
119 /*
120  * Convert a socket to a stream. Invoked when the illusory sockmod
121  * is popped from the stream.
122  * Change the stream head back to default operation without losing
123  * any messages (T_conn_ind's are moved to the stream head queue).
124  */
125 int
126 so_sock2stream(struct sonode *so)
127 {
128 	struct vnode		*vp = SOTOV(so);
129 	queue_t			*rq;
130 	mblk_t			*mp;
131 	int			error = 0;
132 
133 	ASSERT(MUTEX_HELD(&so->so_plumb_lock));
134 
135 	mutex_enter(&so->so_lock);
136 	so_lock_single(so);
137 
138 	ASSERT(so->so_version != SOV_STREAM);
139 
140 	/* tell the transport below that sockmod is being popped */
141 	if ((so->so_state & SS_TCP_FAST_ACCEPT) != 0) {
142 		int	rval;
143 		mblk_t	**mpp;
144 
145 		mutex_exit(&so->so_lock);
146 		error = strioctl(vp, SIOCPOPSOCKFS, NULL, 0, K_TO_K, CRED(),
147 		    &rval);
148 		mutex_enter(&so->so_lock);
149 		if (error != 0) {
150 			dprintso(so, 0,
151 			    ("so_sock2stream(%p): SIOCPOPSOCKFS failed\n", so));
152 			goto exit;
153 		}
154 		so->so_state &= ~SS_TCP_FAST_ACCEPT;
155 
156 		for (mpp = &so->so_conn_ind_head; (mp = *mpp) != NULL;
157 		    mpp = &mp->b_next) {
158 			struct T_conn_ind	*conn_ind;
159 
160 			/*
161 			 * strsock_proto() has already verified the length of
162 			 * this message block.
163 			 */
164 			ASSERT(MBLKL(mp) >= sizeof (struct T_conn_ind));
165 
166 			conn_ind = (struct T_conn_ind *)mp->b_rptr;
167 			if (conn_ind->OPT_length == 0 &&
168 			    conn_ind->OPT_offset == 0)
169 				continue;
170 
171 			if (DB_REF(mp) > 1) {
172 				mblk_t	*newmp;
173 				size_t	length;
174 				cred_t	*cr;
175 
176 				/*
177 				 * Copy the message block because it is used
178 				 * elsewhere, too.
179 				 */
180 				length = MBLKL(mp);
181 				newmp = soallocproto(length, _ALLOC_INTR);
182 				if (newmp == NULL) {
183 					error = EINTR;
184 					goto exit;
185 				}
186 				bcopy(mp->b_rptr, newmp->b_wptr, length);
187 				newmp->b_wptr += length;
188 				newmp->b_next = mp->b_next;
189 				cr = DB_CRED(mp);
190 				if (cr != NULL)
191 					mblk_setcred(newmp, cr);
192 				DB_CPID(newmp) = DB_CPID(mp);
193 
194 				/*
195 				 * Link the new message block into the queue
196 				 * and free the old one.
197 				 */
198 				*mpp = newmp;
199 				mp->b_next = NULL;
200 				freemsg(mp);
201 
202 				mp = newmp;
203 				conn_ind = (struct T_conn_ind *)mp->b_rptr;
204 			}
205 
206 			/*
207 			 * Remove options added by TCP for accept fast-path.
208 			 */
209 			conn_ind->OPT_length = 0;
210 			conn_ind->OPT_offset = 0;
211 		}
212 	}
213 
214 	so->so_version = SOV_STREAM;
215 	so->so_priv = NULL;
216 
217 	/*
218 	 * Remove the hooks in the stream head to avoid queuing more
219 	 * packets in sockfs.
220 	 */
221 	mutex_exit(&so->so_lock);
222 	so_removehooks(so);
223 	mutex_enter(&so->so_lock);
224 
225 	/*
226 	 * Clear any state related to urgent data. Leave any T_EXDATA_IND
227 	 * on the queue - the behavior of urgent data after a switch is
228 	 * left undefined.
229 	 */
230 	so->so_error = so->so_delayed_error = 0;
231 	freemsg(so->so_oobmsg);
232 	so->so_oobmsg = NULL;
233 	so->so_oobsigcnt = so->so_oobcnt = 0;
234 
235 	so->so_state &= ~(SS_RCVATMARK|SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA|
236 	    SS_HASCONNIND|SS_SAVEDEOR);
237 	ASSERT(so_verify_oobstate(so));
238 
239 	freemsg(so->so_ack_mp);
240 	so->so_ack_mp = NULL;
241 
242 	/*
243 	 * Flush the T_DISCON_IND on so_discon_ind_mp.
244 	 */
245 	so_flush_discon_ind(so);
246 
247 	/*
248 	 * Move any queued T_CONN_IND messages to stream head queue.
249 	 */
250 	rq = RD(strvp2wq(vp));
251 	while ((mp = so->so_conn_ind_head) != NULL) {
252 		so->so_conn_ind_head = mp->b_next;
253 		mp->b_next = NULL;
254 		if (so->so_conn_ind_head == NULL) {
255 			ASSERT(so->so_conn_ind_tail == mp);
256 			so->so_conn_ind_tail = NULL;
257 		}
258 		dprintso(so, 0,
259 			("so_sock2stream(%p): moving T_CONN_IND\n",
260 			so));
261 
262 		/* Drop lock across put() */
263 		mutex_exit(&so->so_lock);
264 		put(rq, mp);
265 		mutex_enter(&so->so_lock);
266 	}
267 
268 exit:
269 	ASSERT(MUTEX_HELD(&so->so_lock));
270 	so_unlock_single(so, SOLOCKED);
271 	mutex_exit(&so->so_lock);
272 	return (error);
273 }
274 
275 /*
276  * Covert a stream back to a socket. This is invoked when the illusory
277  * sockmod is pushed on a stream (where the stream was "created" by
278  * popping the illusory sockmod).
279  * This routine can not recreate the socket state (certain aspects of
280  * it like urgent data state and the bound/connected addresses for AF_UNIX
281  * sockets can not be recreated by asking the transport for information).
282  * Thus this routine implicitly assumes that the socket is in an initial
283  * state (as if it was just created). It flushes any messages queued on the
284  * read queue to avoid dealing with e.g. TPI acks or T_exdata_ind messages.
285  */
286 void
287 so_stream2sock(struct sonode *so)
288 {
289 	struct vnode *vp = SOTOV(so);
290 
291 	ASSERT(MUTEX_HELD(&so->so_plumb_lock));
292 
293 	mutex_enter(&so->so_lock);
294 	so_lock_single(so);
295 	ASSERT(so->so_version == SOV_STREAM);
296 	so->so_version = SOV_SOCKSTREAM;
297 	so->so_pushcnt = 0;
298 	mutex_exit(&so->so_lock);
299 
300 	/*
301 	 * Set a permenent error to force any thread in sorecvmsg to
302 	 * return (and drop SOREADLOCKED). Clear the error once
303 	 * we have SOREADLOCKED.
304 	 * This makes a read sleeping during the I_PUSH of sockmod return
305 	 * EIO.
306 	 */
307 	strsetrerror(SOTOV(so), EIO, 1, NULL);
308 
309 	/*
310 	 * Get the read lock before flushing data to avoid
311 	 * problems with the T_EXDATA_IND MSG_PEEK code in sorecvmsg.
312 	 */
313 	mutex_enter(&so->so_lock);
314 	(void) so_lock_read(so, 0);	/* Set SOREADLOCKED */
315 	mutex_exit(&so->so_lock);
316 
317 	strsetrerror(SOTOV(so), 0, 0, NULL);
318 	so_installhooks(so);
319 
320 	/*
321 	 * Flush everything on the read queue.
322 	 * This ensures that no T_CONN_IND remain and that no T_EXDATA_IND
323 	 * remain; those types of messages would confuse sockfs.
324 	 */
325 	strflushrq(vp, FLUSHALL);
326 	mutex_enter(&so->so_lock);
327 
328 	/*
329 	 * Flush the T_DISCON_IND on so_discon_ind_mp.
330 	 */
331 	so_flush_discon_ind(so);
332 	so_unlock_read(so);	/* Clear SOREADLOCKED */
333 
334 	so_unlock_single(so, SOLOCKED);
335 	mutex_exit(&so->so_lock);
336 }
337 
338 /*
339  * Install the hooks in the stream head.
340  */
341 void
342 so_installhooks(struct sonode *so)
343 {
344 	struct vnode *vp = SOTOV(so);
345 
346 	strsetrputhooks(vp, SH_SIGALLDATA | SH_IGN_ZEROLEN | SH_CONSOL_DATA,
347 	    strsock_proto, strsock_misc);
348 	strsetwputhooks(vp, SH_SIGPIPE | SH_RECHECK_ERR, 0);
349 }
350 
351 /*
352  * Remove the hooks in the stream head.
353  */
354 static void
355 so_removehooks(struct sonode *so)
356 {
357 	struct vnode *vp = SOTOV(so);
358 
359 	strsetrputhooks(vp, 0, NULL, NULL);
360 	strsetwputhooks(vp, 0, STRTIMOUT);
361 	/*
362 	 * Leave read behavior as it would have been for a normal
363 	 * stream i.e. a read of an M_PROTO will fail.
364 	 */
365 }
366 
367 /*
368  * Initialize the streams side of a socket including
369  * T_info_req/ack processing. If tso is not NULL its values are used thereby
370  * avoiding the T_INFO_REQ.
371  */
372 int
373 so_strinit(struct sonode *so, struct sonode *tso)
374 {
375 	struct vnode *vp = SOTOV(so);
376 	struct stdata *stp;
377 	mblk_t *mp;
378 	int error;
379 
380 	dprintso(so, 1, ("so_strinit(%p)\n", so));
381 
382 	/* Preallocate an unbind_req message */
383 	mp = soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP);
384 	mutex_enter(&so->so_lock);
385 	so->so_unbind_mp = mp;
386 #ifdef DEBUG
387 	so->so_options = so_default_options;
388 #endif /* DEBUG */
389 	mutex_exit(&so->so_lock);
390 
391 	so_installhooks(so);
392 
393 	/*
394 	 * The T_CAPABILITY_REQ should be the first message sent down because
395 	 * at least TCP has a fast-path for this which avoids timeouts while
396 	 * waiting for the T_CAPABILITY_ACK under high system load.
397 	 */
398 	if (tso == NULL) {
399 		error = do_tcapability(so, TC1_ACCEPTOR_ID | TC1_INFO);
400 		if (error)
401 			return (error);
402 	} else {
403 		mutex_enter(&so->so_lock);
404 		so->so_tsdu_size = tso->so_tsdu_size;
405 		so->so_etsdu_size = tso->so_etsdu_size;
406 		so->so_addr_size = tso->so_addr_size;
407 		so->so_opt_size = tso->so_opt_size;
408 		so->so_tidu_size = tso->so_tidu_size;
409 		so->so_serv_type = tso->so_serv_type;
410 		so->so_mode = tso->so_mode & ~SM_ACCEPTOR_ID;
411 		mutex_exit(&so->so_lock);
412 
413 		/* the following do_tcapability may update so->so_mode */
414 		if ((tso->so_serv_type != T_CLTS) &&
415 		    ((so->so_state & SS_TCP_FAST_ACCEPT) == 0)) {
416 			error = do_tcapability(so, TC1_ACCEPTOR_ID);
417 			if (error)
418 				return (error);
419 		}
420 	}
421 	/*
422 	 * If the addr_size is 0 we treat it as already bound
423 	 * and connected. This is used by the routing socket.
424 	 * We set the addr_size to something to allocate a the address
425 	 * structures.
426 	 */
427 	if (so->so_addr_size == 0) {
428 		so->so_state |= SS_ISBOUND | SS_ISCONNECTED;
429 		/* Address size can vary with address families. */
430 		if (so->so_family == AF_INET6)
431 			so->so_addr_size =
432 			    (t_scalar_t)sizeof (struct sockaddr_in6);
433 		else
434 			so->so_addr_size =
435 			    (t_scalar_t)sizeof (struct sockaddr_in);
436 		ASSERT(so->so_unbind_mp);
437 	}
438 	/*
439 	 * Allocate the addresses.
440 	 */
441 	ASSERT(so->so_laddr_sa == NULL && so->so_faddr_sa == NULL);
442 	ASSERT(so->so_laddr_len == 0 && so->so_faddr_len == 0);
443 	so->so_laddr_maxlen = so->so_faddr_maxlen =
444 		    P2ROUNDUP(so->so_addr_size, KMEM_ALIGN);
445 	so->so_laddr_sa = kmem_alloc(so->so_laddr_maxlen * 2, KM_SLEEP);
446 	so->so_faddr_sa = (struct sockaddr *)((caddr_t)so->so_laddr_sa
447 		    + so->so_laddr_maxlen);
448 
449 	if (so->so_family == AF_UNIX) {
450 		/*
451 		 * Initialize AF_UNIX related fields.
452 		 */
453 		bzero(&so->so_ux_laddr, sizeof (so->so_ux_laddr));
454 		bzero(&so->so_ux_faddr, sizeof (so->so_ux_faddr));
455 	}
456 
457 	stp = vp->v_stream;
458 	/*
459 	 * Have to keep minpsz at zero in order to allow write/send of zero
460 	 * bytes.
461 	 */
462 	mutex_enter(&stp->sd_lock);
463 	if (stp->sd_qn_minpsz == 1)
464 		stp->sd_qn_minpsz = 0;
465 	mutex_exit(&stp->sd_lock);
466 
467 	return (0);
468 }
469 
470 static void
471 copy_tinfo(struct sonode *so, struct T_info_ack *tia)
472 {
473 	so->so_tsdu_size = tia->TSDU_size;
474 	so->so_etsdu_size = tia->ETSDU_size;
475 	so->so_addr_size = tia->ADDR_size;
476 	so->so_opt_size = tia->OPT_size;
477 	so->so_tidu_size = tia->TIDU_size;
478 	so->so_serv_type = tia->SERV_type;
479 	switch (tia->CURRENT_state) {
480 	case TS_UNBND:
481 		break;
482 	case TS_IDLE:
483 		so->so_state |= SS_ISBOUND;
484 		so->so_laddr_len = 0;
485 		so->so_state &= ~SS_LADDR_VALID;
486 		break;
487 	case TS_DATA_XFER:
488 		so->so_state |= SS_ISBOUND|SS_ISCONNECTED;
489 		so->so_laddr_len = 0;
490 		so->so_faddr_len = 0;
491 		so->so_state &= ~(SS_LADDR_VALID | SS_FADDR_VALID);
492 		break;
493 	}
494 
495 	/*
496 	 * Heuristics for determining the socket mode flags
497 	 * (SM_ATOMIC, SM_CONNREQUIRED, SM_ADDR, SM_FDPASSING,
498 	 * and SM_EXDATA, SM_OPTDATA, and SM_BYTESTREAM)
499 	 * from the info ack.
500 	 */
501 	if (so->so_serv_type == T_CLTS) {
502 		so->so_mode |= SM_ATOMIC | SM_ADDR;
503 	} else {
504 		so->so_mode |= SM_CONNREQUIRED;
505 		if (so->so_etsdu_size != 0 && so->so_etsdu_size != -2)
506 			so->so_mode |= SM_EXDATA;
507 	}
508 	if (so->so_type == SOCK_SEQPACKET || so->so_type == SOCK_RAW) {
509 		/* Semantics are to discard tail end of messages */
510 		so->so_mode |= SM_ATOMIC;
511 	}
512 	if (so->so_family == AF_UNIX) {
513 		so->so_mode |= SM_FDPASSING | SM_OPTDATA;
514 		if (so->so_addr_size == -1) {
515 			/* MAXPATHLEN + soun_family + nul termination */
516 			so->so_addr_size = (t_scalar_t)(MAXPATHLEN +
517 				sizeof (short) + 1);
518 		}
519 		if (so->so_type == SOCK_STREAM) {
520 			/*
521 			 * Make it into a byte-stream transport.
522 			 * SOCK_SEQPACKET sockets are unchanged.
523 			 */
524 			so->so_tsdu_size = 0;
525 		}
526 	} else if (so->so_addr_size == -1) {
527 		/*
528 		 * Logic extracted from sockmod - have to pick some max address
529 		 * length in order to preallocate the addresses.
530 		 */
531 		so->so_addr_size = SOA_DEFSIZE;
532 	}
533 	if (so->so_tsdu_size == 0)
534 		so->so_mode |= SM_BYTESTREAM;
535 }
536 
537 static int
538 check_tinfo(struct sonode *so)
539 {
540 	/* Consistency checks */
541 	if (so->so_type == SOCK_DGRAM && so->so_serv_type != T_CLTS) {
542 		eprintso(so, ("service type and socket type mismatch\n"));
543 		eprintsoline(so, EPROTO);
544 		return (EPROTO);
545 	}
546 	if (so->so_type == SOCK_STREAM && so->so_serv_type == T_CLTS) {
547 		eprintso(so, ("service type and socket type mismatch\n"));
548 		eprintsoline(so, EPROTO);
549 		return (EPROTO);
550 	}
551 	if (so->so_type == SOCK_SEQPACKET && so->so_serv_type == T_CLTS) {
552 		eprintso(so, ("service type and socket type mismatch\n"));
553 		eprintsoline(so, EPROTO);
554 		return (EPROTO);
555 	}
556 	if (so->so_family == AF_INET &&
557 	    so->so_addr_size != (t_scalar_t)sizeof (struct sockaddr_in)) {
558 		eprintso(so,
559 		    ("AF_INET must have sockaddr_in address length. Got %d\n",
560 		    so->so_addr_size));
561 		eprintsoline(so, EMSGSIZE);
562 		return (EMSGSIZE);
563 	}
564 	if (so->so_family == AF_INET6 &&
565 	    so->so_addr_size != (t_scalar_t)sizeof (struct sockaddr_in6)) {
566 		eprintso(so,
567 		    ("AF_INET6 must have sockaddr_in6 address length. Got %d\n",
568 		    so->so_addr_size));
569 		eprintsoline(so, EMSGSIZE);
570 		return (EMSGSIZE);
571 	}
572 
573 	dprintso(so, 1, (
574 	    "tinfo: serv %d tsdu %d, etsdu %d, addr %d, opt %d, tidu %d\n",
575 	    so->so_serv_type, so->so_tsdu_size, so->so_etsdu_size,
576 	    so->so_addr_size, so->so_opt_size,
577 	    so->so_tidu_size));
578 	dprintso(so, 1, ("tinfo: so_state %s\n",
579 			pr_state(so->so_state, so->so_mode)));
580 	return (0);
581 }
582 
583 /*
584  * Send down T_info_req and wait for the ack.
585  * Record interesting T_info_ack values in the sonode.
586  */
587 static int
588 do_tinfo(struct sonode *so)
589 {
590 	struct T_info_req tir;
591 	mblk_t *mp;
592 	int error;
593 
594 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
595 
596 	if (so_no_tinfo) {
597 		so->so_addr_size = 0;
598 		return (0);
599 	}
600 
601 	dprintso(so, 1, ("do_tinfo(%p)\n", so));
602 
603 	/* Send T_INFO_REQ */
604 	tir.PRIM_type = T_INFO_REQ;
605 	mp = soallocproto1(&tir, sizeof (tir),
606 	    sizeof (struct T_info_req) + sizeof (struct T_info_ack),
607 	    _ALLOC_INTR);
608 	if (mp == NULL) {
609 		eprintsoline(so, ENOBUFS);
610 		return (ENOBUFS);
611 	}
612 	/* T_INFO_REQ has to be M_PCPROTO */
613 	DB_TYPE(mp) = M_PCPROTO;
614 
615 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
616 			MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
617 	if (error) {
618 		eprintsoline(so, error);
619 		return (error);
620 	}
621 	mutex_enter(&so->so_lock);
622 	/* Wait for T_INFO_ACK */
623 	if ((error = sowaitprim(so, T_INFO_REQ, T_INFO_ACK,
624 	    (t_uscalar_t)sizeof (struct T_info_ack), &mp, 0))) {
625 		mutex_exit(&so->so_lock);
626 		eprintsoline(so, error);
627 		return (error);
628 	}
629 
630 	ASSERT(mp);
631 	copy_tinfo(so, (struct T_info_ack *)mp->b_rptr);
632 	mutex_exit(&so->so_lock);
633 	freemsg(mp);
634 	return (check_tinfo(so));
635 }
636 
637 /*
638  * Send down T_capability_req and wait for the ack.
639  * Record interesting T_capability_ack values in the sonode.
640  */
641 static int
642 do_tcapability(struct sonode *so, t_uscalar_t cap_bits1)
643 {
644 	struct T_capability_req tcr;
645 	struct T_capability_ack *tca;
646 	mblk_t *mp;
647 	int error;
648 
649 	ASSERT(cap_bits1 != 0);
650 	ASSERT((cap_bits1 & ~(TC1_ACCEPTOR_ID | TC1_INFO)) == 0);
651 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
652 
653 	if (so->so_provinfo->tpi_capability == PI_NO)
654 		return (do_tinfo(so));
655 
656 	if (so_no_tinfo) {
657 		so->so_addr_size = 0;
658 		if ((cap_bits1 &= ~TC1_INFO) == 0)
659 			return (0);
660 	}
661 
662 	dprintso(so, 1, ("do_tcapability(%p)\n", so));
663 
664 	/* Send T_CAPABILITY_REQ */
665 	tcr.PRIM_type = T_CAPABILITY_REQ;
666 	tcr.CAP_bits1 = cap_bits1;
667 	mp = soallocproto1(&tcr, sizeof (tcr),
668 	    sizeof (struct T_capability_req) + sizeof (struct T_capability_ack),
669 	    _ALLOC_INTR);
670 	if (mp == NULL) {
671 		eprintsoline(so, ENOBUFS);
672 		return (ENOBUFS);
673 	}
674 	/* T_CAPABILITY_REQ should be M_PCPROTO here */
675 	DB_TYPE(mp) = M_PCPROTO;
676 
677 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
678 	    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
679 	if (error) {
680 		eprintsoline(so, error);
681 		return (error);
682 	}
683 	mutex_enter(&so->so_lock);
684 	/* Wait for T_CAPABILITY_ACK */
685 	if ((error = sowaitprim(so, T_CAPABILITY_REQ, T_CAPABILITY_ACK,
686 	    (t_uscalar_t)sizeof (*tca), &mp, sock_capability_timeout * hz))) {
687 		mutex_exit(&so->so_lock);
688 		PI_PROVLOCK(so->so_provinfo);
689 		if (so->so_provinfo->tpi_capability == PI_DONTKNOW)
690 			so->so_provinfo->tpi_capability = PI_NO;
691 		PI_PROVUNLOCK(so->so_provinfo);
692 		ASSERT((so->so_mode & SM_ACCEPTOR_ID) == 0);
693 		if (cap_bits1 & TC1_INFO) {
694 			/*
695 			 * If the T_CAPABILITY_REQ timed out and then a
696 			 * T_INFO_REQ gets a protocol error, most likely
697 			 * the capability was slow (vs. unsupported). Return
698 			 * ENOSR for this case as a best guess.
699 			 */
700 			if (error == ETIME) {
701 				return ((error = do_tinfo(so)) == EPROTO ?
702 				    ENOSR : error);
703 			}
704 			return (do_tinfo(so));
705 		}
706 		return (0);
707 	}
708 
709 	if (so->so_provinfo->tpi_capability == PI_DONTKNOW) {
710 		PI_PROVLOCK(so->so_provinfo);
711 		so->so_provinfo->tpi_capability = PI_YES;
712 		PI_PROVUNLOCK(so->so_provinfo);
713 	}
714 
715 	ASSERT(mp);
716 	tca = (struct T_capability_ack *)mp->b_rptr;
717 
718 	ASSERT((cap_bits1 & TC1_INFO) == (tca->CAP_bits1 & TC1_INFO));
719 
720 	cap_bits1 = tca->CAP_bits1;
721 
722 	if (cap_bits1 & TC1_ACCEPTOR_ID) {
723 		so->so_acceptor_id = tca->ACCEPTOR_id;
724 		so->so_mode |= SM_ACCEPTOR_ID;
725 	}
726 
727 	if (cap_bits1 & TC1_INFO)
728 		copy_tinfo(so, &tca->INFO_ack);
729 
730 	mutex_exit(&so->so_lock);
731 	freemsg(mp);
732 
733 	if (cap_bits1 & TC1_INFO)
734 		return (check_tinfo(so));
735 
736 	return (0);
737 }
738 
739 /*
740  * Retrieve and clear the socket error.
741  */
742 int
743 sogeterr(struct sonode *so)
744 {
745 	int error;
746 
747 	ASSERT(MUTEX_HELD(&so->so_lock));
748 
749 	error = so->so_error;
750 	so->so_error = 0;
751 
752 	return (error);
753 }
754 
755 /*
756  * This routine is registered with the stream head to retrieve read
757  * side errors.
758  * It does not clear the socket error for a peeking read side operation.
759  * It the error is to be cleared it sets *clearerr.
760  */
761 int
762 sogetrderr(vnode_t *vp, int ispeek, int *clearerr)
763 {
764 	struct sonode *so = VTOSO(vp);
765 	int error;
766 
767 	mutex_enter(&so->so_lock);
768 	if (ispeek) {
769 		error = so->so_error;
770 		*clearerr = 0;
771 	} else {
772 		error = so->so_error;
773 		so->so_error = 0;
774 		*clearerr = 1;
775 	}
776 	mutex_exit(&so->so_lock);
777 	return (error);
778 }
779 
780 /*
781  * This routine is registered with the stream head to retrieve write
782  * side errors.
783  * It does not clear the socket error for a peeking read side operation.
784  * It the error is to be cleared it sets *clearerr.
785  */
786 int
787 sogetwrerr(vnode_t *vp, int ispeek, int *clearerr)
788 {
789 	struct sonode *so = VTOSO(vp);
790 	int error;
791 
792 	mutex_enter(&so->so_lock);
793 	if (so->so_state & SS_CANTSENDMORE) {
794 		error = EPIPE;
795 		*clearerr = 0;
796 	} else {
797 		error = so->so_error;
798 		if (ispeek) {
799 			*clearerr = 0;
800 		} else {
801 			so->so_error = 0;
802 			*clearerr = 1;
803 		}
804 	}
805 	mutex_exit(&so->so_lock);
806 	return (error);
807 }
808 
809 /*
810  * Set a nonpersistent read and write error on the socket.
811  * Used when there is a T_uderror_ind for a connected socket.
812  * The caller also needs to call strsetrerror and strsetwerror
813  * after dropping the lock.
814  */
815 void
816 soseterror(struct sonode *so, int error)
817 {
818 	ASSERT(error != 0);
819 
820 	ASSERT(MUTEX_HELD(&so->so_lock));
821 	so->so_error = (ushort_t)error;
822 }
823 
824 void
825 soisconnecting(struct sonode *so)
826 {
827 	ASSERT(MUTEX_HELD(&so->so_lock));
828 	so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
829 	so->so_state |= SS_ISCONNECTING;
830 	cv_broadcast(&so->so_state_cv);
831 }
832 
833 void
834 soisconnected(struct sonode *so)
835 {
836 	ASSERT(MUTEX_HELD(&so->so_lock));
837 	so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING);
838 	so->so_state |= SS_ISCONNECTED;
839 	cv_broadcast(&so->so_state_cv);
840 }
841 
842 /*
843  * The caller also needs to call strsetrerror, strsetwerror and strseteof.
844  */
845 void
846 soisdisconnected(struct sonode *so, int error)
847 {
848 	ASSERT(MUTEX_HELD(&so->so_lock));
849 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING|
850 	    SS_LADDR_VALID|SS_FADDR_VALID);
851 	so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE);
852 	so->so_error = (ushort_t)error;
853 	if (so->so_peercred != NULL) {
854 		crfree(so->so_peercred);
855 		so->so_peercred = NULL;
856 	}
857 	cv_broadcast(&so->so_state_cv);
858 }
859 
860 /*
861  * For connected AF_UNIX SOCK_DGRAM sockets when the peer closes.
862  * Does not affect write side.
863  * The caller also has to call strsetrerror.
864  */
865 static void
866 sobreakconn(struct sonode *so, int error)
867 {
868 	ASSERT(MUTEX_HELD(&so->so_lock));
869 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
870 	so->so_error = (ushort_t)error;
871 	cv_broadcast(&so->so_state_cv);
872 }
873 
874 /*
875  * Can no longer send.
876  * Caller must also call strsetwerror.
877  *
878  * We mark the peer address as no longer valid for getpeername, but
879  * leave it around for so_unix_close to notify the peer (that
880  * transport has no addressing held at that layer).
881  */
882 void
883 socantsendmore(struct sonode *so)
884 {
885 	ASSERT(MUTEX_HELD(&so->so_lock));
886 	so->so_state = so->so_state & ~SS_FADDR_VALID | SS_CANTSENDMORE;
887 	cv_broadcast(&so->so_state_cv);
888 }
889 
890 /*
891  * The caller must call strseteof(,1) as well as this routine
892  * to change the socket state.
893  */
894 void
895 socantrcvmore(struct sonode *so)
896 {
897 	ASSERT(MUTEX_HELD(&so->so_lock));
898 	so->so_state |= SS_CANTRCVMORE;
899 	cv_broadcast(&so->so_state_cv);
900 }
901 
902 /*
903  * The caller has sent down a "request_prim" primitive and wants to wait for
904  * an ack ("ack_prim") or an T_ERROR_ACK for it.
905  * The specified "ack_prim" can be a T_OK_ACK.
906  *
907  * Assumes that all the TPI acks are M_PCPROTO messages.
908  *
909  * Note that the socket is single-threaded (using so_lock_single)
910  * for all operations that generate TPI ack messages. Since
911  * only TPI ack messages are M_PCPROTO we should never receive
912  * anything except either the ack we are expecting or a T_ERROR_ACK
913  * for the same primitive.
914  */
915 int
916 sowaitprim(struct sonode *so, t_scalar_t request_prim, t_scalar_t ack_prim,
917 	    t_uscalar_t min_size, mblk_t **mpp, clock_t wait)
918 {
919 	mblk_t *mp;
920 	union T_primitives *tpr;
921 	int error;
922 
923 	dprintso(so, 1, ("sowaitprim(%p, %d, %d, %d, %p, %lu)\n",
924 		so, request_prim, ack_prim, min_size, mpp, wait));
925 
926 	ASSERT(MUTEX_HELD(&so->so_lock));
927 
928 	error = sowaitack(so, &mp, wait);
929 	if (error)
930 		return (error);
931 
932 	dprintso(so, 1, ("got msg %p\n", mp));
933 	if (DB_TYPE(mp) != M_PCPROTO ||
934 	    MBLKL(mp) < sizeof (tpr->type)) {
935 		freemsg(mp);
936 		eprintsoline(so, EPROTO);
937 		return (EPROTO);
938 	}
939 	tpr = (union T_primitives *)mp->b_rptr;
940 	/*
941 	 * Did we get the primitive that we were asking for?
942 	 * For T_OK_ACK we also check that it matches the request primitive.
943 	 */
944 	if (tpr->type == ack_prim &&
945 	    (ack_prim != T_OK_ACK ||
946 	    tpr->ok_ack.CORRECT_prim == request_prim)) {
947 		if (MBLKL(mp) >= (ssize_t)min_size) {
948 			/* Found what we are looking for */
949 			*mpp = mp;
950 			return (0);
951 		}
952 		/* Too short */
953 		freemsg(mp);
954 		eprintsoline(so, EPROTO);
955 		return (EPROTO);
956 	}
957 
958 	if (tpr->type == T_ERROR_ACK &&
959 	    tpr->error_ack.ERROR_prim == request_prim) {
960 		/* Error to the primitive we were looking for */
961 		if (tpr->error_ack.TLI_error == TSYSERR) {
962 			error = tpr->error_ack.UNIX_error;
963 		} else {
964 			error = tlitosyserr(tpr->error_ack.TLI_error);
965 		}
966 		dprintso(so, 0, ("error_ack for %d: %d/%d ->%d\n",
967 			tpr->error_ack.ERROR_prim,
968 			tpr->error_ack.TLI_error,
969 			tpr->error_ack.UNIX_error,
970 			error));
971 		freemsg(mp);
972 		return (error);
973 	}
974 	/*
975 	 * Wrong primitive or T_ERROR_ACK for the wrong primitive
976 	 */
977 #ifdef DEBUG
978 	if (tpr->type == T_ERROR_ACK) {
979 		dprintso(so, 0, ("error_ack for %d: %d/%d\n",
980 			tpr->error_ack.ERROR_prim,
981 			tpr->error_ack.TLI_error,
982 			tpr->error_ack.UNIX_error));
983 	} else if (tpr->type == T_OK_ACK) {
984 		dprintso(so, 0, ("ok_ack for %d, expected %d for %d\n",
985 			tpr->ok_ack.CORRECT_prim,
986 			ack_prim, request_prim));
987 	} else {
988 		dprintso(so, 0,
989 			("unexpected primitive %d, expected %d for %d\n",
990 			tpr->type, ack_prim, request_prim));
991 	}
992 #endif /* DEBUG */
993 
994 	freemsg(mp);
995 	eprintsoline(so, EPROTO);
996 	return (EPROTO);
997 }
998 
999 /*
1000  * Wait for a T_OK_ACK for the specified primitive.
1001  */
1002 int
1003 sowaitokack(struct sonode *so, t_scalar_t request_prim)
1004 {
1005 	mblk_t *mp;
1006 	int error;
1007 
1008 	error = sowaitprim(so, request_prim, T_OK_ACK,
1009 	    (t_uscalar_t)sizeof (struct T_ok_ack), &mp, 0);
1010 	if (error)
1011 		return (error);
1012 	freemsg(mp);
1013 	return (0);
1014 }
1015 
1016 /*
1017  * Queue a received TPI ack message on so_ack_mp.
1018  */
1019 void
1020 soqueueack(struct sonode *so, mblk_t *mp)
1021 {
1022 	if (DB_TYPE(mp) != M_PCPROTO) {
1023 		cmn_err(CE_WARN,
1024 		    "sockfs: received unexpected M_PROTO TPI ack. Prim %d\n",
1025 		    *(t_scalar_t *)mp->b_rptr);
1026 		freemsg(mp);
1027 		return;
1028 	}
1029 
1030 	mutex_enter(&so->so_lock);
1031 	if (so->so_ack_mp != NULL) {
1032 		dprintso(so, 1, ("so_ack_mp already set\n"));
1033 		freemsg(so->so_ack_mp);
1034 		so->so_ack_mp = NULL;
1035 	}
1036 	so->so_ack_mp = mp;
1037 	cv_broadcast(&so->so_ack_cv);
1038 	mutex_exit(&so->so_lock);
1039 }
1040 
1041 /*
1042  * Wait for a TPI ack ignoring signals and errors.
1043  */
1044 int
1045 sowaitack(struct sonode *so, mblk_t **mpp, clock_t wait)
1046 {
1047 	ASSERT(MUTEX_HELD(&so->so_lock));
1048 
1049 	while (so->so_ack_mp == NULL) {
1050 #ifdef SOCK_TEST
1051 		if (wait == 0 && sock_test_timelimit != 0)
1052 			wait = sock_test_timelimit;
1053 #endif
1054 		if (wait != 0) {
1055 			/*
1056 			 * Only wait for the time limit.
1057 			 */
1058 			clock_t now;
1059 
1060 			time_to_wait(&now, wait);
1061 			if (cv_timedwait(&so->so_ack_cv, &so->so_lock,
1062 			    now) == -1) {
1063 				eprintsoline(so, ETIME);
1064 				return (ETIME);
1065 			}
1066 		}
1067 		else
1068 			cv_wait(&so->so_ack_cv, &so->so_lock);
1069 	}
1070 	*mpp = so->so_ack_mp;
1071 #ifdef DEBUG
1072 	{
1073 		union T_primitives *tpr;
1074 		mblk_t *mp = *mpp;
1075 
1076 		tpr = (union T_primitives *)mp->b_rptr;
1077 		ASSERT(DB_TYPE(mp) == M_PCPROTO);
1078 		ASSERT(tpr->type == T_OK_ACK ||
1079 			tpr->type == T_ERROR_ACK ||
1080 			tpr->type == T_BIND_ACK ||
1081 			tpr->type == T_CAPABILITY_ACK ||
1082 			tpr->type == T_INFO_ACK ||
1083 			tpr->type == T_OPTMGMT_ACK);
1084 	}
1085 #endif /* DEBUG */
1086 	so->so_ack_mp = NULL;
1087 	return (0);
1088 }
1089 
1090 /*
1091  * Queue a received T_CONN_IND message on so_conn_ind_head/tail.
1092  */
1093 void
1094 soqueueconnind(struct sonode *so, mblk_t *mp)
1095 {
1096 	if (DB_TYPE(mp) != M_PROTO) {
1097 		cmn_err(CE_WARN,
1098 		    "sockfs: received unexpected M_PCPROTO T_CONN_IND\n");
1099 		freemsg(mp);
1100 		return;
1101 	}
1102 
1103 	mutex_enter(&so->so_lock);
1104 	ASSERT(mp->b_next == NULL);
1105 	if (so->so_conn_ind_head == NULL) {
1106 		so->so_conn_ind_head = mp;
1107 		so->so_state |= SS_HASCONNIND;
1108 	} else {
1109 		ASSERT(so->so_state & SS_HASCONNIND);
1110 		ASSERT(so->so_conn_ind_tail->b_next == NULL);
1111 		so->so_conn_ind_tail->b_next = mp;
1112 	}
1113 	so->so_conn_ind_tail = mp;
1114 	/* Wakeup a single consumer of the T_CONN_IND */
1115 	cv_signal(&so->so_connind_cv);
1116 	mutex_exit(&so->so_lock);
1117 }
1118 
1119 /*
1120  * Wait for a T_CONN_IND.
1121  * Don't wait if nonblocking.
1122  * Accept signals and socket errors.
1123  */
1124 int
1125 sowaitconnind(struct sonode *so, int fmode, mblk_t **mpp)
1126 {
1127 	mblk_t *mp;
1128 	int error = 0;
1129 
1130 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
1131 	mutex_enter(&so->so_lock);
1132 check_error:
1133 	if (so->so_error) {
1134 		error = sogeterr(so);
1135 		if (error) {
1136 			mutex_exit(&so->so_lock);
1137 			return (error);
1138 		}
1139 	}
1140 
1141 	if (so->so_conn_ind_head == NULL) {
1142 		if (fmode & (FNDELAY|FNONBLOCK)) {
1143 			error = EWOULDBLOCK;
1144 			goto done;
1145 		}
1146 		if (!cv_wait_sig_swap(&so->so_connind_cv, &so->so_lock)) {
1147 			error = EINTR;
1148 			goto done;
1149 		}
1150 		goto check_error;
1151 	}
1152 	mp = so->so_conn_ind_head;
1153 	so->so_conn_ind_head = mp->b_next;
1154 	mp->b_next = NULL;
1155 	if (so->so_conn_ind_head == NULL) {
1156 		ASSERT(so->so_conn_ind_tail == mp);
1157 		so->so_conn_ind_tail = NULL;
1158 		so->so_state &= ~SS_HASCONNIND;
1159 	}
1160 	*mpp = mp;
1161 done:
1162 	mutex_exit(&so->so_lock);
1163 	return (error);
1164 }
1165 
1166 /*
1167  * Flush a T_CONN_IND matching the sequence number from the list.
1168  * Return zero if found; non-zero otherwise.
1169  * This is called very infrequently thus it is ok to do a linear search.
1170  */
1171 int
1172 soflushconnind(struct sonode *so, t_scalar_t seqno)
1173 {
1174 	mblk_t *prevmp, *mp;
1175 	struct T_conn_ind *tci;
1176 
1177 	mutex_enter(&so->so_lock);
1178 	for (prevmp = NULL, mp = so->so_conn_ind_head; mp != NULL;
1179 	    prevmp = mp, mp = mp->b_next) {
1180 		tci = (struct T_conn_ind *)mp->b_rptr;
1181 		if (tci->SEQ_number == seqno) {
1182 			dprintso(so, 1,
1183 				("t_discon_ind: found T_CONN_IND %d\n", seqno));
1184 			/* Deleting last? */
1185 			if (so->so_conn_ind_tail == mp) {
1186 				so->so_conn_ind_tail = prevmp;
1187 			}
1188 			if (prevmp == NULL) {
1189 				/* Deleting first */
1190 				so->so_conn_ind_head = mp->b_next;
1191 			} else {
1192 				prevmp->b_next = mp->b_next;
1193 			}
1194 			mp->b_next = NULL;
1195 			if (so->so_conn_ind_head == NULL) {
1196 				ASSERT(so->so_conn_ind_tail == NULL);
1197 				so->so_state &= ~SS_HASCONNIND;
1198 			} else {
1199 				ASSERT(so->so_conn_ind_tail != NULL);
1200 			}
1201 			so->so_error = ECONNABORTED;
1202 			mutex_exit(&so->so_lock);
1203 			freemsg(mp);
1204 			return (0);
1205 		}
1206 	}
1207 	mutex_exit(&so->so_lock);
1208 	dprintso(so, 1,	("t_discon_ind: NOT found T_CONN_IND %d\n", seqno));
1209 	return (-1);
1210 }
1211 
1212 /*
1213  * Wait until the socket is connected or there is an error.
1214  * fmode should contain any nonblocking flags. nosig should be
1215  * set if the caller does not want the wait to be interrupted by a signal.
1216  */
1217 int
1218 sowaitconnected(struct sonode *so, int fmode, int nosig)
1219 {
1220 	int error;
1221 
1222 	ASSERT(MUTEX_HELD(&so->so_lock));
1223 
1224 	while ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) ==
1225 		SS_ISCONNECTING && so->so_error == 0) {
1226 
1227 		dprintso(so, 1, ("waiting for SS_ISCONNECTED on %p\n", so));
1228 		if (fmode & (FNDELAY|FNONBLOCK))
1229 			return (EINPROGRESS);
1230 
1231 		if (nosig)
1232 			cv_wait(&so->so_state_cv, &so->so_lock);
1233 		else if (!cv_wait_sig_swap(&so->so_state_cv, &so->so_lock)) {
1234 			/*
1235 			 * Return EINTR and let the application use
1236 			 * nonblocking techniques for detecting when
1237 			 * the connection has been established.
1238 			 */
1239 			return (EINTR);
1240 		}
1241 		dprintso(so, 1, ("awoken on %p\n", so));
1242 	}
1243 
1244 	if (so->so_error != 0) {
1245 		error = sogeterr(so);
1246 		ASSERT(error != 0);
1247 		dprintso(so, 1, ("sowaitconnected: error %d\n", error));
1248 		return (error);
1249 	}
1250 	if (!(so->so_state & SS_ISCONNECTED)) {
1251 		/*
1252 		 * Could have received a T_ORDREL_IND or a T_DISCON_IND with
1253 		 * zero errno. Or another thread could have consumed so_error
1254 		 * e.g. by calling read.
1255 		 */
1256 		error = ECONNREFUSED;
1257 		dprintso(so, 1, ("sowaitconnected: error %d\n", error));
1258 		return (error);
1259 	}
1260 	return (0);
1261 }
1262 
1263 
1264 /*
1265  * Handle the signal generation aspect of urgent data.
1266  */
1267 static void
1268 so_oob_sig(struct sonode *so, int extrasig,
1269     strsigset_t *signals, strpollset_t *pollwakeups)
1270 {
1271 	ASSERT(MUTEX_HELD(&so->so_lock));
1272 
1273 	ASSERT(so_verify_oobstate(so));
1274 	ASSERT(so->so_oobsigcnt >= so->so_oobcnt);
1275 	if (so->so_oobsigcnt > so->so_oobcnt) {
1276 		/*
1277 		 * Signal has already been generated once for this
1278 		 * urgent "event". However, since TCP can receive updated
1279 		 * urgent pointers we still generate a signal.
1280 		 */
1281 		ASSERT(so->so_state & SS_OOBPEND);
1282 		if (extrasig) {
1283 			*signals |= S_RDBAND;
1284 			*pollwakeups |= POLLRDBAND;
1285 		}
1286 		return;
1287 	}
1288 
1289 	so->so_oobsigcnt++;
1290 	ASSERT(so->so_oobsigcnt > 0);	/* Wraparound */
1291 	ASSERT(so->so_oobsigcnt > so->so_oobcnt);
1292 
1293 	/*
1294 	 * Record (for select/poll) that urgent data is pending.
1295 	 */
1296 	so->so_state |= SS_OOBPEND;
1297 	/*
1298 	 * New urgent data on the way so forget about any old
1299 	 * urgent data.
1300 	 */
1301 	so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA);
1302 	if (so->so_oobmsg != NULL) {
1303 		dprintso(so, 1, ("sock: discarding old oob\n"));
1304 		freemsg(so->so_oobmsg);
1305 		so->so_oobmsg = NULL;
1306 	}
1307 	*signals |= S_RDBAND;
1308 	*pollwakeups |= POLLRDBAND;
1309 	ASSERT(so_verify_oobstate(so));
1310 }
1311 
1312 /*
1313  * Handle the processing of the T_EXDATA_IND with urgent data.
1314  * Returns the T_EXDATA_IND if it should be queued on the read queue.
1315  */
1316 /* ARGSUSED2 */
1317 static mblk_t *
1318 so_oob_exdata(struct sonode *so, mblk_t *mp,
1319 	strsigset_t *signals, strpollset_t *pollwakeups)
1320 {
1321 	ASSERT(MUTEX_HELD(&so->so_lock));
1322 
1323 	ASSERT(so_verify_oobstate(so));
1324 
1325 	ASSERT(so->so_oobsigcnt > so->so_oobcnt);
1326 
1327 	so->so_oobcnt++;
1328 	ASSERT(so->so_oobcnt > 0);	/* wraparound? */
1329 	ASSERT(so->so_oobsigcnt >= so->so_oobcnt);
1330 
1331 	/*
1332 	 * Set MSGMARK for SIOCATMARK.
1333 	 */
1334 	mp->b_flag |= MSGMARK;
1335 
1336 	ASSERT(so_verify_oobstate(so));
1337 	return (mp);
1338 }
1339 
1340 /*
1341  * Handle the processing of the actual urgent data.
1342  * Returns the data mblk if it should be queued on the read queue.
1343  */
1344 static mblk_t *
1345 so_oob_data(struct sonode *so, mblk_t *mp,
1346 	strsigset_t *signals, strpollset_t *pollwakeups)
1347 {
1348 	ASSERT(MUTEX_HELD(&so->so_lock));
1349 
1350 	ASSERT(so_verify_oobstate(so));
1351 
1352 	ASSERT(so->so_oobsigcnt >= so->so_oobcnt);
1353 	ASSERT(mp != NULL);
1354 	/*
1355 	 * For OOBINLINE we keep the data in the T_EXDATA_IND.
1356 	 * Otherwise we store it in so_oobmsg.
1357 	 */
1358 	ASSERT(so->so_oobmsg == NULL);
1359 	if (so->so_options & SO_OOBINLINE) {
1360 		*pollwakeups |= POLLIN | POLLRDNORM | POLLRDBAND;
1361 		*signals |= S_INPUT | S_RDNORM;
1362 	} else {
1363 		*pollwakeups |= POLLRDBAND;
1364 		so->so_state |= SS_HAVEOOBDATA;
1365 		so->so_oobmsg = mp;
1366 		mp = NULL;
1367 	}
1368 	ASSERT(so_verify_oobstate(so));
1369 	return (mp);
1370 }
1371 
1372 /*
1373  * Caller must hold the mutex.
1374  * For delayed processing, save the T_DISCON_IND received
1375  * from below on so_discon_ind_mp.
1376  * When the message is processed the framework will call:
1377  *      (*func)(so, mp);
1378  */
1379 static void
1380 so_save_discon_ind(struct sonode *so,
1381 	mblk_t *mp,
1382 	void (*func)(struct sonode *so, mblk_t *))
1383 {
1384 	ASSERT(MUTEX_HELD(&so->so_lock));
1385 
1386 	/*
1387 	 * Discard new T_DISCON_IND if we have already received another.
1388 	 * Currently the earlier message can either be on so_discon_ind_mp
1389 	 * or being processed.
1390 	 */
1391 	if (so->so_discon_ind_mp != NULL || (so->so_flag & SOASYNC_UNBIND)) {
1392 		cmn_err(CE_WARN,
1393 		    "sockfs: received unexpected additional T_DISCON_IND\n");
1394 		freemsg(mp);
1395 		return;
1396 	}
1397 	mp->b_prev = (mblk_t *)func;
1398 	mp->b_next = NULL;
1399 	so->so_discon_ind_mp = mp;
1400 }
1401 
1402 /*
1403  * Caller must hold the mutex and make sure that either SOLOCKED
1404  * or SOASYNC_UNBIND is set. Called from so_unlock_single().
1405  * Perform delayed processing of T_DISCON_IND message on so_discon_ind_mp.
1406  * Need to ensure that strsock_proto() will not end up sleeping for
1407  * SOASYNC_UNBIND, while executing this function.
1408  */
1409 void
1410 so_drain_discon_ind(struct sonode *so)
1411 {
1412 	mblk_t	*bp;
1413 	void (*func)(struct sonode *so, mblk_t *);
1414 
1415 	ASSERT(MUTEX_HELD(&so->so_lock));
1416 	ASSERT(so->so_flag & (SOLOCKED|SOASYNC_UNBIND));
1417 
1418 	/* Process T_DISCON_IND on so_discon_ind_mp */
1419 	if ((bp = so->so_discon_ind_mp) != NULL) {
1420 		so->so_discon_ind_mp = NULL;
1421 		func = (void (*)())bp->b_prev;
1422 		bp->b_prev = NULL;
1423 
1424 		/*
1425 		 * This (*func) is supposed to generate a message downstream
1426 		 * and we need to have a flag set until the corresponding
1427 		 * upstream message reaches stream head.
1428 		 * When processing T_DISCON_IND in strsock_discon_ind
1429 		 * we hold SOASYN_UNBIND when sending T_UNBIND_REQ down and
1430 		 * drop the flag after we get the ACK in strsock_proto.
1431 		 */
1432 		(void) (*func)(so, bp);
1433 	}
1434 }
1435 
1436 /*
1437  * Caller must hold the mutex.
1438  * Remove the T_DISCON_IND on so_discon_ind_mp.
1439  */
1440 void
1441 so_flush_discon_ind(struct sonode *so)
1442 {
1443 	mblk_t	*bp;
1444 
1445 	ASSERT(MUTEX_HELD(&so->so_lock));
1446 
1447 	/*
1448 	 * Remove T_DISCON_IND mblk at so_discon_ind_mp.
1449 	 */
1450 	if ((bp = so->so_discon_ind_mp) != NULL) {
1451 		so->so_discon_ind_mp = NULL;
1452 		bp->b_prev = NULL;
1453 		freemsg(bp);
1454 	}
1455 }
1456 
1457 /*
1458  * Caller must hold the mutex.
1459  *
1460  * This function is used to process the T_DISCON_IND message. It does
1461  * immediate processing when called from strsock_proto and delayed
1462  * processing of discon_ind saved on so_discon_ind_mp when called from
1463  * so_drain_discon_ind. When a T_DISCON_IND message is saved in
1464  * so_discon_ind_mp for delayed processing, this function is registered
1465  * as the callback function to process the message.
1466  *
1467  * SOASYNC_UNBIND should be held in this function, during the non-blocking
1468  * unbind operation, and should be released only after we receive the ACK
1469  * in strsock_proto, for the T_UNBIND_REQ sent here. Since SOLOCKED is not set,
1470  * no TPI messages would be sent down at this time. This is to prevent M_FLUSH
1471  * sent from either this function or tcp_unbind(), flushing away any TPI
1472  * message that is being sent down and stays in a lower module's queue.
1473  *
1474  * This function drops so_lock and grabs it again.
1475  */
1476 static void
1477 strsock_discon_ind(struct sonode *so, mblk_t *discon_mp)
1478 {
1479 	struct vnode *vp;
1480 	struct stdata *stp;
1481 	union T_primitives *tpr;
1482 	struct T_unbind_req *ubr;
1483 	mblk_t *mp;
1484 	int error;
1485 
1486 	ASSERT(MUTEX_HELD(&so->so_lock));
1487 	ASSERT(discon_mp);
1488 	ASSERT(discon_mp->b_rptr);
1489 
1490 	tpr = (union T_primitives *)discon_mp->b_rptr;
1491 	ASSERT(tpr->type == T_DISCON_IND);
1492 
1493 	vp = SOTOV(so);
1494 	stp = vp->v_stream;
1495 	ASSERT(stp);
1496 
1497 	/*
1498 	 * Not a listener
1499 	 */
1500 	ASSERT((so->so_state & SS_ACCEPTCONN) == 0);
1501 
1502 	/*
1503 	 * This assumes that the name space for DISCON_reason
1504 	 * is the errno name space.
1505 	 */
1506 	soisdisconnected(so, tpr->discon_ind.DISCON_reason);
1507 
1508 	/*
1509 	 * Unbind with the transport without blocking.
1510 	 * If we've already received a T_DISCON_IND do not unbind.
1511 	 *
1512 	 * If there is no preallocated unbind message, we have already
1513 	 * unbound with the transport
1514 	 *
1515 	 * If the socket is not bound, no need to unbind.
1516 	 */
1517 	mp = so->so_unbind_mp;
1518 	if (mp == NULL) {
1519 		ASSERT(!(so->so_state & SS_ISBOUND));
1520 		mutex_exit(&so->so_lock);
1521 	} else if (!(so->so_state & SS_ISBOUND))  {
1522 		mutex_exit(&so->so_lock);
1523 	} else {
1524 		so->so_unbind_mp = NULL;
1525 
1526 		/*
1527 		 * Is another T_DISCON_IND being processed.
1528 		 */
1529 		ASSERT((so->so_flag & SOASYNC_UNBIND) == 0);
1530 
1531 		/*
1532 		 * Make strsock_proto ignore T_OK_ACK and T_ERROR_ACK for
1533 		 * this unbind. Set SOASYNC_UNBIND. This should be cleared
1534 		 * only after we receive the ACK in strsock_proto.
1535 		 */
1536 		so->so_flag |= SOASYNC_UNBIND;
1537 		ASSERT(!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)));
1538 		so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN|SS_LADDR_VALID);
1539 		mutex_exit(&so->so_lock);
1540 
1541 		/*
1542 		 * Send down T_UNBIND_REQ ignoring flow control.
1543 		 * XXX Assumes that MSG_IGNFLOW implies that this thread
1544 		 * does not run service procedures.
1545 		 */
1546 		ASSERT(DB_TYPE(mp) == M_PROTO);
1547 		ubr = (struct T_unbind_req *)mp->b_rptr;
1548 		mp->b_wptr += sizeof (*ubr);
1549 		ubr->PRIM_type = T_UNBIND_REQ;
1550 
1551 		/*
1552 		 * Flush the read and write side (except stream head read queue)
1553 		 * and send down T_UNBIND_REQ.
1554 		 */
1555 		(void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW);
1556 		error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
1557 			MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0);
1558 		/* LINTED - warning: statement has no consequent: if */
1559 		if (error) {
1560 			eprintsoline(so, error);
1561 		}
1562 	}
1563 
1564 	if (tpr->discon_ind.DISCON_reason != 0)
1565 		strsetrerror(SOTOV(so), 0, 0, sogetrderr);
1566 	strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
1567 	strseteof(SOTOV(so), 1);
1568 	/*
1569 	 * strseteof takes care of read side wakeups,
1570 	 * pollwakeups, and signals.
1571 	 */
1572 	dprintso(so, 1, ("T_DISCON_IND: error %d\n", so->so_error));
1573 	freemsg(discon_mp);
1574 
1575 
1576 	pollwakeup(&stp->sd_pollist, POLLOUT);
1577 	mutex_enter(&stp->sd_lock);
1578 
1579 	/*
1580 	 * Wake sleeping write
1581 	 */
1582 	if (stp->sd_flag & WSLEEP) {
1583 		stp->sd_flag &= ~WSLEEP;
1584 		cv_broadcast(&stp->sd_wrq->q_wait);
1585 	}
1586 
1587 	/*
1588 	 * strsendsig can handle multiple signals with a
1589 	 * single call.  Send SIGPOLL for S_OUTPUT event.
1590 	 */
1591 	if (stp->sd_sigflags & S_OUTPUT)
1592 		strsendsig(stp->sd_siglist, S_OUTPUT, 0, 0);
1593 
1594 	mutex_exit(&stp->sd_lock);
1595 	mutex_enter(&so->so_lock);
1596 }
1597 
1598 /*
1599  * This routine is registered with the stream head to receive M_PROTO
1600  * and M_PCPROTO messages.
1601  *
1602  * Returns NULL if the message was consumed.
1603  * Returns an mblk to make that mblk be processed (and queued) by the stream
1604  * head.
1605  *
1606  * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and
1607  * *pollwakeups) for the stream head to take action on. Note that since
1608  * sockets always deliver SIGIO for every new piece of data this routine
1609  * never sets *firstmsgsigs; any signals are returned in *allmsgsigs.
1610  *
1611  * This routine handles all data related TPI messages independent of
1612  * the type of the socket i.e. it doesn't care if T_UNITDATA_IND message
1613  * arrive on a SOCK_STREAM.
1614  */
1615 static mblk_t *
1616 strsock_proto(vnode_t *vp, mblk_t *mp,
1617 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
1618 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
1619 {
1620 	union T_primitives *tpr;
1621 	struct sonode *so;
1622 
1623 	so = VTOSO(vp);
1624 
1625 	dprintso(so, 1, ("strsock_proto(%p, %p)\n", vp, mp));
1626 
1627 	/* Set default return values */
1628 	*firstmsgsigs = *wakeups = *allmsgsigs = *pollwakeups = 0;
1629 
1630 	ASSERT(DB_TYPE(mp) == M_PROTO ||
1631 	    DB_TYPE(mp) == M_PCPROTO);
1632 
1633 	if (MBLKL(mp) < sizeof (tpr->type)) {
1634 		/* The message is too short to even contain the primitive */
1635 		cmn_err(CE_WARN,
1636 		    "sockfs: Too short TPI message received. Len = %ld\n",
1637 		    (ptrdiff_t)(MBLKL(mp)));
1638 		freemsg(mp);
1639 		return (NULL);
1640 	}
1641 	if (!__TPI_PRIM_ISALIGNED(mp->b_rptr)) {
1642 		/* The read pointer is not aligned correctly for TPI */
1643 		cmn_err(CE_WARN,
1644 		    "sockfs: Unaligned TPI message received. rptr = %p\n",
1645 		    (void *)mp->b_rptr);
1646 		freemsg(mp);
1647 		return (NULL);
1648 	}
1649 	tpr = (union T_primitives *)mp->b_rptr;
1650 	dprintso(so, 1, ("strsock_proto: primitive %d\n", tpr->type));
1651 
1652 	switch (tpr->type) {
1653 
1654 	case T_DATA_IND:
1655 		if (MBLKL(mp) < sizeof (struct T_data_ind)) {
1656 			cmn_err(CE_WARN,
1657 			    "sockfs: Too short T_DATA_IND. Len = %ld\n",
1658 			    (ptrdiff_t)(MBLKL(mp)));
1659 			freemsg(mp);
1660 			return (NULL);
1661 		}
1662 		/*
1663 		 * Ignore zero-length T_DATA_IND messages. These might be
1664 		 * generated by some transports.
1665 		 * This is needed to prevent read (which skips the M_PROTO
1666 		 * part) to unexpectedly return 0 (or return EWOULDBLOCK
1667 		 * on a non-blocking socket after select/poll has indicated
1668 		 * that data is available).
1669 		 */
1670 		if (msgdsize(mp->b_cont) == 0) {
1671 			dprintso(so, 0,
1672 			    ("strsock_proto: zero length T_DATA_IND\n"));
1673 			freemsg(mp);
1674 			return (NULL);
1675 		}
1676 		*allmsgsigs = S_INPUT | S_RDNORM;
1677 		*pollwakeups = POLLIN | POLLRDNORM;
1678 		*wakeups = RSLEEP;
1679 		return (mp);
1680 
1681 	case T_UNITDATA_IND: {
1682 		struct T_unitdata_ind	*tudi = &tpr->unitdata_ind;
1683 		void			*addr;
1684 		t_uscalar_t		addrlen;
1685 
1686 		if (MBLKL(mp) < sizeof (struct T_unitdata_ind)) {
1687 			cmn_err(CE_WARN,
1688 			    "sockfs: Too short T_UNITDATA_IND. Len = %ld\n",
1689 			    (ptrdiff_t)(MBLKL(mp)));
1690 			freemsg(mp);
1691 			return (NULL);
1692 		}
1693 
1694 		/* Is this is not a connected datagram socket? */
1695 		if ((so->so_mode & SM_CONNREQUIRED) ||
1696 		    !(so->so_state & SS_ISCONNECTED)) {
1697 			/*
1698 			 * Not a connected datagram socket. Look for
1699 			 * the SO_UNIX_CLOSE option. If such an option is found
1700 			 * discard the message (since it has no meaning
1701 			 * unless connected).
1702 			 */
1703 			if (so->so_family == AF_UNIX && msgdsize(mp) == 0 &&
1704 			    tudi->OPT_length != 0) {
1705 				void *opt;
1706 				t_uscalar_t optlen = tudi->OPT_length;
1707 
1708 				opt = sogetoff(mp, tudi->OPT_offset,
1709 					optlen, __TPI_ALIGN_SIZE);
1710 				if (opt == NULL) {
1711 					/* The len/off falls outside mp */
1712 					freemsg(mp);
1713 					mutex_enter(&so->so_lock);
1714 					soseterror(so, EPROTO);
1715 					mutex_exit(&so->so_lock);
1716 					cmn_err(CE_WARN,
1717 					    "sockfs: T_unidata_ind with "
1718 					    "invalid optlen/offset %u/%d\n",
1719 					    optlen, tudi->OPT_offset);
1720 					return (NULL);
1721 				}
1722 				if (so_getopt_unix_close(opt, optlen)) {
1723 					freemsg(mp);
1724 					return (NULL);
1725 				}
1726 			}
1727 			*allmsgsigs = S_INPUT | S_RDNORM;
1728 			*pollwakeups = POLLIN | POLLRDNORM;
1729 			*wakeups = RSLEEP;
1730 #ifdef C2_AUDIT
1731 			if (audit_active)
1732 				audit_sock(T_UNITDATA_IND, strvp2wq(vp),
1733 					mp, 0);
1734 #endif /* C2_AUDIT */
1735 			return (mp);
1736 		}
1737 
1738 		/*
1739 		 * A connect datagram socket. For AF_INET{,6} we verify that
1740 		 * the source address matches the "connected to" address.
1741 		 * The semantics of AF_UNIX sockets is to not verify
1742 		 * the source address.
1743 		 * Note that this source address verification is transport
1744 		 * specific. Thus the real fix would be to extent TPI
1745 		 * to allow T_CONN_REQ messages to be send to connectionless
1746 		 * transport providers and always let the transport provider
1747 		 * do whatever filtering is needed.
1748 		 *
1749 		 * The verification/filtering semantics for transports
1750 		 * other than AF_INET and AF_UNIX are unknown. The choice
1751 		 * would be to either filter using bcmp or let all messages
1752 		 * get through. This code does not filter other address
1753 		 * families since this at least allows the application to
1754 		 * work around any missing filtering.
1755 		 *
1756 		 * XXX Should we move filtering to UDP/ICMP???
1757 		 * That would require passing e.g. a T_DISCON_REQ to UDP
1758 		 * when the socket becomes unconnected.
1759 		 */
1760 		addrlen = tudi->SRC_length;
1761 		/*
1762 		 * The alignment restriction is really to strict but
1763 		 * we want enough alignment to inspect the fields of
1764 		 * a sockaddr_in.
1765 		 */
1766 		addr = sogetoff(mp, tudi->SRC_offset, addrlen,
1767 				__TPI_ALIGN_SIZE);
1768 		if (addr == NULL) {
1769 			freemsg(mp);
1770 			mutex_enter(&so->so_lock);
1771 			soseterror(so, EPROTO);
1772 			mutex_exit(&so->so_lock);
1773 			cmn_err(CE_WARN,
1774 			    "sockfs: T_unidata_ind with invalid "
1775 			    "addrlen/offset %u/%d\n",
1776 			    addrlen, tudi->SRC_offset);
1777 			return (NULL);
1778 		}
1779 
1780 		if (so->so_family == AF_INET) {
1781 			/*
1782 			 * For AF_INET we allow wildcarding both sin_addr
1783 			 * and sin_port.
1784 			 */
1785 			struct sockaddr_in *faddr, *sin;
1786 
1787 			/* Prevent so_faddr_sa from changing while accessed */
1788 			mutex_enter(&so->so_lock);
1789 			ASSERT(so->so_faddr_len ==
1790 				(socklen_t)sizeof (struct sockaddr_in));
1791 			faddr = (struct sockaddr_in *)so->so_faddr_sa;
1792 			sin = (struct sockaddr_in *)addr;
1793 			if (addrlen !=
1794 				(t_uscalar_t)sizeof (struct sockaddr_in) ||
1795 			    (sin->sin_addr.s_addr != faddr->sin_addr.s_addr &&
1796 			    faddr->sin_addr.s_addr != INADDR_ANY) ||
1797 			    (so->so_type != SOCK_RAW &&
1798 			    sin->sin_port != faddr->sin_port &&
1799 			    faddr->sin_port != 0)) {
1800 #ifdef DEBUG
1801 				dprintso(so, 0,
1802 					("sockfs: T_UNITDATA_IND mismatch: %s",
1803 					pr_addr(so->so_family,
1804 						(struct sockaddr *)addr,
1805 						addrlen)));
1806 				dprintso(so, 0, (" - %s\n",
1807 					pr_addr(so->so_family, so->so_faddr_sa,
1808 					    (t_uscalar_t)so->so_faddr_len)));
1809 #endif /* DEBUG */
1810 				mutex_exit(&so->so_lock);
1811 				freemsg(mp);
1812 				return (NULL);
1813 			}
1814 			mutex_exit(&so->so_lock);
1815 		} else if (so->so_family == AF_INET6) {
1816 			/*
1817 			 * For AF_INET6 we allow wildcarding both sin6_addr
1818 			 * and sin6_port.
1819 			 */
1820 			struct sockaddr_in6 *faddr6, *sin6;
1821 			static struct in6_addr zeroes; /* inits to all zeros */
1822 
1823 			/* Prevent so_faddr_sa from changing while accessed */
1824 			mutex_enter(&so->so_lock);
1825 			ASSERT(so->so_faddr_len ==
1826 			    (socklen_t)sizeof (struct sockaddr_in6));
1827 			faddr6 = (struct sockaddr_in6 *)so->so_faddr_sa;
1828 			sin6 = (struct sockaddr_in6 *)addr;
1829 			/* XXX could we get a mapped address ::ffff:0.0.0.0 ? */
1830 			if (addrlen !=
1831 			    (t_uscalar_t)sizeof (struct sockaddr_in6) ||
1832 			    (!IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
1833 				&faddr6->sin6_addr) &&
1834 			    !IN6_ARE_ADDR_EQUAL(&faddr6->sin6_addr, &zeroes)) ||
1835 			    (so->so_type != SOCK_RAW &&
1836 			    sin6->sin6_port != faddr6->sin6_port &&
1837 			    faddr6->sin6_port != 0)) {
1838 #ifdef DEBUG
1839 				dprintso(so, 0,
1840 				    ("sockfs: T_UNITDATA_IND mismatch: %s",
1841 					pr_addr(so->so_family,
1842 					    (struct sockaddr *)addr,
1843 					    addrlen)));
1844 				dprintso(so, 0, (" - %s\n",
1845 				    pr_addr(so->so_family, so->so_faddr_sa,
1846 					(t_uscalar_t)so->so_faddr_len)));
1847 #endif /* DEBUG */
1848 				mutex_exit(&so->so_lock);
1849 				freemsg(mp);
1850 				return (NULL);
1851 			}
1852 			mutex_exit(&so->so_lock);
1853 		} else if (so->so_family == AF_UNIX &&
1854 		    msgdsize(mp->b_cont) == 0 &&
1855 		    tudi->OPT_length != 0) {
1856 			/*
1857 			 * Attempt to extract AF_UNIX
1858 			 * SO_UNIX_CLOSE indication from options.
1859 			 */
1860 			void *opt;
1861 			t_uscalar_t optlen = tudi->OPT_length;
1862 
1863 			opt = sogetoff(mp, tudi->OPT_offset,
1864 				optlen, __TPI_ALIGN_SIZE);
1865 			if (opt == NULL) {
1866 				/* The len/off falls outside mp */
1867 				freemsg(mp);
1868 				mutex_enter(&so->so_lock);
1869 				soseterror(so, EPROTO);
1870 				mutex_exit(&so->so_lock);
1871 				cmn_err(CE_WARN,
1872 				    "sockfs: T_unidata_ind with invalid "
1873 				    "optlen/offset %u/%d\n",
1874 				    optlen, tudi->OPT_offset);
1875 				return (NULL);
1876 			}
1877 			/*
1878 			 * If we received a unix close indication mark the
1879 			 * socket and discard this message.
1880 			 */
1881 			if (so_getopt_unix_close(opt, optlen)) {
1882 				mutex_enter(&so->so_lock);
1883 				sobreakconn(so, ECONNRESET);
1884 				mutex_exit(&so->so_lock);
1885 				strsetrerror(SOTOV(so), 0, 0, sogetrderr);
1886 				freemsg(mp);
1887 				*pollwakeups = POLLIN | POLLRDNORM;
1888 				*allmsgsigs = S_INPUT | S_RDNORM;
1889 				*wakeups = RSLEEP;
1890 				return (NULL);
1891 			}
1892 		}
1893 		*allmsgsigs = S_INPUT | S_RDNORM;
1894 		*pollwakeups = POLLIN | POLLRDNORM;
1895 		*wakeups = RSLEEP;
1896 		return (mp);
1897 	}
1898 
1899 	case T_OPTDATA_IND: {
1900 		struct T_optdata_ind	*tdi = &tpr->optdata_ind;
1901 
1902 		if (MBLKL(mp) < sizeof (struct T_optdata_ind)) {
1903 			cmn_err(CE_WARN,
1904 			    "sockfs: Too short T_OPTDATA_IND. Len = %ld\n",
1905 			    (ptrdiff_t)(MBLKL(mp)));
1906 			freemsg(mp);
1907 			return (NULL);
1908 		}
1909 		/*
1910 		 * Allow zero-length messages carrying options.
1911 		 * This is used when carrying the SO_UNIX_CLOSE option.
1912 		 */
1913 		if (so->so_family == AF_UNIX && msgdsize(mp->b_cont) == 0 &&
1914 		    tdi->OPT_length != 0) {
1915 			/*
1916 			 * Attempt to extract AF_UNIX close indication
1917 			 * from the options. Ignore any other options -
1918 			 * those are handled once the message is removed
1919 			 * from the queue.
1920 			 * The close indication message should not carry data.
1921 			 */
1922 			void *opt;
1923 			t_uscalar_t optlen = tdi->OPT_length;
1924 
1925 			opt = sogetoff(mp, tdi->OPT_offset,
1926 				optlen, __TPI_ALIGN_SIZE);
1927 			if (opt == NULL) {
1928 				/* The len/off falls outside mp */
1929 				freemsg(mp);
1930 				mutex_enter(&so->so_lock);
1931 				soseterror(so, EPROTO);
1932 				mutex_exit(&so->so_lock);
1933 				cmn_err(CE_WARN,
1934 				    "sockfs: T_optdata_ind with invalid "
1935 				    "optlen/offset %u/%d\n",
1936 				    optlen, tdi->OPT_offset);
1937 				return (NULL);
1938 			}
1939 			/*
1940 			 * If we received a close indication mark the
1941 			 * socket and discard this message.
1942 			 */
1943 			if (so_getopt_unix_close(opt, optlen)) {
1944 				mutex_enter(&so->so_lock);
1945 				socantsendmore(so);
1946 				mutex_exit(&so->so_lock);
1947 				strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
1948 				freemsg(mp);
1949 				return (NULL);
1950 			}
1951 		}
1952 		*allmsgsigs = S_INPUT | S_RDNORM;
1953 		*pollwakeups = POLLIN | POLLRDNORM;
1954 		*wakeups = RSLEEP;
1955 		return (mp);
1956 	}
1957 
1958 	case T_EXDATA_IND: {
1959 		mblk_t		*mctl, *mdata;
1960 
1961 		if (MBLKL(mp) < sizeof (struct T_exdata_ind)) {
1962 			cmn_err(CE_WARN,
1963 			    "sockfs: Too short T_EXDATA_IND. Len = %ld\n",
1964 			    (ptrdiff_t)(MBLKL(mp)));
1965 			freemsg(mp);
1966 			return (NULL);
1967 		}
1968 		/*
1969 		 * Ignore zero-length T_EXDATA_IND messages. These might be
1970 		 * generated by some transports.
1971 		 *
1972 		 * This is needed to prevent read (which skips the M_PROTO
1973 		 * part) to unexpectedly return 0 (or return EWOULDBLOCK
1974 		 * on a non-blocking socket after select/poll has indicated
1975 		 * that data is available).
1976 		 */
1977 		dprintso(so, 1,
1978 			("T_EXDATA_IND(%p): counts %d/%d state %s\n",
1979 			vp, so->so_oobsigcnt, so->so_oobcnt,
1980 			pr_state(so->so_state, so->so_mode)));
1981 
1982 		if (msgdsize(mp->b_cont) == 0) {
1983 			dprintso(so, 0,
1984 				("strsock_proto: zero length T_EXDATA_IND\n"));
1985 			freemsg(mp);
1986 			return (NULL);
1987 		}
1988 
1989 		/*
1990 		 * Split into the T_EXDATA_IND and the M_DATA part.
1991 		 * We process these three pieces separately:
1992 		 *	signal generation
1993 		 *	handling T_EXDATA_IND
1994 		 *	handling M_DATA component
1995 		 */
1996 		mctl = mp;
1997 		mdata = mctl->b_cont;
1998 		mctl->b_cont = NULL;
1999 		mutex_enter(&so->so_lock);
2000 		so_oob_sig(so, 0, allmsgsigs, pollwakeups);
2001 		mctl = so_oob_exdata(so, mctl, allmsgsigs, pollwakeups);
2002 		mdata = so_oob_data(so, mdata, allmsgsigs, pollwakeups);
2003 
2004 		/*
2005 		 * Pass the T_EXDATA_IND and the M_DATA back separately
2006 		 * by using b_next linkage. (The stream head will queue any
2007 		 * b_next linked messages separately.) This is needed
2008 		 * since MSGMARK applies to the last by of the message
2009 		 * hence we can not have any M_DATA component attached
2010 		 * to the marked T_EXDATA_IND. Note that the stream head
2011 		 * will not consolidate M_DATA messages onto an MSGMARK'ed
2012 		 * message in order to preserve the constraint that
2013 		 * the T_EXDATA_IND always is a separate message.
2014 		 */
2015 		ASSERT(mctl != NULL);
2016 		mctl->b_next = mdata;
2017 		mp = mctl;
2018 #ifdef DEBUG
2019 		if (mdata == NULL) {
2020 			dprintso(so, 1,
2021 				("after outofline T_EXDATA_IND(%p): "
2022 				"counts %d/%d  poll 0x%x sig 0x%x state %s\n",
2023 				vp, so->so_oobsigcnt,
2024 				so->so_oobcnt, *pollwakeups, *allmsgsigs,
2025 				pr_state(so->so_state, so->so_mode)));
2026 		} else {
2027 			dprintso(so, 1,
2028 				("after inline T_EXDATA_IND(%p): "
2029 				"counts %d/%d  poll 0x%x sig 0x%x state %s\n",
2030 				vp, so->so_oobsigcnt,
2031 				so->so_oobcnt, *pollwakeups, *allmsgsigs,
2032 				pr_state(so->so_state, so->so_mode)));
2033 		}
2034 #endif /* DEBUG */
2035 		mutex_exit(&so->so_lock);
2036 		*wakeups = RSLEEP;
2037 		return (mp);
2038 	}
2039 
2040 	case T_CONN_CON: {
2041 		struct T_conn_con	*conn_con;
2042 		void			*addr;
2043 		t_uscalar_t		addrlen;
2044 
2045 		/*
2046 		 * Verify the state, update the state to ISCONNECTED,
2047 		 * record the potentially new address in the message,
2048 		 * and drop the message.
2049 		 */
2050 		if (MBLKL(mp) < sizeof (struct T_conn_con)) {
2051 			cmn_err(CE_WARN,
2052 			    "sockfs: Too short T_CONN_CON. Len = %ld\n",
2053 			    (ptrdiff_t)(MBLKL(mp)));
2054 			freemsg(mp);
2055 			return (NULL);
2056 		}
2057 
2058 		mutex_enter(&so->so_lock);
2059 		if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) !=
2060 		    SS_ISCONNECTING) {
2061 			mutex_exit(&so->so_lock);
2062 			dprintso(so, 1,
2063 				("T_CONN_CON: state %x\n", so->so_state));
2064 			freemsg(mp);
2065 			return (NULL);
2066 		}
2067 
2068 		conn_con = &tpr->conn_con;
2069 		addrlen = conn_con->RES_length;
2070 		/*
2071 		 * Allow the address to be of different size than sent down
2072 		 * in the T_CONN_REQ as long as it doesn't exceed the maxlen.
2073 		 * For AF_UNIX require the identical length.
2074 		 */
2075 		if (so->so_family == AF_UNIX ?
2076 		    addrlen != (t_uscalar_t)sizeof (so->so_ux_laddr) :
2077 		    addrlen > (t_uscalar_t)so->so_faddr_maxlen) {
2078 			cmn_err(CE_WARN,
2079 			    "sockfs: T_conn_con with different "
2080 			    "length %u/%d\n",
2081 			    addrlen, conn_con->RES_length);
2082 			soisdisconnected(so, EPROTO);
2083 			mutex_exit(&so->so_lock);
2084 			strsetrerror(SOTOV(so), 0, 0, sogetrderr);
2085 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2086 			strseteof(SOTOV(so), 1);
2087 			freemsg(mp);
2088 			/*
2089 			 * strseteof takes care of read side wakeups,
2090 			 * pollwakeups, and signals.
2091 			 */
2092 			*wakeups = WSLEEP;
2093 			*allmsgsigs = S_OUTPUT;
2094 			*pollwakeups = POLLOUT;
2095 			return (NULL);
2096 		}
2097 		addr = sogetoff(mp, conn_con->RES_offset, addrlen, 1);
2098 		if (addr == NULL) {
2099 			cmn_err(CE_WARN,
2100 			    "sockfs: T_conn_con with invalid "
2101 			    "addrlen/offset %u/%d\n",
2102 			    addrlen, conn_con->RES_offset);
2103 			mutex_exit(&so->so_lock);
2104 			strsetrerror(SOTOV(so), 0, 0, sogetrderr);
2105 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2106 			strseteof(SOTOV(so), 1);
2107 			freemsg(mp);
2108 			/*
2109 			 * strseteof takes care of read side wakeups,
2110 			 * pollwakeups, and signals.
2111 			 */
2112 			*wakeups = WSLEEP;
2113 			*allmsgsigs = S_OUTPUT;
2114 			*pollwakeups = POLLOUT;
2115 			return (NULL);
2116 		}
2117 
2118 		/*
2119 		 * Save for getpeername.
2120 		 */
2121 		if (so->so_family != AF_UNIX) {
2122 			so->so_faddr_len = (socklen_t)addrlen;
2123 			ASSERT(so->so_faddr_len <= so->so_faddr_maxlen);
2124 			bcopy(addr, so->so_faddr_sa, addrlen);
2125 			so->so_state |= SS_FADDR_VALID;
2126 		}
2127 
2128 		if (so->so_peercred != NULL)
2129 			crfree(so->so_peercred);
2130 		so->so_peercred = DB_CRED(mp);
2131 		so->so_cpid = DB_CPID(mp);
2132 		if (so->so_peercred != NULL)
2133 			crhold(so->so_peercred);
2134 
2135 		/* Wakeup anybody sleeping in sowaitconnected */
2136 		soisconnected(so);
2137 		mutex_exit(&so->so_lock);
2138 
2139 		/*
2140 		 * The socket is now available for sending data.
2141 		 */
2142 		*wakeups = WSLEEP;
2143 		*allmsgsigs = S_OUTPUT;
2144 		*pollwakeups = POLLOUT;
2145 		freemsg(mp);
2146 		return (NULL);
2147 	}
2148 
2149 	case T_CONN_IND:
2150 		/*
2151 		 * Verify the min size and queue the message on
2152 		 * the so_conn_ind_head/tail list.
2153 		 */
2154 		if (MBLKL(mp) < sizeof (struct T_conn_ind)) {
2155 			cmn_err(CE_WARN,
2156 			    "sockfs: Too short T_CONN_IND. Len = %ld\n",
2157 			    (ptrdiff_t)(MBLKL(mp)));
2158 			freemsg(mp);
2159 			return (NULL);
2160 		}
2161 
2162 #ifdef C2_AUDIT
2163 		if (audit_active)
2164 			audit_sock(T_CONN_IND, strvp2wq(vp), mp, 0);
2165 #endif /* C2_AUDIT */
2166 		if (!(so->so_state & SS_ACCEPTCONN)) {
2167 			cmn_err(CE_WARN,
2168 			    "sockfs: T_conn_ind on non-listening socket\n");
2169 			freemsg(mp);
2170 			return (NULL);
2171 		}
2172 		soqueueconnind(so, mp);
2173 		*allmsgsigs = S_INPUT | S_RDNORM;
2174 		*pollwakeups = POLLIN | POLLRDNORM;
2175 		*wakeups = RSLEEP;
2176 		return (NULL);
2177 
2178 	case T_ORDREL_IND:
2179 		if (MBLKL(mp) < sizeof (struct T_ordrel_ind)) {
2180 			cmn_err(CE_WARN,
2181 			    "sockfs: Too short T_ORDREL_IND. Len = %ld\n",
2182 			    (ptrdiff_t)(MBLKL(mp)));
2183 			freemsg(mp);
2184 			return (NULL);
2185 		}
2186 
2187 		/*
2188 		 * Some providers send this when not fully connected.
2189 		 * SunLink X.25 needs to retrieve disconnect reason after
2190 		 * disconnect for compatibility. It uses T_ORDREL_IND
2191 		 * instead of T_DISCON_IND so that it may use the
2192 		 * endpoint after a connect failure to retrieve the
2193 		 * reason using an ioctl. Thus we explicitly clear
2194 		 * SS_ISCONNECTING here for SunLink X.25.
2195 		 * This is a needed TPI violation.
2196 		 */
2197 		mutex_enter(&so->so_lock);
2198 		so->so_state &= ~SS_ISCONNECTING;
2199 		socantrcvmore(so);
2200 		mutex_exit(&so->so_lock);
2201 		strseteof(SOTOV(so), 1);
2202 		/*
2203 		 * strseteof takes care of read side wakeups,
2204 		 * pollwakeups, and signals.
2205 		 */
2206 		freemsg(mp);
2207 		return (NULL);
2208 
2209 	case T_DISCON_IND:
2210 		if (MBLKL(mp) < sizeof (struct T_discon_ind)) {
2211 			cmn_err(CE_WARN,
2212 			    "sockfs: Too short T_DISCON_IND. Len = %ld\n",
2213 			    (ptrdiff_t)(MBLKL(mp)));
2214 			freemsg(mp);
2215 			return (NULL);
2216 		}
2217 		if (so->so_state & SS_ACCEPTCONN) {
2218 			/*
2219 			 * This is a listener. Look for a queued T_CONN_IND
2220 			 * with a matching sequence number and remove it
2221 			 * from the list.
2222 			 * It is normal to not find the sequence number since
2223 			 * the soaccept might have already dequeued it
2224 			 * (in which case the T_CONN_RES will fail with
2225 			 * TBADSEQ).
2226 			 */
2227 			(void) soflushconnind(so, tpr->discon_ind.SEQ_number);
2228 			freemsg(mp);
2229 			return (0);
2230 		}
2231 
2232 		/*
2233 		 * Not a listener
2234 		 *
2235 		 * If SS_CANTRCVMORE for AF_UNIX ignore the discon_reason.
2236 		 * Such a discon_ind appears when the peer has first done
2237 		 * a shutdown() followed by a close() in which case we just
2238 		 * want to record socantsendmore.
2239 		 * In this case sockfs first receives a T_ORDREL_IND followed
2240 		 * by a T_DISCON_IND.
2241 		 * Note that for other transports (e.g. TCP) we need to handle
2242 		 * the discon_ind in this case since it signals an error.
2243 		 */
2244 		mutex_enter(&so->so_lock);
2245 		if ((so->so_state & SS_CANTRCVMORE) &&
2246 		    (so->so_family == AF_UNIX)) {
2247 			socantsendmore(so);
2248 			mutex_exit(&so->so_lock);
2249 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2250 			dprintso(so, 1,
2251 				("T_DISCON_IND: error %d\n", so->so_error));
2252 			freemsg(mp);
2253 			/*
2254 			 * Set these variables for caller to process them.
2255 			 * For the else part where T_DISCON_IND is processed,
2256 			 * this will be done in the function being called
2257 			 * (strsock_discon_ind())
2258 			 */
2259 			*wakeups = WSLEEP;
2260 			*allmsgsigs = S_OUTPUT;
2261 			*pollwakeups = POLLOUT;
2262 		} else if (so->so_flag & (SOASYNC_UNBIND | SOLOCKED)) {
2263 			/*
2264 			 * Deferred processing of T_DISCON_IND
2265 			 */
2266 			so_save_discon_ind(so, mp, strsock_discon_ind);
2267 			mutex_exit(&so->so_lock);
2268 		} else {
2269 			/*
2270 			 * Process T_DISCON_IND now
2271 			 */
2272 			(void) strsock_discon_ind(so, mp);
2273 			mutex_exit(&so->so_lock);
2274 		}
2275 		return (NULL);
2276 
2277 	case T_UDERROR_IND: {
2278 		struct T_uderror_ind	*tudi = &tpr->uderror_ind;
2279 		void			*addr;
2280 		t_uscalar_t		addrlen;
2281 		int			error;
2282 
2283 		dprintso(so, 0,
2284 			("T_UDERROR_IND: error %d\n", tudi->ERROR_type));
2285 
2286 		if (MBLKL(mp) < sizeof (struct T_uderror_ind)) {
2287 			cmn_err(CE_WARN,
2288 			    "sockfs: Too short T_UDERROR_IND. Len = %ld\n",
2289 			    (ptrdiff_t)(MBLKL(mp)));
2290 			freemsg(mp);
2291 			return (NULL);
2292 		}
2293 		/* Ignore on connection-oriented transports */
2294 		if (so->so_mode & SM_CONNREQUIRED) {
2295 			freemsg(mp);
2296 			eprintsoline(so, 0);
2297 			cmn_err(CE_WARN,
2298 			    "sockfs: T_uderror_ind on connection-oriented "
2299 			    "transport\n");
2300 			return (NULL);
2301 		}
2302 		addrlen = tudi->DEST_length;
2303 		addr = sogetoff(mp, tudi->DEST_offset, addrlen, 1);
2304 		if (addr == NULL) {
2305 			cmn_err(CE_WARN,
2306 			    "sockfs: T_uderror_ind with invalid "
2307 			    "addrlen/offset %u/%d\n",
2308 			    addrlen, tudi->DEST_offset);
2309 			freemsg(mp);
2310 			return (NULL);
2311 		}
2312 
2313 		/* Verify source address for connected socket. */
2314 		mutex_enter(&so->so_lock);
2315 		if (so->so_state & SS_ISCONNECTED) {
2316 			void *faddr;
2317 			t_uscalar_t faddr_len;
2318 			boolean_t match = B_FALSE;
2319 
2320 			switch (so->so_family) {
2321 			case AF_INET: {
2322 				/* Compare just IP address and port */
2323 				struct sockaddr_in *sin1, *sin2;
2324 
2325 				sin1 = (struct sockaddr_in *)so->so_faddr_sa;
2326 				sin2 = (struct sockaddr_in *)addr;
2327 				if (addrlen == sizeof (struct sockaddr_in) &&
2328 				    sin1->sin_port == sin2->sin_port &&
2329 				    sin1->sin_addr.s_addr ==
2330 				    sin2->sin_addr.s_addr)
2331 					match = B_TRUE;
2332 				break;
2333 			}
2334 			case AF_INET6: {
2335 				/* Compare just IP address and port. Not flow */
2336 				struct sockaddr_in6 *sin1, *sin2;
2337 
2338 				sin1 = (struct sockaddr_in6 *)so->so_faddr_sa;
2339 				sin2 = (struct sockaddr_in6 *)addr;
2340 				if (addrlen == sizeof (struct sockaddr_in6) &&
2341 				    sin1->sin6_port == sin2->sin6_port &&
2342 				    IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr,
2343 					&sin2->sin6_addr))
2344 					match = B_TRUE;
2345 				break;
2346 			}
2347 			case AF_UNIX:
2348 				faddr = &so->so_ux_faddr;
2349 				faddr_len =
2350 					(t_uscalar_t)sizeof (so->so_ux_faddr);
2351 				if (faddr_len == addrlen &&
2352 				    bcmp(addr, faddr, addrlen) == 0)
2353 					match = B_TRUE;
2354 				break;
2355 			default:
2356 				faddr = so->so_faddr_sa;
2357 				faddr_len = (t_uscalar_t)so->so_faddr_len;
2358 				if (faddr_len == addrlen &&
2359 				    bcmp(addr, faddr, addrlen) == 0)
2360 					match = B_TRUE;
2361 				break;
2362 			}
2363 
2364 			if (!match) {
2365 #ifdef DEBUG
2366 				dprintso(so, 0,
2367 					("sockfs: T_UDERR_IND mismatch: %s - ",
2368 					pr_addr(so->so_family,
2369 						(struct sockaddr *)addr,
2370 						addrlen)));
2371 				dprintso(so, 0, ("%s\n",
2372 					pr_addr(so->so_family, so->so_faddr_sa,
2373 						so->so_faddr_len)));
2374 #endif /* DEBUG */
2375 				mutex_exit(&so->so_lock);
2376 				freemsg(mp);
2377 				return (NULL);
2378 			}
2379 			/*
2380 			 * Make the write error nonpersistent. If the error
2381 			 * is zero we use ECONNRESET.
2382 			 * This assumes that the name space for ERROR_type
2383 			 * is the errno name space.
2384 			 */
2385 			if (tudi->ERROR_type != 0)
2386 				error = tudi->ERROR_type;
2387 			else
2388 				error = ECONNRESET;
2389 
2390 			soseterror(so, error);
2391 			mutex_exit(&so->so_lock);
2392 			strsetrerror(SOTOV(so), 0, 0, sogetrderr);
2393 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2394 			*wakeups = RSLEEP | WSLEEP;
2395 			*allmsgsigs = S_INPUT | S_RDNORM | S_OUTPUT;
2396 			*pollwakeups = POLLIN | POLLRDNORM | POLLOUT;
2397 			freemsg(mp);
2398 			return (NULL);
2399 		}
2400 		/*
2401 		 * If the application asked for delayed errors
2402 		 * record the T_UDERROR_IND so_eaddr_mp and the reason in
2403 		 * so_delayed_error for delayed error posting. If the reason
2404 		 * is zero use ECONNRESET.
2405 		 * Note that delayed error indications do not make sense for
2406 		 * AF_UNIX sockets since sendto checks that the destination
2407 		 * address is valid at the time of the sendto.
2408 		 */
2409 		if (!(so->so_options & SO_DGRAM_ERRIND)) {
2410 			mutex_exit(&so->so_lock);
2411 			freemsg(mp);
2412 			return (NULL);
2413 		}
2414 		if (so->so_eaddr_mp != NULL)
2415 			freemsg(so->so_eaddr_mp);
2416 
2417 		so->so_eaddr_mp = mp;
2418 		if (tudi->ERROR_type != 0)
2419 			error = tudi->ERROR_type;
2420 		else
2421 			error = ECONNRESET;
2422 		so->so_delayed_error = (ushort_t)error;
2423 		mutex_exit(&so->so_lock);
2424 		return (NULL);
2425 	}
2426 
2427 	case T_ERROR_ACK:
2428 		dprintso(so, 0,
2429 			("strsock_proto: T_ERROR_ACK for %d, error %d/%d\n",
2430 			tpr->error_ack.ERROR_prim,
2431 			tpr->error_ack.TLI_error,
2432 			tpr->error_ack.UNIX_error));
2433 
2434 		if (MBLKL(mp) < sizeof (struct T_error_ack)) {
2435 			cmn_err(CE_WARN,
2436 			    "sockfs: Too short T_ERROR_ACK. Len = %ld\n",
2437 			    (ptrdiff_t)(MBLKL(mp)));
2438 			freemsg(mp);
2439 			return (NULL);
2440 		}
2441 		/*
2442 		 * Check if we were waiting for the async message
2443 		 */
2444 		mutex_enter(&so->so_lock);
2445 		if ((so->so_flag & SOASYNC_UNBIND) &&
2446 		    tpr->error_ack.ERROR_prim == T_UNBIND_REQ) {
2447 			so_unlock_single(so, SOASYNC_UNBIND);
2448 			mutex_exit(&so->so_lock);
2449 			freemsg(mp);
2450 			return (NULL);
2451 		}
2452 		mutex_exit(&so->so_lock);
2453 		soqueueack(so, mp);
2454 		return (NULL);
2455 
2456 	case T_OK_ACK:
2457 		if (MBLKL(mp) < sizeof (struct T_ok_ack)) {
2458 			cmn_err(CE_WARN,
2459 			    "sockfs: Too short T_OK_ACK. Len = %ld\n",
2460 			    (ptrdiff_t)(MBLKL(mp)));
2461 			freemsg(mp);
2462 			return (NULL);
2463 		}
2464 		/*
2465 		 * Check if we were waiting for the async message
2466 		 */
2467 		mutex_enter(&so->so_lock);
2468 		if ((so->so_flag & SOASYNC_UNBIND) &&
2469 		    tpr->ok_ack.CORRECT_prim == T_UNBIND_REQ) {
2470 			dprintso(so, 1,
2471 				("strsock_proto: T_OK_ACK async unbind\n"));
2472 			so_unlock_single(so, SOASYNC_UNBIND);
2473 			mutex_exit(&so->so_lock);
2474 			freemsg(mp);
2475 			return (NULL);
2476 		}
2477 		mutex_exit(&so->so_lock);
2478 		soqueueack(so, mp);
2479 		return (NULL);
2480 
2481 	case T_INFO_ACK:
2482 		if (MBLKL(mp) < sizeof (struct T_info_ack)) {
2483 			cmn_err(CE_WARN,
2484 			    "sockfs: Too short T_INFO_ACK. Len = %ld\n",
2485 			    (ptrdiff_t)(MBLKL(mp)));
2486 			freemsg(mp);
2487 			return (NULL);
2488 		}
2489 		soqueueack(so, mp);
2490 		return (NULL);
2491 
2492 	case T_CAPABILITY_ACK:
2493 		/*
2494 		 * A T_capability_ack need only be large enough to hold
2495 		 * the PRIM_type and CAP_bits1 fields; checking for anything
2496 		 * larger might reject a correct response from an older
2497 		 * provider.
2498 		 */
2499 		if (MBLKL(mp) < 2 * sizeof (t_uscalar_t)) {
2500 			cmn_err(CE_WARN,
2501 			    "sockfs: Too short T_CAPABILITY_ACK. Len = %ld\n",
2502 			    (ptrdiff_t)(MBLKL(mp)));
2503 			freemsg(mp);
2504 			return (NULL);
2505 		}
2506 		soqueueack(so, mp);
2507 		return (NULL);
2508 
2509 	case T_BIND_ACK:
2510 		if (MBLKL(mp) < sizeof (struct T_bind_ack)) {
2511 			cmn_err(CE_WARN,
2512 			    "sockfs: Too short T_BIND_ACK. Len = %ld\n",
2513 			    (ptrdiff_t)(MBLKL(mp)));
2514 			freemsg(mp);
2515 			return (NULL);
2516 		}
2517 		soqueueack(so, mp);
2518 		return (NULL);
2519 
2520 	case T_OPTMGMT_ACK:
2521 		if (MBLKL(mp) < sizeof (struct T_optmgmt_ack)) {
2522 			cmn_err(CE_WARN,
2523 			    "sockfs: Too short T_OPTMGMT_ACK. Len = %ld\n",
2524 			    (ptrdiff_t)(MBLKL(mp)));
2525 			freemsg(mp);
2526 			return (NULL);
2527 		}
2528 		soqueueack(so, mp);
2529 		return (NULL);
2530 	default:
2531 #ifdef DEBUG
2532 		cmn_err(CE_WARN,
2533 			"sockfs: unknown TPI primitive %d received\n",
2534 			tpr->type);
2535 #endif /* DEBUG */
2536 		freemsg(mp);
2537 		return (NULL);
2538 	}
2539 }
2540 
2541 /*
2542  * This routine is registered with the stream head to receive other
2543  * (non-data, and non-proto) messages.
2544  *
2545  * Returns NULL if the message was consumed.
2546  * Returns an mblk to make that mblk be processed by the stream head.
2547  *
2548  * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and
2549  * *pollwakeups) for the stream head to take action on.
2550  */
2551 static mblk_t *
2552 strsock_misc(vnode_t *vp, mblk_t *mp,
2553 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
2554 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
2555 {
2556 	struct sonode *so;
2557 
2558 	so = VTOSO(vp);
2559 
2560 	dprintso(so, 1, ("strsock_misc(%p, %p, 0x%x)\n",
2561 			vp, mp, DB_TYPE(mp)));
2562 
2563 	/* Set default return values */
2564 	*wakeups = *allmsgsigs = *firstmsgsigs = *pollwakeups = 0;
2565 
2566 	switch (DB_TYPE(mp)) {
2567 	case M_PCSIG:
2568 		/*
2569 		 * This assumes that an M_PCSIG for the urgent data arrives
2570 		 * before the corresponding T_EXDATA_IND.
2571 		 *
2572 		 * Note: Just like in SunOS 4.X and 4.4BSD a poll will be
2573 		 * awoken before the urgent data shows up.
2574 		 * For OOBINLINE this can result in select returning
2575 		 * only exceptions as opposed to except|read.
2576 		 */
2577 		if (*mp->b_rptr == SIGURG) {
2578 			mutex_enter(&so->so_lock);
2579 			dprintso(so, 1,
2580 				("SIGURG(%p): counts %d/%d state %s\n",
2581 				vp, so->so_oobsigcnt,
2582 				so->so_oobcnt,
2583 				pr_state(so->so_state, so->so_mode)));
2584 			so_oob_sig(so, 1, allmsgsigs, pollwakeups);
2585 			dprintso(so, 1,
2586 				("after SIGURG(%p): counts %d/%d "
2587 				" poll 0x%x sig 0x%x state %s\n",
2588 				vp, so->so_oobsigcnt,
2589 				so->so_oobcnt, *pollwakeups, *allmsgsigs,
2590 				pr_state(so->so_state, so->so_mode)));
2591 			mutex_exit(&so->so_lock);
2592 		}
2593 		freemsg(mp);
2594 		return (NULL);
2595 
2596 	case M_SIG:
2597 	case M_HANGUP:
2598 	case M_UNHANGUP:
2599 	case M_ERROR:
2600 		/* M_ERRORs etc are ignored */
2601 		freemsg(mp);
2602 		return (NULL);
2603 
2604 	case M_FLUSH:
2605 		/*
2606 		 * Do not flush read queue. If the M_FLUSH
2607 		 * arrives because of an impending T_discon_ind
2608 		 * we still have to keep any queued data - this is part of
2609 		 * socket semantics.
2610 		 */
2611 		if (*mp->b_rptr & FLUSHW) {
2612 			*mp->b_rptr &= ~FLUSHR;
2613 			return (mp);
2614 		}
2615 		freemsg(mp);
2616 		return (NULL);
2617 
2618 	default:
2619 		return (mp);
2620 	}
2621 }
2622 
2623 
2624 /* Register to receive signals for certain events */
2625 int
2626 so_set_asyncsigs(vnode_t *vp, pid_t pgrp, int events, int mode, cred_t *cr)
2627 {
2628 	struct strsigset ss;
2629 	int32_t rval;
2630 
2631 	/*
2632 	 * Note that SOLOCKED will be set except for the call from soaccept().
2633 	 */
2634 	ASSERT(!mutex_owned(&VTOSO(vp)->so_lock));
2635 	ss.ss_pid = pgrp;
2636 	ss.ss_events = events;
2637 	return (strioctl(vp, I_ESETSIG, (intptr_t)&ss, mode, K_TO_K, cr,
2638 	    &rval));
2639 }
2640 
2641 
2642 /* Register for events matching the SS_ASYNC flag */
2643 int
2644 so_set_events(struct sonode *so, vnode_t *vp, cred_t *cr)
2645 {
2646 	int events = so->so_state & SS_ASYNC ?
2647 	    S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT :
2648 	    S_RDBAND | S_BANDURG;
2649 
2650 	return (so_set_asyncsigs(vp, so->so_pgrp, events, 0, cr));
2651 }
2652 
2653 
2654 /* Change the SS_ASYNC flag, and update signal delivery if needed */
2655 int
2656 so_flip_async(struct sonode *so, vnode_t *vp, int mode, cred_t *cr)
2657 {
2658 	ASSERT(mutex_owned(&so->so_lock));
2659 	if (so->so_pgrp != 0) {
2660 		int error;
2661 		int events = so->so_state & SS_ASYNC ?		/* Old flag */
2662 		    S_RDBAND | S_BANDURG :			/* New sigs */
2663 		    S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT;
2664 
2665 		so_lock_single(so);
2666 		mutex_exit(&so->so_lock);
2667 
2668 		error = so_set_asyncsigs(vp, so->so_pgrp, events, mode, cr);
2669 
2670 		mutex_enter(&so->so_lock);
2671 		so_unlock_single(so, SOLOCKED);
2672 		if (error)
2673 			return (error);
2674 	}
2675 	so->so_state ^= SS_ASYNC;
2676 	return (0);
2677 }
2678 
2679 /*
2680  * Set new pid/pgrp for SIGPOLL (or SIGIO for FIOASYNC mode), replacing
2681  * any existing one.  If passed zero, just clear the existing one.
2682  */
2683 int
2684 so_set_siggrp(struct sonode *so, vnode_t *vp, pid_t pgrp, int mode, cred_t *cr)
2685 {
2686 	int events = so->so_state & SS_ASYNC ?
2687 	    S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT :
2688 	    S_RDBAND | S_BANDURG;
2689 	int error;
2690 
2691 	ASSERT(mutex_owned(&so->so_lock));
2692 
2693 	/*
2694 	 * Change socket process (group).
2695 	 *
2696 	 * strioctl (via so_set_asyncsigs) will perform permission check and
2697 	 * also keep a PID_HOLD to prevent the pid from being reused.
2698 	 */
2699 	so_lock_single(so);
2700 	mutex_exit(&so->so_lock);
2701 
2702 	if (pgrp != 0) {
2703 		dprintso(so, 1, ("setown: adding pgrp %d ev 0x%x\n",
2704 		    pgrp, events));
2705 		error = so_set_asyncsigs(vp, pgrp, events, mode, cr);
2706 		if (error != 0) {
2707 			eprintsoline(so, error);
2708 			goto bad;
2709 		}
2710 	}
2711 	/* Remove the previously registered process/group */
2712 	if (so->so_pgrp != 0) {
2713 		dprintso(so, 1, ("setown: removing pgrp %d\n", so->so_pgrp));
2714 		error = so_set_asyncsigs(vp, so->so_pgrp, 0, mode, cr);
2715 		if (error != 0) {
2716 			eprintsoline(so, error);
2717 			error = 0;
2718 		}
2719 	}
2720 	mutex_enter(&so->so_lock);
2721 	so_unlock_single(so, SOLOCKED);
2722 	so->so_pgrp = pgrp;
2723 	return (0);
2724 bad:
2725 	mutex_enter(&so->so_lock);
2726 	so_unlock_single(so, SOLOCKED);
2727 	return (error);
2728 }
2729 
2730 
2731 
2732 /*
2733  * Translate a TLI(/XTI) error into a system error as best we can.
2734  */
2735 static const int tli_errs[] = {
2736 		0,		/* no error	*/
2737 		EADDRNOTAVAIL,  /* TBADADDR	*/
2738 		ENOPROTOOPT,	/* TBADOPT	*/
2739 		EACCES,		/* TACCES	*/
2740 		EBADF,		/* TBADF	*/
2741 		EADDRNOTAVAIL,	/* TNOADDR	*/
2742 		EPROTO,		/* TOUTSTATE	*/
2743 		ECONNABORTED,	/* TBADSEQ	*/
2744 		0,		/* TSYSERR - will never get	*/
2745 		EPROTO,		/* TLOOK - should never be sent by transport */
2746 		EMSGSIZE,	/* TBADDATA	*/
2747 		EMSGSIZE,	/* TBUFOVFLW	*/
2748 		EPROTO,		/* TFLOW	*/
2749 		EWOULDBLOCK,	/* TNODATA	*/
2750 		EPROTO,		/* TNODIS	*/
2751 		EPROTO,		/* TNOUDERR	*/
2752 		EINVAL,		/* TBADFLAG	*/
2753 		EPROTO,		/* TNOREL	*/
2754 		EOPNOTSUPP,	/* TNOTSUPPORT	*/
2755 		EPROTO,		/* TSTATECHNG	*/
2756 		/* following represent error namespace expansion with XTI */
2757 		EPROTO,		/* TNOSTRUCTYPE - never sent by transport */
2758 		EPROTO,		/* TBADNAME - never sent by transport */
2759 		EPROTO,		/* TBADQLEN - never sent by transport */
2760 		EADDRINUSE,	/* TADDRBUSY	*/
2761 		EBADF,		/* TINDOUT	*/
2762 		EBADF,		/* TPROVMISMATCH */
2763 		EBADF,		/* TRESQLEN	*/
2764 		EBADF,		/* TRESADDR	*/
2765 		EPROTO,		/* TQFULL - never sent by transport */
2766 		EPROTO,		/* TPROTO	*/
2767 };
2768 
2769 static int
2770 tlitosyserr(int terr)
2771 {
2772 	ASSERT(terr != TSYSERR);
2773 	if (terr >= (sizeof (tli_errs) / sizeof (tli_errs[0])))
2774 		return (EPROTO);
2775 	else
2776 		return (tli_errs[terr]);
2777 }
2778