xref: /titanic_51/usr/src/uts/common/fs/sockfs/sockstr.c (revision 83fcdc8cfa9b16b358b13c5dd920d71bbaf4a8b5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/inttypes.h>
31 #include <sys/t_lock.h>
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/buf.h>
35 #include <sys/conf.h>
36 #include <sys/cred.h>
37 #include <sys/kmem.h>
38 #include <sys/sysmacros.h>
39 #include <sys/vfs.h>
40 #include <sys/vnode.h>
41 #include <sys/debug.h>
42 #include <sys/errno.h>
43 #include <sys/time.h>
44 #include <sys/file.h>
45 #include <sys/user.h>
46 #include <sys/stream.h>
47 #include <sys/strsubr.h>
48 #include <sys/esunddi.h>
49 #include <sys/flock.h>
50 #include <sys/modctl.h>
51 #include <sys/vtrace.h>
52 #include <sys/strsun.h>
53 #include <sys/cmn_err.h>
54 #include <sys/proc.h>
55 #include <sys/ddi.h>
56 #include <sys/kmem_impl.h>
57 
58 #include <sys/suntpi.h>
59 #include <sys/socket.h>
60 #include <sys/sockio.h>
61 #include <sys/socketvar.h>
62 #include <netinet/in.h>
63 
64 #include <sys/tiuser.h>
65 #define	_SUN_TPI_VERSION	2
66 #include <sys/tihdr.h>
67 
68 #include <c2/audit.h>
69 
70 int so_default_version = SOV_SOCKSTREAM;
71 
72 #ifdef DEBUG
73 /* Set sockdebug to print debug messages when SO_DEBUG is set */
74 int sockdebug = 0;
75 
76 /* Set sockprinterr to print error messages when SO_DEBUG is set */
77 int sockprinterr = 0;
78 
79 /*
80  * Set so_default_options to SO_DEBUG is all sockets should be created
81  * with SO_DEBUG set. This is needed to get debug printouts from the
82  * socket() call itself.
83  */
84 int so_default_options = 0;
85 #endif /* DEBUG */
86 
87 #ifdef SOCK_TEST
88 /*
89  * Set to number of ticks to limit cv_waits for code coverage testing.
90  * Set to 1000 when SO_DEBUG is set to 2.
91  */
92 clock_t sock_test_timelimit = 0;
93 #endif /* SOCK_TEST */
94 
95 /*
96  * For concurrency testing of e.g. opening /dev/ip which does not
97  * handle T_INFO_REQ messages.
98  */
99 int so_no_tinfo = 0;
100 
101 /*
102  * Timeout for getting a T_CAPABILITY_ACK - it is possible for a provider
103  * to simply ignore the T_CAPABILITY_REQ.
104  */
105 clock_t	sock_capability_timeout	= 2;	/* seconds */
106 
107 static int	do_tcapability(struct sonode *so, t_uscalar_t cap_bits1);
108 static void	so_removehooks(struct sonode *so);
109 
110 static mblk_t *strsock_proto(vnode_t *vp, mblk_t *mp,
111 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
112 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups);
113 static mblk_t *strsock_misc(vnode_t *vp, mblk_t *mp,
114 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
115 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups);
116 
117 static int tlitosyserr(int terr);
118 
119 /*
120  * Convert a socket to a stream. Invoked when the illusory sockmod
121  * is popped from the stream.
122  * Change the stream head back to default operation without losing
123  * any messages (T_conn_ind's are moved to the stream head queue).
124  */
125 int
126 so_sock2stream(struct sonode *so)
127 {
128 	struct vnode		*vp = SOTOV(so);
129 	queue_t			*rq;
130 	mblk_t			*mp;
131 	int			error = 0;
132 
133 	ASSERT(MUTEX_HELD(&so->so_plumb_lock));
134 
135 	mutex_enter(&so->so_lock);
136 	so_lock_single(so);
137 
138 	ASSERT(so->so_version != SOV_STREAM);
139 
140 	if (so->so_state & SS_DIRECT) {
141 		mblk_t **mpp;
142 		int rval;
143 
144 		/*
145 		 * Tell the transport below that sockmod is being popped
146 		 */
147 		mutex_exit(&so->so_lock);
148 		error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, CRED(),
149 		    &rval);
150 		mutex_enter(&so->so_lock);
151 		if (error != 0) {
152 			dprintso(so, 0, ("so_sock2stream(%p): "
153 			    "_SIOCSOCKFALLBACK failed\n", so));
154 			goto exit;
155 		}
156 		so->so_state &= ~SS_DIRECT;
157 
158 		for (mpp = &so->so_conn_ind_head; (mp = *mpp) != NULL;
159 		    mpp = &mp->b_next) {
160 			struct T_conn_ind	*conn_ind;
161 
162 			/*
163 			 * strsock_proto() has already verified the length of
164 			 * this message block.
165 			 */
166 			ASSERT(MBLKL(mp) >= sizeof (struct T_conn_ind));
167 
168 			conn_ind = (struct T_conn_ind *)mp->b_rptr;
169 			if (conn_ind->OPT_length == 0 &&
170 			    conn_ind->OPT_offset == 0)
171 				continue;
172 
173 			if (DB_REF(mp) > 1) {
174 				mblk_t	*newmp;
175 				size_t	length;
176 				cred_t	*cr;
177 
178 				/*
179 				 * Copy the message block because it is used
180 				 * elsewhere, too.
181 				 */
182 				length = MBLKL(mp);
183 				newmp = soallocproto(length, _ALLOC_INTR);
184 				if (newmp == NULL) {
185 					error = EINTR;
186 					goto exit;
187 				}
188 				bcopy(mp->b_rptr, newmp->b_wptr, length);
189 				newmp->b_wptr += length;
190 				newmp->b_next = mp->b_next;
191 				cr = DB_CRED(mp);
192 				if (cr != NULL)
193 					mblk_setcred(newmp, cr);
194 				DB_CPID(newmp) = DB_CPID(mp);
195 
196 				/*
197 				 * Link the new message block into the queue
198 				 * and free the old one.
199 				 */
200 				*mpp = newmp;
201 				mp->b_next = NULL;
202 				freemsg(mp);
203 
204 				mp = newmp;
205 				conn_ind = (struct T_conn_ind *)mp->b_rptr;
206 			}
207 
208 			/*
209 			 * Remove options added by TCP for accept fast-path.
210 			 */
211 			conn_ind->OPT_length = 0;
212 			conn_ind->OPT_offset = 0;
213 		}
214 	}
215 
216 	so->so_version = SOV_STREAM;
217 	so->so_priv = NULL;
218 
219 	/*
220 	 * Remove the hooks in the stream head to avoid queuing more
221 	 * packets in sockfs.
222 	 */
223 	mutex_exit(&so->so_lock);
224 	so_removehooks(so);
225 	mutex_enter(&so->so_lock);
226 
227 	/*
228 	 * Clear any state related to urgent data. Leave any T_EXDATA_IND
229 	 * on the queue - the behavior of urgent data after a switch is
230 	 * left undefined.
231 	 */
232 	so->so_error = so->so_delayed_error = 0;
233 	freemsg(so->so_oobmsg);
234 	so->so_oobmsg = NULL;
235 	so->so_oobsigcnt = so->so_oobcnt = 0;
236 
237 	so->so_state &= ~(SS_RCVATMARK|SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA|
238 	    SS_HASCONNIND|SS_SAVEDEOR);
239 	ASSERT(so_verify_oobstate(so));
240 
241 	freemsg(so->so_ack_mp);
242 	so->so_ack_mp = NULL;
243 
244 	/*
245 	 * Flush the T_DISCON_IND on so_discon_ind_mp.
246 	 */
247 	so_flush_discon_ind(so);
248 
249 	/*
250 	 * Move any queued T_CONN_IND messages to stream head queue.
251 	 */
252 	rq = RD(strvp2wq(vp));
253 	while ((mp = so->so_conn_ind_head) != NULL) {
254 		so->so_conn_ind_head = mp->b_next;
255 		mp->b_next = NULL;
256 		if (so->so_conn_ind_head == NULL) {
257 			ASSERT(so->so_conn_ind_tail == mp);
258 			so->so_conn_ind_tail = NULL;
259 		}
260 		dprintso(so, 0,
261 			("so_sock2stream(%p): moving T_CONN_IND\n",
262 			so));
263 
264 		/* Drop lock across put() */
265 		mutex_exit(&so->so_lock);
266 		put(rq, mp);
267 		mutex_enter(&so->so_lock);
268 	}
269 
270 exit:
271 	ASSERT(MUTEX_HELD(&so->so_lock));
272 	so_unlock_single(so, SOLOCKED);
273 	mutex_exit(&so->so_lock);
274 	return (error);
275 }
276 
277 /*
278  * Covert a stream back to a socket. This is invoked when the illusory
279  * sockmod is pushed on a stream (where the stream was "created" by
280  * popping the illusory sockmod).
281  * This routine can not recreate the socket state (certain aspects of
282  * it like urgent data state and the bound/connected addresses for AF_UNIX
283  * sockets can not be recreated by asking the transport for information).
284  * Thus this routine implicitly assumes that the socket is in an initial
285  * state (as if it was just created). It flushes any messages queued on the
286  * read queue to avoid dealing with e.g. TPI acks or T_exdata_ind messages.
287  */
288 void
289 so_stream2sock(struct sonode *so)
290 {
291 	struct vnode *vp = SOTOV(so);
292 
293 	ASSERT(MUTEX_HELD(&so->so_plumb_lock));
294 
295 	mutex_enter(&so->so_lock);
296 	so_lock_single(so);
297 	ASSERT(so->so_version == SOV_STREAM);
298 	so->so_version = SOV_SOCKSTREAM;
299 	so->so_pushcnt = 0;
300 	mutex_exit(&so->so_lock);
301 
302 	/*
303 	 * Set a permenent error to force any thread in sorecvmsg to
304 	 * return (and drop SOREADLOCKED). Clear the error once
305 	 * we have SOREADLOCKED.
306 	 * This makes a read sleeping during the I_PUSH of sockmod return
307 	 * EIO.
308 	 */
309 	strsetrerror(SOTOV(so), EIO, 1, NULL);
310 
311 	/*
312 	 * Get the read lock before flushing data to avoid
313 	 * problems with the T_EXDATA_IND MSG_PEEK code in sorecvmsg.
314 	 */
315 	mutex_enter(&so->so_lock);
316 	(void) so_lock_read(so, 0);	/* Set SOREADLOCKED */
317 	mutex_exit(&so->so_lock);
318 
319 	strsetrerror(SOTOV(so), 0, 0, NULL);
320 	so_installhooks(so);
321 
322 	/*
323 	 * Flush everything on the read queue.
324 	 * This ensures that no T_CONN_IND remain and that no T_EXDATA_IND
325 	 * remain; those types of messages would confuse sockfs.
326 	 */
327 	strflushrq(vp, FLUSHALL);
328 	mutex_enter(&so->so_lock);
329 
330 	/*
331 	 * Flush the T_DISCON_IND on so_discon_ind_mp.
332 	 */
333 	so_flush_discon_ind(so);
334 	so_unlock_read(so);	/* Clear SOREADLOCKED */
335 
336 	so_unlock_single(so, SOLOCKED);
337 	mutex_exit(&so->so_lock);
338 }
339 
340 /*
341  * Install the hooks in the stream head.
342  */
343 void
344 so_installhooks(struct sonode *so)
345 {
346 	struct vnode *vp = SOTOV(so);
347 
348 	strsetrputhooks(vp, SH_SIGALLDATA | SH_IGN_ZEROLEN | SH_CONSOL_DATA,
349 	    strsock_proto, strsock_misc);
350 	strsetwputhooks(vp, SH_SIGPIPE | SH_RECHECK_ERR, 0);
351 }
352 
353 /*
354  * Remove the hooks in the stream head.
355  */
356 static void
357 so_removehooks(struct sonode *so)
358 {
359 	struct vnode *vp = SOTOV(so);
360 
361 	strsetrputhooks(vp, 0, NULL, NULL);
362 	strsetwputhooks(vp, 0, STRTIMOUT);
363 	/*
364 	 * Leave read behavior as it would have been for a normal
365 	 * stream i.e. a read of an M_PROTO will fail.
366 	 */
367 }
368 
369 /*
370  * Initialize the streams side of a socket including
371  * T_info_req/ack processing. If tso is not NULL its values are used thereby
372  * avoiding the T_INFO_REQ.
373  */
374 int
375 so_strinit(struct sonode *so, struct sonode *tso)
376 {
377 	struct vnode *vp = SOTOV(so);
378 	struct stdata *stp;
379 	mblk_t *mp;
380 	int error;
381 
382 	dprintso(so, 1, ("so_strinit(%p)\n", so));
383 
384 	/* Preallocate an unbind_req message */
385 	mp = soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP);
386 	mutex_enter(&so->so_lock);
387 	so->so_unbind_mp = mp;
388 #ifdef DEBUG
389 	so->so_options = so_default_options;
390 #endif /* DEBUG */
391 	mutex_exit(&so->so_lock);
392 
393 	so_installhooks(so);
394 
395 	/*
396 	 * The T_CAPABILITY_REQ should be the first message sent down because
397 	 * at least TCP has a fast-path for this which avoids timeouts while
398 	 * waiting for the T_CAPABILITY_ACK under high system load.
399 	 */
400 	if (tso == NULL) {
401 		error = do_tcapability(so, TC1_ACCEPTOR_ID | TC1_INFO);
402 		if (error)
403 			return (error);
404 	} else {
405 		mutex_enter(&so->so_lock);
406 		so->so_tsdu_size = tso->so_tsdu_size;
407 		so->so_etsdu_size = tso->so_etsdu_size;
408 		so->so_addr_size = tso->so_addr_size;
409 		so->so_opt_size = tso->so_opt_size;
410 		so->so_tidu_size = tso->so_tidu_size;
411 		so->so_serv_type = tso->so_serv_type;
412 		so->so_mode = tso->so_mode & ~SM_ACCEPTOR_ID;
413 		mutex_exit(&so->so_lock);
414 
415 		/* the following do_tcapability may update so->so_mode */
416 		if ((tso->so_serv_type != T_CLTS) &&
417 		    !(so->so_state & SS_DIRECT)) {
418 			error = do_tcapability(so, TC1_ACCEPTOR_ID);
419 			if (error)
420 				return (error);
421 		}
422 	}
423 	/*
424 	 * If the addr_size is 0 we treat it as already bound
425 	 * and connected. This is used by the routing socket.
426 	 * We set the addr_size to something to allocate a the address
427 	 * structures.
428 	 */
429 	if (so->so_addr_size == 0) {
430 		so->so_state |= SS_ISBOUND | SS_ISCONNECTED;
431 		/* Address size can vary with address families. */
432 		if (so->so_family == AF_INET6)
433 			so->so_addr_size =
434 			    (t_scalar_t)sizeof (struct sockaddr_in6);
435 		else
436 			so->so_addr_size =
437 			    (t_scalar_t)sizeof (struct sockaddr_in);
438 		ASSERT(so->so_unbind_mp);
439 	}
440 	/*
441 	 * Allocate the addresses.
442 	 */
443 	ASSERT(so->so_laddr_sa == NULL && so->so_faddr_sa == NULL);
444 	ASSERT(so->so_laddr_len == 0 && so->so_faddr_len == 0);
445 	so->so_laddr_maxlen = so->so_faddr_maxlen =
446 		    P2ROUNDUP(so->so_addr_size, KMEM_ALIGN);
447 	so->so_laddr_sa = kmem_alloc(so->so_laddr_maxlen * 2, KM_SLEEP);
448 	so->so_faddr_sa = (struct sockaddr *)((caddr_t)so->so_laddr_sa
449 		    + so->so_laddr_maxlen);
450 
451 	if (so->so_family == AF_UNIX) {
452 		/*
453 		 * Initialize AF_UNIX related fields.
454 		 */
455 		bzero(&so->so_ux_laddr, sizeof (so->so_ux_laddr));
456 		bzero(&so->so_ux_faddr, sizeof (so->so_ux_faddr));
457 	}
458 
459 	stp = vp->v_stream;
460 	/*
461 	 * Have to keep minpsz at zero in order to allow write/send of zero
462 	 * bytes.
463 	 */
464 	mutex_enter(&stp->sd_lock);
465 	if (stp->sd_qn_minpsz == 1)
466 		stp->sd_qn_minpsz = 0;
467 	mutex_exit(&stp->sd_lock);
468 
469 	return (0);
470 }
471 
472 static void
473 copy_tinfo(struct sonode *so, struct T_info_ack *tia)
474 {
475 	so->so_tsdu_size = tia->TSDU_size;
476 	so->so_etsdu_size = tia->ETSDU_size;
477 	so->so_addr_size = tia->ADDR_size;
478 	so->so_opt_size = tia->OPT_size;
479 	so->so_tidu_size = tia->TIDU_size;
480 	so->so_serv_type = tia->SERV_type;
481 	switch (tia->CURRENT_state) {
482 	case TS_UNBND:
483 		break;
484 	case TS_IDLE:
485 		so->so_state |= SS_ISBOUND;
486 		so->so_laddr_len = 0;
487 		so->so_state &= ~SS_LADDR_VALID;
488 		break;
489 	case TS_DATA_XFER:
490 		so->so_state |= SS_ISBOUND|SS_ISCONNECTED;
491 		so->so_laddr_len = 0;
492 		so->so_faddr_len = 0;
493 		so->so_state &= ~(SS_LADDR_VALID | SS_FADDR_VALID);
494 		break;
495 	}
496 
497 	/*
498 	 * Heuristics for determining the socket mode flags
499 	 * (SM_ATOMIC, SM_CONNREQUIRED, SM_ADDR, SM_FDPASSING,
500 	 * and SM_EXDATA, SM_OPTDATA, and SM_BYTESTREAM)
501 	 * from the info ack.
502 	 */
503 	if (so->so_serv_type == T_CLTS) {
504 		so->so_mode |= SM_ATOMIC | SM_ADDR;
505 	} else {
506 		so->so_mode |= SM_CONNREQUIRED;
507 		if (so->so_etsdu_size != 0 && so->so_etsdu_size != -2)
508 			so->so_mode |= SM_EXDATA;
509 	}
510 	if (so->so_type == SOCK_SEQPACKET || so->so_type == SOCK_RAW) {
511 		/* Semantics are to discard tail end of messages */
512 		so->so_mode |= SM_ATOMIC;
513 	}
514 	if (so->so_family == AF_UNIX) {
515 		so->so_mode |= SM_FDPASSING | SM_OPTDATA;
516 		if (so->so_addr_size == -1) {
517 			/* MAXPATHLEN + soun_family + nul termination */
518 			so->so_addr_size = (t_scalar_t)(MAXPATHLEN +
519 				sizeof (short) + 1);
520 		}
521 		if (so->so_type == SOCK_STREAM) {
522 			/*
523 			 * Make it into a byte-stream transport.
524 			 * SOCK_SEQPACKET sockets are unchanged.
525 			 */
526 			so->so_tsdu_size = 0;
527 		}
528 	} else if (so->so_addr_size == -1) {
529 		/*
530 		 * Logic extracted from sockmod - have to pick some max address
531 		 * length in order to preallocate the addresses.
532 		 */
533 		so->so_addr_size = SOA_DEFSIZE;
534 	}
535 	if (so->so_tsdu_size == 0)
536 		so->so_mode |= SM_BYTESTREAM;
537 }
538 
539 static int
540 check_tinfo(struct sonode *so)
541 {
542 	/* Consistency checks */
543 	if (so->so_type == SOCK_DGRAM && so->so_serv_type != T_CLTS) {
544 		eprintso(so, ("service type and socket type mismatch\n"));
545 		eprintsoline(so, EPROTO);
546 		return (EPROTO);
547 	}
548 	if (so->so_type == SOCK_STREAM && so->so_serv_type == T_CLTS) {
549 		eprintso(so, ("service type and socket type mismatch\n"));
550 		eprintsoline(so, EPROTO);
551 		return (EPROTO);
552 	}
553 	if (so->so_type == SOCK_SEQPACKET && so->so_serv_type == T_CLTS) {
554 		eprintso(so, ("service type and socket type mismatch\n"));
555 		eprintsoline(so, EPROTO);
556 		return (EPROTO);
557 	}
558 	if (so->so_family == AF_INET &&
559 	    so->so_addr_size != (t_scalar_t)sizeof (struct sockaddr_in)) {
560 		eprintso(so,
561 		    ("AF_INET must have sockaddr_in address length. Got %d\n",
562 		    so->so_addr_size));
563 		eprintsoline(so, EMSGSIZE);
564 		return (EMSGSIZE);
565 	}
566 	if (so->so_family == AF_INET6 &&
567 	    so->so_addr_size != (t_scalar_t)sizeof (struct sockaddr_in6)) {
568 		eprintso(so,
569 		    ("AF_INET6 must have sockaddr_in6 address length. Got %d\n",
570 		    so->so_addr_size));
571 		eprintsoline(so, EMSGSIZE);
572 		return (EMSGSIZE);
573 	}
574 
575 	dprintso(so, 1, (
576 	    "tinfo: serv %d tsdu %d, etsdu %d, addr %d, opt %d, tidu %d\n",
577 	    so->so_serv_type, so->so_tsdu_size, so->so_etsdu_size,
578 	    so->so_addr_size, so->so_opt_size,
579 	    so->so_tidu_size));
580 	dprintso(so, 1, ("tinfo: so_state %s\n",
581 			pr_state(so->so_state, so->so_mode)));
582 	return (0);
583 }
584 
585 /*
586  * Send down T_info_req and wait for the ack.
587  * Record interesting T_info_ack values in the sonode.
588  */
589 static int
590 do_tinfo(struct sonode *so)
591 {
592 	struct T_info_req tir;
593 	mblk_t *mp;
594 	int error;
595 
596 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
597 
598 	if (so_no_tinfo) {
599 		so->so_addr_size = 0;
600 		return (0);
601 	}
602 
603 	dprintso(so, 1, ("do_tinfo(%p)\n", so));
604 
605 	/* Send T_INFO_REQ */
606 	tir.PRIM_type = T_INFO_REQ;
607 	mp = soallocproto1(&tir, sizeof (tir),
608 	    sizeof (struct T_info_req) + sizeof (struct T_info_ack),
609 	    _ALLOC_INTR);
610 	if (mp == NULL) {
611 		eprintsoline(so, ENOBUFS);
612 		return (ENOBUFS);
613 	}
614 	/* T_INFO_REQ has to be M_PCPROTO */
615 	DB_TYPE(mp) = M_PCPROTO;
616 
617 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
618 			MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
619 	if (error) {
620 		eprintsoline(so, error);
621 		return (error);
622 	}
623 	mutex_enter(&so->so_lock);
624 	/* Wait for T_INFO_ACK */
625 	if ((error = sowaitprim(so, T_INFO_REQ, T_INFO_ACK,
626 	    (t_uscalar_t)sizeof (struct T_info_ack), &mp, 0))) {
627 		mutex_exit(&so->so_lock);
628 		eprintsoline(so, error);
629 		return (error);
630 	}
631 
632 	ASSERT(mp);
633 	copy_tinfo(so, (struct T_info_ack *)mp->b_rptr);
634 	mutex_exit(&so->so_lock);
635 	freemsg(mp);
636 	return (check_tinfo(so));
637 }
638 
639 /*
640  * Send down T_capability_req and wait for the ack.
641  * Record interesting T_capability_ack values in the sonode.
642  */
643 static int
644 do_tcapability(struct sonode *so, t_uscalar_t cap_bits1)
645 {
646 	struct T_capability_req tcr;
647 	struct T_capability_ack *tca;
648 	mblk_t *mp;
649 	int error;
650 
651 	ASSERT(cap_bits1 != 0);
652 	ASSERT((cap_bits1 & ~(TC1_ACCEPTOR_ID | TC1_INFO)) == 0);
653 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
654 
655 	if (so->so_provinfo->tpi_capability == PI_NO)
656 		return (do_tinfo(so));
657 
658 	if (so_no_tinfo) {
659 		so->so_addr_size = 0;
660 		if ((cap_bits1 &= ~TC1_INFO) == 0)
661 			return (0);
662 	}
663 
664 	dprintso(so, 1, ("do_tcapability(%p)\n", so));
665 
666 	/* Send T_CAPABILITY_REQ */
667 	tcr.PRIM_type = T_CAPABILITY_REQ;
668 	tcr.CAP_bits1 = cap_bits1;
669 	mp = soallocproto1(&tcr, sizeof (tcr),
670 	    sizeof (struct T_capability_req) + sizeof (struct T_capability_ack),
671 	    _ALLOC_INTR);
672 	if (mp == NULL) {
673 		eprintsoline(so, ENOBUFS);
674 		return (ENOBUFS);
675 	}
676 	/* T_CAPABILITY_REQ should be M_PCPROTO here */
677 	DB_TYPE(mp) = M_PCPROTO;
678 
679 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
680 	    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
681 	if (error) {
682 		eprintsoline(so, error);
683 		return (error);
684 	}
685 	mutex_enter(&so->so_lock);
686 	/* Wait for T_CAPABILITY_ACK */
687 	if ((error = sowaitprim(so, T_CAPABILITY_REQ, T_CAPABILITY_ACK,
688 	    (t_uscalar_t)sizeof (*tca), &mp, sock_capability_timeout * hz))) {
689 		mutex_exit(&so->so_lock);
690 		PI_PROVLOCK(so->so_provinfo);
691 		if (so->so_provinfo->tpi_capability == PI_DONTKNOW)
692 			so->so_provinfo->tpi_capability = PI_NO;
693 		PI_PROVUNLOCK(so->so_provinfo);
694 		ASSERT((so->so_mode & SM_ACCEPTOR_ID) == 0);
695 		if (cap_bits1 & TC1_INFO) {
696 			/*
697 			 * If the T_CAPABILITY_REQ timed out and then a
698 			 * T_INFO_REQ gets a protocol error, most likely
699 			 * the capability was slow (vs. unsupported). Return
700 			 * ENOSR for this case as a best guess.
701 			 */
702 			if (error == ETIME) {
703 				return ((error = do_tinfo(so)) == EPROTO ?
704 				    ENOSR : error);
705 			}
706 			return (do_tinfo(so));
707 		}
708 		return (0);
709 	}
710 
711 	if (so->so_provinfo->tpi_capability == PI_DONTKNOW) {
712 		PI_PROVLOCK(so->so_provinfo);
713 		so->so_provinfo->tpi_capability = PI_YES;
714 		PI_PROVUNLOCK(so->so_provinfo);
715 	}
716 
717 	ASSERT(mp);
718 	tca = (struct T_capability_ack *)mp->b_rptr;
719 
720 	ASSERT((cap_bits1 & TC1_INFO) == (tca->CAP_bits1 & TC1_INFO));
721 
722 	cap_bits1 = tca->CAP_bits1;
723 
724 	if (cap_bits1 & TC1_ACCEPTOR_ID) {
725 		so->so_acceptor_id = tca->ACCEPTOR_id;
726 		so->so_mode |= SM_ACCEPTOR_ID;
727 	}
728 
729 	if (cap_bits1 & TC1_INFO)
730 		copy_tinfo(so, &tca->INFO_ack);
731 
732 	mutex_exit(&so->so_lock);
733 	freemsg(mp);
734 
735 	if (cap_bits1 & TC1_INFO)
736 		return (check_tinfo(so));
737 
738 	return (0);
739 }
740 
741 /*
742  * Retrieve and clear the socket error.
743  */
744 int
745 sogeterr(struct sonode *so)
746 {
747 	int error;
748 
749 	ASSERT(MUTEX_HELD(&so->so_lock));
750 
751 	error = so->so_error;
752 	so->so_error = 0;
753 
754 	return (error);
755 }
756 
757 /*
758  * This routine is registered with the stream head to retrieve read
759  * side errors.
760  * It does not clear the socket error for a peeking read side operation.
761  * It the error is to be cleared it sets *clearerr.
762  */
763 int
764 sogetrderr(vnode_t *vp, int ispeek, int *clearerr)
765 {
766 	struct sonode *so = VTOSO(vp);
767 	int error;
768 
769 	mutex_enter(&so->so_lock);
770 	if (ispeek) {
771 		error = so->so_error;
772 		*clearerr = 0;
773 	} else {
774 		error = so->so_error;
775 		so->so_error = 0;
776 		*clearerr = 1;
777 	}
778 	mutex_exit(&so->so_lock);
779 	return (error);
780 }
781 
782 /*
783  * This routine is registered with the stream head to retrieve write
784  * side errors.
785  * It does not clear the socket error for a peeking read side operation.
786  * It the error is to be cleared it sets *clearerr.
787  */
788 int
789 sogetwrerr(vnode_t *vp, int ispeek, int *clearerr)
790 {
791 	struct sonode *so = VTOSO(vp);
792 	int error;
793 
794 	mutex_enter(&so->so_lock);
795 	if (so->so_state & SS_CANTSENDMORE) {
796 		error = EPIPE;
797 		*clearerr = 0;
798 	} else {
799 		error = so->so_error;
800 		if (ispeek) {
801 			*clearerr = 0;
802 		} else {
803 			so->so_error = 0;
804 			*clearerr = 1;
805 		}
806 	}
807 	mutex_exit(&so->so_lock);
808 	return (error);
809 }
810 
811 /*
812  * Set a nonpersistent read and write error on the socket.
813  * Used when there is a T_uderror_ind for a connected socket.
814  * The caller also needs to call strsetrerror and strsetwerror
815  * after dropping the lock.
816  */
817 void
818 soseterror(struct sonode *so, int error)
819 {
820 	ASSERT(error != 0);
821 
822 	ASSERT(MUTEX_HELD(&so->so_lock));
823 	so->so_error = (ushort_t)error;
824 }
825 
826 void
827 soisconnecting(struct sonode *so)
828 {
829 	ASSERT(MUTEX_HELD(&so->so_lock));
830 	so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
831 	so->so_state |= SS_ISCONNECTING;
832 	cv_broadcast(&so->so_state_cv);
833 }
834 
835 void
836 soisconnected(struct sonode *so)
837 {
838 	ASSERT(MUTEX_HELD(&so->so_lock));
839 	so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING);
840 	so->so_state |= SS_ISCONNECTED;
841 	cv_broadcast(&so->so_state_cv);
842 }
843 
844 /*
845  * The caller also needs to call strsetrerror, strsetwerror and strseteof.
846  */
847 void
848 soisdisconnected(struct sonode *so, int error)
849 {
850 	ASSERT(MUTEX_HELD(&so->so_lock));
851 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING|
852 	    SS_LADDR_VALID|SS_FADDR_VALID);
853 	so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE);
854 	so->so_error = (ushort_t)error;
855 	if (so->so_peercred != NULL) {
856 		crfree(so->so_peercred);
857 		so->so_peercred = NULL;
858 	}
859 	cv_broadcast(&so->so_state_cv);
860 }
861 
862 /*
863  * For connected AF_UNIX SOCK_DGRAM sockets when the peer closes.
864  * Does not affect write side.
865  * The caller also has to call strsetrerror.
866  */
867 static void
868 sobreakconn(struct sonode *so, int error)
869 {
870 	ASSERT(MUTEX_HELD(&so->so_lock));
871 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
872 	so->so_error = (ushort_t)error;
873 	cv_broadcast(&so->so_state_cv);
874 }
875 
876 /*
877  * Can no longer send.
878  * Caller must also call strsetwerror.
879  *
880  * We mark the peer address as no longer valid for getpeername, but
881  * leave it around for so_unix_close to notify the peer (that
882  * transport has no addressing held at that layer).
883  */
884 void
885 socantsendmore(struct sonode *so)
886 {
887 	ASSERT(MUTEX_HELD(&so->so_lock));
888 	so->so_state = so->so_state & ~SS_FADDR_VALID | SS_CANTSENDMORE;
889 	cv_broadcast(&so->so_state_cv);
890 }
891 
892 /*
893  * The caller must call strseteof(,1) as well as this routine
894  * to change the socket state.
895  */
896 void
897 socantrcvmore(struct sonode *so)
898 {
899 	ASSERT(MUTEX_HELD(&so->so_lock));
900 	so->so_state |= SS_CANTRCVMORE;
901 	cv_broadcast(&so->so_state_cv);
902 }
903 
904 /*
905  * The caller has sent down a "request_prim" primitive and wants to wait for
906  * an ack ("ack_prim") or an T_ERROR_ACK for it.
907  * The specified "ack_prim" can be a T_OK_ACK.
908  *
909  * Assumes that all the TPI acks are M_PCPROTO messages.
910  *
911  * Note that the socket is single-threaded (using so_lock_single)
912  * for all operations that generate TPI ack messages. Since
913  * only TPI ack messages are M_PCPROTO we should never receive
914  * anything except either the ack we are expecting or a T_ERROR_ACK
915  * for the same primitive.
916  */
917 int
918 sowaitprim(struct sonode *so, t_scalar_t request_prim, t_scalar_t ack_prim,
919 	    t_uscalar_t min_size, mblk_t **mpp, clock_t wait)
920 {
921 	mblk_t *mp;
922 	union T_primitives *tpr;
923 	int error;
924 
925 	dprintso(so, 1, ("sowaitprim(%p, %d, %d, %d, %p, %lu)\n",
926 		so, request_prim, ack_prim, min_size, mpp, wait));
927 
928 	ASSERT(MUTEX_HELD(&so->so_lock));
929 
930 	error = sowaitack(so, &mp, wait);
931 	if (error)
932 		return (error);
933 
934 	dprintso(so, 1, ("got msg %p\n", mp));
935 	if (DB_TYPE(mp) != M_PCPROTO ||
936 	    MBLKL(mp) < sizeof (tpr->type)) {
937 		freemsg(mp);
938 		eprintsoline(so, EPROTO);
939 		return (EPROTO);
940 	}
941 	tpr = (union T_primitives *)mp->b_rptr;
942 	/*
943 	 * Did we get the primitive that we were asking for?
944 	 * For T_OK_ACK we also check that it matches the request primitive.
945 	 */
946 	if (tpr->type == ack_prim &&
947 	    (ack_prim != T_OK_ACK ||
948 	    tpr->ok_ack.CORRECT_prim == request_prim)) {
949 		if (MBLKL(mp) >= (ssize_t)min_size) {
950 			/* Found what we are looking for */
951 			*mpp = mp;
952 			return (0);
953 		}
954 		/* Too short */
955 		freemsg(mp);
956 		eprintsoline(so, EPROTO);
957 		return (EPROTO);
958 	}
959 
960 	if (tpr->type == T_ERROR_ACK &&
961 	    tpr->error_ack.ERROR_prim == request_prim) {
962 		/* Error to the primitive we were looking for */
963 		if (tpr->error_ack.TLI_error == TSYSERR) {
964 			error = tpr->error_ack.UNIX_error;
965 		} else {
966 			error = tlitosyserr(tpr->error_ack.TLI_error);
967 		}
968 		dprintso(so, 0, ("error_ack for %d: %d/%d ->%d\n",
969 			tpr->error_ack.ERROR_prim,
970 			tpr->error_ack.TLI_error,
971 			tpr->error_ack.UNIX_error,
972 			error));
973 		freemsg(mp);
974 		return (error);
975 	}
976 	/*
977 	 * Wrong primitive or T_ERROR_ACK for the wrong primitive
978 	 */
979 #ifdef DEBUG
980 	if (tpr->type == T_ERROR_ACK) {
981 		dprintso(so, 0, ("error_ack for %d: %d/%d\n",
982 			tpr->error_ack.ERROR_prim,
983 			tpr->error_ack.TLI_error,
984 			tpr->error_ack.UNIX_error));
985 	} else if (tpr->type == T_OK_ACK) {
986 		dprintso(so, 0, ("ok_ack for %d, expected %d for %d\n",
987 			tpr->ok_ack.CORRECT_prim,
988 			ack_prim, request_prim));
989 	} else {
990 		dprintso(so, 0,
991 			("unexpected primitive %d, expected %d for %d\n",
992 			tpr->type, ack_prim, request_prim));
993 	}
994 #endif /* DEBUG */
995 
996 	freemsg(mp);
997 	eprintsoline(so, EPROTO);
998 	return (EPROTO);
999 }
1000 
1001 /*
1002  * Wait for a T_OK_ACK for the specified primitive.
1003  */
1004 int
1005 sowaitokack(struct sonode *so, t_scalar_t request_prim)
1006 {
1007 	mblk_t *mp;
1008 	int error;
1009 
1010 	error = sowaitprim(so, request_prim, T_OK_ACK,
1011 	    (t_uscalar_t)sizeof (struct T_ok_ack), &mp, 0);
1012 	if (error)
1013 		return (error);
1014 	freemsg(mp);
1015 	return (0);
1016 }
1017 
1018 /*
1019  * Queue a received TPI ack message on so_ack_mp.
1020  */
1021 void
1022 soqueueack(struct sonode *so, mblk_t *mp)
1023 {
1024 	if (DB_TYPE(mp) != M_PCPROTO) {
1025 		cmn_err(CE_WARN,
1026 		    "sockfs: received unexpected M_PROTO TPI ack. Prim %d\n",
1027 		    *(t_scalar_t *)mp->b_rptr);
1028 		freemsg(mp);
1029 		return;
1030 	}
1031 
1032 	mutex_enter(&so->so_lock);
1033 	if (so->so_ack_mp != NULL) {
1034 		dprintso(so, 1, ("so_ack_mp already set\n"));
1035 		freemsg(so->so_ack_mp);
1036 		so->so_ack_mp = NULL;
1037 	}
1038 	so->so_ack_mp = mp;
1039 	cv_broadcast(&so->so_ack_cv);
1040 	mutex_exit(&so->so_lock);
1041 }
1042 
1043 /*
1044  * Wait for a TPI ack ignoring signals and errors.
1045  */
1046 int
1047 sowaitack(struct sonode *so, mblk_t **mpp, clock_t wait)
1048 {
1049 	ASSERT(MUTEX_HELD(&so->so_lock));
1050 
1051 	while (so->so_ack_mp == NULL) {
1052 #ifdef SOCK_TEST
1053 		if (wait == 0 && sock_test_timelimit != 0)
1054 			wait = sock_test_timelimit;
1055 #endif
1056 		if (wait != 0) {
1057 			/*
1058 			 * Only wait for the time limit.
1059 			 */
1060 			clock_t now;
1061 
1062 			time_to_wait(&now, wait);
1063 			if (cv_timedwait(&so->so_ack_cv, &so->so_lock,
1064 			    now) == -1) {
1065 				eprintsoline(so, ETIME);
1066 				return (ETIME);
1067 			}
1068 		}
1069 		else
1070 			cv_wait(&so->so_ack_cv, &so->so_lock);
1071 	}
1072 	*mpp = so->so_ack_mp;
1073 #ifdef DEBUG
1074 	{
1075 		union T_primitives *tpr;
1076 		mblk_t *mp = *mpp;
1077 
1078 		tpr = (union T_primitives *)mp->b_rptr;
1079 		ASSERT(DB_TYPE(mp) == M_PCPROTO);
1080 		ASSERT(tpr->type == T_OK_ACK ||
1081 			tpr->type == T_ERROR_ACK ||
1082 			tpr->type == T_BIND_ACK ||
1083 			tpr->type == T_CAPABILITY_ACK ||
1084 			tpr->type == T_INFO_ACK ||
1085 			tpr->type == T_OPTMGMT_ACK);
1086 	}
1087 #endif /* DEBUG */
1088 	so->so_ack_mp = NULL;
1089 	return (0);
1090 }
1091 
1092 /*
1093  * Queue a received T_CONN_IND message on so_conn_ind_head/tail.
1094  */
1095 void
1096 soqueueconnind(struct sonode *so, mblk_t *mp)
1097 {
1098 	if (DB_TYPE(mp) != M_PROTO) {
1099 		cmn_err(CE_WARN,
1100 		    "sockfs: received unexpected M_PCPROTO T_CONN_IND\n");
1101 		freemsg(mp);
1102 		return;
1103 	}
1104 
1105 	mutex_enter(&so->so_lock);
1106 	ASSERT(mp->b_next == NULL);
1107 	if (so->so_conn_ind_head == NULL) {
1108 		so->so_conn_ind_head = mp;
1109 		so->so_state |= SS_HASCONNIND;
1110 	} else {
1111 		ASSERT(so->so_state & SS_HASCONNIND);
1112 		ASSERT(so->so_conn_ind_tail->b_next == NULL);
1113 		so->so_conn_ind_tail->b_next = mp;
1114 	}
1115 	so->so_conn_ind_tail = mp;
1116 	/* Wakeup a single consumer of the T_CONN_IND */
1117 	cv_signal(&so->so_connind_cv);
1118 	mutex_exit(&so->so_lock);
1119 }
1120 
1121 /*
1122  * Wait for a T_CONN_IND.
1123  * Don't wait if nonblocking.
1124  * Accept signals and socket errors.
1125  */
1126 int
1127 sowaitconnind(struct sonode *so, int fmode, mblk_t **mpp)
1128 {
1129 	mblk_t *mp;
1130 	int error = 0;
1131 
1132 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
1133 	mutex_enter(&so->so_lock);
1134 check_error:
1135 	if (so->so_error) {
1136 		error = sogeterr(so);
1137 		if (error) {
1138 			mutex_exit(&so->so_lock);
1139 			return (error);
1140 		}
1141 	}
1142 
1143 	if (so->so_conn_ind_head == NULL) {
1144 		if (fmode & (FNDELAY|FNONBLOCK)) {
1145 			error = EWOULDBLOCK;
1146 			goto done;
1147 		}
1148 		if (!cv_wait_sig_swap(&so->so_connind_cv, &so->so_lock)) {
1149 			error = EINTR;
1150 			goto done;
1151 		}
1152 		goto check_error;
1153 	}
1154 	mp = so->so_conn_ind_head;
1155 	so->so_conn_ind_head = mp->b_next;
1156 	mp->b_next = NULL;
1157 	if (so->so_conn_ind_head == NULL) {
1158 		ASSERT(so->so_conn_ind_tail == mp);
1159 		so->so_conn_ind_tail = NULL;
1160 		so->so_state &= ~SS_HASCONNIND;
1161 	}
1162 	*mpp = mp;
1163 done:
1164 	mutex_exit(&so->so_lock);
1165 	return (error);
1166 }
1167 
1168 /*
1169  * Flush a T_CONN_IND matching the sequence number from the list.
1170  * Return zero if found; non-zero otherwise.
1171  * This is called very infrequently thus it is ok to do a linear search.
1172  */
1173 int
1174 soflushconnind(struct sonode *so, t_scalar_t seqno)
1175 {
1176 	mblk_t *prevmp, *mp;
1177 	struct T_conn_ind *tci;
1178 
1179 	mutex_enter(&so->so_lock);
1180 	for (prevmp = NULL, mp = so->so_conn_ind_head; mp != NULL;
1181 	    prevmp = mp, mp = mp->b_next) {
1182 		tci = (struct T_conn_ind *)mp->b_rptr;
1183 		if (tci->SEQ_number == seqno) {
1184 			dprintso(so, 1,
1185 				("t_discon_ind: found T_CONN_IND %d\n", seqno));
1186 			/* Deleting last? */
1187 			if (so->so_conn_ind_tail == mp) {
1188 				so->so_conn_ind_tail = prevmp;
1189 			}
1190 			if (prevmp == NULL) {
1191 				/* Deleting first */
1192 				so->so_conn_ind_head = mp->b_next;
1193 			} else {
1194 				prevmp->b_next = mp->b_next;
1195 			}
1196 			mp->b_next = NULL;
1197 			if (so->so_conn_ind_head == NULL) {
1198 				ASSERT(so->so_conn_ind_tail == NULL);
1199 				so->so_state &= ~SS_HASCONNIND;
1200 			} else {
1201 				ASSERT(so->so_conn_ind_tail != NULL);
1202 			}
1203 			so->so_error = ECONNABORTED;
1204 			mutex_exit(&so->so_lock);
1205 			freemsg(mp);
1206 			return (0);
1207 		}
1208 	}
1209 	mutex_exit(&so->so_lock);
1210 	dprintso(so, 1,	("t_discon_ind: NOT found T_CONN_IND %d\n", seqno));
1211 	return (-1);
1212 }
1213 
1214 /*
1215  * Wait until the socket is connected or there is an error.
1216  * fmode should contain any nonblocking flags. nosig should be
1217  * set if the caller does not want the wait to be interrupted by a signal.
1218  */
1219 int
1220 sowaitconnected(struct sonode *so, int fmode, int nosig)
1221 {
1222 	int error;
1223 
1224 	ASSERT(MUTEX_HELD(&so->so_lock));
1225 
1226 	while ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) ==
1227 		SS_ISCONNECTING && so->so_error == 0) {
1228 
1229 		dprintso(so, 1, ("waiting for SS_ISCONNECTED on %p\n", so));
1230 		if (fmode & (FNDELAY|FNONBLOCK))
1231 			return (EINPROGRESS);
1232 
1233 		if (nosig)
1234 			cv_wait(&so->so_state_cv, &so->so_lock);
1235 		else if (!cv_wait_sig_swap(&so->so_state_cv, &so->so_lock)) {
1236 			/*
1237 			 * Return EINTR and let the application use
1238 			 * nonblocking techniques for detecting when
1239 			 * the connection has been established.
1240 			 */
1241 			return (EINTR);
1242 		}
1243 		dprintso(so, 1, ("awoken on %p\n", so));
1244 	}
1245 
1246 	if (so->so_error != 0) {
1247 		error = sogeterr(so);
1248 		ASSERT(error != 0);
1249 		dprintso(so, 1, ("sowaitconnected: error %d\n", error));
1250 		return (error);
1251 	}
1252 	if (!(so->so_state & SS_ISCONNECTED)) {
1253 		/*
1254 		 * Could have received a T_ORDREL_IND or a T_DISCON_IND with
1255 		 * zero errno. Or another thread could have consumed so_error
1256 		 * e.g. by calling read.
1257 		 */
1258 		error = ECONNREFUSED;
1259 		dprintso(so, 1, ("sowaitconnected: error %d\n", error));
1260 		return (error);
1261 	}
1262 	return (0);
1263 }
1264 
1265 
1266 /*
1267  * Handle the signal generation aspect of urgent data.
1268  */
1269 static void
1270 so_oob_sig(struct sonode *so, int extrasig,
1271     strsigset_t *signals, strpollset_t *pollwakeups)
1272 {
1273 	ASSERT(MUTEX_HELD(&so->so_lock));
1274 
1275 	ASSERT(so_verify_oobstate(so));
1276 	ASSERT(so->so_oobsigcnt >= so->so_oobcnt);
1277 	if (so->so_oobsigcnt > so->so_oobcnt) {
1278 		/*
1279 		 * Signal has already been generated once for this
1280 		 * urgent "event". However, since TCP can receive updated
1281 		 * urgent pointers we still generate a signal.
1282 		 */
1283 		ASSERT(so->so_state & SS_OOBPEND);
1284 		if (extrasig) {
1285 			*signals |= S_RDBAND;
1286 			*pollwakeups |= POLLRDBAND;
1287 		}
1288 		return;
1289 	}
1290 
1291 	so->so_oobsigcnt++;
1292 	ASSERT(so->so_oobsigcnt > 0);	/* Wraparound */
1293 	ASSERT(so->so_oobsigcnt > so->so_oobcnt);
1294 
1295 	/*
1296 	 * Record (for select/poll) that urgent data is pending.
1297 	 */
1298 	so->so_state |= SS_OOBPEND;
1299 	/*
1300 	 * New urgent data on the way so forget about any old
1301 	 * urgent data.
1302 	 */
1303 	so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA);
1304 	if (so->so_oobmsg != NULL) {
1305 		dprintso(so, 1, ("sock: discarding old oob\n"));
1306 		freemsg(so->so_oobmsg);
1307 		so->so_oobmsg = NULL;
1308 	}
1309 	*signals |= S_RDBAND;
1310 	*pollwakeups |= POLLRDBAND;
1311 	ASSERT(so_verify_oobstate(so));
1312 }
1313 
1314 /*
1315  * Handle the processing of the T_EXDATA_IND with urgent data.
1316  * Returns the T_EXDATA_IND if it should be queued on the read queue.
1317  */
1318 /* ARGSUSED2 */
1319 static mblk_t *
1320 so_oob_exdata(struct sonode *so, mblk_t *mp,
1321 	strsigset_t *signals, strpollset_t *pollwakeups)
1322 {
1323 	ASSERT(MUTEX_HELD(&so->so_lock));
1324 
1325 	ASSERT(so_verify_oobstate(so));
1326 
1327 	ASSERT(so->so_oobsigcnt > so->so_oobcnt);
1328 
1329 	so->so_oobcnt++;
1330 	ASSERT(so->so_oobcnt > 0);	/* wraparound? */
1331 	ASSERT(so->so_oobsigcnt >= so->so_oobcnt);
1332 
1333 	/*
1334 	 * Set MSGMARK for SIOCATMARK.
1335 	 */
1336 	mp->b_flag |= MSGMARK;
1337 
1338 	ASSERT(so_verify_oobstate(so));
1339 	return (mp);
1340 }
1341 
1342 /*
1343  * Handle the processing of the actual urgent data.
1344  * Returns the data mblk if it should be queued on the read queue.
1345  */
1346 static mblk_t *
1347 so_oob_data(struct sonode *so, mblk_t *mp,
1348 	strsigset_t *signals, strpollset_t *pollwakeups)
1349 {
1350 	ASSERT(MUTEX_HELD(&so->so_lock));
1351 
1352 	ASSERT(so_verify_oobstate(so));
1353 
1354 	ASSERT(so->so_oobsigcnt >= so->so_oobcnt);
1355 	ASSERT(mp != NULL);
1356 	/*
1357 	 * For OOBINLINE we keep the data in the T_EXDATA_IND.
1358 	 * Otherwise we store it in so_oobmsg.
1359 	 */
1360 	ASSERT(so->so_oobmsg == NULL);
1361 	if (so->so_options & SO_OOBINLINE) {
1362 		*pollwakeups |= POLLIN | POLLRDNORM | POLLRDBAND;
1363 		*signals |= S_INPUT | S_RDNORM;
1364 	} else {
1365 		*pollwakeups |= POLLRDBAND;
1366 		so->so_state |= SS_HAVEOOBDATA;
1367 		so->so_oobmsg = mp;
1368 		mp = NULL;
1369 	}
1370 	ASSERT(so_verify_oobstate(so));
1371 	return (mp);
1372 }
1373 
1374 /*
1375  * Caller must hold the mutex.
1376  * For delayed processing, save the T_DISCON_IND received
1377  * from below on so_discon_ind_mp.
1378  * When the message is processed the framework will call:
1379  *      (*func)(so, mp);
1380  */
1381 static void
1382 so_save_discon_ind(struct sonode *so,
1383 	mblk_t *mp,
1384 	void (*func)(struct sonode *so, mblk_t *))
1385 {
1386 	ASSERT(MUTEX_HELD(&so->so_lock));
1387 
1388 	/*
1389 	 * Discard new T_DISCON_IND if we have already received another.
1390 	 * Currently the earlier message can either be on so_discon_ind_mp
1391 	 * or being processed.
1392 	 */
1393 	if (so->so_discon_ind_mp != NULL || (so->so_flag & SOASYNC_UNBIND)) {
1394 		cmn_err(CE_WARN,
1395 		    "sockfs: received unexpected additional T_DISCON_IND\n");
1396 		freemsg(mp);
1397 		return;
1398 	}
1399 	mp->b_prev = (mblk_t *)func;
1400 	mp->b_next = NULL;
1401 	so->so_discon_ind_mp = mp;
1402 }
1403 
1404 /*
1405  * Caller must hold the mutex and make sure that either SOLOCKED
1406  * or SOASYNC_UNBIND is set. Called from so_unlock_single().
1407  * Perform delayed processing of T_DISCON_IND message on so_discon_ind_mp.
1408  * Need to ensure that strsock_proto() will not end up sleeping for
1409  * SOASYNC_UNBIND, while executing this function.
1410  */
1411 void
1412 so_drain_discon_ind(struct sonode *so)
1413 {
1414 	mblk_t	*bp;
1415 	void (*func)(struct sonode *so, mblk_t *);
1416 
1417 	ASSERT(MUTEX_HELD(&so->so_lock));
1418 	ASSERT(so->so_flag & (SOLOCKED|SOASYNC_UNBIND));
1419 
1420 	/* Process T_DISCON_IND on so_discon_ind_mp */
1421 	if ((bp = so->so_discon_ind_mp) != NULL) {
1422 		so->so_discon_ind_mp = NULL;
1423 		func = (void (*)())bp->b_prev;
1424 		bp->b_prev = NULL;
1425 
1426 		/*
1427 		 * This (*func) is supposed to generate a message downstream
1428 		 * and we need to have a flag set until the corresponding
1429 		 * upstream message reaches stream head.
1430 		 * When processing T_DISCON_IND in strsock_discon_ind
1431 		 * we hold SOASYN_UNBIND when sending T_UNBIND_REQ down and
1432 		 * drop the flag after we get the ACK in strsock_proto.
1433 		 */
1434 		(void) (*func)(so, bp);
1435 	}
1436 }
1437 
1438 /*
1439  * Caller must hold the mutex.
1440  * Remove the T_DISCON_IND on so_discon_ind_mp.
1441  */
1442 void
1443 so_flush_discon_ind(struct sonode *so)
1444 {
1445 	mblk_t	*bp;
1446 
1447 	ASSERT(MUTEX_HELD(&so->so_lock));
1448 
1449 	/*
1450 	 * Remove T_DISCON_IND mblk at so_discon_ind_mp.
1451 	 */
1452 	if ((bp = so->so_discon_ind_mp) != NULL) {
1453 		so->so_discon_ind_mp = NULL;
1454 		bp->b_prev = NULL;
1455 		freemsg(bp);
1456 	}
1457 }
1458 
1459 /*
1460  * Caller must hold the mutex.
1461  *
1462  * This function is used to process the T_DISCON_IND message. It does
1463  * immediate processing when called from strsock_proto and delayed
1464  * processing of discon_ind saved on so_discon_ind_mp when called from
1465  * so_drain_discon_ind. When a T_DISCON_IND message is saved in
1466  * so_discon_ind_mp for delayed processing, this function is registered
1467  * as the callback function to process the message.
1468  *
1469  * SOASYNC_UNBIND should be held in this function, during the non-blocking
1470  * unbind operation, and should be released only after we receive the ACK
1471  * in strsock_proto, for the T_UNBIND_REQ sent here. Since SOLOCKED is not set,
1472  * no TPI messages would be sent down at this time. This is to prevent M_FLUSH
1473  * sent from either this function or tcp_unbind(), flushing away any TPI
1474  * message that is being sent down and stays in a lower module's queue.
1475  *
1476  * This function drops so_lock and grabs it again.
1477  */
1478 static void
1479 strsock_discon_ind(struct sonode *so, mblk_t *discon_mp)
1480 {
1481 	struct vnode *vp;
1482 	struct stdata *stp;
1483 	union T_primitives *tpr;
1484 	struct T_unbind_req *ubr;
1485 	mblk_t *mp;
1486 	int error;
1487 
1488 	ASSERT(MUTEX_HELD(&so->so_lock));
1489 	ASSERT(discon_mp);
1490 	ASSERT(discon_mp->b_rptr);
1491 
1492 	tpr = (union T_primitives *)discon_mp->b_rptr;
1493 	ASSERT(tpr->type == T_DISCON_IND);
1494 
1495 	vp = SOTOV(so);
1496 	stp = vp->v_stream;
1497 	ASSERT(stp);
1498 
1499 	/*
1500 	 * Not a listener
1501 	 */
1502 	ASSERT((so->so_state & SS_ACCEPTCONN) == 0);
1503 
1504 	/*
1505 	 * This assumes that the name space for DISCON_reason
1506 	 * is the errno name space.
1507 	 */
1508 	soisdisconnected(so, tpr->discon_ind.DISCON_reason);
1509 
1510 	/*
1511 	 * Unbind with the transport without blocking.
1512 	 * If we've already received a T_DISCON_IND do not unbind.
1513 	 *
1514 	 * If there is no preallocated unbind message, we have already
1515 	 * unbound with the transport
1516 	 *
1517 	 * If the socket is not bound, no need to unbind.
1518 	 */
1519 	mp = so->so_unbind_mp;
1520 	if (mp == NULL) {
1521 		ASSERT(!(so->so_state & SS_ISBOUND));
1522 		mutex_exit(&so->so_lock);
1523 	} else if (!(so->so_state & SS_ISBOUND))  {
1524 		mutex_exit(&so->so_lock);
1525 	} else {
1526 		so->so_unbind_mp = NULL;
1527 
1528 		/*
1529 		 * Is another T_DISCON_IND being processed.
1530 		 */
1531 		ASSERT((so->so_flag & SOASYNC_UNBIND) == 0);
1532 
1533 		/*
1534 		 * Make strsock_proto ignore T_OK_ACK and T_ERROR_ACK for
1535 		 * this unbind. Set SOASYNC_UNBIND. This should be cleared
1536 		 * only after we receive the ACK in strsock_proto.
1537 		 */
1538 		so->so_flag |= SOASYNC_UNBIND;
1539 		ASSERT(!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)));
1540 		so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN|SS_LADDR_VALID);
1541 		mutex_exit(&so->so_lock);
1542 
1543 		/*
1544 		 * Send down T_UNBIND_REQ ignoring flow control.
1545 		 * XXX Assumes that MSG_IGNFLOW implies that this thread
1546 		 * does not run service procedures.
1547 		 */
1548 		ASSERT(DB_TYPE(mp) == M_PROTO);
1549 		ubr = (struct T_unbind_req *)mp->b_rptr;
1550 		mp->b_wptr += sizeof (*ubr);
1551 		ubr->PRIM_type = T_UNBIND_REQ;
1552 
1553 		/*
1554 		 * Flush the read and write side (except stream head read queue)
1555 		 * and send down T_UNBIND_REQ.
1556 		 */
1557 		(void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW);
1558 		error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
1559 			MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0);
1560 		/* LINTED - warning: statement has no consequent: if */
1561 		if (error) {
1562 			eprintsoline(so, error);
1563 		}
1564 	}
1565 
1566 	if (tpr->discon_ind.DISCON_reason != 0)
1567 		strsetrerror(SOTOV(so), 0, 0, sogetrderr);
1568 	strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
1569 	strseteof(SOTOV(so), 1);
1570 	/*
1571 	 * strseteof takes care of read side wakeups,
1572 	 * pollwakeups, and signals.
1573 	 */
1574 	dprintso(so, 1, ("T_DISCON_IND: error %d\n", so->so_error));
1575 	freemsg(discon_mp);
1576 
1577 
1578 	pollwakeup(&stp->sd_pollist, POLLOUT);
1579 	mutex_enter(&stp->sd_lock);
1580 
1581 	/*
1582 	 * Wake sleeping write
1583 	 */
1584 	if (stp->sd_flag & WSLEEP) {
1585 		stp->sd_flag &= ~WSLEEP;
1586 		cv_broadcast(&stp->sd_wrq->q_wait);
1587 	}
1588 
1589 	/*
1590 	 * strsendsig can handle multiple signals with a
1591 	 * single call.  Send SIGPOLL for S_OUTPUT event.
1592 	 */
1593 	if (stp->sd_sigflags & S_OUTPUT)
1594 		strsendsig(stp->sd_siglist, S_OUTPUT, 0, 0);
1595 
1596 	mutex_exit(&stp->sd_lock);
1597 	mutex_enter(&so->so_lock);
1598 }
1599 
1600 /*
1601  * This routine is registered with the stream head to receive M_PROTO
1602  * and M_PCPROTO messages.
1603  *
1604  * Returns NULL if the message was consumed.
1605  * Returns an mblk to make that mblk be processed (and queued) by the stream
1606  * head.
1607  *
1608  * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and
1609  * *pollwakeups) for the stream head to take action on. Note that since
1610  * sockets always deliver SIGIO for every new piece of data this routine
1611  * never sets *firstmsgsigs; any signals are returned in *allmsgsigs.
1612  *
1613  * This routine handles all data related TPI messages independent of
1614  * the type of the socket i.e. it doesn't care if T_UNITDATA_IND message
1615  * arrive on a SOCK_STREAM.
1616  */
1617 static mblk_t *
1618 strsock_proto(vnode_t *vp, mblk_t *mp,
1619 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
1620 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
1621 {
1622 	union T_primitives *tpr;
1623 	struct sonode *so;
1624 
1625 	so = VTOSO(vp);
1626 
1627 	dprintso(so, 1, ("strsock_proto(%p, %p)\n", vp, mp));
1628 
1629 	/* Set default return values */
1630 	*firstmsgsigs = *wakeups = *allmsgsigs = *pollwakeups = 0;
1631 
1632 	ASSERT(DB_TYPE(mp) == M_PROTO ||
1633 	    DB_TYPE(mp) == M_PCPROTO);
1634 
1635 	if (MBLKL(mp) < sizeof (tpr->type)) {
1636 		/* The message is too short to even contain the primitive */
1637 		cmn_err(CE_WARN,
1638 		    "sockfs: Too short TPI message received. Len = %ld\n",
1639 		    (ptrdiff_t)(MBLKL(mp)));
1640 		freemsg(mp);
1641 		return (NULL);
1642 	}
1643 	if (!__TPI_PRIM_ISALIGNED(mp->b_rptr)) {
1644 		/* The read pointer is not aligned correctly for TPI */
1645 		cmn_err(CE_WARN,
1646 		    "sockfs: Unaligned TPI message received. rptr = %p\n",
1647 		    (void *)mp->b_rptr);
1648 		freemsg(mp);
1649 		return (NULL);
1650 	}
1651 	tpr = (union T_primitives *)mp->b_rptr;
1652 	dprintso(so, 1, ("strsock_proto: primitive %d\n", tpr->type));
1653 
1654 	switch (tpr->type) {
1655 
1656 	case T_DATA_IND:
1657 		if (MBLKL(mp) < sizeof (struct T_data_ind)) {
1658 			cmn_err(CE_WARN,
1659 			    "sockfs: Too short T_DATA_IND. Len = %ld\n",
1660 			    (ptrdiff_t)(MBLKL(mp)));
1661 			freemsg(mp);
1662 			return (NULL);
1663 		}
1664 		/*
1665 		 * Ignore zero-length T_DATA_IND messages. These might be
1666 		 * generated by some transports.
1667 		 * This is needed to prevent read (which skips the M_PROTO
1668 		 * part) to unexpectedly return 0 (or return EWOULDBLOCK
1669 		 * on a non-blocking socket after select/poll has indicated
1670 		 * that data is available).
1671 		 */
1672 		if (msgdsize(mp->b_cont) == 0) {
1673 			dprintso(so, 0,
1674 			    ("strsock_proto: zero length T_DATA_IND\n"));
1675 			freemsg(mp);
1676 			return (NULL);
1677 		}
1678 		*allmsgsigs = S_INPUT | S_RDNORM;
1679 		*pollwakeups = POLLIN | POLLRDNORM;
1680 		*wakeups = RSLEEP;
1681 		return (mp);
1682 
1683 	case T_UNITDATA_IND: {
1684 		struct T_unitdata_ind	*tudi = &tpr->unitdata_ind;
1685 		void			*addr;
1686 		t_uscalar_t		addrlen;
1687 
1688 		if (MBLKL(mp) < sizeof (struct T_unitdata_ind)) {
1689 			cmn_err(CE_WARN,
1690 			    "sockfs: Too short T_UNITDATA_IND. Len = %ld\n",
1691 			    (ptrdiff_t)(MBLKL(mp)));
1692 			freemsg(mp);
1693 			return (NULL);
1694 		}
1695 
1696 		/* Is this is not a connected datagram socket? */
1697 		if ((so->so_mode & SM_CONNREQUIRED) ||
1698 		    !(so->so_state & SS_ISCONNECTED)) {
1699 			/*
1700 			 * Not a connected datagram socket. Look for
1701 			 * the SO_UNIX_CLOSE option. If such an option is found
1702 			 * discard the message (since it has no meaning
1703 			 * unless connected).
1704 			 */
1705 			if (so->so_family == AF_UNIX && msgdsize(mp) == 0 &&
1706 			    tudi->OPT_length != 0) {
1707 				void *opt;
1708 				t_uscalar_t optlen = tudi->OPT_length;
1709 
1710 				opt = sogetoff(mp, tudi->OPT_offset,
1711 					optlen, __TPI_ALIGN_SIZE);
1712 				if (opt == NULL) {
1713 					/* The len/off falls outside mp */
1714 					freemsg(mp);
1715 					mutex_enter(&so->so_lock);
1716 					soseterror(so, EPROTO);
1717 					mutex_exit(&so->so_lock);
1718 					cmn_err(CE_WARN,
1719 					    "sockfs: T_unidata_ind with "
1720 					    "invalid optlen/offset %u/%d\n",
1721 					    optlen, tudi->OPT_offset);
1722 					return (NULL);
1723 				}
1724 				if (so_getopt_unix_close(opt, optlen)) {
1725 					freemsg(mp);
1726 					return (NULL);
1727 				}
1728 			}
1729 			*allmsgsigs = S_INPUT | S_RDNORM;
1730 			*pollwakeups = POLLIN | POLLRDNORM;
1731 			*wakeups = RSLEEP;
1732 #ifdef C2_AUDIT
1733 			if (audit_active)
1734 				audit_sock(T_UNITDATA_IND, strvp2wq(vp),
1735 					mp, 0);
1736 #endif /* C2_AUDIT */
1737 			return (mp);
1738 		}
1739 
1740 		/*
1741 		 * A connect datagram socket. For AF_INET{,6} we verify that
1742 		 * the source address matches the "connected to" address.
1743 		 * The semantics of AF_UNIX sockets is to not verify
1744 		 * the source address.
1745 		 * Note that this source address verification is transport
1746 		 * specific. Thus the real fix would be to extent TPI
1747 		 * to allow T_CONN_REQ messages to be send to connectionless
1748 		 * transport providers and always let the transport provider
1749 		 * do whatever filtering is needed.
1750 		 *
1751 		 * The verification/filtering semantics for transports
1752 		 * other than AF_INET and AF_UNIX are unknown. The choice
1753 		 * would be to either filter using bcmp or let all messages
1754 		 * get through. This code does not filter other address
1755 		 * families since this at least allows the application to
1756 		 * work around any missing filtering.
1757 		 *
1758 		 * XXX Should we move filtering to UDP/ICMP???
1759 		 * That would require passing e.g. a T_DISCON_REQ to UDP
1760 		 * when the socket becomes unconnected.
1761 		 */
1762 		addrlen = tudi->SRC_length;
1763 		/*
1764 		 * The alignment restriction is really to strict but
1765 		 * we want enough alignment to inspect the fields of
1766 		 * a sockaddr_in.
1767 		 */
1768 		addr = sogetoff(mp, tudi->SRC_offset, addrlen,
1769 				__TPI_ALIGN_SIZE);
1770 		if (addr == NULL) {
1771 			freemsg(mp);
1772 			mutex_enter(&so->so_lock);
1773 			soseterror(so, EPROTO);
1774 			mutex_exit(&so->so_lock);
1775 			cmn_err(CE_WARN,
1776 			    "sockfs: T_unidata_ind with invalid "
1777 			    "addrlen/offset %u/%d\n",
1778 			    addrlen, tudi->SRC_offset);
1779 			return (NULL);
1780 		}
1781 
1782 		if (so->so_family == AF_INET) {
1783 			/*
1784 			 * For AF_INET we allow wildcarding both sin_addr
1785 			 * and sin_port.
1786 			 */
1787 			struct sockaddr_in *faddr, *sin;
1788 
1789 			/* Prevent so_faddr_sa from changing while accessed */
1790 			mutex_enter(&so->so_lock);
1791 			ASSERT(so->so_faddr_len ==
1792 				(socklen_t)sizeof (struct sockaddr_in));
1793 			faddr = (struct sockaddr_in *)so->so_faddr_sa;
1794 			sin = (struct sockaddr_in *)addr;
1795 			if (addrlen !=
1796 				(t_uscalar_t)sizeof (struct sockaddr_in) ||
1797 			    (sin->sin_addr.s_addr != faddr->sin_addr.s_addr &&
1798 			    faddr->sin_addr.s_addr != INADDR_ANY) ||
1799 			    (so->so_type != SOCK_RAW &&
1800 			    sin->sin_port != faddr->sin_port &&
1801 			    faddr->sin_port != 0)) {
1802 #ifdef DEBUG
1803 				dprintso(so, 0,
1804 					("sockfs: T_UNITDATA_IND mismatch: %s",
1805 					pr_addr(so->so_family,
1806 						(struct sockaddr *)addr,
1807 						addrlen)));
1808 				dprintso(so, 0, (" - %s\n",
1809 					pr_addr(so->so_family, so->so_faddr_sa,
1810 					    (t_uscalar_t)so->so_faddr_len)));
1811 #endif /* DEBUG */
1812 				mutex_exit(&so->so_lock);
1813 				freemsg(mp);
1814 				return (NULL);
1815 			}
1816 			mutex_exit(&so->so_lock);
1817 		} else if (so->so_family == AF_INET6) {
1818 			/*
1819 			 * For AF_INET6 we allow wildcarding both sin6_addr
1820 			 * and sin6_port.
1821 			 */
1822 			struct sockaddr_in6 *faddr6, *sin6;
1823 			static struct in6_addr zeroes; /* inits to all zeros */
1824 
1825 			/* Prevent so_faddr_sa from changing while accessed */
1826 			mutex_enter(&so->so_lock);
1827 			ASSERT(so->so_faddr_len ==
1828 			    (socklen_t)sizeof (struct sockaddr_in6));
1829 			faddr6 = (struct sockaddr_in6 *)so->so_faddr_sa;
1830 			sin6 = (struct sockaddr_in6 *)addr;
1831 			/* XXX could we get a mapped address ::ffff:0.0.0.0 ? */
1832 			if (addrlen !=
1833 			    (t_uscalar_t)sizeof (struct sockaddr_in6) ||
1834 			    (!IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
1835 				&faddr6->sin6_addr) &&
1836 			    !IN6_ARE_ADDR_EQUAL(&faddr6->sin6_addr, &zeroes)) ||
1837 			    (so->so_type != SOCK_RAW &&
1838 			    sin6->sin6_port != faddr6->sin6_port &&
1839 			    faddr6->sin6_port != 0)) {
1840 #ifdef DEBUG
1841 				dprintso(so, 0,
1842 				    ("sockfs: T_UNITDATA_IND mismatch: %s",
1843 					pr_addr(so->so_family,
1844 					    (struct sockaddr *)addr,
1845 					    addrlen)));
1846 				dprintso(so, 0, (" - %s\n",
1847 				    pr_addr(so->so_family, so->so_faddr_sa,
1848 					(t_uscalar_t)so->so_faddr_len)));
1849 #endif /* DEBUG */
1850 				mutex_exit(&so->so_lock);
1851 				freemsg(mp);
1852 				return (NULL);
1853 			}
1854 			mutex_exit(&so->so_lock);
1855 		} else if (so->so_family == AF_UNIX &&
1856 		    msgdsize(mp->b_cont) == 0 &&
1857 		    tudi->OPT_length != 0) {
1858 			/*
1859 			 * Attempt to extract AF_UNIX
1860 			 * SO_UNIX_CLOSE indication from options.
1861 			 */
1862 			void *opt;
1863 			t_uscalar_t optlen = tudi->OPT_length;
1864 
1865 			opt = sogetoff(mp, tudi->OPT_offset,
1866 				optlen, __TPI_ALIGN_SIZE);
1867 			if (opt == NULL) {
1868 				/* The len/off falls outside mp */
1869 				freemsg(mp);
1870 				mutex_enter(&so->so_lock);
1871 				soseterror(so, EPROTO);
1872 				mutex_exit(&so->so_lock);
1873 				cmn_err(CE_WARN,
1874 				    "sockfs: T_unidata_ind with invalid "
1875 				    "optlen/offset %u/%d\n",
1876 				    optlen, tudi->OPT_offset);
1877 				return (NULL);
1878 			}
1879 			/*
1880 			 * If we received a unix close indication mark the
1881 			 * socket and discard this message.
1882 			 */
1883 			if (so_getopt_unix_close(opt, optlen)) {
1884 				mutex_enter(&so->so_lock);
1885 				sobreakconn(so, ECONNRESET);
1886 				mutex_exit(&so->so_lock);
1887 				strsetrerror(SOTOV(so), 0, 0, sogetrderr);
1888 				freemsg(mp);
1889 				*pollwakeups = POLLIN | POLLRDNORM;
1890 				*allmsgsigs = S_INPUT | S_RDNORM;
1891 				*wakeups = RSLEEP;
1892 				return (NULL);
1893 			}
1894 		}
1895 		*allmsgsigs = S_INPUT | S_RDNORM;
1896 		*pollwakeups = POLLIN | POLLRDNORM;
1897 		*wakeups = RSLEEP;
1898 		return (mp);
1899 	}
1900 
1901 	case T_OPTDATA_IND: {
1902 		struct T_optdata_ind	*tdi = &tpr->optdata_ind;
1903 
1904 		if (MBLKL(mp) < sizeof (struct T_optdata_ind)) {
1905 			cmn_err(CE_WARN,
1906 			    "sockfs: Too short T_OPTDATA_IND. Len = %ld\n",
1907 			    (ptrdiff_t)(MBLKL(mp)));
1908 			freemsg(mp);
1909 			return (NULL);
1910 		}
1911 		/*
1912 		 * Allow zero-length messages carrying options.
1913 		 * This is used when carrying the SO_UNIX_CLOSE option.
1914 		 */
1915 		if (so->so_family == AF_UNIX && msgdsize(mp->b_cont) == 0 &&
1916 		    tdi->OPT_length != 0) {
1917 			/*
1918 			 * Attempt to extract AF_UNIX close indication
1919 			 * from the options. Ignore any other options -
1920 			 * those are handled once the message is removed
1921 			 * from the queue.
1922 			 * The close indication message should not carry data.
1923 			 */
1924 			void *opt;
1925 			t_uscalar_t optlen = tdi->OPT_length;
1926 
1927 			opt = sogetoff(mp, tdi->OPT_offset,
1928 				optlen, __TPI_ALIGN_SIZE);
1929 			if (opt == NULL) {
1930 				/* The len/off falls outside mp */
1931 				freemsg(mp);
1932 				mutex_enter(&so->so_lock);
1933 				soseterror(so, EPROTO);
1934 				mutex_exit(&so->so_lock);
1935 				cmn_err(CE_WARN,
1936 				    "sockfs: T_optdata_ind with invalid "
1937 				    "optlen/offset %u/%d\n",
1938 				    optlen, tdi->OPT_offset);
1939 				return (NULL);
1940 			}
1941 			/*
1942 			 * If we received a close indication mark the
1943 			 * socket and discard this message.
1944 			 */
1945 			if (so_getopt_unix_close(opt, optlen)) {
1946 				mutex_enter(&so->so_lock);
1947 				socantsendmore(so);
1948 				mutex_exit(&so->so_lock);
1949 				strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
1950 				freemsg(mp);
1951 				return (NULL);
1952 			}
1953 		}
1954 		*allmsgsigs = S_INPUT | S_RDNORM;
1955 		*pollwakeups = POLLIN | POLLRDNORM;
1956 		*wakeups = RSLEEP;
1957 		return (mp);
1958 	}
1959 
1960 	case T_EXDATA_IND: {
1961 		mblk_t		*mctl, *mdata;
1962 
1963 		if (MBLKL(mp) < sizeof (struct T_exdata_ind)) {
1964 			cmn_err(CE_WARN,
1965 			    "sockfs: Too short T_EXDATA_IND. Len = %ld\n",
1966 			    (ptrdiff_t)(MBLKL(mp)));
1967 			freemsg(mp);
1968 			return (NULL);
1969 		}
1970 		/*
1971 		 * Ignore zero-length T_EXDATA_IND messages. These might be
1972 		 * generated by some transports.
1973 		 *
1974 		 * This is needed to prevent read (which skips the M_PROTO
1975 		 * part) to unexpectedly return 0 (or return EWOULDBLOCK
1976 		 * on a non-blocking socket after select/poll has indicated
1977 		 * that data is available).
1978 		 */
1979 		dprintso(so, 1,
1980 			("T_EXDATA_IND(%p): counts %d/%d state %s\n",
1981 			vp, so->so_oobsigcnt, so->so_oobcnt,
1982 			pr_state(so->so_state, so->so_mode)));
1983 
1984 		if (msgdsize(mp->b_cont) == 0) {
1985 			dprintso(so, 0,
1986 				("strsock_proto: zero length T_EXDATA_IND\n"));
1987 			freemsg(mp);
1988 			return (NULL);
1989 		}
1990 
1991 		/*
1992 		 * Split into the T_EXDATA_IND and the M_DATA part.
1993 		 * We process these three pieces separately:
1994 		 *	signal generation
1995 		 *	handling T_EXDATA_IND
1996 		 *	handling M_DATA component
1997 		 */
1998 		mctl = mp;
1999 		mdata = mctl->b_cont;
2000 		mctl->b_cont = NULL;
2001 		mutex_enter(&so->so_lock);
2002 		so_oob_sig(so, 0, allmsgsigs, pollwakeups);
2003 		mctl = so_oob_exdata(so, mctl, allmsgsigs, pollwakeups);
2004 		mdata = so_oob_data(so, mdata, allmsgsigs, pollwakeups);
2005 
2006 		/*
2007 		 * Pass the T_EXDATA_IND and the M_DATA back separately
2008 		 * by using b_next linkage. (The stream head will queue any
2009 		 * b_next linked messages separately.) This is needed
2010 		 * since MSGMARK applies to the last by of the message
2011 		 * hence we can not have any M_DATA component attached
2012 		 * to the marked T_EXDATA_IND. Note that the stream head
2013 		 * will not consolidate M_DATA messages onto an MSGMARK'ed
2014 		 * message in order to preserve the constraint that
2015 		 * the T_EXDATA_IND always is a separate message.
2016 		 */
2017 		ASSERT(mctl != NULL);
2018 		mctl->b_next = mdata;
2019 		mp = mctl;
2020 #ifdef DEBUG
2021 		if (mdata == NULL) {
2022 			dprintso(so, 1,
2023 				("after outofline T_EXDATA_IND(%p): "
2024 				"counts %d/%d  poll 0x%x sig 0x%x state %s\n",
2025 				vp, so->so_oobsigcnt,
2026 				so->so_oobcnt, *pollwakeups, *allmsgsigs,
2027 				pr_state(so->so_state, so->so_mode)));
2028 		} else {
2029 			dprintso(so, 1,
2030 				("after inline T_EXDATA_IND(%p): "
2031 				"counts %d/%d  poll 0x%x sig 0x%x state %s\n",
2032 				vp, so->so_oobsigcnt,
2033 				so->so_oobcnt, *pollwakeups, *allmsgsigs,
2034 				pr_state(so->so_state, so->so_mode)));
2035 		}
2036 #endif /* DEBUG */
2037 		mutex_exit(&so->so_lock);
2038 		*wakeups = RSLEEP;
2039 		return (mp);
2040 	}
2041 
2042 	case T_CONN_CON: {
2043 		struct T_conn_con	*conn_con;
2044 		void			*addr;
2045 		t_uscalar_t		addrlen;
2046 
2047 		/*
2048 		 * Verify the state, update the state to ISCONNECTED,
2049 		 * record the potentially new address in the message,
2050 		 * and drop the message.
2051 		 */
2052 		if (MBLKL(mp) < sizeof (struct T_conn_con)) {
2053 			cmn_err(CE_WARN,
2054 			    "sockfs: Too short T_CONN_CON. Len = %ld\n",
2055 			    (ptrdiff_t)(MBLKL(mp)));
2056 			freemsg(mp);
2057 			return (NULL);
2058 		}
2059 
2060 		mutex_enter(&so->so_lock);
2061 		if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) !=
2062 		    SS_ISCONNECTING) {
2063 			mutex_exit(&so->so_lock);
2064 			dprintso(so, 1,
2065 				("T_CONN_CON: state %x\n", so->so_state));
2066 			freemsg(mp);
2067 			return (NULL);
2068 		}
2069 
2070 		conn_con = &tpr->conn_con;
2071 		addrlen = conn_con->RES_length;
2072 		/*
2073 		 * Allow the address to be of different size than sent down
2074 		 * in the T_CONN_REQ as long as it doesn't exceed the maxlen.
2075 		 * For AF_UNIX require the identical length.
2076 		 */
2077 		if (so->so_family == AF_UNIX ?
2078 		    addrlen != (t_uscalar_t)sizeof (so->so_ux_laddr) :
2079 		    addrlen > (t_uscalar_t)so->so_faddr_maxlen) {
2080 			cmn_err(CE_WARN,
2081 			    "sockfs: T_conn_con with different "
2082 			    "length %u/%d\n",
2083 			    addrlen, conn_con->RES_length);
2084 			soisdisconnected(so, EPROTO);
2085 			mutex_exit(&so->so_lock);
2086 			strsetrerror(SOTOV(so), 0, 0, sogetrderr);
2087 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2088 			strseteof(SOTOV(so), 1);
2089 			freemsg(mp);
2090 			/*
2091 			 * strseteof takes care of read side wakeups,
2092 			 * pollwakeups, and signals.
2093 			 */
2094 			*wakeups = WSLEEP;
2095 			*allmsgsigs = S_OUTPUT;
2096 			*pollwakeups = POLLOUT;
2097 			return (NULL);
2098 		}
2099 		addr = sogetoff(mp, conn_con->RES_offset, addrlen, 1);
2100 		if (addr == NULL) {
2101 			cmn_err(CE_WARN,
2102 			    "sockfs: T_conn_con with invalid "
2103 			    "addrlen/offset %u/%d\n",
2104 			    addrlen, conn_con->RES_offset);
2105 			mutex_exit(&so->so_lock);
2106 			strsetrerror(SOTOV(so), 0, 0, sogetrderr);
2107 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2108 			strseteof(SOTOV(so), 1);
2109 			freemsg(mp);
2110 			/*
2111 			 * strseteof takes care of read side wakeups,
2112 			 * pollwakeups, and signals.
2113 			 */
2114 			*wakeups = WSLEEP;
2115 			*allmsgsigs = S_OUTPUT;
2116 			*pollwakeups = POLLOUT;
2117 			return (NULL);
2118 		}
2119 
2120 		/*
2121 		 * Save for getpeername.
2122 		 */
2123 		if (so->so_family != AF_UNIX) {
2124 			so->so_faddr_len = (socklen_t)addrlen;
2125 			ASSERT(so->so_faddr_len <= so->so_faddr_maxlen);
2126 			bcopy(addr, so->so_faddr_sa, addrlen);
2127 			so->so_state |= SS_FADDR_VALID;
2128 		}
2129 
2130 		if (so->so_peercred != NULL)
2131 			crfree(so->so_peercred);
2132 		so->so_peercred = DB_CRED(mp);
2133 		so->so_cpid = DB_CPID(mp);
2134 		if (so->so_peercred != NULL)
2135 			crhold(so->so_peercred);
2136 
2137 		/* Wakeup anybody sleeping in sowaitconnected */
2138 		soisconnected(so);
2139 		mutex_exit(&so->so_lock);
2140 
2141 		/*
2142 		 * The socket is now available for sending data.
2143 		 */
2144 		*wakeups = WSLEEP;
2145 		*allmsgsigs = S_OUTPUT;
2146 		*pollwakeups = POLLOUT;
2147 		freemsg(mp);
2148 		return (NULL);
2149 	}
2150 
2151 	case T_CONN_IND:
2152 		/*
2153 		 * Verify the min size and queue the message on
2154 		 * the so_conn_ind_head/tail list.
2155 		 */
2156 		if (MBLKL(mp) < sizeof (struct T_conn_ind)) {
2157 			cmn_err(CE_WARN,
2158 			    "sockfs: Too short T_CONN_IND. Len = %ld\n",
2159 			    (ptrdiff_t)(MBLKL(mp)));
2160 			freemsg(mp);
2161 			return (NULL);
2162 		}
2163 
2164 #ifdef C2_AUDIT
2165 		if (audit_active)
2166 			audit_sock(T_CONN_IND, strvp2wq(vp), mp, 0);
2167 #endif /* C2_AUDIT */
2168 		if (!(so->so_state & SS_ACCEPTCONN)) {
2169 			cmn_err(CE_WARN,
2170 			    "sockfs: T_conn_ind on non-listening socket\n");
2171 			freemsg(mp);
2172 			return (NULL);
2173 		}
2174 		soqueueconnind(so, mp);
2175 		*allmsgsigs = S_INPUT | S_RDNORM;
2176 		*pollwakeups = POLLIN | POLLRDNORM;
2177 		*wakeups = RSLEEP;
2178 		return (NULL);
2179 
2180 	case T_ORDREL_IND:
2181 		if (MBLKL(mp) < sizeof (struct T_ordrel_ind)) {
2182 			cmn_err(CE_WARN,
2183 			    "sockfs: Too short T_ORDREL_IND. Len = %ld\n",
2184 			    (ptrdiff_t)(MBLKL(mp)));
2185 			freemsg(mp);
2186 			return (NULL);
2187 		}
2188 
2189 		/*
2190 		 * Some providers send this when not fully connected.
2191 		 * SunLink X.25 needs to retrieve disconnect reason after
2192 		 * disconnect for compatibility. It uses T_ORDREL_IND
2193 		 * instead of T_DISCON_IND so that it may use the
2194 		 * endpoint after a connect failure to retrieve the
2195 		 * reason using an ioctl. Thus we explicitly clear
2196 		 * SS_ISCONNECTING here for SunLink X.25.
2197 		 * This is a needed TPI violation.
2198 		 */
2199 		mutex_enter(&so->so_lock);
2200 		so->so_state &= ~SS_ISCONNECTING;
2201 		socantrcvmore(so);
2202 		mutex_exit(&so->so_lock);
2203 		strseteof(SOTOV(so), 1);
2204 		/*
2205 		 * strseteof takes care of read side wakeups,
2206 		 * pollwakeups, and signals.
2207 		 */
2208 		freemsg(mp);
2209 		return (NULL);
2210 
2211 	case T_DISCON_IND:
2212 		if (MBLKL(mp) < sizeof (struct T_discon_ind)) {
2213 			cmn_err(CE_WARN,
2214 			    "sockfs: Too short T_DISCON_IND. Len = %ld\n",
2215 			    (ptrdiff_t)(MBLKL(mp)));
2216 			freemsg(mp);
2217 			return (NULL);
2218 		}
2219 		if (so->so_state & SS_ACCEPTCONN) {
2220 			/*
2221 			 * This is a listener. Look for a queued T_CONN_IND
2222 			 * with a matching sequence number and remove it
2223 			 * from the list.
2224 			 * It is normal to not find the sequence number since
2225 			 * the soaccept might have already dequeued it
2226 			 * (in which case the T_CONN_RES will fail with
2227 			 * TBADSEQ).
2228 			 */
2229 			(void) soflushconnind(so, tpr->discon_ind.SEQ_number);
2230 			freemsg(mp);
2231 			return (0);
2232 		}
2233 
2234 		/*
2235 		 * Not a listener
2236 		 *
2237 		 * If SS_CANTRCVMORE for AF_UNIX ignore the discon_reason.
2238 		 * Such a discon_ind appears when the peer has first done
2239 		 * a shutdown() followed by a close() in which case we just
2240 		 * want to record socantsendmore.
2241 		 * In this case sockfs first receives a T_ORDREL_IND followed
2242 		 * by a T_DISCON_IND.
2243 		 * Note that for other transports (e.g. TCP) we need to handle
2244 		 * the discon_ind in this case since it signals an error.
2245 		 */
2246 		mutex_enter(&so->so_lock);
2247 		if ((so->so_state & SS_CANTRCVMORE) &&
2248 		    (so->so_family == AF_UNIX)) {
2249 			socantsendmore(so);
2250 			mutex_exit(&so->so_lock);
2251 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2252 			dprintso(so, 1,
2253 				("T_DISCON_IND: error %d\n", so->so_error));
2254 			freemsg(mp);
2255 			/*
2256 			 * Set these variables for caller to process them.
2257 			 * For the else part where T_DISCON_IND is processed,
2258 			 * this will be done in the function being called
2259 			 * (strsock_discon_ind())
2260 			 */
2261 			*wakeups = WSLEEP;
2262 			*allmsgsigs = S_OUTPUT;
2263 			*pollwakeups = POLLOUT;
2264 		} else if (so->so_flag & (SOASYNC_UNBIND | SOLOCKED)) {
2265 			/*
2266 			 * Deferred processing of T_DISCON_IND
2267 			 */
2268 			so_save_discon_ind(so, mp, strsock_discon_ind);
2269 			mutex_exit(&so->so_lock);
2270 		} else {
2271 			/*
2272 			 * Process T_DISCON_IND now
2273 			 */
2274 			(void) strsock_discon_ind(so, mp);
2275 			mutex_exit(&so->so_lock);
2276 		}
2277 		return (NULL);
2278 
2279 	case T_UDERROR_IND: {
2280 		struct T_uderror_ind	*tudi = &tpr->uderror_ind;
2281 		void			*addr;
2282 		t_uscalar_t		addrlen;
2283 		int			error;
2284 
2285 		dprintso(so, 0,
2286 			("T_UDERROR_IND: error %d\n", tudi->ERROR_type));
2287 
2288 		if (MBLKL(mp) < sizeof (struct T_uderror_ind)) {
2289 			cmn_err(CE_WARN,
2290 			    "sockfs: Too short T_UDERROR_IND. Len = %ld\n",
2291 			    (ptrdiff_t)(MBLKL(mp)));
2292 			freemsg(mp);
2293 			return (NULL);
2294 		}
2295 		/* Ignore on connection-oriented transports */
2296 		if (so->so_mode & SM_CONNREQUIRED) {
2297 			freemsg(mp);
2298 			eprintsoline(so, 0);
2299 			cmn_err(CE_WARN,
2300 			    "sockfs: T_uderror_ind on connection-oriented "
2301 			    "transport\n");
2302 			return (NULL);
2303 		}
2304 		addrlen = tudi->DEST_length;
2305 		addr = sogetoff(mp, tudi->DEST_offset, addrlen, 1);
2306 		if (addr == NULL) {
2307 			cmn_err(CE_WARN,
2308 			    "sockfs: T_uderror_ind with invalid "
2309 			    "addrlen/offset %u/%d\n",
2310 			    addrlen, tudi->DEST_offset);
2311 			freemsg(mp);
2312 			return (NULL);
2313 		}
2314 
2315 		/* Verify source address for connected socket. */
2316 		mutex_enter(&so->so_lock);
2317 		if (so->so_state & SS_ISCONNECTED) {
2318 			void *faddr;
2319 			t_uscalar_t faddr_len;
2320 			boolean_t match = B_FALSE;
2321 
2322 			switch (so->so_family) {
2323 			case AF_INET: {
2324 				/* Compare just IP address and port */
2325 				struct sockaddr_in *sin1, *sin2;
2326 
2327 				sin1 = (struct sockaddr_in *)so->so_faddr_sa;
2328 				sin2 = (struct sockaddr_in *)addr;
2329 				if (addrlen == sizeof (struct sockaddr_in) &&
2330 				    sin1->sin_port == sin2->sin_port &&
2331 				    sin1->sin_addr.s_addr ==
2332 				    sin2->sin_addr.s_addr)
2333 					match = B_TRUE;
2334 				break;
2335 			}
2336 			case AF_INET6: {
2337 				/* Compare just IP address and port. Not flow */
2338 				struct sockaddr_in6 *sin1, *sin2;
2339 
2340 				sin1 = (struct sockaddr_in6 *)so->so_faddr_sa;
2341 				sin2 = (struct sockaddr_in6 *)addr;
2342 				if (addrlen == sizeof (struct sockaddr_in6) &&
2343 				    sin1->sin6_port == sin2->sin6_port &&
2344 				    IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr,
2345 					&sin2->sin6_addr))
2346 					match = B_TRUE;
2347 				break;
2348 			}
2349 			case AF_UNIX:
2350 				faddr = &so->so_ux_faddr;
2351 				faddr_len =
2352 					(t_uscalar_t)sizeof (so->so_ux_faddr);
2353 				if (faddr_len == addrlen &&
2354 				    bcmp(addr, faddr, addrlen) == 0)
2355 					match = B_TRUE;
2356 				break;
2357 			default:
2358 				faddr = so->so_faddr_sa;
2359 				faddr_len = (t_uscalar_t)so->so_faddr_len;
2360 				if (faddr_len == addrlen &&
2361 				    bcmp(addr, faddr, addrlen) == 0)
2362 					match = B_TRUE;
2363 				break;
2364 			}
2365 
2366 			if (!match) {
2367 #ifdef DEBUG
2368 				dprintso(so, 0,
2369 					("sockfs: T_UDERR_IND mismatch: %s - ",
2370 					pr_addr(so->so_family,
2371 						(struct sockaddr *)addr,
2372 						addrlen)));
2373 				dprintso(so, 0, ("%s\n",
2374 					pr_addr(so->so_family, so->so_faddr_sa,
2375 						so->so_faddr_len)));
2376 #endif /* DEBUG */
2377 				mutex_exit(&so->so_lock);
2378 				freemsg(mp);
2379 				return (NULL);
2380 			}
2381 			/*
2382 			 * Make the write error nonpersistent. If the error
2383 			 * is zero we use ECONNRESET.
2384 			 * This assumes that the name space for ERROR_type
2385 			 * is the errno name space.
2386 			 */
2387 			if (tudi->ERROR_type != 0)
2388 				error = tudi->ERROR_type;
2389 			else
2390 				error = ECONNRESET;
2391 
2392 			soseterror(so, error);
2393 			mutex_exit(&so->so_lock);
2394 			strsetrerror(SOTOV(so), 0, 0, sogetrderr);
2395 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2396 			*wakeups = RSLEEP | WSLEEP;
2397 			*allmsgsigs = S_INPUT | S_RDNORM | S_OUTPUT;
2398 			*pollwakeups = POLLIN | POLLRDNORM | POLLOUT;
2399 			freemsg(mp);
2400 			return (NULL);
2401 		}
2402 		/*
2403 		 * If the application asked for delayed errors
2404 		 * record the T_UDERROR_IND so_eaddr_mp and the reason in
2405 		 * so_delayed_error for delayed error posting. If the reason
2406 		 * is zero use ECONNRESET.
2407 		 * Note that delayed error indications do not make sense for
2408 		 * AF_UNIX sockets since sendto checks that the destination
2409 		 * address is valid at the time of the sendto.
2410 		 */
2411 		if (!(so->so_options & SO_DGRAM_ERRIND)) {
2412 			mutex_exit(&so->so_lock);
2413 			freemsg(mp);
2414 			return (NULL);
2415 		}
2416 		if (so->so_eaddr_mp != NULL)
2417 			freemsg(so->so_eaddr_mp);
2418 
2419 		so->so_eaddr_mp = mp;
2420 		if (tudi->ERROR_type != 0)
2421 			error = tudi->ERROR_type;
2422 		else
2423 			error = ECONNRESET;
2424 		so->so_delayed_error = (ushort_t)error;
2425 		mutex_exit(&so->so_lock);
2426 		return (NULL);
2427 	}
2428 
2429 	case T_ERROR_ACK:
2430 		dprintso(so, 0,
2431 			("strsock_proto: T_ERROR_ACK for %d, error %d/%d\n",
2432 			tpr->error_ack.ERROR_prim,
2433 			tpr->error_ack.TLI_error,
2434 			tpr->error_ack.UNIX_error));
2435 
2436 		if (MBLKL(mp) < sizeof (struct T_error_ack)) {
2437 			cmn_err(CE_WARN,
2438 			    "sockfs: Too short T_ERROR_ACK. Len = %ld\n",
2439 			    (ptrdiff_t)(MBLKL(mp)));
2440 			freemsg(mp);
2441 			return (NULL);
2442 		}
2443 		/*
2444 		 * Check if we were waiting for the async message
2445 		 */
2446 		mutex_enter(&so->so_lock);
2447 		if ((so->so_flag & SOASYNC_UNBIND) &&
2448 		    tpr->error_ack.ERROR_prim == T_UNBIND_REQ) {
2449 			so_unlock_single(so, SOASYNC_UNBIND);
2450 			mutex_exit(&so->so_lock);
2451 			freemsg(mp);
2452 			return (NULL);
2453 		}
2454 		mutex_exit(&so->so_lock);
2455 		soqueueack(so, mp);
2456 		return (NULL);
2457 
2458 	case T_OK_ACK:
2459 		if (MBLKL(mp) < sizeof (struct T_ok_ack)) {
2460 			cmn_err(CE_WARN,
2461 			    "sockfs: Too short T_OK_ACK. Len = %ld\n",
2462 			    (ptrdiff_t)(MBLKL(mp)));
2463 			freemsg(mp);
2464 			return (NULL);
2465 		}
2466 		/*
2467 		 * Check if we were waiting for the async message
2468 		 */
2469 		mutex_enter(&so->so_lock);
2470 		if ((so->so_flag & SOASYNC_UNBIND) &&
2471 		    tpr->ok_ack.CORRECT_prim == T_UNBIND_REQ) {
2472 			dprintso(so, 1,
2473 				("strsock_proto: T_OK_ACK async unbind\n"));
2474 			so_unlock_single(so, SOASYNC_UNBIND);
2475 			mutex_exit(&so->so_lock);
2476 			freemsg(mp);
2477 			return (NULL);
2478 		}
2479 		mutex_exit(&so->so_lock);
2480 		soqueueack(so, mp);
2481 		return (NULL);
2482 
2483 	case T_INFO_ACK:
2484 		if (MBLKL(mp) < sizeof (struct T_info_ack)) {
2485 			cmn_err(CE_WARN,
2486 			    "sockfs: Too short T_INFO_ACK. Len = %ld\n",
2487 			    (ptrdiff_t)(MBLKL(mp)));
2488 			freemsg(mp);
2489 			return (NULL);
2490 		}
2491 		soqueueack(so, mp);
2492 		return (NULL);
2493 
2494 	case T_CAPABILITY_ACK:
2495 		/*
2496 		 * A T_capability_ack need only be large enough to hold
2497 		 * the PRIM_type and CAP_bits1 fields; checking for anything
2498 		 * larger might reject a correct response from an older
2499 		 * provider.
2500 		 */
2501 		if (MBLKL(mp) < 2 * sizeof (t_uscalar_t)) {
2502 			cmn_err(CE_WARN,
2503 			    "sockfs: Too short T_CAPABILITY_ACK. Len = %ld\n",
2504 			    (ptrdiff_t)(MBLKL(mp)));
2505 			freemsg(mp);
2506 			return (NULL);
2507 		}
2508 		soqueueack(so, mp);
2509 		return (NULL);
2510 
2511 	case T_BIND_ACK:
2512 		if (MBLKL(mp) < sizeof (struct T_bind_ack)) {
2513 			cmn_err(CE_WARN,
2514 			    "sockfs: Too short T_BIND_ACK. Len = %ld\n",
2515 			    (ptrdiff_t)(MBLKL(mp)));
2516 			freemsg(mp);
2517 			return (NULL);
2518 		}
2519 		soqueueack(so, mp);
2520 		return (NULL);
2521 
2522 	case T_OPTMGMT_ACK:
2523 		if (MBLKL(mp) < sizeof (struct T_optmgmt_ack)) {
2524 			cmn_err(CE_WARN,
2525 			    "sockfs: Too short T_OPTMGMT_ACK. Len = %ld\n",
2526 			    (ptrdiff_t)(MBLKL(mp)));
2527 			freemsg(mp);
2528 			return (NULL);
2529 		}
2530 		soqueueack(so, mp);
2531 		return (NULL);
2532 	default:
2533 #ifdef DEBUG
2534 		cmn_err(CE_WARN,
2535 			"sockfs: unknown TPI primitive %d received\n",
2536 			tpr->type);
2537 #endif /* DEBUG */
2538 		freemsg(mp);
2539 		return (NULL);
2540 	}
2541 }
2542 
2543 /*
2544  * This routine is registered with the stream head to receive other
2545  * (non-data, and non-proto) messages.
2546  *
2547  * Returns NULL if the message was consumed.
2548  * Returns an mblk to make that mblk be processed by the stream head.
2549  *
2550  * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and
2551  * *pollwakeups) for the stream head to take action on.
2552  */
2553 static mblk_t *
2554 strsock_misc(vnode_t *vp, mblk_t *mp,
2555 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
2556 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
2557 {
2558 	struct sonode *so;
2559 
2560 	so = VTOSO(vp);
2561 
2562 	dprintso(so, 1, ("strsock_misc(%p, %p, 0x%x)\n",
2563 			vp, mp, DB_TYPE(mp)));
2564 
2565 	/* Set default return values */
2566 	*wakeups = *allmsgsigs = *firstmsgsigs = *pollwakeups = 0;
2567 
2568 	switch (DB_TYPE(mp)) {
2569 	case M_PCSIG:
2570 		/*
2571 		 * This assumes that an M_PCSIG for the urgent data arrives
2572 		 * before the corresponding T_EXDATA_IND.
2573 		 *
2574 		 * Note: Just like in SunOS 4.X and 4.4BSD a poll will be
2575 		 * awoken before the urgent data shows up.
2576 		 * For OOBINLINE this can result in select returning
2577 		 * only exceptions as opposed to except|read.
2578 		 */
2579 		if (*mp->b_rptr == SIGURG) {
2580 			mutex_enter(&so->so_lock);
2581 			dprintso(so, 1,
2582 				("SIGURG(%p): counts %d/%d state %s\n",
2583 				vp, so->so_oobsigcnt,
2584 				so->so_oobcnt,
2585 				pr_state(so->so_state, so->so_mode)));
2586 			so_oob_sig(so, 1, allmsgsigs, pollwakeups);
2587 			dprintso(so, 1,
2588 				("after SIGURG(%p): counts %d/%d "
2589 				" poll 0x%x sig 0x%x state %s\n",
2590 				vp, so->so_oobsigcnt,
2591 				so->so_oobcnt, *pollwakeups, *allmsgsigs,
2592 				pr_state(so->so_state, so->so_mode)));
2593 			mutex_exit(&so->so_lock);
2594 		}
2595 		freemsg(mp);
2596 		return (NULL);
2597 
2598 	case M_SIG:
2599 	case M_HANGUP:
2600 	case M_UNHANGUP:
2601 	case M_ERROR:
2602 		/* M_ERRORs etc are ignored */
2603 		freemsg(mp);
2604 		return (NULL);
2605 
2606 	case M_FLUSH:
2607 		/*
2608 		 * Do not flush read queue. If the M_FLUSH
2609 		 * arrives because of an impending T_discon_ind
2610 		 * we still have to keep any queued data - this is part of
2611 		 * socket semantics.
2612 		 */
2613 		if (*mp->b_rptr & FLUSHW) {
2614 			*mp->b_rptr &= ~FLUSHR;
2615 			return (mp);
2616 		}
2617 		freemsg(mp);
2618 		return (NULL);
2619 
2620 	default:
2621 		return (mp);
2622 	}
2623 }
2624 
2625 
2626 /* Register to receive signals for certain events */
2627 int
2628 so_set_asyncsigs(vnode_t *vp, pid_t pgrp, int events, int mode, cred_t *cr)
2629 {
2630 	struct strsigset ss;
2631 	int32_t rval;
2632 
2633 	/*
2634 	 * Note that SOLOCKED will be set except for the call from soaccept().
2635 	 */
2636 	ASSERT(!mutex_owned(&VTOSO(vp)->so_lock));
2637 	ss.ss_pid = pgrp;
2638 	ss.ss_events = events;
2639 	return (strioctl(vp, I_ESETSIG, (intptr_t)&ss, mode, K_TO_K, cr,
2640 	    &rval));
2641 }
2642 
2643 
2644 /* Register for events matching the SS_ASYNC flag */
2645 int
2646 so_set_events(struct sonode *so, vnode_t *vp, cred_t *cr)
2647 {
2648 	int events = so->so_state & SS_ASYNC ?
2649 	    S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT :
2650 	    S_RDBAND | S_BANDURG;
2651 
2652 	return (so_set_asyncsigs(vp, so->so_pgrp, events, 0, cr));
2653 }
2654 
2655 
2656 /* Change the SS_ASYNC flag, and update signal delivery if needed */
2657 int
2658 so_flip_async(struct sonode *so, vnode_t *vp, int mode, cred_t *cr)
2659 {
2660 	ASSERT(mutex_owned(&so->so_lock));
2661 	if (so->so_pgrp != 0) {
2662 		int error;
2663 		int events = so->so_state & SS_ASYNC ?		/* Old flag */
2664 		    S_RDBAND | S_BANDURG :			/* New sigs */
2665 		    S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT;
2666 
2667 		so_lock_single(so);
2668 		mutex_exit(&so->so_lock);
2669 
2670 		error = so_set_asyncsigs(vp, so->so_pgrp, events, mode, cr);
2671 
2672 		mutex_enter(&so->so_lock);
2673 		so_unlock_single(so, SOLOCKED);
2674 		if (error)
2675 			return (error);
2676 	}
2677 	so->so_state ^= SS_ASYNC;
2678 	return (0);
2679 }
2680 
2681 /*
2682  * Set new pid/pgrp for SIGPOLL (or SIGIO for FIOASYNC mode), replacing
2683  * any existing one.  If passed zero, just clear the existing one.
2684  */
2685 int
2686 so_set_siggrp(struct sonode *so, vnode_t *vp, pid_t pgrp, int mode, cred_t *cr)
2687 {
2688 	int events = so->so_state & SS_ASYNC ?
2689 	    S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT :
2690 	    S_RDBAND | S_BANDURG;
2691 	int error;
2692 
2693 	ASSERT(mutex_owned(&so->so_lock));
2694 
2695 	/*
2696 	 * Change socket process (group).
2697 	 *
2698 	 * strioctl (via so_set_asyncsigs) will perform permission check and
2699 	 * also keep a PID_HOLD to prevent the pid from being reused.
2700 	 */
2701 	so_lock_single(so);
2702 	mutex_exit(&so->so_lock);
2703 
2704 	if (pgrp != 0) {
2705 		dprintso(so, 1, ("setown: adding pgrp %d ev 0x%x\n",
2706 		    pgrp, events));
2707 		error = so_set_asyncsigs(vp, pgrp, events, mode, cr);
2708 		if (error != 0) {
2709 			eprintsoline(so, error);
2710 			goto bad;
2711 		}
2712 	}
2713 	/* Remove the previously registered process/group */
2714 	if (so->so_pgrp != 0) {
2715 		dprintso(so, 1, ("setown: removing pgrp %d\n", so->so_pgrp));
2716 		error = so_set_asyncsigs(vp, so->so_pgrp, 0, mode, cr);
2717 		if (error != 0) {
2718 			eprintsoline(so, error);
2719 			error = 0;
2720 		}
2721 	}
2722 	mutex_enter(&so->so_lock);
2723 	so_unlock_single(so, SOLOCKED);
2724 	so->so_pgrp = pgrp;
2725 	return (0);
2726 bad:
2727 	mutex_enter(&so->so_lock);
2728 	so_unlock_single(so, SOLOCKED);
2729 	return (error);
2730 }
2731 
2732 
2733 
2734 /*
2735  * Translate a TLI(/XTI) error into a system error as best we can.
2736  */
2737 static const int tli_errs[] = {
2738 		0,		/* no error	*/
2739 		EADDRNOTAVAIL,  /* TBADADDR	*/
2740 		ENOPROTOOPT,	/* TBADOPT	*/
2741 		EACCES,		/* TACCES	*/
2742 		EBADF,		/* TBADF	*/
2743 		EADDRNOTAVAIL,	/* TNOADDR	*/
2744 		EPROTO,		/* TOUTSTATE	*/
2745 		ECONNABORTED,	/* TBADSEQ	*/
2746 		0,		/* TSYSERR - will never get	*/
2747 		EPROTO,		/* TLOOK - should never be sent by transport */
2748 		EMSGSIZE,	/* TBADDATA	*/
2749 		EMSGSIZE,	/* TBUFOVFLW	*/
2750 		EPROTO,		/* TFLOW	*/
2751 		EWOULDBLOCK,	/* TNODATA	*/
2752 		EPROTO,		/* TNODIS	*/
2753 		EPROTO,		/* TNOUDERR	*/
2754 		EINVAL,		/* TBADFLAG	*/
2755 		EPROTO,		/* TNOREL	*/
2756 		EOPNOTSUPP,	/* TNOTSUPPORT	*/
2757 		EPROTO,		/* TSTATECHNG	*/
2758 		/* following represent error namespace expansion with XTI */
2759 		EPROTO,		/* TNOSTRUCTYPE - never sent by transport */
2760 		EPROTO,		/* TBADNAME - never sent by transport */
2761 		EPROTO,		/* TBADQLEN - never sent by transport */
2762 		EADDRINUSE,	/* TADDRBUSY	*/
2763 		EBADF,		/* TINDOUT	*/
2764 		EBADF,		/* TPROVMISMATCH */
2765 		EBADF,		/* TRESQLEN	*/
2766 		EBADF,		/* TRESADDR	*/
2767 		EPROTO,		/* TQFULL - never sent by transport */
2768 		EPROTO,		/* TPROTO	*/
2769 };
2770 
2771 static int
2772 tlitosyserr(int terr)
2773 {
2774 	ASSERT(terr != TSYSERR);
2775 	if (terr >= (sizeof (tli_errs) / sizeof (tli_errs[0])))
2776 		return (EPROTO);
2777 	else
2778 		return (tli_errs[terr]);
2779 }
2780