xref: /titanic_50/usr/src/uts/common/fs/sockfs/sockstr.c (revision a74f7440e9d4ba2cf59e6cbfc445479a28170f2a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/inttypes.h>
31 #include <sys/t_lock.h>
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/buf.h>
35 #include <sys/conf.h>
36 #include <sys/cred.h>
37 #include <sys/kmem.h>
38 #include <sys/sysmacros.h>
39 #include <sys/vfs.h>
40 #include <sys/vnode.h>
41 #include <sys/debug.h>
42 #include <sys/errno.h>
43 #include <sys/time.h>
44 #include <sys/file.h>
45 #include <sys/user.h>
46 #include <sys/stream.h>
47 #include <sys/strsubr.h>
48 #include <sys/esunddi.h>
49 #include <sys/flock.h>
50 #include <sys/modctl.h>
51 #include <sys/vtrace.h>
52 #include <sys/strsun.h>
53 #include <sys/cmn_err.h>
54 #include <sys/proc.h>
55 #include <sys/ddi.h>
56 #include <sys/kmem_impl.h>
57 
58 #include <sys/suntpi.h>
59 #include <sys/socket.h>
60 #include <sys/sockio.h>
61 #include <sys/socketvar.h>
62 #include <netinet/in.h>
63 
64 #include <sys/tiuser.h>
65 #define	_SUN_TPI_VERSION	2
66 #include <sys/tihdr.h>
67 
68 #include <inet/kssl/ksslapi.h>
69 
70 #include <c2/audit.h>
71 
72 #include <sys/dcopy.h>
73 
74 int so_default_version = SOV_SOCKSTREAM;
75 
76 #ifdef DEBUG
77 /* Set sockdebug to print debug messages when SO_DEBUG is set */
78 int sockdebug = 0;
79 
80 /* Set sockprinterr to print error messages when SO_DEBUG is set */
81 int sockprinterr = 0;
82 
83 /*
84  * Set so_default_options to SO_DEBUG is all sockets should be created
85  * with SO_DEBUG set. This is needed to get debug printouts from the
86  * socket() call itself.
87  */
88 int so_default_options = 0;
89 #endif /* DEBUG */
90 
91 #ifdef SOCK_TEST
92 /*
93  * Set to number of ticks to limit cv_waits for code coverage testing.
94  * Set to 1000 when SO_DEBUG is set to 2.
95  */
96 clock_t sock_test_timelimit = 0;
97 #endif /* SOCK_TEST */
98 
99 /*
100  * For concurrency testing of e.g. opening /dev/ip which does not
101  * handle T_INFO_REQ messages.
102  */
103 int so_no_tinfo = 0;
104 
105 /*
106  * Timeout for getting a T_CAPABILITY_ACK - it is possible for a provider
107  * to simply ignore the T_CAPABILITY_REQ.
108  */
109 clock_t	sock_capability_timeout	= 2;	/* seconds */
110 
111 static int	do_tcapability(struct sonode *so, t_uscalar_t cap_bits1);
112 static void	so_removehooks(struct sonode *so);
113 
114 static mblk_t *strsock_proto(vnode_t *vp, mblk_t *mp,
115 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
116 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups);
117 static mblk_t *strsock_misc(vnode_t *vp, mblk_t *mp,
118 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
119 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups);
120 
121 static int tlitosyserr(int terr);
122 
123 /*
124  * Sodirect kmem_cache and put/wakeup functions.
125  */
126 struct kmem_cache *socktpi_sod_cache;
127 static int sodput(sodirect_t *, mblk_t *);
128 static void sodwakeup(sodirect_t *);
129 
130 /*
131  * Called by sockinit() when sockfs is loaded.
132  */
133 int
134 sostr_init()
135 {
136 	/* Allocate sodirect_t kmem_cache */
137 	socktpi_sod_cache = kmem_cache_create("socktpi_sod_cache",
138 	    sizeof (sodirect_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
139 
140 	return (0);
141 }
142 
143 /*
144  * Convert a socket to a stream. Invoked when the illusory sockmod
145  * is popped from the stream.
146  * Change the stream head back to default operation without losing
147  * any messages (T_conn_ind's are moved to the stream head queue).
148  */
149 int
150 so_sock2stream(struct sonode *so)
151 {
152 	struct vnode		*vp = SOTOV(so);
153 	queue_t			*rq;
154 	mblk_t			*mp;
155 	int			error = 0;
156 
157 	ASSERT(MUTEX_HELD(&so->so_plumb_lock));
158 
159 	mutex_enter(&so->so_lock);
160 	so_lock_single(so);
161 
162 	ASSERT(so->so_version != SOV_STREAM);
163 
164 	if (so->so_state & SS_DIRECT) {
165 		mblk_t **mpp;
166 		int rval;
167 
168 		/*
169 		 * Tell the transport below that sockmod is being popped
170 		 */
171 		mutex_exit(&so->so_lock);
172 		error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, CRED(),
173 		    &rval);
174 		mutex_enter(&so->so_lock);
175 		if (error != 0) {
176 			dprintso(so, 0, ("so_sock2stream(%p): "
177 			    "_SIOCSOCKFALLBACK failed\n", so));
178 			goto exit;
179 		}
180 		so->so_state &= ~SS_DIRECT;
181 
182 		for (mpp = &so->so_conn_ind_head; (mp = *mpp) != NULL;
183 		    mpp = &mp->b_next) {
184 			struct T_conn_ind	*conn_ind;
185 
186 			/*
187 			 * strsock_proto() has already verified the length of
188 			 * this message block.
189 			 */
190 			ASSERT(MBLKL(mp) >= sizeof (struct T_conn_ind));
191 
192 			conn_ind = (struct T_conn_ind *)mp->b_rptr;
193 			if (conn_ind->OPT_length == 0 &&
194 			    conn_ind->OPT_offset == 0)
195 				continue;
196 
197 			if (DB_REF(mp) > 1) {
198 				mblk_t	*newmp;
199 				size_t	length;
200 				cred_t	*cr;
201 
202 				/*
203 				 * Copy the message block because it is used
204 				 * elsewhere, too.
205 				 */
206 				length = MBLKL(mp);
207 				newmp = soallocproto(length, _ALLOC_INTR);
208 				if (newmp == NULL) {
209 					error = EINTR;
210 					goto exit;
211 				}
212 				bcopy(mp->b_rptr, newmp->b_wptr, length);
213 				newmp->b_wptr += length;
214 				newmp->b_next = mp->b_next;
215 				cr = DB_CRED(mp);
216 				if (cr != NULL)
217 					mblk_setcred(newmp, cr);
218 				DB_CPID(newmp) = DB_CPID(mp);
219 
220 				/*
221 				 * Link the new message block into the queue
222 				 * and free the old one.
223 				 */
224 				*mpp = newmp;
225 				mp->b_next = NULL;
226 				freemsg(mp);
227 
228 				mp = newmp;
229 				conn_ind = (struct T_conn_ind *)mp->b_rptr;
230 			}
231 
232 			/*
233 			 * Remove options added by TCP for accept fast-path.
234 			 */
235 			conn_ind->OPT_length = 0;
236 			conn_ind->OPT_offset = 0;
237 		}
238 	}
239 
240 	so->so_version = SOV_STREAM;
241 	so->so_priv = NULL;
242 
243 	/*
244 	 * Remove the hooks in the stream head to avoid queuing more
245 	 * packets in sockfs.
246 	 */
247 	mutex_exit(&so->so_lock);
248 	so_removehooks(so);
249 	mutex_enter(&so->so_lock);
250 
251 	/*
252 	 * Clear any state related to urgent data. Leave any T_EXDATA_IND
253 	 * on the queue - the behavior of urgent data after a switch is
254 	 * left undefined.
255 	 */
256 	so->so_error = so->so_delayed_error = 0;
257 	freemsg(so->so_oobmsg);
258 	so->so_oobmsg = NULL;
259 	so->so_oobsigcnt = so->so_oobcnt = 0;
260 
261 	so->so_state &= ~(SS_RCVATMARK|SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA|
262 	    SS_HASCONNIND|SS_SAVEDEOR);
263 	ASSERT(so_verify_oobstate(so));
264 
265 	freemsg(so->so_ack_mp);
266 	so->so_ack_mp = NULL;
267 
268 	/*
269 	 * Flush the T_DISCON_IND on so_discon_ind_mp.
270 	 */
271 	so_flush_discon_ind(so);
272 
273 	/*
274 	 * Move any queued T_CONN_IND messages to stream head queue.
275 	 */
276 	rq = RD(strvp2wq(vp));
277 	while ((mp = so->so_conn_ind_head) != NULL) {
278 		so->so_conn_ind_head = mp->b_next;
279 		mp->b_next = NULL;
280 		if (so->so_conn_ind_head == NULL) {
281 			ASSERT(so->so_conn_ind_tail == mp);
282 			so->so_conn_ind_tail = NULL;
283 		}
284 		dprintso(so, 0,
285 		    ("so_sock2stream(%p): moving T_CONN_IND\n",
286 		    so));
287 
288 		/* Drop lock across put() */
289 		mutex_exit(&so->so_lock);
290 		put(rq, mp);
291 		mutex_enter(&so->so_lock);
292 	}
293 
294 exit:
295 	ASSERT(MUTEX_HELD(&so->so_lock));
296 	so_unlock_single(so, SOLOCKED);
297 	mutex_exit(&so->so_lock);
298 	return (error);
299 }
300 
301 /*
302  * Covert a stream back to a socket. This is invoked when the illusory
303  * sockmod is pushed on a stream (where the stream was "created" by
304  * popping the illusory sockmod).
305  * This routine can not recreate the socket state (certain aspects of
306  * it like urgent data state and the bound/connected addresses for AF_UNIX
307  * sockets can not be recreated by asking the transport for information).
308  * Thus this routine implicitly assumes that the socket is in an initial
309  * state (as if it was just created). It flushes any messages queued on the
310  * read queue to avoid dealing with e.g. TPI acks or T_exdata_ind messages.
311  */
312 void
313 so_stream2sock(struct sonode *so)
314 {
315 	struct vnode *vp = SOTOV(so);
316 
317 	ASSERT(MUTEX_HELD(&so->so_plumb_lock));
318 
319 	mutex_enter(&so->so_lock);
320 	so_lock_single(so);
321 	ASSERT(so->so_version == SOV_STREAM);
322 	so->so_version = SOV_SOCKSTREAM;
323 	so->so_pushcnt = 0;
324 	mutex_exit(&so->so_lock);
325 
326 	/*
327 	 * Set a permenent error to force any thread in sorecvmsg to
328 	 * return (and drop SOREADLOCKED). Clear the error once
329 	 * we have SOREADLOCKED.
330 	 * This makes a read sleeping during the I_PUSH of sockmod return
331 	 * EIO.
332 	 */
333 	strsetrerror(SOTOV(so), EIO, 1, NULL);
334 
335 	/*
336 	 * Get the read lock before flushing data to avoid
337 	 * problems with the T_EXDATA_IND MSG_PEEK code in sorecvmsg.
338 	 */
339 	mutex_enter(&so->so_lock);
340 	(void) so_lock_read(so, 0);	/* Set SOREADLOCKED */
341 	mutex_exit(&so->so_lock);
342 
343 	strsetrerror(SOTOV(so), 0, 0, NULL);
344 	so_installhooks(so);
345 
346 	/*
347 	 * Flush everything on the read queue.
348 	 * This ensures that no T_CONN_IND remain and that no T_EXDATA_IND
349 	 * remain; those types of messages would confuse sockfs.
350 	 */
351 	strflushrq(vp, FLUSHALL);
352 	mutex_enter(&so->so_lock);
353 
354 	/*
355 	 * Flush the T_DISCON_IND on so_discon_ind_mp.
356 	 */
357 	so_flush_discon_ind(so);
358 	so_unlock_read(so);	/* Clear SOREADLOCKED */
359 
360 	so_unlock_single(so, SOLOCKED);
361 	mutex_exit(&so->so_lock);
362 }
363 
364 /*
365  * Install the hooks in the stream head.
366  */
367 void
368 so_installhooks(struct sonode *so)
369 {
370 	struct vnode *vp = SOTOV(so);
371 
372 	strsetrputhooks(vp, SH_SIGALLDATA | SH_IGN_ZEROLEN | SH_CONSOL_DATA,
373 	    strsock_proto, strsock_misc);
374 	strsetwputhooks(vp, SH_SIGPIPE | SH_RECHECK_ERR, 0);
375 }
376 
377 /*
378  * Remove the hooks in the stream head.
379  */
380 static void
381 so_removehooks(struct sonode *so)
382 {
383 	struct vnode *vp = SOTOV(so);
384 
385 	strsetrputhooks(vp, 0, NULL, NULL);
386 	strsetwputhooks(vp, 0, STRTIMOUT);
387 	/*
388 	 * Leave read behavior as it would have been for a normal
389 	 * stream i.e. a read of an M_PROTO will fail.
390 	 */
391 }
392 
393 /*
394  * Initialize the streams side of a socket including
395  * T_info_req/ack processing. If tso is not NULL its values are used thereby
396  * avoiding the T_INFO_REQ.
397  */
398 int
399 so_strinit(struct sonode *so, struct sonode *tso)
400 {
401 	struct vnode *vp = SOTOV(so);
402 	struct stdata *stp;
403 	mblk_t *mp;
404 	int error;
405 
406 	dprintso(so, 1, ("so_strinit(%p)\n", so));
407 
408 	/* Preallocate an unbind_req message */
409 	mp = soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP);
410 	mutex_enter(&so->so_lock);
411 	so->so_unbind_mp = mp;
412 #ifdef DEBUG
413 	so->so_options = so_default_options;
414 #endif /* DEBUG */
415 	mutex_exit(&so->so_lock);
416 
417 	so_installhooks(so);
418 
419 	/*
420 	 * The T_CAPABILITY_REQ should be the first message sent down because
421 	 * at least TCP has a fast-path for this which avoids timeouts while
422 	 * waiting for the T_CAPABILITY_ACK under high system load.
423 	 */
424 	if (tso == NULL) {
425 		error = do_tcapability(so, TC1_ACCEPTOR_ID | TC1_INFO);
426 		if (error)
427 			return (error);
428 	} else {
429 		mutex_enter(&so->so_lock);
430 		so->so_tsdu_size = tso->so_tsdu_size;
431 		so->so_etsdu_size = tso->so_etsdu_size;
432 		so->so_addr_size = tso->so_addr_size;
433 		so->so_opt_size = tso->so_opt_size;
434 		so->so_tidu_size = tso->so_tidu_size;
435 		so->so_serv_type = tso->so_serv_type;
436 		so->so_mode = tso->so_mode & ~SM_ACCEPTOR_ID;
437 		mutex_exit(&so->so_lock);
438 
439 		/* the following do_tcapability may update so->so_mode */
440 		if ((tso->so_serv_type != T_CLTS) &&
441 		    !(tso->so_state & SS_DIRECT)) {
442 			error = do_tcapability(so, TC1_ACCEPTOR_ID);
443 			if (error)
444 				return (error);
445 		}
446 	}
447 	/*
448 	 * If the addr_size is 0 we treat it as already bound
449 	 * and connected. This is used by the routing socket.
450 	 * We set the addr_size to something to allocate a the address
451 	 * structures.
452 	 */
453 	if (so->so_addr_size == 0) {
454 		so->so_state |= SS_ISBOUND | SS_ISCONNECTED;
455 		/* Address size can vary with address families. */
456 		if (so->so_family == AF_INET6)
457 			so->so_addr_size =
458 			    (t_scalar_t)sizeof (struct sockaddr_in6);
459 		else
460 			so->so_addr_size =
461 			    (t_scalar_t)sizeof (struct sockaddr_in);
462 		ASSERT(so->so_unbind_mp);
463 	}
464 	/*
465 	 * Allocate the addresses.
466 	 */
467 	ASSERT(so->so_laddr_sa == NULL && so->so_faddr_sa == NULL);
468 	ASSERT(so->so_laddr_len == 0 && so->so_faddr_len == 0);
469 	so->so_laddr_maxlen = so->so_faddr_maxlen =
470 	    P2ROUNDUP(so->so_addr_size, KMEM_ALIGN);
471 	so->so_laddr_sa = kmem_alloc(so->so_laddr_maxlen * 2, KM_SLEEP);
472 	so->so_faddr_sa = (struct sockaddr *)((caddr_t)so->so_laddr_sa
473 	    + so->so_laddr_maxlen);
474 
475 	if (so->so_family == AF_UNIX) {
476 		/*
477 		 * Initialize AF_UNIX related fields.
478 		 */
479 		bzero(&so->so_ux_laddr, sizeof (so->so_ux_laddr));
480 		bzero(&so->so_ux_faddr, sizeof (so->so_ux_faddr));
481 	}
482 
483 	stp = vp->v_stream;
484 	/*
485 	 * Have to keep minpsz at zero in order to allow write/send of zero
486 	 * bytes.
487 	 */
488 	mutex_enter(&stp->sd_lock);
489 	if (stp->sd_qn_minpsz == 1)
490 		stp->sd_qn_minpsz = 0;
491 	mutex_exit(&stp->sd_lock);
492 
493 	/*
494 	 * If sodirect capable allocate and initialize sodirect_t.
495 	 * Note, SS_SODIRECT is set in socktpi_open().
496 	 */
497 	if (so->so_state & SS_SODIRECT) {
498 		sodirect_t	*sodp;
499 
500 		ASSERT(so->so_direct == NULL);
501 
502 		sodp = kmem_cache_alloc(socktpi_sod_cache, KM_SLEEP);
503 		sodp->sod_state = SOD_ENABLED | SOD_WAKE_NOT;
504 		sodp->sod_want = 0;
505 		sodp->sod_q = RD(stp->sd_wrq);
506 		sodp->sod_enqueue = sodput;
507 		sodp->sod_wakeup = sodwakeup;
508 		sodp->sod_uioafh = NULL;
509 		sodp->sod_uioaft = NULL;
510 		sodp->sod_lock = &stp->sd_lock;
511 		/*
512 		 * Remainder of the sod_uioa members are left uninitialized
513 		 * but will be initialized later by uioainit() before uioa
514 		 * is enabled.
515 		 */
516 		sodp->sod_uioa.uioa_state = UIOA_ALLOC;
517 		so->so_direct = sodp;
518 		stp->sd_sodirect = sodp;
519 	}
520 
521 	return (0);
522 }
523 
524 static void
525 copy_tinfo(struct sonode *so, struct T_info_ack *tia)
526 {
527 	so->so_tsdu_size = tia->TSDU_size;
528 	so->so_etsdu_size = tia->ETSDU_size;
529 	so->so_addr_size = tia->ADDR_size;
530 	so->so_opt_size = tia->OPT_size;
531 	so->so_tidu_size = tia->TIDU_size;
532 	so->so_serv_type = tia->SERV_type;
533 	switch (tia->CURRENT_state) {
534 	case TS_UNBND:
535 		break;
536 	case TS_IDLE:
537 		so->so_state |= SS_ISBOUND;
538 		so->so_laddr_len = 0;
539 		so->so_state &= ~SS_LADDR_VALID;
540 		break;
541 	case TS_DATA_XFER:
542 		so->so_state |= SS_ISBOUND|SS_ISCONNECTED;
543 		so->so_laddr_len = 0;
544 		so->so_faddr_len = 0;
545 		so->so_state &= ~(SS_LADDR_VALID | SS_FADDR_VALID);
546 		break;
547 	}
548 
549 	/*
550 	 * Heuristics for determining the socket mode flags
551 	 * (SM_ATOMIC, SM_CONNREQUIRED, SM_ADDR, SM_FDPASSING,
552 	 * and SM_EXDATA, SM_OPTDATA, and SM_BYTESTREAM)
553 	 * from the info ack.
554 	 */
555 	if (so->so_serv_type == T_CLTS) {
556 		so->so_mode |= SM_ATOMIC | SM_ADDR;
557 	} else {
558 		so->so_mode |= SM_CONNREQUIRED;
559 		if (so->so_etsdu_size != 0 && so->so_etsdu_size != -2)
560 			so->so_mode |= SM_EXDATA;
561 	}
562 	if (so->so_type == SOCK_SEQPACKET || so->so_type == SOCK_RAW) {
563 		/* Semantics are to discard tail end of messages */
564 		so->so_mode |= SM_ATOMIC;
565 	}
566 	if (so->so_family == AF_UNIX) {
567 		so->so_mode |= SM_FDPASSING | SM_OPTDATA;
568 		if (so->so_addr_size == -1) {
569 			/* MAXPATHLEN + soun_family + nul termination */
570 			so->so_addr_size = (t_scalar_t)(MAXPATHLEN +
571 			    sizeof (short) + 1);
572 		}
573 		if (so->so_type == SOCK_STREAM) {
574 			/*
575 			 * Make it into a byte-stream transport.
576 			 * SOCK_SEQPACKET sockets are unchanged.
577 			 */
578 			so->so_tsdu_size = 0;
579 		}
580 	} else if (so->so_addr_size == -1) {
581 		/*
582 		 * Logic extracted from sockmod - have to pick some max address
583 		 * length in order to preallocate the addresses.
584 		 */
585 		so->so_addr_size = SOA_DEFSIZE;
586 	}
587 	if (so->so_tsdu_size == 0)
588 		so->so_mode |= SM_BYTESTREAM;
589 }
590 
591 static int
592 check_tinfo(struct sonode *so)
593 {
594 	/* Consistency checks */
595 	if (so->so_type == SOCK_DGRAM && so->so_serv_type != T_CLTS) {
596 		eprintso(so, ("service type and socket type mismatch\n"));
597 		eprintsoline(so, EPROTO);
598 		return (EPROTO);
599 	}
600 	if (so->so_type == SOCK_STREAM && so->so_serv_type == T_CLTS) {
601 		eprintso(so, ("service type and socket type mismatch\n"));
602 		eprintsoline(so, EPROTO);
603 		return (EPROTO);
604 	}
605 	if (so->so_type == SOCK_SEQPACKET && so->so_serv_type == T_CLTS) {
606 		eprintso(so, ("service type and socket type mismatch\n"));
607 		eprintsoline(so, EPROTO);
608 		return (EPROTO);
609 	}
610 	if (so->so_family == AF_INET &&
611 	    so->so_addr_size != (t_scalar_t)sizeof (struct sockaddr_in)) {
612 		eprintso(so,
613 		    ("AF_INET must have sockaddr_in address length. Got %d\n",
614 		    so->so_addr_size));
615 		eprintsoline(so, EMSGSIZE);
616 		return (EMSGSIZE);
617 	}
618 	if (so->so_family == AF_INET6 &&
619 	    so->so_addr_size != (t_scalar_t)sizeof (struct sockaddr_in6)) {
620 		eprintso(so,
621 		    ("AF_INET6 must have sockaddr_in6 address length. Got %d\n",
622 		    so->so_addr_size));
623 		eprintsoline(so, EMSGSIZE);
624 		return (EMSGSIZE);
625 	}
626 
627 	dprintso(so, 1, (
628 	    "tinfo: serv %d tsdu %d, etsdu %d, addr %d, opt %d, tidu %d\n",
629 	    so->so_serv_type, so->so_tsdu_size, so->so_etsdu_size,
630 	    so->so_addr_size, so->so_opt_size,
631 	    so->so_tidu_size));
632 	dprintso(so, 1, ("tinfo: so_state %s\n",
633 	    pr_state(so->so_state, so->so_mode)));
634 	return (0);
635 }
636 
637 /*
638  * Send down T_info_req and wait for the ack.
639  * Record interesting T_info_ack values in the sonode.
640  */
641 static int
642 do_tinfo(struct sonode *so)
643 {
644 	struct T_info_req tir;
645 	mblk_t *mp;
646 	int error;
647 
648 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
649 
650 	if (so_no_tinfo) {
651 		so->so_addr_size = 0;
652 		return (0);
653 	}
654 
655 	dprintso(so, 1, ("do_tinfo(%p)\n", so));
656 
657 	/* Send T_INFO_REQ */
658 	tir.PRIM_type = T_INFO_REQ;
659 	mp = soallocproto1(&tir, sizeof (tir),
660 	    sizeof (struct T_info_req) + sizeof (struct T_info_ack),
661 	    _ALLOC_INTR);
662 	if (mp == NULL) {
663 		eprintsoline(so, ENOBUFS);
664 		return (ENOBUFS);
665 	}
666 	/* T_INFO_REQ has to be M_PCPROTO */
667 	DB_TYPE(mp) = M_PCPROTO;
668 
669 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
670 	    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
671 	if (error) {
672 		eprintsoline(so, error);
673 		return (error);
674 	}
675 	mutex_enter(&so->so_lock);
676 	/* Wait for T_INFO_ACK */
677 	if ((error = sowaitprim(so, T_INFO_REQ, T_INFO_ACK,
678 	    (t_uscalar_t)sizeof (struct T_info_ack), &mp, 0))) {
679 		mutex_exit(&so->so_lock);
680 		eprintsoline(so, error);
681 		return (error);
682 	}
683 
684 	ASSERT(mp);
685 	copy_tinfo(so, (struct T_info_ack *)mp->b_rptr);
686 	mutex_exit(&so->so_lock);
687 	freemsg(mp);
688 	return (check_tinfo(so));
689 }
690 
691 /*
692  * Send down T_capability_req and wait for the ack.
693  * Record interesting T_capability_ack values in the sonode.
694  */
695 static int
696 do_tcapability(struct sonode *so, t_uscalar_t cap_bits1)
697 {
698 	struct T_capability_req tcr;
699 	struct T_capability_ack *tca;
700 	mblk_t *mp;
701 	int error;
702 
703 	ASSERT(cap_bits1 != 0);
704 	ASSERT((cap_bits1 & ~(TC1_ACCEPTOR_ID | TC1_INFO)) == 0);
705 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
706 
707 	if (so->so_provinfo->tpi_capability == PI_NO)
708 		return (do_tinfo(so));
709 
710 	if (so_no_tinfo) {
711 		so->so_addr_size = 0;
712 		if ((cap_bits1 &= ~TC1_INFO) == 0)
713 			return (0);
714 	}
715 
716 	dprintso(so, 1, ("do_tcapability(%p)\n", so));
717 
718 	/* Send T_CAPABILITY_REQ */
719 	tcr.PRIM_type = T_CAPABILITY_REQ;
720 	tcr.CAP_bits1 = cap_bits1;
721 	mp = soallocproto1(&tcr, sizeof (tcr),
722 	    sizeof (struct T_capability_req) + sizeof (struct T_capability_ack),
723 	    _ALLOC_INTR);
724 	if (mp == NULL) {
725 		eprintsoline(so, ENOBUFS);
726 		return (ENOBUFS);
727 	}
728 	/* T_CAPABILITY_REQ should be M_PCPROTO here */
729 	DB_TYPE(mp) = M_PCPROTO;
730 
731 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
732 	    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
733 	if (error) {
734 		eprintsoline(so, error);
735 		return (error);
736 	}
737 	mutex_enter(&so->so_lock);
738 	/* Wait for T_CAPABILITY_ACK */
739 	if ((error = sowaitprim(so, T_CAPABILITY_REQ, T_CAPABILITY_ACK,
740 	    (t_uscalar_t)sizeof (*tca), &mp, sock_capability_timeout * hz))) {
741 		mutex_exit(&so->so_lock);
742 		PI_PROVLOCK(so->so_provinfo);
743 		if (so->so_provinfo->tpi_capability == PI_DONTKNOW)
744 			so->so_provinfo->tpi_capability = PI_NO;
745 		PI_PROVUNLOCK(so->so_provinfo);
746 		ASSERT((so->so_mode & SM_ACCEPTOR_ID) == 0);
747 		if (cap_bits1 & TC1_INFO) {
748 			/*
749 			 * If the T_CAPABILITY_REQ timed out and then a
750 			 * T_INFO_REQ gets a protocol error, most likely
751 			 * the capability was slow (vs. unsupported). Return
752 			 * ENOSR for this case as a best guess.
753 			 */
754 			if (error == ETIME) {
755 				return ((error = do_tinfo(so)) == EPROTO ?
756 				    ENOSR : error);
757 			}
758 			return (do_tinfo(so));
759 		}
760 		return (0);
761 	}
762 
763 	if (so->so_provinfo->tpi_capability == PI_DONTKNOW) {
764 		PI_PROVLOCK(so->so_provinfo);
765 		so->so_provinfo->tpi_capability = PI_YES;
766 		PI_PROVUNLOCK(so->so_provinfo);
767 	}
768 
769 	ASSERT(mp);
770 	tca = (struct T_capability_ack *)mp->b_rptr;
771 
772 	ASSERT((cap_bits1 & TC1_INFO) == (tca->CAP_bits1 & TC1_INFO));
773 
774 	cap_bits1 = tca->CAP_bits1;
775 
776 	if (cap_bits1 & TC1_ACCEPTOR_ID) {
777 		so->so_acceptor_id = tca->ACCEPTOR_id;
778 		so->so_mode |= SM_ACCEPTOR_ID;
779 	}
780 
781 	if (cap_bits1 & TC1_INFO)
782 		copy_tinfo(so, &tca->INFO_ack);
783 
784 	mutex_exit(&so->so_lock);
785 	freemsg(mp);
786 
787 	if (cap_bits1 & TC1_INFO)
788 		return (check_tinfo(so));
789 
790 	return (0);
791 }
792 
793 /*
794  * Retrieve and clear the socket error.
795  */
796 int
797 sogeterr(struct sonode *so)
798 {
799 	int error;
800 
801 	ASSERT(MUTEX_HELD(&so->so_lock));
802 
803 	error = so->so_error;
804 	so->so_error = 0;
805 
806 	return (error);
807 }
808 
809 /*
810  * This routine is registered with the stream head to retrieve read
811  * side errors.
812  * It does not clear the socket error for a peeking read side operation.
813  * It the error is to be cleared it sets *clearerr.
814  */
815 int
816 sogetrderr(vnode_t *vp, int ispeek, int *clearerr)
817 {
818 	struct sonode *so = VTOSO(vp);
819 	int error;
820 
821 	mutex_enter(&so->so_lock);
822 	if (ispeek) {
823 		error = so->so_error;
824 		*clearerr = 0;
825 	} else {
826 		error = so->so_error;
827 		so->so_error = 0;
828 		*clearerr = 1;
829 	}
830 	mutex_exit(&so->so_lock);
831 	return (error);
832 }
833 
834 /*
835  * This routine is registered with the stream head to retrieve write
836  * side errors.
837  * It does not clear the socket error for a peeking read side operation.
838  * It the error is to be cleared it sets *clearerr.
839  */
840 int
841 sogetwrerr(vnode_t *vp, int ispeek, int *clearerr)
842 {
843 	struct sonode *so = VTOSO(vp);
844 	int error;
845 
846 	mutex_enter(&so->so_lock);
847 	if (so->so_state & SS_CANTSENDMORE) {
848 		error = EPIPE;
849 		*clearerr = 0;
850 	} else {
851 		error = so->so_error;
852 		if (ispeek) {
853 			*clearerr = 0;
854 		} else {
855 			so->so_error = 0;
856 			*clearerr = 1;
857 		}
858 	}
859 	mutex_exit(&so->so_lock);
860 	return (error);
861 }
862 
863 /*
864  * Set a nonpersistent read and write error on the socket.
865  * Used when there is a T_uderror_ind for a connected socket.
866  * The caller also needs to call strsetrerror and strsetwerror
867  * after dropping the lock.
868  */
869 void
870 soseterror(struct sonode *so, int error)
871 {
872 	ASSERT(error != 0);
873 
874 	ASSERT(MUTEX_HELD(&so->so_lock));
875 	so->so_error = (ushort_t)error;
876 }
877 
878 void
879 soisconnecting(struct sonode *so)
880 {
881 	ASSERT(MUTEX_HELD(&so->so_lock));
882 	so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
883 	so->so_state |= SS_ISCONNECTING;
884 	cv_broadcast(&so->so_state_cv);
885 }
886 
887 void
888 soisconnected(struct sonode *so)
889 {
890 	ASSERT(MUTEX_HELD(&so->so_lock));
891 	so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING);
892 	so->so_state |= SS_ISCONNECTED;
893 	cv_broadcast(&so->so_state_cv);
894 }
895 
896 /*
897  * The caller also needs to call strsetrerror, strsetwerror and strseteof.
898  */
899 void
900 soisdisconnected(struct sonode *so, int error)
901 {
902 	ASSERT(MUTEX_HELD(&so->so_lock));
903 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING|
904 	    SS_LADDR_VALID|SS_FADDR_VALID);
905 	so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE);
906 	so->so_error = (ushort_t)error;
907 	if (so->so_peercred != NULL) {
908 		crfree(so->so_peercred);
909 		so->so_peercred = NULL;
910 	}
911 	cv_broadcast(&so->so_state_cv);
912 }
913 
914 /*
915  * For connected AF_UNIX SOCK_DGRAM sockets when the peer closes.
916  * Does not affect write side.
917  * The caller also has to call strsetrerror.
918  */
919 static void
920 sobreakconn(struct sonode *so, int error)
921 {
922 	ASSERT(MUTEX_HELD(&so->so_lock));
923 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
924 	so->so_error = (ushort_t)error;
925 	cv_broadcast(&so->so_state_cv);
926 }
927 
928 /*
929  * Can no longer send.
930  * Caller must also call strsetwerror.
931  *
932  * We mark the peer address as no longer valid for getpeername, but
933  * leave it around for so_unix_close to notify the peer (that
934  * transport has no addressing held at that layer).
935  */
936 void
937 socantsendmore(struct sonode *so)
938 {
939 	ASSERT(MUTEX_HELD(&so->so_lock));
940 	so->so_state = so->so_state & ~SS_FADDR_VALID | SS_CANTSENDMORE;
941 	cv_broadcast(&so->so_state_cv);
942 }
943 
944 /*
945  * The caller must call strseteof(,1) as well as this routine
946  * to change the socket state.
947  */
948 void
949 socantrcvmore(struct sonode *so)
950 {
951 	ASSERT(MUTEX_HELD(&so->so_lock));
952 	so->so_state |= SS_CANTRCVMORE;
953 	cv_broadcast(&so->so_state_cv);
954 }
955 
956 /*
957  * The caller has sent down a "request_prim" primitive and wants to wait for
958  * an ack ("ack_prim") or an T_ERROR_ACK for it.
959  * The specified "ack_prim" can be a T_OK_ACK.
960  *
961  * Assumes that all the TPI acks are M_PCPROTO messages.
962  *
963  * Note that the socket is single-threaded (using so_lock_single)
964  * for all operations that generate TPI ack messages. Since
965  * only TPI ack messages are M_PCPROTO we should never receive
966  * anything except either the ack we are expecting or a T_ERROR_ACK
967  * for the same primitive.
968  */
969 int
970 sowaitprim(struct sonode *so, t_scalar_t request_prim, t_scalar_t ack_prim,
971 	    t_uscalar_t min_size, mblk_t **mpp, clock_t wait)
972 {
973 	mblk_t *mp;
974 	union T_primitives *tpr;
975 	int error;
976 
977 	dprintso(so, 1, ("sowaitprim(%p, %d, %d, %d, %p, %lu)\n",
978 	    so, request_prim, ack_prim, min_size, mpp, wait));
979 
980 	ASSERT(MUTEX_HELD(&so->so_lock));
981 
982 	error = sowaitack(so, &mp, wait);
983 	if (error)
984 		return (error);
985 
986 	dprintso(so, 1, ("got msg %p\n", mp));
987 	if (DB_TYPE(mp) != M_PCPROTO ||
988 	    MBLKL(mp) < sizeof (tpr->type)) {
989 		freemsg(mp);
990 		eprintsoline(so, EPROTO);
991 		return (EPROTO);
992 	}
993 	tpr = (union T_primitives *)mp->b_rptr;
994 	/*
995 	 * Did we get the primitive that we were asking for?
996 	 * For T_OK_ACK we also check that it matches the request primitive.
997 	 */
998 	if (tpr->type == ack_prim &&
999 	    (ack_prim != T_OK_ACK ||
1000 	    tpr->ok_ack.CORRECT_prim == request_prim)) {
1001 		if (MBLKL(mp) >= (ssize_t)min_size) {
1002 			/* Found what we are looking for */
1003 			*mpp = mp;
1004 			return (0);
1005 		}
1006 		/* Too short */
1007 		freemsg(mp);
1008 		eprintsoline(so, EPROTO);
1009 		return (EPROTO);
1010 	}
1011 
1012 	if (tpr->type == T_ERROR_ACK &&
1013 	    tpr->error_ack.ERROR_prim == request_prim) {
1014 		/* Error to the primitive we were looking for */
1015 		if (tpr->error_ack.TLI_error == TSYSERR) {
1016 			error = tpr->error_ack.UNIX_error;
1017 		} else {
1018 			error = tlitosyserr(tpr->error_ack.TLI_error);
1019 		}
1020 		dprintso(so, 0, ("error_ack for %d: %d/%d ->%d\n",
1021 		    tpr->error_ack.ERROR_prim,
1022 		    tpr->error_ack.TLI_error,
1023 		    tpr->error_ack.UNIX_error,
1024 		    error));
1025 		freemsg(mp);
1026 		return (error);
1027 	}
1028 	/*
1029 	 * Wrong primitive or T_ERROR_ACK for the wrong primitive
1030 	 */
1031 #ifdef DEBUG
1032 	if (tpr->type == T_ERROR_ACK) {
1033 		dprintso(so, 0, ("error_ack for %d: %d/%d\n",
1034 		    tpr->error_ack.ERROR_prim,
1035 		    tpr->error_ack.TLI_error,
1036 		    tpr->error_ack.UNIX_error));
1037 	} else if (tpr->type == T_OK_ACK) {
1038 		dprintso(so, 0, ("ok_ack for %d, expected %d for %d\n",
1039 		    tpr->ok_ack.CORRECT_prim,
1040 		    ack_prim, request_prim));
1041 	} else {
1042 		dprintso(so, 0,
1043 		    ("unexpected primitive %d, expected %d for %d\n",
1044 		    tpr->type, ack_prim, request_prim));
1045 	}
1046 #endif /* DEBUG */
1047 
1048 	freemsg(mp);
1049 	eprintsoline(so, EPROTO);
1050 	return (EPROTO);
1051 }
1052 
1053 /*
1054  * Wait for a T_OK_ACK for the specified primitive.
1055  */
1056 int
1057 sowaitokack(struct sonode *so, t_scalar_t request_prim)
1058 {
1059 	mblk_t *mp;
1060 	int error;
1061 
1062 	error = sowaitprim(so, request_prim, T_OK_ACK,
1063 	    (t_uscalar_t)sizeof (struct T_ok_ack), &mp, 0);
1064 	if (error)
1065 		return (error);
1066 	freemsg(mp);
1067 	return (0);
1068 }
1069 
1070 /*
1071  * Queue a received TPI ack message on so_ack_mp.
1072  */
1073 void
1074 soqueueack(struct sonode *so, mblk_t *mp)
1075 {
1076 	if (DB_TYPE(mp) != M_PCPROTO) {
1077 		zcmn_err(getzoneid(), CE_WARN,
1078 		    "sockfs: received unexpected M_PROTO TPI ack. Prim %d\n",
1079 		    *(t_scalar_t *)mp->b_rptr);
1080 		freemsg(mp);
1081 		return;
1082 	}
1083 
1084 	mutex_enter(&so->so_lock);
1085 	if (so->so_ack_mp != NULL) {
1086 		dprintso(so, 1, ("so_ack_mp already set\n"));
1087 		freemsg(so->so_ack_mp);
1088 		so->so_ack_mp = NULL;
1089 	}
1090 	so->so_ack_mp = mp;
1091 	cv_broadcast(&so->so_ack_cv);
1092 	mutex_exit(&so->so_lock);
1093 }
1094 
1095 /*
1096  * Wait for a TPI ack ignoring signals and errors.
1097  */
1098 int
1099 sowaitack(struct sonode *so, mblk_t **mpp, clock_t wait)
1100 {
1101 	ASSERT(MUTEX_HELD(&so->so_lock));
1102 
1103 	while (so->so_ack_mp == NULL) {
1104 #ifdef SOCK_TEST
1105 		if (wait == 0 && sock_test_timelimit != 0)
1106 			wait = sock_test_timelimit;
1107 #endif
1108 		if (wait != 0) {
1109 			/*
1110 			 * Only wait for the time limit.
1111 			 */
1112 			clock_t now;
1113 
1114 			time_to_wait(&now, wait);
1115 			if (cv_timedwait(&so->so_ack_cv, &so->so_lock,
1116 			    now) == -1) {
1117 				eprintsoline(so, ETIME);
1118 				return (ETIME);
1119 			}
1120 		}
1121 		else
1122 			cv_wait(&so->so_ack_cv, &so->so_lock);
1123 	}
1124 	*mpp = so->so_ack_mp;
1125 #ifdef DEBUG
1126 	{
1127 		union T_primitives *tpr;
1128 		mblk_t *mp = *mpp;
1129 
1130 		tpr = (union T_primitives *)mp->b_rptr;
1131 		ASSERT(DB_TYPE(mp) == M_PCPROTO);
1132 		ASSERT(tpr->type == T_OK_ACK ||
1133 		    tpr->type == T_ERROR_ACK ||
1134 		    tpr->type == T_BIND_ACK ||
1135 		    tpr->type == T_CAPABILITY_ACK ||
1136 		    tpr->type == T_INFO_ACK ||
1137 		    tpr->type == T_OPTMGMT_ACK);
1138 	}
1139 #endif /* DEBUG */
1140 	so->so_ack_mp = NULL;
1141 	return (0);
1142 }
1143 
1144 /*
1145  * Queue a received T_CONN_IND message on so_conn_ind_head/tail.
1146  */
1147 void
1148 soqueueconnind(struct sonode *so, mblk_t *mp)
1149 {
1150 	if (DB_TYPE(mp) != M_PROTO) {
1151 		zcmn_err(getzoneid(), CE_WARN,
1152 		    "sockfs: received unexpected M_PCPROTO T_CONN_IND\n");
1153 		freemsg(mp);
1154 		return;
1155 	}
1156 
1157 	mutex_enter(&so->so_lock);
1158 	ASSERT(mp->b_next == NULL);
1159 	if (so->so_conn_ind_head == NULL) {
1160 		so->so_conn_ind_head = mp;
1161 		so->so_state |= SS_HASCONNIND;
1162 	} else {
1163 		ASSERT(so->so_state & SS_HASCONNIND);
1164 		ASSERT(so->so_conn_ind_tail->b_next == NULL);
1165 		so->so_conn_ind_tail->b_next = mp;
1166 	}
1167 	so->so_conn_ind_tail = mp;
1168 	/* Wakeup a single consumer of the T_CONN_IND */
1169 	cv_signal(&so->so_connind_cv);
1170 	mutex_exit(&so->so_lock);
1171 }
1172 
1173 /*
1174  * Wait for a T_CONN_IND.
1175  * Don't wait if nonblocking.
1176  * Accept signals and socket errors.
1177  */
1178 int
1179 sowaitconnind(struct sonode *so, int fmode, mblk_t **mpp)
1180 {
1181 	mblk_t *mp;
1182 	int error = 0;
1183 
1184 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
1185 	mutex_enter(&so->so_lock);
1186 check_error:
1187 	if (so->so_error) {
1188 		error = sogeterr(so);
1189 		if (error) {
1190 			mutex_exit(&so->so_lock);
1191 			return (error);
1192 		}
1193 	}
1194 
1195 	if (so->so_conn_ind_head == NULL) {
1196 		if (fmode & (FNDELAY|FNONBLOCK)) {
1197 			error = EWOULDBLOCK;
1198 			goto done;
1199 		}
1200 		if (!cv_wait_sig_swap(&so->so_connind_cv, &so->so_lock)) {
1201 			error = EINTR;
1202 			goto done;
1203 		}
1204 		goto check_error;
1205 	}
1206 	mp = so->so_conn_ind_head;
1207 	so->so_conn_ind_head = mp->b_next;
1208 	mp->b_next = NULL;
1209 	if (so->so_conn_ind_head == NULL) {
1210 		ASSERT(so->so_conn_ind_tail == mp);
1211 		so->so_conn_ind_tail = NULL;
1212 		so->so_state &= ~SS_HASCONNIND;
1213 	}
1214 	*mpp = mp;
1215 done:
1216 	mutex_exit(&so->so_lock);
1217 	return (error);
1218 }
1219 
1220 /*
1221  * Flush a T_CONN_IND matching the sequence number from the list.
1222  * Return zero if found; non-zero otherwise.
1223  * This is called very infrequently thus it is ok to do a linear search.
1224  */
1225 int
1226 soflushconnind(struct sonode *so, t_scalar_t seqno)
1227 {
1228 	mblk_t *prevmp, *mp;
1229 	struct T_conn_ind *tci;
1230 
1231 	mutex_enter(&so->so_lock);
1232 	for (prevmp = NULL, mp = so->so_conn_ind_head; mp != NULL;
1233 	    prevmp = mp, mp = mp->b_next) {
1234 		tci = (struct T_conn_ind *)mp->b_rptr;
1235 		if (tci->SEQ_number == seqno) {
1236 			dprintso(so, 1,
1237 			    ("t_discon_ind: found T_CONN_IND %d\n", seqno));
1238 			/* Deleting last? */
1239 			if (so->so_conn_ind_tail == mp) {
1240 				so->so_conn_ind_tail = prevmp;
1241 			}
1242 			if (prevmp == NULL) {
1243 				/* Deleting first */
1244 				so->so_conn_ind_head = mp->b_next;
1245 			} else {
1246 				prevmp->b_next = mp->b_next;
1247 			}
1248 			mp->b_next = NULL;
1249 			if (so->so_conn_ind_head == NULL) {
1250 				ASSERT(so->so_conn_ind_tail == NULL);
1251 				so->so_state &= ~SS_HASCONNIND;
1252 			} else {
1253 				ASSERT(so->so_conn_ind_tail != NULL);
1254 			}
1255 			so->so_error = ECONNABORTED;
1256 			mutex_exit(&so->so_lock);
1257 
1258 			/*
1259 			 * T_KSSL_PROXY_CONN_IND may carry a handle for
1260 			 * an SSL context, and needs to be released.
1261 			 */
1262 			if ((tci->PRIM_type == T_SSL_PROXY_CONN_IND) &&
1263 			    (mp->b_cont != NULL)) {
1264 				kssl_ctx_t kssl_ctx;
1265 
1266 				ASSERT(MBLKL(mp->b_cont) ==
1267 				    sizeof (kssl_ctx_t));
1268 				kssl_ctx = *((kssl_ctx_t *)mp->b_cont->b_rptr);
1269 				kssl_release_ctx(kssl_ctx);
1270 			}
1271 			freemsg(mp);
1272 			return (0);
1273 		}
1274 	}
1275 	mutex_exit(&so->so_lock);
1276 	dprintso(so, 1,	("t_discon_ind: NOT found T_CONN_IND %d\n", seqno));
1277 	return (-1);
1278 }
1279 
1280 /*
1281  * Wait until the socket is connected or there is an error.
1282  * fmode should contain any nonblocking flags. nosig should be
1283  * set if the caller does not want the wait to be interrupted by a signal.
1284  */
1285 int
1286 sowaitconnected(struct sonode *so, int fmode, int nosig)
1287 {
1288 	int error;
1289 
1290 	ASSERT(MUTEX_HELD(&so->so_lock));
1291 
1292 	while ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) ==
1293 	    SS_ISCONNECTING && so->so_error == 0) {
1294 
1295 		dprintso(so, 1, ("waiting for SS_ISCONNECTED on %p\n", so));
1296 		if (fmode & (FNDELAY|FNONBLOCK))
1297 			return (EINPROGRESS);
1298 
1299 		if (nosig)
1300 			cv_wait(&so->so_state_cv, &so->so_lock);
1301 		else if (!cv_wait_sig_swap(&so->so_state_cv, &so->so_lock)) {
1302 			/*
1303 			 * Return EINTR and let the application use
1304 			 * nonblocking techniques for detecting when
1305 			 * the connection has been established.
1306 			 */
1307 			return (EINTR);
1308 		}
1309 		dprintso(so, 1, ("awoken on %p\n", so));
1310 	}
1311 
1312 	if (so->so_error != 0) {
1313 		error = sogeterr(so);
1314 		ASSERT(error != 0);
1315 		dprintso(so, 1, ("sowaitconnected: error %d\n", error));
1316 		return (error);
1317 	}
1318 	if (!(so->so_state & SS_ISCONNECTED)) {
1319 		/*
1320 		 * Could have received a T_ORDREL_IND or a T_DISCON_IND with
1321 		 * zero errno. Or another thread could have consumed so_error
1322 		 * e.g. by calling read.
1323 		 */
1324 		error = ECONNREFUSED;
1325 		dprintso(so, 1, ("sowaitconnected: error %d\n", error));
1326 		return (error);
1327 	}
1328 	return (0);
1329 }
1330 
1331 
1332 /*
1333  * Handle the signal generation aspect of urgent data.
1334  */
1335 static void
1336 so_oob_sig(struct sonode *so, int extrasig,
1337     strsigset_t *signals, strpollset_t *pollwakeups)
1338 {
1339 	ASSERT(MUTEX_HELD(&so->so_lock));
1340 
1341 	ASSERT(so_verify_oobstate(so));
1342 	ASSERT(so->so_oobsigcnt >= so->so_oobcnt);
1343 	if (so->so_oobsigcnt > so->so_oobcnt) {
1344 		/*
1345 		 * Signal has already been generated once for this
1346 		 * urgent "event". However, since TCP can receive updated
1347 		 * urgent pointers we still generate a signal.
1348 		 */
1349 		ASSERT(so->so_state & SS_OOBPEND);
1350 		if (extrasig) {
1351 			*signals |= S_RDBAND;
1352 			*pollwakeups |= POLLRDBAND;
1353 		}
1354 		return;
1355 	}
1356 
1357 	so->so_oobsigcnt++;
1358 	ASSERT(so->so_oobsigcnt > 0);	/* Wraparound */
1359 	ASSERT(so->so_oobsigcnt > so->so_oobcnt);
1360 
1361 	/*
1362 	 * Record (for select/poll) that urgent data is pending.
1363 	 */
1364 	so->so_state |= SS_OOBPEND;
1365 	/*
1366 	 * New urgent data on the way so forget about any old
1367 	 * urgent data.
1368 	 */
1369 	so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA);
1370 	if (so->so_oobmsg != NULL) {
1371 		dprintso(so, 1, ("sock: discarding old oob\n"));
1372 		freemsg(so->so_oobmsg);
1373 		so->so_oobmsg = NULL;
1374 	}
1375 	*signals |= S_RDBAND;
1376 	*pollwakeups |= POLLRDBAND;
1377 	ASSERT(so_verify_oobstate(so));
1378 }
1379 
1380 /*
1381  * Handle the processing of the T_EXDATA_IND with urgent data.
1382  * Returns the T_EXDATA_IND if it should be queued on the read queue.
1383  */
1384 /* ARGSUSED2 */
1385 static mblk_t *
1386 so_oob_exdata(struct sonode *so, mblk_t *mp,
1387 	strsigset_t *signals, strpollset_t *pollwakeups)
1388 {
1389 	ASSERT(MUTEX_HELD(&so->so_lock));
1390 
1391 	ASSERT(so_verify_oobstate(so));
1392 
1393 	ASSERT(so->so_oobsigcnt > so->so_oobcnt);
1394 
1395 	so->so_oobcnt++;
1396 	ASSERT(so->so_oobcnt > 0);	/* wraparound? */
1397 	ASSERT(so->so_oobsigcnt >= so->so_oobcnt);
1398 
1399 	/*
1400 	 * Set MSGMARK for SIOCATMARK.
1401 	 */
1402 	mp->b_flag |= MSGMARK;
1403 
1404 	ASSERT(so_verify_oobstate(so));
1405 	return (mp);
1406 }
1407 
1408 /*
1409  * Handle the processing of the actual urgent data.
1410  * Returns the data mblk if it should be queued on the read queue.
1411  */
1412 static mblk_t *
1413 so_oob_data(struct sonode *so, mblk_t *mp,
1414 	strsigset_t *signals, strpollset_t *pollwakeups)
1415 {
1416 	ASSERT(MUTEX_HELD(&so->so_lock));
1417 
1418 	ASSERT(so_verify_oobstate(so));
1419 
1420 	ASSERT(so->so_oobsigcnt >= so->so_oobcnt);
1421 	ASSERT(mp != NULL);
1422 	/*
1423 	 * For OOBINLINE we keep the data in the T_EXDATA_IND.
1424 	 * Otherwise we store it in so_oobmsg.
1425 	 */
1426 	ASSERT(so->so_oobmsg == NULL);
1427 	if (so->so_options & SO_OOBINLINE) {
1428 		*pollwakeups |= POLLIN | POLLRDNORM | POLLRDBAND;
1429 		*signals |= S_INPUT | S_RDNORM;
1430 	} else {
1431 		*pollwakeups |= POLLRDBAND;
1432 		so->so_state |= SS_HAVEOOBDATA;
1433 		so->so_oobmsg = mp;
1434 		mp = NULL;
1435 	}
1436 	ASSERT(so_verify_oobstate(so));
1437 	return (mp);
1438 }
1439 
1440 /*
1441  * Caller must hold the mutex.
1442  * For delayed processing, save the T_DISCON_IND received
1443  * from below on so_discon_ind_mp.
1444  * When the message is processed the framework will call:
1445  *      (*func)(so, mp);
1446  */
1447 static void
1448 so_save_discon_ind(struct sonode *so,
1449 	mblk_t *mp,
1450 	void (*func)(struct sonode *so, mblk_t *))
1451 {
1452 	ASSERT(MUTEX_HELD(&so->so_lock));
1453 
1454 	/*
1455 	 * Discard new T_DISCON_IND if we have already received another.
1456 	 * Currently the earlier message can either be on so_discon_ind_mp
1457 	 * or being processed.
1458 	 */
1459 	if (so->so_discon_ind_mp != NULL || (so->so_flag & SOASYNC_UNBIND)) {
1460 		zcmn_err(getzoneid(), CE_WARN,
1461 		    "sockfs: received unexpected additional T_DISCON_IND\n");
1462 		freemsg(mp);
1463 		return;
1464 	}
1465 	mp->b_prev = (mblk_t *)func;
1466 	mp->b_next = NULL;
1467 	so->so_discon_ind_mp = mp;
1468 }
1469 
1470 /*
1471  * Caller must hold the mutex and make sure that either SOLOCKED
1472  * or SOASYNC_UNBIND is set. Called from so_unlock_single().
1473  * Perform delayed processing of T_DISCON_IND message on so_discon_ind_mp.
1474  * Need to ensure that strsock_proto() will not end up sleeping for
1475  * SOASYNC_UNBIND, while executing this function.
1476  */
1477 void
1478 so_drain_discon_ind(struct sonode *so)
1479 {
1480 	mblk_t	*bp;
1481 	void (*func)(struct sonode *so, mblk_t *);
1482 
1483 	ASSERT(MUTEX_HELD(&so->so_lock));
1484 	ASSERT(so->so_flag & (SOLOCKED|SOASYNC_UNBIND));
1485 
1486 	/* Process T_DISCON_IND on so_discon_ind_mp */
1487 	if ((bp = so->so_discon_ind_mp) != NULL) {
1488 		so->so_discon_ind_mp = NULL;
1489 		func = (void (*)())bp->b_prev;
1490 		bp->b_prev = NULL;
1491 
1492 		/*
1493 		 * This (*func) is supposed to generate a message downstream
1494 		 * and we need to have a flag set until the corresponding
1495 		 * upstream message reaches stream head.
1496 		 * When processing T_DISCON_IND in strsock_discon_ind
1497 		 * we hold SOASYN_UNBIND when sending T_UNBIND_REQ down and
1498 		 * drop the flag after we get the ACK in strsock_proto.
1499 		 */
1500 		(void) (*func)(so, bp);
1501 	}
1502 }
1503 
1504 /*
1505  * Caller must hold the mutex.
1506  * Remove the T_DISCON_IND on so_discon_ind_mp.
1507  */
1508 void
1509 so_flush_discon_ind(struct sonode *so)
1510 {
1511 	mblk_t	*bp;
1512 
1513 	ASSERT(MUTEX_HELD(&so->so_lock));
1514 
1515 	/*
1516 	 * Remove T_DISCON_IND mblk at so_discon_ind_mp.
1517 	 */
1518 	if ((bp = so->so_discon_ind_mp) != NULL) {
1519 		so->so_discon_ind_mp = NULL;
1520 		bp->b_prev = NULL;
1521 		freemsg(bp);
1522 	}
1523 }
1524 
1525 /*
1526  * Caller must hold the mutex.
1527  *
1528  * This function is used to process the T_DISCON_IND message. It does
1529  * immediate processing when called from strsock_proto and delayed
1530  * processing of discon_ind saved on so_discon_ind_mp when called from
1531  * so_drain_discon_ind. When a T_DISCON_IND message is saved in
1532  * so_discon_ind_mp for delayed processing, this function is registered
1533  * as the callback function to process the message.
1534  *
1535  * SOASYNC_UNBIND should be held in this function, during the non-blocking
1536  * unbind operation, and should be released only after we receive the ACK
1537  * in strsock_proto, for the T_UNBIND_REQ sent here. Since SOLOCKED is not set,
1538  * no TPI messages would be sent down at this time. This is to prevent M_FLUSH
1539  * sent from either this function or tcp_unbind(), flushing away any TPI
1540  * message that is being sent down and stays in a lower module's queue.
1541  *
1542  * This function drops so_lock and grabs it again.
1543  */
1544 static void
1545 strsock_discon_ind(struct sonode *so, mblk_t *discon_mp)
1546 {
1547 	struct vnode *vp;
1548 	struct stdata *stp;
1549 	union T_primitives *tpr;
1550 	struct T_unbind_req *ubr;
1551 	mblk_t *mp;
1552 	int error;
1553 
1554 	ASSERT(MUTEX_HELD(&so->so_lock));
1555 	ASSERT(discon_mp);
1556 	ASSERT(discon_mp->b_rptr);
1557 
1558 	tpr = (union T_primitives *)discon_mp->b_rptr;
1559 	ASSERT(tpr->type == T_DISCON_IND);
1560 
1561 	vp = SOTOV(so);
1562 	stp = vp->v_stream;
1563 	ASSERT(stp);
1564 
1565 	/*
1566 	 * Not a listener
1567 	 */
1568 	ASSERT((so->so_state & SS_ACCEPTCONN) == 0);
1569 
1570 	/*
1571 	 * This assumes that the name space for DISCON_reason
1572 	 * is the errno name space.
1573 	 */
1574 	soisdisconnected(so, tpr->discon_ind.DISCON_reason);
1575 
1576 	/*
1577 	 * Unbind with the transport without blocking.
1578 	 * If we've already received a T_DISCON_IND do not unbind.
1579 	 *
1580 	 * If there is no preallocated unbind message, we have already
1581 	 * unbound with the transport
1582 	 *
1583 	 * If the socket is not bound, no need to unbind.
1584 	 */
1585 	mp = so->so_unbind_mp;
1586 	if (mp == NULL) {
1587 		ASSERT(!(so->so_state & SS_ISBOUND));
1588 		mutex_exit(&so->so_lock);
1589 	} else if (!(so->so_state & SS_ISBOUND))  {
1590 		mutex_exit(&so->so_lock);
1591 	} else {
1592 		so->so_unbind_mp = NULL;
1593 
1594 		/*
1595 		 * Is another T_DISCON_IND being processed.
1596 		 */
1597 		ASSERT((so->so_flag & SOASYNC_UNBIND) == 0);
1598 
1599 		/*
1600 		 * Make strsock_proto ignore T_OK_ACK and T_ERROR_ACK for
1601 		 * this unbind. Set SOASYNC_UNBIND. This should be cleared
1602 		 * only after we receive the ACK in strsock_proto.
1603 		 */
1604 		so->so_flag |= SOASYNC_UNBIND;
1605 		ASSERT(!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)));
1606 		so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN|SS_LADDR_VALID);
1607 		mutex_exit(&so->so_lock);
1608 
1609 		/*
1610 		 * Send down T_UNBIND_REQ ignoring flow control.
1611 		 * XXX Assumes that MSG_IGNFLOW implies that this thread
1612 		 * does not run service procedures.
1613 		 */
1614 		ASSERT(DB_TYPE(mp) == M_PROTO);
1615 		ubr = (struct T_unbind_req *)mp->b_rptr;
1616 		mp->b_wptr += sizeof (*ubr);
1617 		ubr->PRIM_type = T_UNBIND_REQ;
1618 
1619 		/*
1620 		 * Flush the read and write side (except stream head read queue)
1621 		 * and send down T_UNBIND_REQ.
1622 		 */
1623 		(void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW);
1624 		error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
1625 		    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0);
1626 		/* LINTED - warning: statement has no consequent: if */
1627 		if (error) {
1628 			eprintsoline(so, error);
1629 		}
1630 	}
1631 
1632 	if (tpr->discon_ind.DISCON_reason != 0)
1633 		strsetrerror(SOTOV(so), 0, 0, sogetrderr);
1634 	strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
1635 	strseteof(SOTOV(so), 1);
1636 	/*
1637 	 * strseteof takes care of read side wakeups,
1638 	 * pollwakeups, and signals.
1639 	 */
1640 	dprintso(so, 1, ("T_DISCON_IND: error %d\n", so->so_error));
1641 	freemsg(discon_mp);
1642 
1643 
1644 	pollwakeup(&stp->sd_pollist, POLLOUT);
1645 	mutex_enter(&stp->sd_lock);
1646 
1647 	/*
1648 	 * Wake sleeping write
1649 	 */
1650 	if (stp->sd_flag & WSLEEP) {
1651 		stp->sd_flag &= ~WSLEEP;
1652 		cv_broadcast(&stp->sd_wrq->q_wait);
1653 	}
1654 
1655 	/*
1656 	 * strsendsig can handle multiple signals with a
1657 	 * single call.  Send SIGPOLL for S_OUTPUT event.
1658 	 */
1659 	if (stp->sd_sigflags & S_OUTPUT)
1660 		strsendsig(stp->sd_siglist, S_OUTPUT, 0, 0);
1661 
1662 	mutex_exit(&stp->sd_lock);
1663 	mutex_enter(&so->so_lock);
1664 }
1665 
1666 /*
1667  * This routine is registered with the stream head to receive M_PROTO
1668  * and M_PCPROTO messages.
1669  *
1670  * Returns NULL if the message was consumed.
1671  * Returns an mblk to make that mblk be processed (and queued) by the stream
1672  * head.
1673  *
1674  * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and
1675  * *pollwakeups) for the stream head to take action on. Note that since
1676  * sockets always deliver SIGIO for every new piece of data this routine
1677  * never sets *firstmsgsigs; any signals are returned in *allmsgsigs.
1678  *
1679  * This routine handles all data related TPI messages independent of
1680  * the type of the socket i.e. it doesn't care if T_UNITDATA_IND message
1681  * arrive on a SOCK_STREAM.
1682  */
1683 static mblk_t *
1684 strsock_proto(vnode_t *vp, mblk_t *mp,
1685 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
1686 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
1687 {
1688 	union T_primitives *tpr;
1689 	struct sonode *so;
1690 
1691 	so = VTOSO(vp);
1692 
1693 	dprintso(so, 1, ("strsock_proto(%p, %p)\n", vp, mp));
1694 
1695 	/* Set default return values */
1696 	*firstmsgsigs = *wakeups = *allmsgsigs = *pollwakeups = 0;
1697 
1698 	ASSERT(DB_TYPE(mp) == M_PROTO ||
1699 	    DB_TYPE(mp) == M_PCPROTO);
1700 
1701 	if (MBLKL(mp) < sizeof (tpr->type)) {
1702 		/* The message is too short to even contain the primitive */
1703 		zcmn_err(getzoneid(), CE_WARN,
1704 		    "sockfs: Too short TPI message received. Len = %ld\n",
1705 		    (ptrdiff_t)(MBLKL(mp)));
1706 		freemsg(mp);
1707 		return (NULL);
1708 	}
1709 	if (!__TPI_PRIM_ISALIGNED(mp->b_rptr)) {
1710 		/* The read pointer is not aligned correctly for TPI */
1711 		zcmn_err(getzoneid(), CE_WARN,
1712 		    "sockfs: Unaligned TPI message received. rptr = %p\n",
1713 		    (void *)mp->b_rptr);
1714 		freemsg(mp);
1715 		return (NULL);
1716 	}
1717 	tpr = (union T_primitives *)mp->b_rptr;
1718 	dprintso(so, 1, ("strsock_proto: primitive %d\n", tpr->type));
1719 
1720 	switch (tpr->type) {
1721 
1722 	case T_DATA_IND:
1723 		if (MBLKL(mp) < sizeof (struct T_data_ind)) {
1724 			zcmn_err(getzoneid(), CE_WARN,
1725 			    "sockfs: Too short T_DATA_IND. Len = %ld\n",
1726 			    (ptrdiff_t)(MBLKL(mp)));
1727 			freemsg(mp);
1728 			return (NULL);
1729 		}
1730 		/*
1731 		 * Ignore zero-length T_DATA_IND messages. These might be
1732 		 * generated by some transports.
1733 		 * This is needed to prevent read (which skips the M_PROTO
1734 		 * part) to unexpectedly return 0 (or return EWOULDBLOCK
1735 		 * on a non-blocking socket after select/poll has indicated
1736 		 * that data is available).
1737 		 */
1738 		if (msgdsize(mp->b_cont) == 0) {
1739 			dprintso(so, 0,
1740 			    ("strsock_proto: zero length T_DATA_IND\n"));
1741 			freemsg(mp);
1742 			return (NULL);
1743 		}
1744 		*allmsgsigs = S_INPUT | S_RDNORM;
1745 		*pollwakeups = POLLIN | POLLRDNORM;
1746 		*wakeups = RSLEEP;
1747 		return (mp);
1748 
1749 	case T_UNITDATA_IND: {
1750 		struct T_unitdata_ind	*tudi = &tpr->unitdata_ind;
1751 		void			*addr;
1752 		t_uscalar_t		addrlen;
1753 
1754 		if (MBLKL(mp) < sizeof (struct T_unitdata_ind)) {
1755 			zcmn_err(getzoneid(), CE_WARN,
1756 			    "sockfs: Too short T_UNITDATA_IND. Len = %ld\n",
1757 			    (ptrdiff_t)(MBLKL(mp)));
1758 			freemsg(mp);
1759 			return (NULL);
1760 		}
1761 
1762 		/* Is this is not a connected datagram socket? */
1763 		if ((so->so_mode & SM_CONNREQUIRED) ||
1764 		    !(so->so_state & SS_ISCONNECTED)) {
1765 			/*
1766 			 * Not a connected datagram socket. Look for
1767 			 * the SO_UNIX_CLOSE option. If such an option is found
1768 			 * discard the message (since it has no meaning
1769 			 * unless connected).
1770 			 */
1771 			if (so->so_family == AF_UNIX && msgdsize(mp) == 0 &&
1772 			    tudi->OPT_length != 0) {
1773 				void *opt;
1774 				t_uscalar_t optlen = tudi->OPT_length;
1775 
1776 				opt = sogetoff(mp, tudi->OPT_offset,
1777 				    optlen, __TPI_ALIGN_SIZE);
1778 				if (opt == NULL) {
1779 					/* The len/off falls outside mp */
1780 					freemsg(mp);
1781 					mutex_enter(&so->so_lock);
1782 					soseterror(so, EPROTO);
1783 					mutex_exit(&so->so_lock);
1784 					zcmn_err(getzoneid(), CE_WARN,
1785 					    "sockfs: T_unidata_ind with "
1786 					    "invalid optlen/offset %u/%d\n",
1787 					    optlen, tudi->OPT_offset);
1788 					return (NULL);
1789 				}
1790 				if (so_getopt_unix_close(opt, optlen)) {
1791 					freemsg(mp);
1792 					return (NULL);
1793 				}
1794 			}
1795 			*allmsgsigs = S_INPUT | S_RDNORM;
1796 			*pollwakeups = POLLIN | POLLRDNORM;
1797 			*wakeups = RSLEEP;
1798 			if (audit_active)
1799 				audit_sock(T_UNITDATA_IND, strvp2wq(vp),
1800 				    mp, 0);
1801 			return (mp);
1802 		}
1803 
1804 		/*
1805 		 * A connect datagram socket. For AF_INET{,6} we verify that
1806 		 * the source address matches the "connected to" address.
1807 		 * The semantics of AF_UNIX sockets is to not verify
1808 		 * the source address.
1809 		 * Note that this source address verification is transport
1810 		 * specific. Thus the real fix would be to extent TPI
1811 		 * to allow T_CONN_REQ messages to be send to connectionless
1812 		 * transport providers and always let the transport provider
1813 		 * do whatever filtering is needed.
1814 		 *
1815 		 * The verification/filtering semantics for transports
1816 		 * other than AF_INET and AF_UNIX are unknown. The choice
1817 		 * would be to either filter using bcmp or let all messages
1818 		 * get through. This code does not filter other address
1819 		 * families since this at least allows the application to
1820 		 * work around any missing filtering.
1821 		 *
1822 		 * XXX Should we move filtering to UDP/ICMP???
1823 		 * That would require passing e.g. a T_DISCON_REQ to UDP
1824 		 * when the socket becomes unconnected.
1825 		 */
1826 		addrlen = tudi->SRC_length;
1827 		/*
1828 		 * The alignment restriction is really to strict but
1829 		 * we want enough alignment to inspect the fields of
1830 		 * a sockaddr_in.
1831 		 */
1832 		addr = sogetoff(mp, tudi->SRC_offset, addrlen,
1833 		    __TPI_ALIGN_SIZE);
1834 		if (addr == NULL) {
1835 			freemsg(mp);
1836 			mutex_enter(&so->so_lock);
1837 			soseterror(so, EPROTO);
1838 			mutex_exit(&so->so_lock);
1839 			zcmn_err(getzoneid(), CE_WARN,
1840 			    "sockfs: T_unidata_ind with invalid "
1841 			    "addrlen/offset %u/%d\n",
1842 			    addrlen, tudi->SRC_offset);
1843 			return (NULL);
1844 		}
1845 
1846 		if (so->so_family == AF_INET) {
1847 			/*
1848 			 * For AF_INET we allow wildcarding both sin_addr
1849 			 * and sin_port.
1850 			 */
1851 			struct sockaddr_in *faddr, *sin;
1852 
1853 			/* Prevent so_faddr_sa from changing while accessed */
1854 			mutex_enter(&so->so_lock);
1855 			ASSERT(so->so_faddr_len ==
1856 			    (socklen_t)sizeof (struct sockaddr_in));
1857 			faddr = (struct sockaddr_in *)so->so_faddr_sa;
1858 			sin = (struct sockaddr_in *)addr;
1859 			if (addrlen !=
1860 			    (t_uscalar_t)sizeof (struct sockaddr_in) ||
1861 			    (sin->sin_addr.s_addr != faddr->sin_addr.s_addr &&
1862 			    faddr->sin_addr.s_addr != INADDR_ANY) ||
1863 			    (so->so_type != SOCK_RAW &&
1864 			    sin->sin_port != faddr->sin_port &&
1865 			    faddr->sin_port != 0)) {
1866 #ifdef DEBUG
1867 				dprintso(so, 0,
1868 				    ("sockfs: T_UNITDATA_IND mismatch: %s",
1869 				    pr_addr(so->so_family,
1870 				    (struct sockaddr *)addr,
1871 				    addrlen)));
1872 				dprintso(so, 0, (" - %s\n",
1873 				    pr_addr(so->so_family, so->so_faddr_sa,
1874 				    (t_uscalar_t)so->so_faddr_len)));
1875 #endif /* DEBUG */
1876 				mutex_exit(&so->so_lock);
1877 				freemsg(mp);
1878 				return (NULL);
1879 			}
1880 			mutex_exit(&so->so_lock);
1881 		} else if (so->so_family == AF_INET6) {
1882 			/*
1883 			 * For AF_INET6 we allow wildcarding both sin6_addr
1884 			 * and sin6_port.
1885 			 */
1886 			struct sockaddr_in6 *faddr6, *sin6;
1887 			static struct in6_addr zeroes; /* inits to all zeros */
1888 
1889 			/* Prevent so_faddr_sa from changing while accessed */
1890 			mutex_enter(&so->so_lock);
1891 			ASSERT(so->so_faddr_len ==
1892 			    (socklen_t)sizeof (struct sockaddr_in6));
1893 			faddr6 = (struct sockaddr_in6 *)so->so_faddr_sa;
1894 			sin6 = (struct sockaddr_in6 *)addr;
1895 			/* XXX could we get a mapped address ::ffff:0.0.0.0 ? */
1896 			if (addrlen !=
1897 			    (t_uscalar_t)sizeof (struct sockaddr_in6) ||
1898 			    (!IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
1899 			    &faddr6->sin6_addr) &&
1900 			    !IN6_ARE_ADDR_EQUAL(&faddr6->sin6_addr, &zeroes)) ||
1901 			    (so->so_type != SOCK_RAW &&
1902 			    sin6->sin6_port != faddr6->sin6_port &&
1903 			    faddr6->sin6_port != 0)) {
1904 #ifdef DEBUG
1905 				dprintso(so, 0,
1906 				    ("sockfs: T_UNITDATA_IND mismatch: %s",
1907 				    pr_addr(so->so_family,
1908 				    (struct sockaddr *)addr,
1909 				    addrlen)));
1910 				dprintso(so, 0, (" - %s\n",
1911 				    pr_addr(so->so_family, so->so_faddr_sa,
1912 				    (t_uscalar_t)so->so_faddr_len)));
1913 #endif /* DEBUG */
1914 				mutex_exit(&so->so_lock);
1915 				freemsg(mp);
1916 				return (NULL);
1917 			}
1918 			mutex_exit(&so->so_lock);
1919 		} else if (so->so_family == AF_UNIX &&
1920 		    msgdsize(mp->b_cont) == 0 &&
1921 		    tudi->OPT_length != 0) {
1922 			/*
1923 			 * Attempt to extract AF_UNIX
1924 			 * SO_UNIX_CLOSE indication from options.
1925 			 */
1926 			void *opt;
1927 			t_uscalar_t optlen = tudi->OPT_length;
1928 
1929 			opt = sogetoff(mp, tudi->OPT_offset,
1930 			    optlen, __TPI_ALIGN_SIZE);
1931 			if (opt == NULL) {
1932 				/* The len/off falls outside mp */
1933 				freemsg(mp);
1934 				mutex_enter(&so->so_lock);
1935 				soseterror(so, EPROTO);
1936 				mutex_exit(&so->so_lock);
1937 				zcmn_err(getzoneid(), CE_WARN,
1938 				    "sockfs: T_unidata_ind with invalid "
1939 				    "optlen/offset %u/%d\n",
1940 				    optlen, tudi->OPT_offset);
1941 				return (NULL);
1942 			}
1943 			/*
1944 			 * If we received a unix close indication mark the
1945 			 * socket and discard this message.
1946 			 */
1947 			if (so_getopt_unix_close(opt, optlen)) {
1948 				mutex_enter(&so->so_lock);
1949 				sobreakconn(so, ECONNRESET);
1950 				mutex_exit(&so->so_lock);
1951 				strsetrerror(SOTOV(so), 0, 0, sogetrderr);
1952 				freemsg(mp);
1953 				*pollwakeups = POLLIN | POLLRDNORM;
1954 				*allmsgsigs = S_INPUT | S_RDNORM;
1955 				*wakeups = RSLEEP;
1956 				return (NULL);
1957 			}
1958 		}
1959 		*allmsgsigs = S_INPUT | S_RDNORM;
1960 		*pollwakeups = POLLIN | POLLRDNORM;
1961 		*wakeups = RSLEEP;
1962 		return (mp);
1963 	}
1964 
1965 	case T_OPTDATA_IND: {
1966 		struct T_optdata_ind	*tdi = &tpr->optdata_ind;
1967 
1968 		if (MBLKL(mp) < sizeof (struct T_optdata_ind)) {
1969 			zcmn_err(getzoneid(), CE_WARN,
1970 			    "sockfs: Too short T_OPTDATA_IND. Len = %ld\n",
1971 			    (ptrdiff_t)(MBLKL(mp)));
1972 			freemsg(mp);
1973 			return (NULL);
1974 		}
1975 		/*
1976 		 * Allow zero-length messages carrying options.
1977 		 * This is used when carrying the SO_UNIX_CLOSE option.
1978 		 */
1979 		if (so->so_family == AF_UNIX && msgdsize(mp->b_cont) == 0 &&
1980 		    tdi->OPT_length != 0) {
1981 			/*
1982 			 * Attempt to extract AF_UNIX close indication
1983 			 * from the options. Ignore any other options -
1984 			 * those are handled once the message is removed
1985 			 * from the queue.
1986 			 * The close indication message should not carry data.
1987 			 */
1988 			void *opt;
1989 			t_uscalar_t optlen = tdi->OPT_length;
1990 
1991 			opt = sogetoff(mp, tdi->OPT_offset,
1992 			    optlen, __TPI_ALIGN_SIZE);
1993 			if (opt == NULL) {
1994 				/* The len/off falls outside mp */
1995 				freemsg(mp);
1996 				mutex_enter(&so->so_lock);
1997 				soseterror(so, EPROTO);
1998 				mutex_exit(&so->so_lock);
1999 				zcmn_err(getzoneid(), CE_WARN,
2000 				    "sockfs: T_optdata_ind with invalid "
2001 				    "optlen/offset %u/%d\n",
2002 				    optlen, tdi->OPT_offset);
2003 				return (NULL);
2004 			}
2005 			/*
2006 			 * If we received a close indication mark the
2007 			 * socket and discard this message.
2008 			 */
2009 			if (so_getopt_unix_close(opt, optlen)) {
2010 				mutex_enter(&so->so_lock);
2011 				socantsendmore(so);
2012 				mutex_exit(&so->so_lock);
2013 				strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2014 				freemsg(mp);
2015 				return (NULL);
2016 			}
2017 		}
2018 		*allmsgsigs = S_INPUT | S_RDNORM;
2019 		*pollwakeups = POLLIN | POLLRDNORM;
2020 		*wakeups = RSLEEP;
2021 		return (mp);
2022 	}
2023 
2024 	case T_EXDATA_IND: {
2025 		mblk_t		*mctl, *mdata;
2026 		mblk_t *lbp;
2027 		union T_primitives *tprp;
2028 		struct stdata   *stp;
2029 		queue_t *qp;
2030 
2031 		if (MBLKL(mp) < sizeof (struct T_exdata_ind)) {
2032 			zcmn_err(getzoneid(), CE_WARN,
2033 			    "sockfs: Too short T_EXDATA_IND. Len = %ld\n",
2034 			    (ptrdiff_t)(MBLKL(mp)));
2035 			freemsg(mp);
2036 			return (NULL);
2037 		}
2038 		/*
2039 		 * Ignore zero-length T_EXDATA_IND messages. These might be
2040 		 * generated by some transports.
2041 		 *
2042 		 * This is needed to prevent read (which skips the M_PROTO
2043 		 * part) to unexpectedly return 0 (or return EWOULDBLOCK
2044 		 * on a non-blocking socket after select/poll has indicated
2045 		 * that data is available).
2046 		 */
2047 		dprintso(so, 1,
2048 		    ("T_EXDATA_IND(%p): counts %d/%d state %s\n",
2049 		    vp, so->so_oobsigcnt, so->so_oobcnt,
2050 		    pr_state(so->so_state, so->so_mode)));
2051 
2052 		if (msgdsize(mp->b_cont) == 0) {
2053 			dprintso(so, 0,
2054 			    ("strsock_proto: zero length T_EXDATA_IND\n"));
2055 			freemsg(mp);
2056 			return (NULL);
2057 		}
2058 
2059 		/*
2060 		 * Split into the T_EXDATA_IND and the M_DATA part.
2061 		 * We process these three pieces separately:
2062 		 *	signal generation
2063 		 *	handling T_EXDATA_IND
2064 		 *	handling M_DATA component
2065 		 */
2066 		mctl = mp;
2067 		mdata = mctl->b_cont;
2068 		mctl->b_cont = NULL;
2069 		mutex_enter(&so->so_lock);
2070 		so_oob_sig(so, 0, allmsgsigs, pollwakeups);
2071 		mctl = so_oob_exdata(so, mctl, allmsgsigs, pollwakeups);
2072 		mdata = so_oob_data(so, mdata, allmsgsigs, pollwakeups);
2073 
2074 		stp = vp->v_stream;
2075 		ASSERT(stp != NULL);
2076 		qp = _RD(stp->sd_wrq);
2077 
2078 		mutex_enter(QLOCK(qp));
2079 		lbp = qp->q_last;
2080 
2081 		/*
2082 		 * We want to avoid queueing up a string of T_EXDATA_IND
2083 		 * messages with no intervening data messages at the stream
2084 		 * head. These messages contribute to the total message
2085 		 * count. Eventually this can lead to STREAMS flow contol
2086 		 * and also cause TCP to advertise a zero window condition
2087 		 * to the peer. This can happen in the degenerate case where
2088 		 * the sender and receiver exchange only OOB data. The sender
2089 		 * only sends messages with MSG_OOB flag and the receiver
2090 		 * receives only MSG_OOB messages and does not use SO_OOBINLINE.
2091 		 * An example of this scenario has been reported in applications
2092 		 * that use OOB data to exchange heart beats. Flow control
2093 		 * relief will never happen if the application only reads OOB
2094 		 * data which is done directly by sorecvoob() and the
2095 		 * T_EXDATA_IND messages at the streamhead won't be consumed.
2096 		 * Note that there is no correctness issue in compressing the
2097 		 * string of T_EXDATA_IND messages into a single T_EXDATA_IND
2098 		 * message. A single read that does not specify MSG_OOB will
2099 		 * read across all the marks in a loop in sotpi_recvmsg().
2100 		 * Each mark is individually distinguishable only if the
2101 		 * T_EXDATA_IND messages are separated by data messages.
2102 		 */
2103 		if ((qp->q_first != NULL) && (DB_TYPE(lbp) == M_PROTO)) {
2104 			tprp = (union T_primitives *)lbp->b_rptr;
2105 			if ((tprp->type == T_EXDATA_IND) &&
2106 			    !(so->so_options & SO_OOBINLINE)) {
2107 
2108 				/*
2109 				 * free the new M_PROTO message
2110 				 */
2111 				freemsg(mctl);
2112 
2113 				/*
2114 				 * adjust the OOB count and OOB	signal count
2115 				 * just incremented for the new OOB data.
2116 				 */
2117 				so->so_oobcnt--;
2118 				so->so_oobsigcnt--;
2119 				mutex_exit(QLOCK(qp));
2120 				mutex_exit(&so->so_lock);
2121 				return (NULL);
2122 			}
2123 		}
2124 		mutex_exit(QLOCK(qp));
2125 
2126 		/*
2127 		 * Pass the T_EXDATA_IND and the M_DATA back separately
2128 		 * by using b_next linkage. (The stream head will queue any
2129 		 * b_next linked messages separately.) This is needed
2130 		 * since MSGMARK applies to the last by of the message
2131 		 * hence we can not have any M_DATA component attached
2132 		 * to the marked T_EXDATA_IND. Note that the stream head
2133 		 * will not consolidate M_DATA messages onto an MSGMARK'ed
2134 		 * message in order to preserve the constraint that
2135 		 * the T_EXDATA_IND always is a separate message.
2136 		 */
2137 		ASSERT(mctl != NULL);
2138 		mctl->b_next = mdata;
2139 		mp = mctl;
2140 #ifdef DEBUG
2141 		if (mdata == NULL) {
2142 			dprintso(so, 1,
2143 			    ("after outofline T_EXDATA_IND(%p): "
2144 			    "counts %d/%d  poll 0x%x sig 0x%x state %s\n",
2145 			    vp, so->so_oobsigcnt,
2146 			    so->so_oobcnt, *pollwakeups, *allmsgsigs,
2147 			    pr_state(so->so_state, so->so_mode)));
2148 		} else {
2149 			dprintso(so, 1,
2150 			    ("after inline T_EXDATA_IND(%p): "
2151 			    "counts %d/%d  poll 0x%x sig 0x%x state %s\n",
2152 			    vp, so->so_oobsigcnt,
2153 			    so->so_oobcnt, *pollwakeups, *allmsgsigs,
2154 			    pr_state(so->so_state, so->so_mode)));
2155 		}
2156 #endif /* DEBUG */
2157 		mutex_exit(&so->so_lock);
2158 		*wakeups = RSLEEP;
2159 		return (mp);
2160 	}
2161 
2162 	case T_CONN_CON: {
2163 		struct T_conn_con	*conn_con;
2164 		void			*addr;
2165 		t_uscalar_t		addrlen;
2166 
2167 		/*
2168 		 * Verify the state, update the state to ISCONNECTED,
2169 		 * record the potentially new address in the message,
2170 		 * and drop the message.
2171 		 */
2172 		if (MBLKL(mp) < sizeof (struct T_conn_con)) {
2173 			zcmn_err(getzoneid(), CE_WARN,
2174 			    "sockfs: Too short T_CONN_CON. Len = %ld\n",
2175 			    (ptrdiff_t)(MBLKL(mp)));
2176 			freemsg(mp);
2177 			return (NULL);
2178 		}
2179 
2180 		mutex_enter(&so->so_lock);
2181 		if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) !=
2182 		    SS_ISCONNECTING) {
2183 			mutex_exit(&so->so_lock);
2184 			dprintso(so, 1,
2185 			    ("T_CONN_CON: state %x\n", so->so_state));
2186 			freemsg(mp);
2187 			return (NULL);
2188 		}
2189 
2190 		conn_con = &tpr->conn_con;
2191 		addrlen = conn_con->RES_length;
2192 		/*
2193 		 * Allow the address to be of different size than sent down
2194 		 * in the T_CONN_REQ as long as it doesn't exceed the maxlen.
2195 		 * For AF_UNIX require the identical length.
2196 		 */
2197 		if (so->so_family == AF_UNIX ?
2198 		    addrlen != (t_uscalar_t)sizeof (so->so_ux_laddr) :
2199 		    addrlen > (t_uscalar_t)so->so_faddr_maxlen) {
2200 			zcmn_err(getzoneid(), CE_WARN,
2201 			    "sockfs: T_conn_con with different "
2202 			    "length %u/%d\n",
2203 			    addrlen, conn_con->RES_length);
2204 			soisdisconnected(so, EPROTO);
2205 			mutex_exit(&so->so_lock);
2206 			strsetrerror(SOTOV(so), 0, 0, sogetrderr);
2207 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2208 			strseteof(SOTOV(so), 1);
2209 			freemsg(mp);
2210 			/*
2211 			 * strseteof takes care of read side wakeups,
2212 			 * pollwakeups, and signals.
2213 			 */
2214 			*wakeups = WSLEEP;
2215 			*allmsgsigs = S_OUTPUT;
2216 			*pollwakeups = POLLOUT;
2217 			return (NULL);
2218 		}
2219 		addr = sogetoff(mp, conn_con->RES_offset, addrlen, 1);
2220 		if (addr == NULL) {
2221 			zcmn_err(getzoneid(), CE_WARN,
2222 			    "sockfs: T_conn_con with invalid "
2223 			    "addrlen/offset %u/%d\n",
2224 			    addrlen, conn_con->RES_offset);
2225 			mutex_exit(&so->so_lock);
2226 			strsetrerror(SOTOV(so), 0, 0, sogetrderr);
2227 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2228 			strseteof(SOTOV(so), 1);
2229 			freemsg(mp);
2230 			/*
2231 			 * strseteof takes care of read side wakeups,
2232 			 * pollwakeups, and signals.
2233 			 */
2234 			*wakeups = WSLEEP;
2235 			*allmsgsigs = S_OUTPUT;
2236 			*pollwakeups = POLLOUT;
2237 			return (NULL);
2238 		}
2239 
2240 		/*
2241 		 * Save for getpeername.
2242 		 */
2243 		if (so->so_family != AF_UNIX) {
2244 			so->so_faddr_len = (socklen_t)addrlen;
2245 			ASSERT(so->so_faddr_len <= so->so_faddr_maxlen);
2246 			bcopy(addr, so->so_faddr_sa, addrlen);
2247 			so->so_state |= SS_FADDR_VALID;
2248 		}
2249 
2250 		if (so->so_peercred != NULL)
2251 			crfree(so->so_peercred);
2252 		so->so_peercred = DB_CRED(mp);
2253 		so->so_cpid = DB_CPID(mp);
2254 		if (so->so_peercred != NULL)
2255 			crhold(so->so_peercred);
2256 
2257 		/* Wakeup anybody sleeping in sowaitconnected */
2258 		soisconnected(so);
2259 		mutex_exit(&so->so_lock);
2260 
2261 		/*
2262 		 * The socket is now available for sending data.
2263 		 */
2264 		*wakeups = WSLEEP;
2265 		*allmsgsigs = S_OUTPUT;
2266 		*pollwakeups = POLLOUT;
2267 		freemsg(mp);
2268 		return (NULL);
2269 	}
2270 
2271 	/*
2272 	 * Extra processing in case of an SSL proxy, before queuing or
2273 	 * forwarding to the fallback endpoint
2274 	 */
2275 	case T_SSL_PROXY_CONN_IND:
2276 	case T_CONN_IND:
2277 		/*
2278 		 * Verify the min size and queue the message on
2279 		 * the so_conn_ind_head/tail list.
2280 		 */
2281 		if (MBLKL(mp) < sizeof (struct T_conn_ind)) {
2282 			zcmn_err(getzoneid(), CE_WARN,
2283 			    "sockfs: Too short T_CONN_IND. Len = %ld\n",
2284 			    (ptrdiff_t)(MBLKL(mp)));
2285 			freemsg(mp);
2286 			return (NULL);
2287 		}
2288 
2289 		if (audit_active)
2290 			audit_sock(T_CONN_IND, strvp2wq(vp), mp, 0);
2291 		if (!(so->so_state & SS_ACCEPTCONN)) {
2292 			zcmn_err(getzoneid(), CE_WARN,
2293 			    "sockfs: T_conn_ind on non-listening socket\n");
2294 			freemsg(mp);
2295 			return (NULL);
2296 		}
2297 
2298 		if (tpr->type == T_SSL_PROXY_CONN_IND && mp->b_cont == NULL) {
2299 			/* No context: need to fall back */
2300 			struct sonode *fbso;
2301 			stdata_t *fbstp;
2302 
2303 			tpr->type = T_CONN_IND;
2304 
2305 			fbso = kssl_find_fallback(so->so_kssl_ent);
2306 
2307 			/*
2308 			 * No fallback: the remote will timeout and
2309 			 * disconnect.
2310 			 */
2311 			if (fbso == NULL) {
2312 				freemsg(mp);
2313 				return (NULL);
2314 			}
2315 			fbstp = SOTOV(fbso)->v_stream;
2316 			qreply(fbstp->sd_wrq->q_next, mp);
2317 			return (NULL);
2318 		}
2319 		soqueueconnind(so, mp);
2320 		*allmsgsigs = S_INPUT | S_RDNORM;
2321 		*pollwakeups = POLLIN | POLLRDNORM;
2322 		*wakeups = RSLEEP;
2323 		return (NULL);
2324 
2325 	case T_ORDREL_IND:
2326 		if (MBLKL(mp) < sizeof (struct T_ordrel_ind)) {
2327 			zcmn_err(getzoneid(), CE_WARN,
2328 			    "sockfs: Too short T_ORDREL_IND. Len = %ld\n",
2329 			    (ptrdiff_t)(MBLKL(mp)));
2330 			freemsg(mp);
2331 			return (NULL);
2332 		}
2333 
2334 		/*
2335 		 * Some providers send this when not fully connected.
2336 		 * SunLink X.25 needs to retrieve disconnect reason after
2337 		 * disconnect for compatibility. It uses T_ORDREL_IND
2338 		 * instead of T_DISCON_IND so that it may use the
2339 		 * endpoint after a connect failure to retrieve the
2340 		 * reason using an ioctl. Thus we explicitly clear
2341 		 * SS_ISCONNECTING here for SunLink X.25.
2342 		 * This is a needed TPI violation.
2343 		 */
2344 		mutex_enter(&so->so_lock);
2345 		so->so_state &= ~SS_ISCONNECTING;
2346 		socantrcvmore(so);
2347 		mutex_exit(&so->so_lock);
2348 		strseteof(SOTOV(so), 1);
2349 		/*
2350 		 * strseteof takes care of read side wakeups,
2351 		 * pollwakeups, and signals.
2352 		 */
2353 		freemsg(mp);
2354 		return (NULL);
2355 
2356 	case T_DISCON_IND:
2357 		if (MBLKL(mp) < sizeof (struct T_discon_ind)) {
2358 			zcmn_err(getzoneid(), CE_WARN,
2359 			    "sockfs: Too short T_DISCON_IND. Len = %ld\n",
2360 			    (ptrdiff_t)(MBLKL(mp)));
2361 			freemsg(mp);
2362 			return (NULL);
2363 		}
2364 		if (so->so_state & SS_ACCEPTCONN) {
2365 			/*
2366 			 * This is a listener. Look for a queued T_CONN_IND
2367 			 * with a matching sequence number and remove it
2368 			 * from the list.
2369 			 * It is normal to not find the sequence number since
2370 			 * the soaccept might have already dequeued it
2371 			 * (in which case the T_CONN_RES will fail with
2372 			 * TBADSEQ).
2373 			 */
2374 			(void) soflushconnind(so, tpr->discon_ind.SEQ_number);
2375 			freemsg(mp);
2376 			return (0);
2377 		}
2378 
2379 		/*
2380 		 * Not a listener
2381 		 *
2382 		 * If SS_CANTRCVMORE for AF_UNIX ignore the discon_reason.
2383 		 * Such a discon_ind appears when the peer has first done
2384 		 * a shutdown() followed by a close() in which case we just
2385 		 * want to record socantsendmore.
2386 		 * In this case sockfs first receives a T_ORDREL_IND followed
2387 		 * by a T_DISCON_IND.
2388 		 * Note that for other transports (e.g. TCP) we need to handle
2389 		 * the discon_ind in this case since it signals an error.
2390 		 */
2391 		mutex_enter(&so->so_lock);
2392 		if ((so->so_state & SS_CANTRCVMORE) &&
2393 		    (so->so_family == AF_UNIX)) {
2394 			socantsendmore(so);
2395 			mutex_exit(&so->so_lock);
2396 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2397 			dprintso(so, 1,
2398 			    ("T_DISCON_IND: error %d\n", so->so_error));
2399 			freemsg(mp);
2400 			/*
2401 			 * Set these variables for caller to process them.
2402 			 * For the else part where T_DISCON_IND is processed,
2403 			 * this will be done in the function being called
2404 			 * (strsock_discon_ind())
2405 			 */
2406 			*wakeups = WSLEEP;
2407 			*allmsgsigs = S_OUTPUT;
2408 			*pollwakeups = POLLOUT;
2409 		} else if (so->so_flag & (SOASYNC_UNBIND | SOLOCKED)) {
2410 			/*
2411 			 * Deferred processing of T_DISCON_IND
2412 			 */
2413 			so_save_discon_ind(so, mp, strsock_discon_ind);
2414 			mutex_exit(&so->so_lock);
2415 		} else {
2416 			/*
2417 			 * Process T_DISCON_IND now
2418 			 */
2419 			(void) strsock_discon_ind(so, mp);
2420 			mutex_exit(&so->so_lock);
2421 		}
2422 		return (NULL);
2423 
2424 	case T_UDERROR_IND: {
2425 		struct T_uderror_ind	*tudi = &tpr->uderror_ind;
2426 		void			*addr;
2427 		t_uscalar_t		addrlen;
2428 		int			error;
2429 
2430 		dprintso(so, 0,
2431 		    ("T_UDERROR_IND: error %d\n", tudi->ERROR_type));
2432 
2433 		if (MBLKL(mp) < sizeof (struct T_uderror_ind)) {
2434 			zcmn_err(getzoneid(), CE_WARN,
2435 			    "sockfs: Too short T_UDERROR_IND. Len = %ld\n",
2436 			    (ptrdiff_t)(MBLKL(mp)));
2437 			freemsg(mp);
2438 			return (NULL);
2439 		}
2440 		/* Ignore on connection-oriented transports */
2441 		if (so->so_mode & SM_CONNREQUIRED) {
2442 			freemsg(mp);
2443 			eprintsoline(so, 0);
2444 			zcmn_err(getzoneid(), CE_WARN,
2445 			    "sockfs: T_uderror_ind on connection-oriented "
2446 			    "transport\n");
2447 			return (NULL);
2448 		}
2449 		addrlen = tudi->DEST_length;
2450 		addr = sogetoff(mp, tudi->DEST_offset, addrlen, 1);
2451 		if (addr == NULL) {
2452 			zcmn_err(getzoneid(), CE_WARN,
2453 			    "sockfs: T_uderror_ind with invalid "
2454 			    "addrlen/offset %u/%d\n",
2455 			    addrlen, tudi->DEST_offset);
2456 			freemsg(mp);
2457 			return (NULL);
2458 		}
2459 
2460 		/* Verify source address for connected socket. */
2461 		mutex_enter(&so->so_lock);
2462 		if (so->so_state & SS_ISCONNECTED) {
2463 			void *faddr;
2464 			t_uscalar_t faddr_len;
2465 			boolean_t match = B_FALSE;
2466 
2467 			switch (so->so_family) {
2468 			case AF_INET: {
2469 				/* Compare just IP address and port */
2470 				struct sockaddr_in *sin1, *sin2;
2471 
2472 				sin1 = (struct sockaddr_in *)so->so_faddr_sa;
2473 				sin2 = (struct sockaddr_in *)addr;
2474 				if (addrlen == sizeof (struct sockaddr_in) &&
2475 				    sin1->sin_port == sin2->sin_port &&
2476 				    sin1->sin_addr.s_addr ==
2477 				    sin2->sin_addr.s_addr)
2478 					match = B_TRUE;
2479 				break;
2480 			}
2481 			case AF_INET6: {
2482 				/* Compare just IP address and port. Not flow */
2483 				struct sockaddr_in6 *sin1, *sin2;
2484 
2485 				sin1 = (struct sockaddr_in6 *)so->so_faddr_sa;
2486 				sin2 = (struct sockaddr_in6 *)addr;
2487 				if (addrlen == sizeof (struct sockaddr_in6) &&
2488 				    sin1->sin6_port == sin2->sin6_port &&
2489 				    IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr,
2490 				    &sin2->sin6_addr))
2491 					match = B_TRUE;
2492 				break;
2493 			}
2494 			case AF_UNIX:
2495 				faddr = &so->so_ux_faddr;
2496 				faddr_len =
2497 				    (t_uscalar_t)sizeof (so->so_ux_faddr);
2498 				if (faddr_len == addrlen &&
2499 				    bcmp(addr, faddr, addrlen) == 0)
2500 					match = B_TRUE;
2501 				break;
2502 			default:
2503 				faddr = so->so_faddr_sa;
2504 				faddr_len = (t_uscalar_t)so->so_faddr_len;
2505 				if (faddr_len == addrlen &&
2506 				    bcmp(addr, faddr, addrlen) == 0)
2507 					match = B_TRUE;
2508 				break;
2509 			}
2510 
2511 			if (!match) {
2512 #ifdef DEBUG
2513 				dprintso(so, 0,
2514 				    ("sockfs: T_UDERR_IND mismatch: %s - ",
2515 				    pr_addr(so->so_family,
2516 				    (struct sockaddr *)addr,
2517 				    addrlen)));
2518 				dprintso(so, 0, ("%s\n",
2519 				    pr_addr(so->so_family, so->so_faddr_sa,
2520 				    so->so_faddr_len)));
2521 #endif /* DEBUG */
2522 				mutex_exit(&so->so_lock);
2523 				freemsg(mp);
2524 				return (NULL);
2525 			}
2526 			/*
2527 			 * Make the write error nonpersistent. If the error
2528 			 * is zero we use ECONNRESET.
2529 			 * This assumes that the name space for ERROR_type
2530 			 * is the errno name space.
2531 			 */
2532 			if (tudi->ERROR_type != 0)
2533 				error = tudi->ERROR_type;
2534 			else
2535 				error = ECONNRESET;
2536 
2537 			soseterror(so, error);
2538 			mutex_exit(&so->so_lock);
2539 			strsetrerror(SOTOV(so), 0, 0, sogetrderr);
2540 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2541 			*wakeups = RSLEEP | WSLEEP;
2542 			*allmsgsigs = S_INPUT | S_RDNORM | S_OUTPUT;
2543 			*pollwakeups = POLLIN | POLLRDNORM | POLLOUT;
2544 			freemsg(mp);
2545 			return (NULL);
2546 		}
2547 		/*
2548 		 * If the application asked for delayed errors
2549 		 * record the T_UDERROR_IND so_eaddr_mp and the reason in
2550 		 * so_delayed_error for delayed error posting. If the reason
2551 		 * is zero use ECONNRESET.
2552 		 * Note that delayed error indications do not make sense for
2553 		 * AF_UNIX sockets since sendto checks that the destination
2554 		 * address is valid at the time of the sendto.
2555 		 */
2556 		if (!(so->so_options & SO_DGRAM_ERRIND)) {
2557 			mutex_exit(&so->so_lock);
2558 			freemsg(mp);
2559 			return (NULL);
2560 		}
2561 		if (so->so_eaddr_mp != NULL)
2562 			freemsg(so->so_eaddr_mp);
2563 
2564 		so->so_eaddr_mp = mp;
2565 		if (tudi->ERROR_type != 0)
2566 			error = tudi->ERROR_type;
2567 		else
2568 			error = ECONNRESET;
2569 		so->so_delayed_error = (ushort_t)error;
2570 		mutex_exit(&so->so_lock);
2571 		return (NULL);
2572 	}
2573 
2574 	case T_ERROR_ACK:
2575 		dprintso(so, 0,
2576 		    ("strsock_proto: T_ERROR_ACK for %d, error %d/%d\n",
2577 		    tpr->error_ack.ERROR_prim,
2578 		    tpr->error_ack.TLI_error,
2579 		    tpr->error_ack.UNIX_error));
2580 
2581 		if (MBLKL(mp) < sizeof (struct T_error_ack)) {
2582 			zcmn_err(getzoneid(), CE_WARN,
2583 			    "sockfs: Too short T_ERROR_ACK. Len = %ld\n",
2584 			    (ptrdiff_t)(MBLKL(mp)));
2585 			freemsg(mp);
2586 			return (NULL);
2587 		}
2588 		/*
2589 		 * Check if we were waiting for the async message
2590 		 */
2591 		mutex_enter(&so->so_lock);
2592 		if ((so->so_flag & SOASYNC_UNBIND) &&
2593 		    tpr->error_ack.ERROR_prim == T_UNBIND_REQ) {
2594 			so_unlock_single(so, SOASYNC_UNBIND);
2595 			mutex_exit(&so->so_lock);
2596 			freemsg(mp);
2597 			return (NULL);
2598 		}
2599 		mutex_exit(&so->so_lock);
2600 		soqueueack(so, mp);
2601 		return (NULL);
2602 
2603 	case T_OK_ACK:
2604 		if (MBLKL(mp) < sizeof (struct T_ok_ack)) {
2605 			zcmn_err(getzoneid(), CE_WARN,
2606 			    "sockfs: Too short T_OK_ACK. Len = %ld\n",
2607 			    (ptrdiff_t)(MBLKL(mp)));
2608 			freemsg(mp);
2609 			return (NULL);
2610 		}
2611 		/*
2612 		 * Check if we were waiting for the async message
2613 		 */
2614 		mutex_enter(&so->so_lock);
2615 		if ((so->so_flag & SOASYNC_UNBIND) &&
2616 		    tpr->ok_ack.CORRECT_prim == T_UNBIND_REQ) {
2617 			dprintso(so, 1,
2618 			    ("strsock_proto: T_OK_ACK async unbind\n"));
2619 			so_unlock_single(so, SOASYNC_UNBIND);
2620 			mutex_exit(&so->so_lock);
2621 			freemsg(mp);
2622 			return (NULL);
2623 		}
2624 		mutex_exit(&so->so_lock);
2625 		soqueueack(so, mp);
2626 		return (NULL);
2627 
2628 	case T_INFO_ACK:
2629 		if (MBLKL(mp) < sizeof (struct T_info_ack)) {
2630 			zcmn_err(getzoneid(), CE_WARN,
2631 			    "sockfs: Too short T_INFO_ACK. Len = %ld\n",
2632 			    (ptrdiff_t)(MBLKL(mp)));
2633 			freemsg(mp);
2634 			return (NULL);
2635 		}
2636 		soqueueack(so, mp);
2637 		return (NULL);
2638 
2639 	case T_CAPABILITY_ACK:
2640 		/*
2641 		 * A T_capability_ack need only be large enough to hold
2642 		 * the PRIM_type and CAP_bits1 fields; checking for anything
2643 		 * larger might reject a correct response from an older
2644 		 * provider.
2645 		 */
2646 		if (MBLKL(mp) < 2 * sizeof (t_uscalar_t)) {
2647 			zcmn_err(getzoneid(), CE_WARN,
2648 			    "sockfs: Too short T_CAPABILITY_ACK. Len = %ld\n",
2649 			    (ptrdiff_t)(MBLKL(mp)));
2650 			freemsg(mp);
2651 			return (NULL);
2652 		}
2653 		soqueueack(so, mp);
2654 		return (NULL);
2655 
2656 	case T_BIND_ACK:
2657 		if (MBLKL(mp) < sizeof (struct T_bind_ack)) {
2658 			zcmn_err(getzoneid(), CE_WARN,
2659 			    "sockfs: Too short T_BIND_ACK. Len = %ld\n",
2660 			    (ptrdiff_t)(MBLKL(mp)));
2661 			freemsg(mp);
2662 			return (NULL);
2663 		}
2664 		soqueueack(so, mp);
2665 		return (NULL);
2666 
2667 	case T_OPTMGMT_ACK:
2668 		if (MBLKL(mp) < sizeof (struct T_optmgmt_ack)) {
2669 			zcmn_err(getzoneid(), CE_WARN,
2670 			    "sockfs: Too short T_OPTMGMT_ACK. Len = %ld\n",
2671 			    (ptrdiff_t)(MBLKL(mp)));
2672 			freemsg(mp);
2673 			return (NULL);
2674 		}
2675 		soqueueack(so, mp);
2676 		return (NULL);
2677 	default:
2678 #ifdef DEBUG
2679 		zcmn_err(getzoneid(), CE_WARN,
2680 		    "sockfs: unknown TPI primitive %d received\n",
2681 		    tpr->type);
2682 #endif /* DEBUG */
2683 		freemsg(mp);
2684 		return (NULL);
2685 	}
2686 }
2687 
2688 /*
2689  * This routine is registered with the stream head to receive other
2690  * (non-data, and non-proto) messages.
2691  *
2692  * Returns NULL if the message was consumed.
2693  * Returns an mblk to make that mblk be processed by the stream head.
2694  *
2695  * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and
2696  * *pollwakeups) for the stream head to take action on.
2697  */
2698 static mblk_t *
2699 strsock_misc(vnode_t *vp, mblk_t *mp,
2700 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
2701 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
2702 {
2703 	struct sonode *so;
2704 
2705 	so = VTOSO(vp);
2706 
2707 	dprintso(so, 1, ("strsock_misc(%p, %p, 0x%x)\n",
2708 	    vp, mp, DB_TYPE(mp)));
2709 
2710 	/* Set default return values */
2711 	*wakeups = *allmsgsigs = *firstmsgsigs = *pollwakeups = 0;
2712 
2713 	switch (DB_TYPE(mp)) {
2714 	case M_PCSIG:
2715 		/*
2716 		 * This assumes that an M_PCSIG for the urgent data arrives
2717 		 * before the corresponding T_EXDATA_IND.
2718 		 *
2719 		 * Note: Just like in SunOS 4.X and 4.4BSD a poll will be
2720 		 * awoken before the urgent data shows up.
2721 		 * For OOBINLINE this can result in select returning
2722 		 * only exceptions as opposed to except|read.
2723 		 */
2724 		if (*mp->b_rptr == SIGURG) {
2725 			mutex_enter(&so->so_lock);
2726 			dprintso(so, 1,
2727 			    ("SIGURG(%p): counts %d/%d state %s\n",
2728 			    vp, so->so_oobsigcnt,
2729 			    so->so_oobcnt,
2730 			    pr_state(so->so_state, so->so_mode)));
2731 			so_oob_sig(so, 1, allmsgsigs, pollwakeups);
2732 			dprintso(so, 1,
2733 			    ("after SIGURG(%p): counts %d/%d "
2734 			    " poll 0x%x sig 0x%x state %s\n",
2735 			    vp, so->so_oobsigcnt,
2736 			    so->so_oobcnt, *pollwakeups, *allmsgsigs,
2737 			    pr_state(so->so_state, so->so_mode)));
2738 			mutex_exit(&so->so_lock);
2739 		}
2740 		freemsg(mp);
2741 		return (NULL);
2742 
2743 	case M_SIG:
2744 	case M_HANGUP:
2745 	case M_UNHANGUP:
2746 	case M_ERROR:
2747 		/* M_ERRORs etc are ignored */
2748 		freemsg(mp);
2749 		return (NULL);
2750 
2751 	case M_FLUSH:
2752 		/*
2753 		 * Do not flush read queue. If the M_FLUSH
2754 		 * arrives because of an impending T_discon_ind
2755 		 * we still have to keep any queued data - this is part of
2756 		 * socket semantics.
2757 		 */
2758 		if (*mp->b_rptr & FLUSHW) {
2759 			*mp->b_rptr &= ~FLUSHR;
2760 			return (mp);
2761 		}
2762 		freemsg(mp);
2763 		return (NULL);
2764 
2765 	default:
2766 		return (mp);
2767 	}
2768 }
2769 
2770 
2771 /* Register to receive signals for certain events */
2772 int
2773 so_set_asyncsigs(vnode_t *vp, pid_t pgrp, int events, int mode, cred_t *cr)
2774 {
2775 	struct strsigset ss;
2776 	int32_t rval;
2777 
2778 	/*
2779 	 * Note that SOLOCKED will be set except for the call from soaccept().
2780 	 */
2781 	ASSERT(!mutex_owned(&VTOSO(vp)->so_lock));
2782 	ss.ss_pid = pgrp;
2783 	ss.ss_events = events;
2784 	return (strioctl(vp, I_ESETSIG, (intptr_t)&ss, mode, K_TO_K, cr,
2785 	    &rval));
2786 }
2787 
2788 
2789 /* Register for events matching the SS_ASYNC flag */
2790 int
2791 so_set_events(struct sonode *so, vnode_t *vp, cred_t *cr)
2792 {
2793 	int events = so->so_state & SS_ASYNC ?
2794 	    S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT :
2795 	    S_RDBAND | S_BANDURG;
2796 
2797 	return (so_set_asyncsigs(vp, so->so_pgrp, events, 0, cr));
2798 }
2799 
2800 
2801 /* Change the SS_ASYNC flag, and update signal delivery if needed */
2802 int
2803 so_flip_async(struct sonode *so, vnode_t *vp, int mode, cred_t *cr)
2804 {
2805 	ASSERT(mutex_owned(&so->so_lock));
2806 	if (so->so_pgrp != 0) {
2807 		int error;
2808 		int events = so->so_state & SS_ASYNC ?		/* Old flag */
2809 		    S_RDBAND | S_BANDURG :			/* New sigs */
2810 		    S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT;
2811 
2812 		so_lock_single(so);
2813 		mutex_exit(&so->so_lock);
2814 
2815 		error = so_set_asyncsigs(vp, so->so_pgrp, events, mode, cr);
2816 
2817 		mutex_enter(&so->so_lock);
2818 		so_unlock_single(so, SOLOCKED);
2819 		if (error)
2820 			return (error);
2821 	}
2822 	so->so_state ^= SS_ASYNC;
2823 	return (0);
2824 }
2825 
2826 /*
2827  * Set new pid/pgrp for SIGPOLL (or SIGIO for FIOASYNC mode), replacing
2828  * any existing one.  If passed zero, just clear the existing one.
2829  */
2830 int
2831 so_set_siggrp(struct sonode *so, vnode_t *vp, pid_t pgrp, int mode, cred_t *cr)
2832 {
2833 	int events = so->so_state & SS_ASYNC ?
2834 	    S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT :
2835 	    S_RDBAND | S_BANDURG;
2836 	int error;
2837 
2838 	ASSERT(mutex_owned(&so->so_lock));
2839 
2840 	/*
2841 	 * Change socket process (group).
2842 	 *
2843 	 * strioctl (via so_set_asyncsigs) will perform permission check and
2844 	 * also keep a PID_HOLD to prevent the pid from being reused.
2845 	 */
2846 	so_lock_single(so);
2847 	mutex_exit(&so->so_lock);
2848 
2849 	if (pgrp != 0) {
2850 		dprintso(so, 1, ("setown: adding pgrp %d ev 0x%x\n",
2851 		    pgrp, events));
2852 		error = so_set_asyncsigs(vp, pgrp, events, mode, cr);
2853 		if (error != 0) {
2854 			eprintsoline(so, error);
2855 			goto bad;
2856 		}
2857 	}
2858 	/* Remove the previously registered process/group */
2859 	if (so->so_pgrp != 0) {
2860 		dprintso(so, 1, ("setown: removing pgrp %d\n", so->so_pgrp));
2861 		error = so_set_asyncsigs(vp, so->so_pgrp, 0, mode, cr);
2862 		if (error != 0) {
2863 			eprintsoline(so, error);
2864 			error = 0;
2865 		}
2866 	}
2867 	mutex_enter(&so->so_lock);
2868 	so_unlock_single(so, SOLOCKED);
2869 	so->so_pgrp = pgrp;
2870 	return (0);
2871 bad:
2872 	mutex_enter(&so->so_lock);
2873 	so_unlock_single(so, SOLOCKED);
2874 	return (error);
2875 }
2876 
2877 
2878 
2879 /*
2880  * Translate a TLI(/XTI) error into a system error as best we can.
2881  */
2882 static const int tli_errs[] = {
2883 		0,		/* no error	*/
2884 		EADDRNOTAVAIL,  /* TBADADDR	*/
2885 		ENOPROTOOPT,	/* TBADOPT	*/
2886 		EACCES,		/* TACCES	*/
2887 		EBADF,		/* TBADF	*/
2888 		EADDRNOTAVAIL,	/* TNOADDR	*/
2889 		EPROTO,		/* TOUTSTATE	*/
2890 		ECONNABORTED,	/* TBADSEQ	*/
2891 		0,		/* TSYSERR - will never get	*/
2892 		EPROTO,		/* TLOOK - should never be sent by transport */
2893 		EMSGSIZE,	/* TBADDATA	*/
2894 		EMSGSIZE,	/* TBUFOVFLW	*/
2895 		EPROTO,		/* TFLOW	*/
2896 		EWOULDBLOCK,	/* TNODATA	*/
2897 		EPROTO,		/* TNODIS	*/
2898 		EPROTO,		/* TNOUDERR	*/
2899 		EINVAL,		/* TBADFLAG	*/
2900 		EPROTO,		/* TNOREL	*/
2901 		EOPNOTSUPP,	/* TNOTSUPPORT	*/
2902 		EPROTO,		/* TSTATECHNG	*/
2903 		/* following represent error namespace expansion with XTI */
2904 		EPROTO,		/* TNOSTRUCTYPE - never sent by transport */
2905 		EPROTO,		/* TBADNAME - never sent by transport */
2906 		EPROTO,		/* TBADQLEN - never sent by transport */
2907 		EADDRINUSE,	/* TADDRBUSY	*/
2908 		EBADF,		/* TINDOUT	*/
2909 		EBADF,		/* TPROVMISMATCH */
2910 		EBADF,		/* TRESQLEN	*/
2911 		EBADF,		/* TRESADDR	*/
2912 		EPROTO,		/* TQFULL - never sent by transport */
2913 		EPROTO,		/* TPROTO	*/
2914 };
2915 
2916 static int
2917 tlitosyserr(int terr)
2918 {
2919 	ASSERT(terr != TSYSERR);
2920 	if (terr >= (sizeof (tli_errs) / sizeof (tli_errs[0])))
2921 		return (EPROTO);
2922 	else
2923 		return (tli_errs[terr]);
2924 }
2925 
2926 /*
2927  * Sockfs sodirect STREAMS read put procedure. Called from sodirect enable
2928  * transport driver/module with an mblk_t chain.
2929  *
2930  * Note, we in-line putq() for the fast-path cases of q is empty, q_last and
2931  * bp are of type M_DATA. All other cases we call putq().
2932  *
2933  * On success a zero will be return, else an errno will be returned.
2934  */
2935 int
2936 sodput(sodirect_t *sodp, mblk_t *bp)
2937 {
2938 	queue_t		*q = sodp->sod_q;
2939 	struct stdata	*stp = (struct stdata *)q->q_ptr;
2940 	mblk_t		*nbp;
2941 	int		ret;
2942 	mblk_t		*last = q->q_last;
2943 	int		bytecnt = 0;
2944 	int		mblkcnt = 0;
2945 
2946 
2947 	ASSERT(MUTEX_HELD(sodp->sod_lock));
2948 
2949 	if (stp->sd_flag == STREOF) {
2950 		ret = 0;
2951 		goto error;
2952 	}
2953 
2954 	if (q->q_first == NULL) {
2955 		/* Q empty, really fast fast-path */
2956 		bp->b_prev = NULL;
2957 		bp->b_next = NULL;
2958 		q->q_first = bp;
2959 		q->q_last = bp;
2960 
2961 	} else if (last->b_datap->db_type == M_DATA &&
2962 	    bp->b_datap->db_type == M_DATA) {
2963 		/*
2964 		 * Last mblk_t chain and bp are both type M_DATA so
2965 		 * in-line putq() here, if the DBLK_UIOA state match
2966 		 * add bp to the end of the current last chain, else
2967 		 * start a new last chain with bp.
2968 		 */
2969 		if ((last->b_datap->db_flags & DBLK_UIOA) ==
2970 		    (bp->b_datap->db_flags & DBLK_UIOA)) {
2971 			/* Added to end */
2972 			while ((nbp = last->b_cont) != NULL)
2973 				last = nbp;
2974 			last->b_cont = bp;
2975 		} else {
2976 			/* New last */
2977 			last->b_next = bp;
2978 			bp->b_next = NULL;
2979 			bp->b_prev = last;
2980 			q->q_last = bp;
2981 		}
2982 	} else {
2983 		/*
2984 		 * Can't use q_last so just call putq().
2985 		 */
2986 		(void) putq(q, bp);
2987 		return (0);
2988 	}
2989 
2990 	/* Count bytes and mblk_t's */
2991 	do {
2992 		bytecnt += MBLKL(bp);
2993 		mblkcnt++;
2994 	} while ((bp = bp->b_cont) != NULL);
2995 	q->q_count += bytecnt;
2996 	q->q_mblkcnt += mblkcnt;
2997 
2998 	/* Check for QFULL */
2999 	if (q->q_count >= q->q_hiwat + sodp->sod_want ||
3000 	    q->q_mblkcnt >= q->q_hiwat) {
3001 		q->q_flag |= QFULL;
3002 	}
3003 
3004 	return (0);
3005 
3006 error:
3007 	do {
3008 		if ((nbp = bp->b_next) != NULL)
3009 			bp->b_next = NULL;
3010 		freemsg(bp);
3011 	} while ((bp = nbp) != NULL);
3012 
3013 	return (ret);
3014 }
3015 
3016 /*
3017  * Sockfs sodirect read wakeup. Called from a sodirect enabled transport
3018  * driver/module to indicate that read-side data is available.
3019  *
3020  * On return the sodirect_t.lock mutex will be exited so this must be the
3021  * last sodirect_t call to guarantee atomic access of *sodp.
3022  */
3023 void
3024 sodwakeup(sodirect_t *sodp)
3025 {
3026 	queue_t		*q = sodp->sod_q;
3027 	struct stdata	*stp = (struct stdata *)q->q_ptr;
3028 
3029 	ASSERT(MUTEX_HELD(sodp->sod_lock));
3030 
3031 	if (stp->sd_flag & RSLEEP) {
3032 		stp->sd_flag &= ~RSLEEP;
3033 		cv_broadcast(&q->q_wait);
3034 	}
3035 
3036 	if (stp->sd_rput_opt & SR_POLLIN) {
3037 		stp->sd_rput_opt &= ~SR_POLLIN;
3038 		mutex_exit(sodp->sod_lock);
3039 		pollwakeup(&stp->sd_pollist, POLLIN | POLLRDNORM);
3040 	} else
3041 		mutex_exit(sodp->sod_lock);
3042 }
3043