xref: /titanic_51/usr/src/uts/common/fs/sockfs/sockstr.c (revision 7eea693d6b672899726e75993fddc4e95b52647f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/inttypes.h>
29 #include <sys/t_lock.h>
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/buf.h>
33 #include <sys/conf.h>
34 #include <sys/cred.h>
35 #include <sys/kmem.h>
36 #include <sys/sysmacros.h>
37 #include <sys/vfs.h>
38 #include <sys/vnode.h>
39 #include <sys/debug.h>
40 #include <sys/errno.h>
41 #include <sys/time.h>
42 #include <sys/file.h>
43 #include <sys/user.h>
44 #include <sys/stream.h>
45 #include <sys/strsubr.h>
46 #include <sys/esunddi.h>
47 #include <sys/flock.h>
48 #include <sys/modctl.h>
49 #include <sys/vtrace.h>
50 #include <sys/strsun.h>
51 #include <sys/cmn_err.h>
52 #include <sys/proc.h>
53 #include <sys/ddi.h>
54 #include <sys/kmem_impl.h>
55 
56 #include <sys/suntpi.h>
57 #include <sys/socket.h>
58 #include <sys/sockio.h>
59 #include <sys/socketvar.h>
60 #include <netinet/in.h>
61 
62 #include <sys/tiuser.h>
63 #define	_SUN_TPI_VERSION	2
64 #include <sys/tihdr.h>
65 
66 #include <inet/kssl/ksslapi.h>
67 
68 #include <c2/audit.h>
69 
70 #include <sys/dcopy.h>
71 
72 int so_default_version = SOV_SOCKSTREAM;
73 
74 #ifdef DEBUG
75 /* Set sockdebug to print debug messages when SO_DEBUG is set */
76 int sockdebug = 0;
77 
78 /* Set sockprinterr to print error messages when SO_DEBUG is set */
79 int sockprinterr = 0;
80 
81 /*
82  * Set so_default_options to SO_DEBUG is all sockets should be created
83  * with SO_DEBUG set. This is needed to get debug printouts from the
84  * socket() call itself.
85  */
86 int so_default_options = 0;
87 #endif /* DEBUG */
88 
89 #ifdef SOCK_TEST
90 /*
91  * Set to number of ticks to limit cv_waits for code coverage testing.
92  * Set to 1000 when SO_DEBUG is set to 2.
93  */
94 clock_t sock_test_timelimit = 0;
95 #endif /* SOCK_TEST */
96 
97 /*
98  * For concurrency testing of e.g. opening /dev/ip which does not
99  * handle T_INFO_REQ messages.
100  */
101 int so_no_tinfo = 0;
102 
103 /*
104  * Timeout for getting a T_CAPABILITY_ACK - it is possible for a provider
105  * to simply ignore the T_CAPABILITY_REQ.
106  */
107 clock_t	sock_capability_timeout	= 2;	/* seconds */
108 
109 static int	do_tcapability(struct sonode *so, t_uscalar_t cap_bits1);
110 static void	so_removehooks(struct sonode *so);
111 
112 static mblk_t *strsock_proto(vnode_t *vp, mblk_t *mp,
113 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
114 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups);
115 static mblk_t *strsock_misc(vnode_t *vp, mblk_t *mp,
116 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
117 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups);
118 
119 static int tlitosyserr(int terr);
120 
121 /*
122  * Sodirect kmem_cache and put/wakeup functions.
123  */
124 struct kmem_cache *socktpi_sod_cache;
125 static int sodput(sodirect_t *, mblk_t *);
126 static void sodwakeup(sodirect_t *);
127 
128 /*
129  * Called by sockinit() when sockfs is loaded.
130  */
131 int
132 sostr_init()
133 {
134 	/* Allocate sodirect_t kmem_cache */
135 	socktpi_sod_cache = kmem_cache_create("socktpi_sod_cache",
136 	    sizeof (sodirect_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
137 
138 	return (0);
139 }
140 
141 /*
142  * Convert a socket to a stream. Invoked when the illusory sockmod
143  * is popped from the stream.
144  * Change the stream head back to default operation without losing
145  * any messages (T_conn_ind's are moved to the stream head queue).
146  */
147 int
148 so_sock2stream(struct sonode *so)
149 {
150 	struct vnode		*vp = SOTOV(so);
151 	queue_t			*rq;
152 	mblk_t			*mp;
153 	int			error = 0;
154 
155 	ASSERT(MUTEX_HELD(&so->so_plumb_lock));
156 
157 	mutex_enter(&so->so_lock);
158 	so_lock_single(so);
159 
160 	ASSERT(so->so_version != SOV_STREAM);
161 
162 	if (so->so_state & SS_DIRECT) {
163 		mblk_t **mpp;
164 		int rval;
165 
166 		/*
167 		 * Tell the transport below that sockmod is being popped
168 		 */
169 		mutex_exit(&so->so_lock);
170 		error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, CRED(),
171 		    &rval);
172 		mutex_enter(&so->so_lock);
173 		if (error != 0) {
174 			dprintso(so, 0, ("so_sock2stream(%p): "
175 			    "_SIOCSOCKFALLBACK failed\n", (void *)so));
176 			goto exit;
177 		}
178 		so->so_state &= ~SS_DIRECT;
179 
180 		for (mpp = &so->so_conn_ind_head; (mp = *mpp) != NULL;
181 		    mpp = &mp->b_next) {
182 			struct T_conn_ind	*conn_ind;
183 
184 			/*
185 			 * strsock_proto() has already verified the length of
186 			 * this message block.
187 			 */
188 			ASSERT(MBLKL(mp) >= sizeof (struct T_conn_ind));
189 
190 			conn_ind = (struct T_conn_ind *)mp->b_rptr;
191 			if (conn_ind->OPT_length == 0 &&
192 			    conn_ind->OPT_offset == 0)
193 				continue;
194 
195 			if (DB_REF(mp) > 1) {
196 				mblk_t	*newmp;
197 				size_t	length;
198 				cred_t	*cr;
199 
200 				/*
201 				 * Copy the message block because it is used
202 				 * elsewhere, too.
203 				 */
204 				length = MBLKL(mp);
205 				newmp = soallocproto(length, _ALLOC_INTR);
206 				if (newmp == NULL) {
207 					error = EINTR;
208 					goto exit;
209 				}
210 				bcopy(mp->b_rptr, newmp->b_wptr, length);
211 				newmp->b_wptr += length;
212 				newmp->b_next = mp->b_next;
213 				cr = DB_CRED(mp);
214 				if (cr != NULL)
215 					mblk_setcred(newmp, cr);
216 				DB_CPID(newmp) = DB_CPID(mp);
217 
218 				/*
219 				 * Link the new message block into the queue
220 				 * and free the old one.
221 				 */
222 				*mpp = newmp;
223 				mp->b_next = NULL;
224 				freemsg(mp);
225 
226 				mp = newmp;
227 				conn_ind = (struct T_conn_ind *)mp->b_rptr;
228 			}
229 
230 			/*
231 			 * Remove options added by TCP for accept fast-path.
232 			 */
233 			conn_ind->OPT_length = 0;
234 			conn_ind->OPT_offset = 0;
235 		}
236 	}
237 
238 	so->so_version = SOV_STREAM;
239 	so->so_priv = NULL;
240 
241 	/*
242 	 * Remove the hooks in the stream head to avoid queuing more
243 	 * packets in sockfs.
244 	 */
245 	mutex_exit(&so->so_lock);
246 	so_removehooks(so);
247 	mutex_enter(&so->so_lock);
248 
249 	/*
250 	 * Clear any state related to urgent data. Leave any T_EXDATA_IND
251 	 * on the queue - the behavior of urgent data after a switch is
252 	 * left undefined.
253 	 */
254 	so->so_error = so->so_delayed_error = 0;
255 	freemsg(so->so_oobmsg);
256 	so->so_oobmsg = NULL;
257 	so->so_oobsigcnt = so->so_oobcnt = 0;
258 
259 	so->so_state &= ~(SS_RCVATMARK|SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA|
260 	    SS_HASCONNIND|SS_SAVEDEOR);
261 	ASSERT(so_verify_oobstate(so));
262 
263 	freemsg(so->so_ack_mp);
264 	so->so_ack_mp = NULL;
265 
266 	/*
267 	 * Flush the T_DISCON_IND on so_discon_ind_mp.
268 	 */
269 	so_flush_discon_ind(so);
270 
271 	/*
272 	 * Move any queued T_CONN_IND messages to stream head queue.
273 	 */
274 	rq = RD(strvp2wq(vp));
275 	while ((mp = so->so_conn_ind_head) != NULL) {
276 		so->so_conn_ind_head = mp->b_next;
277 		mp->b_next = NULL;
278 		if (so->so_conn_ind_head == NULL) {
279 			ASSERT(so->so_conn_ind_tail == mp);
280 			so->so_conn_ind_tail = NULL;
281 		}
282 		dprintso(so, 0,
283 		    ("so_sock2stream(%p): moving T_CONN_IND\n",
284 		    (void *)so));
285 
286 		/* Drop lock across put() */
287 		mutex_exit(&so->so_lock);
288 		put(rq, mp);
289 		mutex_enter(&so->so_lock);
290 	}
291 
292 exit:
293 	ASSERT(MUTEX_HELD(&so->so_lock));
294 	so_unlock_single(so, SOLOCKED);
295 	mutex_exit(&so->so_lock);
296 	return (error);
297 }
298 
299 /*
300  * Covert a stream back to a socket. This is invoked when the illusory
301  * sockmod is pushed on a stream (where the stream was "created" by
302  * popping the illusory sockmod).
303  * This routine can not recreate the socket state (certain aspects of
304  * it like urgent data state and the bound/connected addresses for AF_UNIX
305  * sockets can not be recreated by asking the transport for information).
306  * Thus this routine implicitly assumes that the socket is in an initial
307  * state (as if it was just created). It flushes any messages queued on the
308  * read queue to avoid dealing with e.g. TPI acks or T_exdata_ind messages.
309  */
310 void
311 so_stream2sock(struct sonode *so)
312 {
313 	struct vnode *vp = SOTOV(so);
314 
315 	ASSERT(MUTEX_HELD(&so->so_plumb_lock));
316 
317 	mutex_enter(&so->so_lock);
318 	so_lock_single(so);
319 	ASSERT(so->so_version == SOV_STREAM);
320 	so->so_version = SOV_SOCKSTREAM;
321 	so->so_pushcnt = 0;
322 	mutex_exit(&so->so_lock);
323 
324 	/*
325 	 * Set a permenent error to force any thread in sorecvmsg to
326 	 * return (and drop SOREADLOCKED). Clear the error once
327 	 * we have SOREADLOCKED.
328 	 * This makes a read sleeping during the I_PUSH of sockmod return
329 	 * EIO.
330 	 */
331 	strsetrerror(SOTOV(so), EIO, 1, NULL);
332 
333 	/*
334 	 * Get the read lock before flushing data to avoid
335 	 * problems with the T_EXDATA_IND MSG_PEEK code in sorecvmsg.
336 	 */
337 	mutex_enter(&so->so_lock);
338 	(void) so_lock_read(so, 0);	/* Set SOREADLOCKED */
339 	mutex_exit(&so->so_lock);
340 
341 	strsetrerror(SOTOV(so), 0, 0, NULL);
342 	so_installhooks(so);
343 
344 	/*
345 	 * Flush everything on the read queue.
346 	 * This ensures that no T_CONN_IND remain and that no T_EXDATA_IND
347 	 * remain; those types of messages would confuse sockfs.
348 	 */
349 	strflushrq(vp, FLUSHALL);
350 	mutex_enter(&so->so_lock);
351 
352 	/*
353 	 * Flush the T_DISCON_IND on so_discon_ind_mp.
354 	 */
355 	so_flush_discon_ind(so);
356 	so_unlock_read(so);	/* Clear SOREADLOCKED */
357 
358 	so_unlock_single(so, SOLOCKED);
359 	mutex_exit(&so->so_lock);
360 }
361 
362 /*
363  * Install the hooks in the stream head.
364  */
365 void
366 so_installhooks(struct sonode *so)
367 {
368 	struct vnode *vp = SOTOV(so);
369 
370 	strsetrputhooks(vp, SH_SIGALLDATA | SH_IGN_ZEROLEN | SH_CONSOL_DATA,
371 	    strsock_proto, strsock_misc);
372 	strsetwputhooks(vp, SH_SIGPIPE | SH_RECHECK_ERR, 0);
373 }
374 
375 /*
376  * Remove the hooks in the stream head.
377  */
378 static void
379 so_removehooks(struct sonode *so)
380 {
381 	struct vnode *vp = SOTOV(so);
382 
383 	strsetrputhooks(vp, 0, NULL, NULL);
384 	strsetwputhooks(vp, 0, STRTIMOUT);
385 	/*
386 	 * Leave read behavior as it would have been for a normal
387 	 * stream i.e. a read of an M_PROTO will fail.
388 	 */
389 }
390 
391 /*
392  * Initialize the streams side of a socket including
393  * T_info_req/ack processing. If tso is not NULL its values are used thereby
394  * avoiding the T_INFO_REQ.
395  */
396 int
397 so_strinit(struct sonode *so, struct sonode *tso)
398 {
399 	struct vnode *vp = SOTOV(so);
400 	struct stdata *stp;
401 	mblk_t *mp;
402 	int error;
403 
404 	dprintso(so, 1, ("so_strinit(%p)\n", (void *)so));
405 
406 	/* Preallocate an unbind_req message */
407 	mp = soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP);
408 	mutex_enter(&so->so_lock);
409 	so->so_unbind_mp = mp;
410 #ifdef DEBUG
411 	so->so_options = so_default_options;
412 #endif /* DEBUG */
413 	mutex_exit(&so->so_lock);
414 
415 	so_installhooks(so);
416 
417 	/*
418 	 * The T_CAPABILITY_REQ should be the first message sent down because
419 	 * at least TCP has a fast-path for this which avoids timeouts while
420 	 * waiting for the T_CAPABILITY_ACK under high system load.
421 	 */
422 	if (tso == NULL) {
423 		error = do_tcapability(so, TC1_ACCEPTOR_ID | TC1_INFO);
424 		if (error)
425 			return (error);
426 	} else {
427 		mutex_enter(&so->so_lock);
428 		so->so_tsdu_size = tso->so_tsdu_size;
429 		so->so_etsdu_size = tso->so_etsdu_size;
430 		so->so_addr_size = tso->so_addr_size;
431 		so->so_opt_size = tso->so_opt_size;
432 		so->so_tidu_size = tso->so_tidu_size;
433 		so->so_serv_type = tso->so_serv_type;
434 		so->so_mode = tso->so_mode & ~SM_ACCEPTOR_ID;
435 		mutex_exit(&so->so_lock);
436 
437 		/* the following do_tcapability may update so->so_mode */
438 		if ((tso->so_serv_type != T_CLTS) &&
439 		    !(tso->so_state & SS_DIRECT)) {
440 			error = do_tcapability(so, TC1_ACCEPTOR_ID);
441 			if (error)
442 				return (error);
443 		}
444 	}
445 	/*
446 	 * If the addr_size is 0 we treat it as already bound
447 	 * and connected. This is used by the routing socket.
448 	 * We set the addr_size to something to allocate a the address
449 	 * structures.
450 	 */
451 	if (so->so_addr_size == 0) {
452 		so->so_state |= SS_ISBOUND | SS_ISCONNECTED;
453 		/* Address size can vary with address families. */
454 		if (so->so_family == AF_INET6)
455 			so->so_addr_size =
456 			    (t_scalar_t)sizeof (struct sockaddr_in6);
457 		else
458 			so->so_addr_size =
459 			    (t_scalar_t)sizeof (struct sockaddr_in);
460 		ASSERT(so->so_unbind_mp);
461 	}
462 	/*
463 	 * Allocate the addresses.
464 	 */
465 	ASSERT(so->so_laddr_sa == NULL && so->so_faddr_sa == NULL);
466 	ASSERT(so->so_laddr_len == 0 && so->so_faddr_len == 0);
467 	so->so_laddr_maxlen = so->so_faddr_maxlen =
468 	    P2ROUNDUP(so->so_addr_size, KMEM_ALIGN);
469 	so->so_laddr_sa = kmem_alloc(so->so_laddr_maxlen * 2, KM_SLEEP);
470 	so->so_faddr_sa = (struct sockaddr *)((caddr_t)so->so_laddr_sa
471 	    + so->so_laddr_maxlen);
472 
473 	if (so->so_family == AF_UNIX) {
474 		/*
475 		 * Initialize AF_UNIX related fields.
476 		 */
477 		bzero(&so->so_ux_laddr, sizeof (so->so_ux_laddr));
478 		bzero(&so->so_ux_faddr, sizeof (so->so_ux_faddr));
479 	}
480 
481 	stp = vp->v_stream;
482 	/*
483 	 * Have to keep minpsz at zero in order to allow write/send of zero
484 	 * bytes.
485 	 */
486 	mutex_enter(&stp->sd_lock);
487 	if (stp->sd_qn_minpsz == 1)
488 		stp->sd_qn_minpsz = 0;
489 	mutex_exit(&stp->sd_lock);
490 
491 	/*
492 	 * If sodirect capable allocate and initialize sodirect_t.
493 	 * Note, SS_SODIRECT is set in socktpi_open().
494 	 */
495 	if (so->so_state & SS_SODIRECT) {
496 		sodirect_t	*sodp;
497 
498 		ASSERT(so->so_direct == NULL);
499 
500 		sodp = kmem_cache_alloc(socktpi_sod_cache, KM_SLEEP);
501 		sodp->sod_state = SOD_ENABLED | SOD_WAKE_NOT;
502 		sodp->sod_want = 0;
503 		sodp->sod_q = RD(stp->sd_wrq);
504 		sodp->sod_enqueue = sodput;
505 		sodp->sod_wakeup = sodwakeup;
506 		sodp->sod_uioafh = NULL;
507 		sodp->sod_uioaft = NULL;
508 		sodp->sod_lockp = &stp->sd_lock;
509 		/*
510 		 * Remainder of the sod_uioa members are left uninitialized
511 		 * but will be initialized later by uioainit() before uioa
512 		 * is enabled.
513 		 */
514 		sodp->sod_uioa.uioa_state = UIOA_ALLOC;
515 		so->so_direct = sodp;
516 		stp->sd_sodirect = sodp;
517 	}
518 
519 	return (0);
520 }
521 
522 static void
523 copy_tinfo(struct sonode *so, struct T_info_ack *tia)
524 {
525 	so->so_tsdu_size = tia->TSDU_size;
526 	so->so_etsdu_size = tia->ETSDU_size;
527 	so->so_addr_size = tia->ADDR_size;
528 	so->so_opt_size = tia->OPT_size;
529 	so->so_tidu_size = tia->TIDU_size;
530 	so->so_serv_type = tia->SERV_type;
531 	switch (tia->CURRENT_state) {
532 	case TS_UNBND:
533 		break;
534 	case TS_IDLE:
535 		so->so_state |= SS_ISBOUND;
536 		so->so_laddr_len = 0;
537 		so->so_state &= ~SS_LADDR_VALID;
538 		break;
539 	case TS_DATA_XFER:
540 		so->so_state |= SS_ISBOUND|SS_ISCONNECTED;
541 		so->so_laddr_len = 0;
542 		so->so_faddr_len = 0;
543 		so->so_state &= ~(SS_LADDR_VALID | SS_FADDR_VALID);
544 		break;
545 	}
546 
547 	/*
548 	 * Heuristics for determining the socket mode flags
549 	 * (SM_ATOMIC, SM_CONNREQUIRED, SM_ADDR, SM_FDPASSING,
550 	 * and SM_EXDATA, SM_OPTDATA, and SM_BYTESTREAM)
551 	 * from the info ack.
552 	 */
553 	if (so->so_serv_type == T_CLTS) {
554 		so->so_mode |= SM_ATOMIC | SM_ADDR;
555 	} else {
556 		so->so_mode |= SM_CONNREQUIRED;
557 		if (so->so_etsdu_size != 0 && so->so_etsdu_size != -2)
558 			so->so_mode |= SM_EXDATA;
559 	}
560 	if (so->so_type == SOCK_SEQPACKET || so->so_type == SOCK_RAW) {
561 		/* Semantics are to discard tail end of messages */
562 		so->so_mode |= SM_ATOMIC;
563 	}
564 	if (so->so_family == AF_UNIX) {
565 		so->so_mode |= SM_FDPASSING | SM_OPTDATA;
566 		if (so->so_addr_size == -1) {
567 			/* MAXPATHLEN + soun_family + nul termination */
568 			so->so_addr_size = (t_scalar_t)(MAXPATHLEN +
569 			    sizeof (short) + 1);
570 		}
571 		if (so->so_type == SOCK_STREAM) {
572 			/*
573 			 * Make it into a byte-stream transport.
574 			 * SOCK_SEQPACKET sockets are unchanged.
575 			 */
576 			so->so_tsdu_size = 0;
577 		}
578 	} else if (so->so_addr_size == -1) {
579 		/*
580 		 * Logic extracted from sockmod - have to pick some max address
581 		 * length in order to preallocate the addresses.
582 		 */
583 		so->so_addr_size = SOA_DEFSIZE;
584 	}
585 	if (so->so_tsdu_size == 0)
586 		so->so_mode |= SM_BYTESTREAM;
587 }
588 
589 static int
590 check_tinfo(struct sonode *so)
591 {
592 	/* Consistency checks */
593 	if (so->so_type == SOCK_DGRAM && so->so_serv_type != T_CLTS) {
594 		eprintso(so, ("service type and socket type mismatch\n"));
595 		eprintsoline(so, EPROTO);
596 		return (EPROTO);
597 	}
598 	if (so->so_type == SOCK_STREAM && so->so_serv_type == T_CLTS) {
599 		eprintso(so, ("service type and socket type mismatch\n"));
600 		eprintsoline(so, EPROTO);
601 		return (EPROTO);
602 	}
603 	if (so->so_type == SOCK_SEQPACKET && so->so_serv_type == T_CLTS) {
604 		eprintso(so, ("service type and socket type mismatch\n"));
605 		eprintsoline(so, EPROTO);
606 		return (EPROTO);
607 	}
608 	if (so->so_family == AF_INET &&
609 	    so->so_addr_size != (t_scalar_t)sizeof (struct sockaddr_in)) {
610 		eprintso(so,
611 		    ("AF_INET must have sockaddr_in address length. Got %d\n",
612 		    so->so_addr_size));
613 		eprintsoline(so, EMSGSIZE);
614 		return (EMSGSIZE);
615 	}
616 	if (so->so_family == AF_INET6 &&
617 	    so->so_addr_size != (t_scalar_t)sizeof (struct sockaddr_in6)) {
618 		eprintso(so,
619 		    ("AF_INET6 must have sockaddr_in6 address length. Got %d\n",
620 		    so->so_addr_size));
621 		eprintsoline(so, EMSGSIZE);
622 		return (EMSGSIZE);
623 	}
624 
625 	dprintso(so, 1, (
626 	    "tinfo: serv %d tsdu %d, etsdu %d, addr %d, opt %d, tidu %d\n",
627 	    so->so_serv_type, so->so_tsdu_size, so->so_etsdu_size,
628 	    so->so_addr_size, so->so_opt_size,
629 	    so->so_tidu_size));
630 	dprintso(so, 1, ("tinfo: so_state %s\n",
631 	    pr_state(so->so_state, so->so_mode)));
632 	return (0);
633 }
634 
635 /*
636  * Send down T_info_req and wait for the ack.
637  * Record interesting T_info_ack values in the sonode.
638  */
639 static int
640 do_tinfo(struct sonode *so)
641 {
642 	struct T_info_req tir;
643 	mblk_t *mp;
644 	int error;
645 
646 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
647 
648 	if (so_no_tinfo) {
649 		so->so_addr_size = 0;
650 		return (0);
651 	}
652 
653 	dprintso(so, 1, ("do_tinfo(%p)\n", (void *)so));
654 
655 	/* Send T_INFO_REQ */
656 	tir.PRIM_type = T_INFO_REQ;
657 	mp = soallocproto1(&tir, sizeof (tir),
658 	    sizeof (struct T_info_req) + sizeof (struct T_info_ack),
659 	    _ALLOC_INTR);
660 	if (mp == NULL) {
661 		eprintsoline(so, ENOBUFS);
662 		return (ENOBUFS);
663 	}
664 	/* T_INFO_REQ has to be M_PCPROTO */
665 	DB_TYPE(mp) = M_PCPROTO;
666 
667 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
668 	    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
669 	if (error) {
670 		eprintsoline(so, error);
671 		return (error);
672 	}
673 	mutex_enter(&so->so_lock);
674 	/* Wait for T_INFO_ACK */
675 	if ((error = sowaitprim(so, T_INFO_REQ, T_INFO_ACK,
676 	    (t_uscalar_t)sizeof (struct T_info_ack), &mp, 0))) {
677 		mutex_exit(&so->so_lock);
678 		eprintsoline(so, error);
679 		return (error);
680 	}
681 
682 	ASSERT(mp);
683 	copy_tinfo(so, (struct T_info_ack *)mp->b_rptr);
684 	mutex_exit(&so->so_lock);
685 	freemsg(mp);
686 	return (check_tinfo(so));
687 }
688 
689 /*
690  * Send down T_capability_req and wait for the ack.
691  * Record interesting T_capability_ack values in the sonode.
692  */
693 static int
694 do_tcapability(struct sonode *so, t_uscalar_t cap_bits1)
695 {
696 	struct T_capability_req tcr;
697 	struct T_capability_ack *tca;
698 	mblk_t *mp;
699 	int error;
700 
701 	ASSERT(cap_bits1 != 0);
702 	ASSERT((cap_bits1 & ~(TC1_ACCEPTOR_ID | TC1_INFO)) == 0);
703 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
704 
705 	if (so->so_provinfo->tpi_capability == PI_NO)
706 		return (do_tinfo(so));
707 
708 	if (so_no_tinfo) {
709 		so->so_addr_size = 0;
710 		if ((cap_bits1 &= ~TC1_INFO) == 0)
711 			return (0);
712 	}
713 
714 	dprintso(so, 1, ("do_tcapability(%p)\n", (void *)so));
715 
716 	/* Send T_CAPABILITY_REQ */
717 	tcr.PRIM_type = T_CAPABILITY_REQ;
718 	tcr.CAP_bits1 = cap_bits1;
719 	mp = soallocproto1(&tcr, sizeof (tcr),
720 	    sizeof (struct T_capability_req) + sizeof (struct T_capability_ack),
721 	    _ALLOC_INTR);
722 	if (mp == NULL) {
723 		eprintsoline(so, ENOBUFS);
724 		return (ENOBUFS);
725 	}
726 	/* T_CAPABILITY_REQ should be M_PCPROTO here */
727 	DB_TYPE(mp) = M_PCPROTO;
728 
729 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
730 	    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
731 	if (error) {
732 		eprintsoline(so, error);
733 		return (error);
734 	}
735 	mutex_enter(&so->so_lock);
736 	/* Wait for T_CAPABILITY_ACK */
737 	if ((error = sowaitprim(so, T_CAPABILITY_REQ, T_CAPABILITY_ACK,
738 	    (t_uscalar_t)sizeof (*tca), &mp, sock_capability_timeout * hz))) {
739 		mutex_exit(&so->so_lock);
740 		PI_PROVLOCK(so->so_provinfo);
741 		if (so->so_provinfo->tpi_capability == PI_DONTKNOW)
742 			so->so_provinfo->tpi_capability = PI_NO;
743 		PI_PROVUNLOCK(so->so_provinfo);
744 		ASSERT((so->so_mode & SM_ACCEPTOR_ID) == 0);
745 		if (cap_bits1 & TC1_INFO) {
746 			/*
747 			 * If the T_CAPABILITY_REQ timed out and then a
748 			 * T_INFO_REQ gets a protocol error, most likely
749 			 * the capability was slow (vs. unsupported). Return
750 			 * ENOSR for this case as a best guess.
751 			 */
752 			if (error == ETIME) {
753 				return ((error = do_tinfo(so)) == EPROTO ?
754 				    ENOSR : error);
755 			}
756 			return (do_tinfo(so));
757 		}
758 		return (0);
759 	}
760 
761 	if (so->so_provinfo->tpi_capability == PI_DONTKNOW) {
762 		PI_PROVLOCK(so->so_provinfo);
763 		so->so_provinfo->tpi_capability = PI_YES;
764 		PI_PROVUNLOCK(so->so_provinfo);
765 	}
766 
767 	ASSERT(mp);
768 	tca = (struct T_capability_ack *)mp->b_rptr;
769 
770 	ASSERT((cap_bits1 & TC1_INFO) == (tca->CAP_bits1 & TC1_INFO));
771 
772 	cap_bits1 = tca->CAP_bits1;
773 
774 	if (cap_bits1 & TC1_ACCEPTOR_ID) {
775 		so->so_acceptor_id = tca->ACCEPTOR_id;
776 		so->so_mode |= SM_ACCEPTOR_ID;
777 	}
778 
779 	if (cap_bits1 & TC1_INFO)
780 		copy_tinfo(so, &tca->INFO_ack);
781 
782 	mutex_exit(&so->so_lock);
783 	freemsg(mp);
784 
785 	if (cap_bits1 & TC1_INFO)
786 		return (check_tinfo(so));
787 
788 	return (0);
789 }
790 
791 /*
792  * Retrieve and clear the socket error.
793  */
794 int
795 sogeterr(struct sonode *so)
796 {
797 	int error;
798 
799 	ASSERT(MUTEX_HELD(&so->so_lock));
800 
801 	error = so->so_error;
802 	so->so_error = 0;
803 
804 	return (error);
805 }
806 
807 /*
808  * This routine is registered with the stream head to retrieve read
809  * side errors.
810  * It does not clear the socket error for a peeking read side operation.
811  * It the error is to be cleared it sets *clearerr.
812  */
813 int
814 sogetrderr(vnode_t *vp, int ispeek, int *clearerr)
815 {
816 	struct sonode *so = VTOSO(vp);
817 	int error;
818 
819 	mutex_enter(&so->so_lock);
820 	if (ispeek) {
821 		error = so->so_error;
822 		*clearerr = 0;
823 	} else {
824 		error = so->so_error;
825 		so->so_error = 0;
826 		*clearerr = 1;
827 	}
828 	mutex_exit(&so->so_lock);
829 	return (error);
830 }
831 
832 /*
833  * This routine is registered with the stream head to retrieve write
834  * side errors.
835  * It does not clear the socket error for a peeking read side operation.
836  * It the error is to be cleared it sets *clearerr.
837  */
838 int
839 sogetwrerr(vnode_t *vp, int ispeek, int *clearerr)
840 {
841 	struct sonode *so = VTOSO(vp);
842 	int error;
843 
844 	mutex_enter(&so->so_lock);
845 	if (so->so_state & SS_CANTSENDMORE) {
846 		error = EPIPE;
847 		*clearerr = 0;
848 	} else {
849 		error = so->so_error;
850 		if (ispeek) {
851 			*clearerr = 0;
852 		} else {
853 			so->so_error = 0;
854 			*clearerr = 1;
855 		}
856 	}
857 	mutex_exit(&so->so_lock);
858 	return (error);
859 }
860 
861 /*
862  * Set a nonpersistent read and write error on the socket.
863  * Used when there is a T_uderror_ind for a connected socket.
864  * The caller also needs to call strsetrerror and strsetwerror
865  * after dropping the lock.
866  */
867 void
868 soseterror(struct sonode *so, int error)
869 {
870 	ASSERT(error != 0);
871 
872 	ASSERT(MUTEX_HELD(&so->so_lock));
873 	so->so_error = (ushort_t)error;
874 }
875 
876 void
877 soisconnecting(struct sonode *so)
878 {
879 	ASSERT(MUTEX_HELD(&so->so_lock));
880 	so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
881 	so->so_state |= SS_ISCONNECTING;
882 	cv_broadcast(&so->so_state_cv);
883 }
884 
885 void
886 soisconnected(struct sonode *so)
887 {
888 	ASSERT(MUTEX_HELD(&so->so_lock));
889 	so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING);
890 	so->so_state |= SS_ISCONNECTED;
891 	cv_broadcast(&so->so_state_cv);
892 }
893 
894 /*
895  * The caller also needs to call strsetrerror, strsetwerror and strseteof.
896  */
897 void
898 soisdisconnected(struct sonode *so, int error)
899 {
900 	ASSERT(MUTEX_HELD(&so->so_lock));
901 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING|
902 	    SS_LADDR_VALID|SS_FADDR_VALID);
903 	so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE);
904 	so->so_error = (ushort_t)error;
905 	if (so->so_peercred != NULL) {
906 		crfree(so->so_peercred);
907 		so->so_peercred = NULL;
908 	}
909 	cv_broadcast(&so->so_state_cv);
910 }
911 
912 /*
913  * For connected AF_UNIX SOCK_DGRAM sockets when the peer closes.
914  * Does not affect write side.
915  * The caller also has to call strsetrerror.
916  */
917 static void
918 sobreakconn(struct sonode *so, int error)
919 {
920 	ASSERT(MUTEX_HELD(&so->so_lock));
921 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
922 	so->so_error = (ushort_t)error;
923 	cv_broadcast(&so->so_state_cv);
924 }
925 
926 /*
927  * Can no longer send.
928  * Caller must also call strsetwerror.
929  *
930  * We mark the peer address as no longer valid for getpeername, but
931  * leave it around for so_unix_close to notify the peer (that
932  * transport has no addressing held at that layer).
933  */
934 void
935 socantsendmore(struct sonode *so)
936 {
937 	ASSERT(MUTEX_HELD(&so->so_lock));
938 	so->so_state = so->so_state & ~SS_FADDR_VALID | SS_CANTSENDMORE;
939 	cv_broadcast(&so->so_state_cv);
940 }
941 
942 /*
943  * The caller must call strseteof(,1) as well as this routine
944  * to change the socket state.
945  */
946 void
947 socantrcvmore(struct sonode *so)
948 {
949 	ASSERT(MUTEX_HELD(&so->so_lock));
950 	so->so_state |= SS_CANTRCVMORE;
951 	cv_broadcast(&so->so_state_cv);
952 }
953 
954 /*
955  * The caller has sent down a "request_prim" primitive and wants to wait for
956  * an ack ("ack_prim") or an T_ERROR_ACK for it.
957  * The specified "ack_prim" can be a T_OK_ACK.
958  *
959  * Assumes that all the TPI acks are M_PCPROTO messages.
960  *
961  * Note that the socket is single-threaded (using so_lock_single)
962  * for all operations that generate TPI ack messages. Since
963  * only TPI ack messages are M_PCPROTO we should never receive
964  * anything except either the ack we are expecting or a T_ERROR_ACK
965  * for the same primitive.
966  */
967 int
968 sowaitprim(struct sonode *so, t_scalar_t request_prim, t_scalar_t ack_prim,
969 	    t_uscalar_t min_size, mblk_t **mpp, clock_t wait)
970 {
971 	mblk_t *mp;
972 	union T_primitives *tpr;
973 	int error;
974 
975 	dprintso(so, 1, ("sowaitprim(%p, %d, %d, %d, %p, %lu)\n",
976 	    (void *)so, request_prim, ack_prim, min_size, (void *)mpp, wait));
977 
978 	ASSERT(MUTEX_HELD(&so->so_lock));
979 
980 	error = sowaitack(so, &mp, wait);
981 	if (error)
982 		return (error);
983 
984 	dprintso(so, 1, ("got msg %p\n", (void *)mp));
985 	if (DB_TYPE(mp) != M_PCPROTO ||
986 	    MBLKL(mp) < sizeof (tpr->type)) {
987 		freemsg(mp);
988 		eprintsoline(so, EPROTO);
989 		return (EPROTO);
990 	}
991 	tpr = (union T_primitives *)mp->b_rptr;
992 	/*
993 	 * Did we get the primitive that we were asking for?
994 	 * For T_OK_ACK we also check that it matches the request primitive.
995 	 */
996 	if (tpr->type == ack_prim &&
997 	    (ack_prim != T_OK_ACK ||
998 	    tpr->ok_ack.CORRECT_prim == request_prim)) {
999 		if (MBLKL(mp) >= (ssize_t)min_size) {
1000 			/* Found what we are looking for */
1001 			*mpp = mp;
1002 			return (0);
1003 		}
1004 		/* Too short */
1005 		freemsg(mp);
1006 		eprintsoline(so, EPROTO);
1007 		return (EPROTO);
1008 	}
1009 
1010 	if (tpr->type == T_ERROR_ACK &&
1011 	    tpr->error_ack.ERROR_prim == request_prim) {
1012 		/* Error to the primitive we were looking for */
1013 		if (tpr->error_ack.TLI_error == TSYSERR) {
1014 			error = tpr->error_ack.UNIX_error;
1015 		} else {
1016 			error = tlitosyserr(tpr->error_ack.TLI_error);
1017 		}
1018 		dprintso(so, 0, ("error_ack for %d: %d/%d ->%d\n",
1019 		    tpr->error_ack.ERROR_prim,
1020 		    tpr->error_ack.TLI_error,
1021 		    tpr->error_ack.UNIX_error,
1022 		    error));
1023 		freemsg(mp);
1024 		return (error);
1025 	}
1026 	/*
1027 	 * Wrong primitive or T_ERROR_ACK for the wrong primitive
1028 	 */
1029 #ifdef DEBUG
1030 	if (tpr->type == T_ERROR_ACK) {
1031 		dprintso(so, 0, ("error_ack for %d: %d/%d\n",
1032 		    tpr->error_ack.ERROR_prim,
1033 		    tpr->error_ack.TLI_error,
1034 		    tpr->error_ack.UNIX_error));
1035 	} else if (tpr->type == T_OK_ACK) {
1036 		dprintso(so, 0, ("ok_ack for %d, expected %d for %d\n",
1037 		    tpr->ok_ack.CORRECT_prim,
1038 		    ack_prim, request_prim));
1039 	} else {
1040 		dprintso(so, 0,
1041 		    ("unexpected primitive %d, expected %d for %d\n",
1042 		    tpr->type, ack_prim, request_prim));
1043 	}
1044 #endif /* DEBUG */
1045 
1046 	freemsg(mp);
1047 	eprintsoline(so, EPROTO);
1048 	return (EPROTO);
1049 }
1050 
1051 /*
1052  * Wait for a T_OK_ACK for the specified primitive.
1053  */
1054 int
1055 sowaitokack(struct sonode *so, t_scalar_t request_prim)
1056 {
1057 	mblk_t *mp;
1058 	int error;
1059 
1060 	error = sowaitprim(so, request_prim, T_OK_ACK,
1061 	    (t_uscalar_t)sizeof (struct T_ok_ack), &mp, 0);
1062 	if (error)
1063 		return (error);
1064 	freemsg(mp);
1065 	return (0);
1066 }
1067 
1068 /*
1069  * Queue a received TPI ack message on so_ack_mp.
1070  */
1071 void
1072 soqueueack(struct sonode *so, mblk_t *mp)
1073 {
1074 	if (DB_TYPE(mp) != M_PCPROTO) {
1075 		zcmn_err(getzoneid(), CE_WARN,
1076 		    "sockfs: received unexpected M_PROTO TPI ack. Prim %d\n",
1077 		    *(t_scalar_t *)mp->b_rptr);
1078 		freemsg(mp);
1079 		return;
1080 	}
1081 
1082 	mutex_enter(&so->so_lock);
1083 	if (so->so_ack_mp != NULL) {
1084 		dprintso(so, 1, ("so_ack_mp already set\n"));
1085 		freemsg(so->so_ack_mp);
1086 		so->so_ack_mp = NULL;
1087 	}
1088 	so->so_ack_mp = mp;
1089 	cv_broadcast(&so->so_ack_cv);
1090 	mutex_exit(&so->so_lock);
1091 }
1092 
1093 /*
1094  * Wait for a TPI ack ignoring signals and errors.
1095  */
1096 int
1097 sowaitack(struct sonode *so, mblk_t **mpp, clock_t wait)
1098 {
1099 	ASSERT(MUTEX_HELD(&so->so_lock));
1100 
1101 	while (so->so_ack_mp == NULL) {
1102 #ifdef SOCK_TEST
1103 		if (wait == 0 && sock_test_timelimit != 0)
1104 			wait = sock_test_timelimit;
1105 #endif
1106 		if (wait != 0) {
1107 			/*
1108 			 * Only wait for the time limit.
1109 			 */
1110 			clock_t now;
1111 
1112 			time_to_wait(&now, wait);
1113 			if (cv_timedwait(&so->so_ack_cv, &so->so_lock,
1114 			    now) == -1) {
1115 				eprintsoline(so, ETIME);
1116 				return (ETIME);
1117 			}
1118 		}
1119 		else
1120 			cv_wait(&so->so_ack_cv, &so->so_lock);
1121 	}
1122 	*mpp = so->so_ack_mp;
1123 #ifdef DEBUG
1124 	{
1125 		union T_primitives *tpr;
1126 		mblk_t *mp = *mpp;
1127 
1128 		tpr = (union T_primitives *)mp->b_rptr;
1129 		ASSERT(DB_TYPE(mp) == M_PCPROTO);
1130 		ASSERT(tpr->type == T_OK_ACK ||
1131 		    tpr->type == T_ERROR_ACK ||
1132 		    tpr->type == T_BIND_ACK ||
1133 		    tpr->type == T_CAPABILITY_ACK ||
1134 		    tpr->type == T_INFO_ACK ||
1135 		    tpr->type == T_OPTMGMT_ACK);
1136 	}
1137 #endif /* DEBUG */
1138 	so->so_ack_mp = NULL;
1139 	return (0);
1140 }
1141 
1142 /*
1143  * Queue a received T_CONN_IND message on so_conn_ind_head/tail.
1144  */
1145 void
1146 soqueueconnind(struct sonode *so, mblk_t *mp)
1147 {
1148 	if (DB_TYPE(mp) != M_PROTO) {
1149 		zcmn_err(getzoneid(), CE_WARN,
1150 		    "sockfs: received unexpected M_PCPROTO T_CONN_IND\n");
1151 		freemsg(mp);
1152 		return;
1153 	}
1154 
1155 	mutex_enter(&so->so_lock);
1156 	ASSERT(mp->b_next == NULL);
1157 	if (so->so_conn_ind_head == NULL) {
1158 		so->so_conn_ind_head = mp;
1159 		so->so_state |= SS_HASCONNIND;
1160 	} else {
1161 		ASSERT(so->so_state & SS_HASCONNIND);
1162 		ASSERT(so->so_conn_ind_tail->b_next == NULL);
1163 		so->so_conn_ind_tail->b_next = mp;
1164 	}
1165 	so->so_conn_ind_tail = mp;
1166 	/* Wakeup a single consumer of the T_CONN_IND */
1167 	cv_signal(&so->so_connind_cv);
1168 	mutex_exit(&so->so_lock);
1169 }
1170 
1171 /*
1172  * Wait for a T_CONN_IND.
1173  * Don't wait if nonblocking.
1174  * Accept signals and socket errors.
1175  */
1176 int
1177 sowaitconnind(struct sonode *so, int fmode, mblk_t **mpp)
1178 {
1179 	mblk_t *mp;
1180 	int error = 0;
1181 
1182 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
1183 	mutex_enter(&so->so_lock);
1184 check_error:
1185 	if (so->so_error) {
1186 		error = sogeterr(so);
1187 		if (error) {
1188 			mutex_exit(&so->so_lock);
1189 			return (error);
1190 		}
1191 	}
1192 
1193 	if (so->so_conn_ind_head == NULL) {
1194 		if (fmode & (FNDELAY|FNONBLOCK)) {
1195 			error = EWOULDBLOCK;
1196 			goto done;
1197 		}
1198 		if (!cv_wait_sig_swap(&so->so_connind_cv, &so->so_lock)) {
1199 			error = EINTR;
1200 			goto done;
1201 		}
1202 		goto check_error;
1203 	}
1204 	mp = so->so_conn_ind_head;
1205 	so->so_conn_ind_head = mp->b_next;
1206 	mp->b_next = NULL;
1207 	if (so->so_conn_ind_head == NULL) {
1208 		ASSERT(so->so_conn_ind_tail == mp);
1209 		so->so_conn_ind_tail = NULL;
1210 		so->so_state &= ~SS_HASCONNIND;
1211 	}
1212 	*mpp = mp;
1213 done:
1214 	mutex_exit(&so->so_lock);
1215 	return (error);
1216 }
1217 
1218 /*
1219  * Flush a T_CONN_IND matching the sequence number from the list.
1220  * Return zero if found; non-zero otherwise.
1221  * This is called very infrequently thus it is ok to do a linear search.
1222  */
1223 int
1224 soflushconnind(struct sonode *so, t_scalar_t seqno)
1225 {
1226 	mblk_t *prevmp, *mp;
1227 	struct T_conn_ind *tci;
1228 
1229 	mutex_enter(&so->so_lock);
1230 	for (prevmp = NULL, mp = so->so_conn_ind_head; mp != NULL;
1231 	    prevmp = mp, mp = mp->b_next) {
1232 		tci = (struct T_conn_ind *)mp->b_rptr;
1233 		if (tci->SEQ_number == seqno) {
1234 			dprintso(so, 1,
1235 			    ("t_discon_ind: found T_CONN_IND %d\n", seqno));
1236 			/* Deleting last? */
1237 			if (so->so_conn_ind_tail == mp) {
1238 				so->so_conn_ind_tail = prevmp;
1239 			}
1240 			if (prevmp == NULL) {
1241 				/* Deleting first */
1242 				so->so_conn_ind_head = mp->b_next;
1243 			} else {
1244 				prevmp->b_next = mp->b_next;
1245 			}
1246 			mp->b_next = NULL;
1247 			if (so->so_conn_ind_head == NULL) {
1248 				ASSERT(so->so_conn_ind_tail == NULL);
1249 				so->so_state &= ~SS_HASCONNIND;
1250 			} else {
1251 				ASSERT(so->so_conn_ind_tail != NULL);
1252 			}
1253 			so->so_error = ECONNABORTED;
1254 			mutex_exit(&so->so_lock);
1255 
1256 			/*
1257 			 * T_KSSL_PROXY_CONN_IND may carry a handle for
1258 			 * an SSL context, and needs to be released.
1259 			 */
1260 			if ((tci->PRIM_type == T_SSL_PROXY_CONN_IND) &&
1261 			    (mp->b_cont != NULL)) {
1262 				kssl_ctx_t kssl_ctx;
1263 
1264 				ASSERT(MBLKL(mp->b_cont) ==
1265 				    sizeof (kssl_ctx_t));
1266 				kssl_ctx = *((kssl_ctx_t *)mp->b_cont->b_rptr);
1267 				kssl_release_ctx(kssl_ctx);
1268 			}
1269 			freemsg(mp);
1270 			return (0);
1271 		}
1272 	}
1273 	mutex_exit(&so->so_lock);
1274 	dprintso(so, 1,	("t_discon_ind: NOT found T_CONN_IND %d\n", seqno));
1275 	return (-1);
1276 }
1277 
1278 /*
1279  * Wait until the socket is connected or there is an error.
1280  * fmode should contain any nonblocking flags. nosig should be
1281  * set if the caller does not want the wait to be interrupted by a signal.
1282  */
1283 int
1284 sowaitconnected(struct sonode *so, int fmode, int nosig)
1285 {
1286 	int error;
1287 
1288 	ASSERT(MUTEX_HELD(&so->so_lock));
1289 
1290 	while ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) ==
1291 	    SS_ISCONNECTING && so->so_error == 0) {
1292 
1293 		dprintso(so, 1, ("waiting for SS_ISCONNECTED on %p\n",
1294 		    (void *)so));
1295 		if (fmode & (FNDELAY|FNONBLOCK))
1296 			return (EINPROGRESS);
1297 
1298 		if (nosig)
1299 			cv_wait(&so->so_state_cv, &so->so_lock);
1300 		else if (!cv_wait_sig_swap(&so->so_state_cv, &so->so_lock)) {
1301 			/*
1302 			 * Return EINTR and let the application use
1303 			 * nonblocking techniques for detecting when
1304 			 * the connection has been established.
1305 			 */
1306 			return (EINTR);
1307 		}
1308 		dprintso(so, 1, ("awoken on %p\n", (void *)so));
1309 	}
1310 
1311 	if (so->so_error != 0) {
1312 		error = sogeterr(so);
1313 		ASSERT(error != 0);
1314 		dprintso(so, 1, ("sowaitconnected: error %d\n", error));
1315 		return (error);
1316 	}
1317 	if (!(so->so_state & SS_ISCONNECTED)) {
1318 		/*
1319 		 * Could have received a T_ORDREL_IND or a T_DISCON_IND with
1320 		 * zero errno. Or another thread could have consumed so_error
1321 		 * e.g. by calling read.
1322 		 */
1323 		error = ECONNREFUSED;
1324 		dprintso(so, 1, ("sowaitconnected: error %d\n", error));
1325 		return (error);
1326 	}
1327 	return (0);
1328 }
1329 
1330 
1331 /*
1332  * Handle the signal generation aspect of urgent data.
1333  */
1334 static void
1335 so_oob_sig(struct sonode *so, int extrasig,
1336     strsigset_t *signals, strpollset_t *pollwakeups)
1337 {
1338 	ASSERT(MUTEX_HELD(&so->so_lock));
1339 
1340 	ASSERT(so_verify_oobstate(so));
1341 	ASSERT(so->so_oobsigcnt >= so->so_oobcnt);
1342 	if (so->so_oobsigcnt > so->so_oobcnt) {
1343 		/*
1344 		 * Signal has already been generated once for this
1345 		 * urgent "event". However, since TCP can receive updated
1346 		 * urgent pointers we still generate a signal.
1347 		 */
1348 		ASSERT(so->so_state & SS_OOBPEND);
1349 		if (extrasig) {
1350 			*signals |= S_RDBAND;
1351 			*pollwakeups |= POLLRDBAND;
1352 		}
1353 		return;
1354 	}
1355 
1356 	so->so_oobsigcnt++;
1357 	ASSERT(so->so_oobsigcnt > 0);	/* Wraparound */
1358 	ASSERT(so->so_oobsigcnt > so->so_oobcnt);
1359 
1360 	/*
1361 	 * Record (for select/poll) that urgent data is pending.
1362 	 */
1363 	so->so_state |= SS_OOBPEND;
1364 	/*
1365 	 * New urgent data on the way so forget about any old
1366 	 * urgent data.
1367 	 */
1368 	so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA);
1369 	if (so->so_oobmsg != NULL) {
1370 		dprintso(so, 1, ("sock: discarding old oob\n"));
1371 		freemsg(so->so_oobmsg);
1372 		so->so_oobmsg = NULL;
1373 	}
1374 	*signals |= S_RDBAND;
1375 	*pollwakeups |= POLLRDBAND;
1376 	ASSERT(so_verify_oobstate(so));
1377 }
1378 
1379 /*
1380  * Handle the processing of the T_EXDATA_IND with urgent data.
1381  * Returns the T_EXDATA_IND if it should be queued on the read queue.
1382  */
1383 /* ARGSUSED2 */
1384 static mblk_t *
1385 so_oob_exdata(struct sonode *so, mblk_t *mp,
1386 	strsigset_t *signals, strpollset_t *pollwakeups)
1387 {
1388 	ASSERT(MUTEX_HELD(&so->so_lock));
1389 
1390 	ASSERT(so_verify_oobstate(so));
1391 
1392 	ASSERT(so->so_oobsigcnt > so->so_oobcnt);
1393 
1394 	so->so_oobcnt++;
1395 	ASSERT(so->so_oobcnt > 0);	/* wraparound? */
1396 	ASSERT(so->so_oobsigcnt >= so->so_oobcnt);
1397 
1398 	/*
1399 	 * Set MSGMARK for SIOCATMARK.
1400 	 */
1401 	mp->b_flag |= MSGMARK;
1402 
1403 	ASSERT(so_verify_oobstate(so));
1404 	return (mp);
1405 }
1406 
1407 /*
1408  * Handle the processing of the actual urgent data.
1409  * Returns the data mblk if it should be queued on the read queue.
1410  */
1411 static mblk_t *
1412 so_oob_data(struct sonode *so, mblk_t *mp,
1413 	strsigset_t *signals, strpollset_t *pollwakeups)
1414 {
1415 	ASSERT(MUTEX_HELD(&so->so_lock));
1416 
1417 	ASSERT(so_verify_oobstate(so));
1418 
1419 	ASSERT(so->so_oobsigcnt >= so->so_oobcnt);
1420 	ASSERT(mp != NULL);
1421 	/*
1422 	 * For OOBINLINE we keep the data in the T_EXDATA_IND.
1423 	 * Otherwise we store it in so_oobmsg.
1424 	 */
1425 	ASSERT(so->so_oobmsg == NULL);
1426 	if (so->so_options & SO_OOBINLINE) {
1427 		*pollwakeups |= POLLIN | POLLRDNORM | POLLRDBAND;
1428 		*signals |= S_INPUT | S_RDNORM;
1429 	} else {
1430 		*pollwakeups |= POLLRDBAND;
1431 		so->so_state |= SS_HAVEOOBDATA;
1432 		so->so_oobmsg = mp;
1433 		mp = NULL;
1434 	}
1435 	ASSERT(so_verify_oobstate(so));
1436 	return (mp);
1437 }
1438 
1439 /*
1440  * Caller must hold the mutex.
1441  * For delayed processing, save the T_DISCON_IND received
1442  * from below on so_discon_ind_mp.
1443  * When the message is processed the framework will call:
1444  *      (*func)(so, mp);
1445  */
1446 static void
1447 so_save_discon_ind(struct sonode *so,
1448 	mblk_t *mp,
1449 	void (*func)(struct sonode *so, mblk_t *))
1450 {
1451 	ASSERT(MUTEX_HELD(&so->so_lock));
1452 
1453 	/*
1454 	 * Discard new T_DISCON_IND if we have already received another.
1455 	 * Currently the earlier message can either be on so_discon_ind_mp
1456 	 * or being processed.
1457 	 */
1458 	if (so->so_discon_ind_mp != NULL || (so->so_flag & SOASYNC_UNBIND)) {
1459 		zcmn_err(getzoneid(), CE_WARN,
1460 		    "sockfs: received unexpected additional T_DISCON_IND\n");
1461 		freemsg(mp);
1462 		return;
1463 	}
1464 	mp->b_prev = (mblk_t *)func;
1465 	mp->b_next = NULL;
1466 	so->so_discon_ind_mp = mp;
1467 }
1468 
1469 /*
1470  * Caller must hold the mutex and make sure that either SOLOCKED
1471  * or SOASYNC_UNBIND is set. Called from so_unlock_single().
1472  * Perform delayed processing of T_DISCON_IND message on so_discon_ind_mp.
1473  * Need to ensure that strsock_proto() will not end up sleeping for
1474  * SOASYNC_UNBIND, while executing this function.
1475  */
1476 void
1477 so_drain_discon_ind(struct sonode *so)
1478 {
1479 	mblk_t	*bp;
1480 	void (*func)(struct sonode *so, mblk_t *);
1481 
1482 	ASSERT(MUTEX_HELD(&so->so_lock));
1483 	ASSERT(so->so_flag & (SOLOCKED|SOASYNC_UNBIND));
1484 
1485 	/* Process T_DISCON_IND on so_discon_ind_mp */
1486 	if ((bp = so->so_discon_ind_mp) != NULL) {
1487 		so->so_discon_ind_mp = NULL;
1488 		func = (void (*)())bp->b_prev;
1489 		bp->b_prev = NULL;
1490 
1491 		/*
1492 		 * This (*func) is supposed to generate a message downstream
1493 		 * and we need to have a flag set until the corresponding
1494 		 * upstream message reaches stream head.
1495 		 * When processing T_DISCON_IND in strsock_discon_ind
1496 		 * we hold SOASYN_UNBIND when sending T_UNBIND_REQ down and
1497 		 * drop the flag after we get the ACK in strsock_proto.
1498 		 */
1499 		(void) (*func)(so, bp);
1500 	}
1501 }
1502 
1503 /*
1504  * Caller must hold the mutex.
1505  * Remove the T_DISCON_IND on so_discon_ind_mp.
1506  */
1507 void
1508 so_flush_discon_ind(struct sonode *so)
1509 {
1510 	mblk_t	*bp;
1511 
1512 	ASSERT(MUTEX_HELD(&so->so_lock));
1513 
1514 	/*
1515 	 * Remove T_DISCON_IND mblk at so_discon_ind_mp.
1516 	 */
1517 	if ((bp = so->so_discon_ind_mp) != NULL) {
1518 		so->so_discon_ind_mp = NULL;
1519 		bp->b_prev = NULL;
1520 		freemsg(bp);
1521 	}
1522 }
1523 
1524 /*
1525  * Caller must hold the mutex.
1526  *
1527  * This function is used to process the T_DISCON_IND message. It does
1528  * immediate processing when called from strsock_proto and delayed
1529  * processing of discon_ind saved on so_discon_ind_mp when called from
1530  * so_drain_discon_ind. When a T_DISCON_IND message is saved in
1531  * so_discon_ind_mp for delayed processing, this function is registered
1532  * as the callback function to process the message.
1533  *
1534  * SOASYNC_UNBIND should be held in this function, during the non-blocking
1535  * unbind operation, and should be released only after we receive the ACK
1536  * in strsock_proto, for the T_UNBIND_REQ sent here. Since SOLOCKED is not set,
1537  * no TPI messages would be sent down at this time. This is to prevent M_FLUSH
1538  * sent from either this function or tcp_unbind(), flushing away any TPI
1539  * message that is being sent down and stays in a lower module's queue.
1540  *
1541  * This function drops so_lock and grabs it again.
1542  */
1543 static void
1544 strsock_discon_ind(struct sonode *so, mblk_t *discon_mp)
1545 {
1546 	struct vnode *vp;
1547 	struct stdata *stp;
1548 	union T_primitives *tpr;
1549 	struct T_unbind_req *ubr;
1550 	mblk_t *mp;
1551 	int error;
1552 
1553 	ASSERT(MUTEX_HELD(&so->so_lock));
1554 	ASSERT(discon_mp);
1555 	ASSERT(discon_mp->b_rptr);
1556 
1557 	tpr = (union T_primitives *)discon_mp->b_rptr;
1558 	ASSERT(tpr->type == T_DISCON_IND);
1559 
1560 	vp = SOTOV(so);
1561 	stp = vp->v_stream;
1562 	ASSERT(stp);
1563 
1564 	/*
1565 	 * Not a listener
1566 	 */
1567 	ASSERT((so->so_state & SS_ACCEPTCONN) == 0);
1568 
1569 	/*
1570 	 * This assumes that the name space for DISCON_reason
1571 	 * is the errno name space.
1572 	 */
1573 	soisdisconnected(so, tpr->discon_ind.DISCON_reason);
1574 
1575 	/*
1576 	 * Unbind with the transport without blocking.
1577 	 * If we've already received a T_DISCON_IND do not unbind.
1578 	 *
1579 	 * If there is no preallocated unbind message, we have already
1580 	 * unbound with the transport
1581 	 *
1582 	 * If the socket is not bound, no need to unbind.
1583 	 */
1584 	mp = so->so_unbind_mp;
1585 	if (mp == NULL) {
1586 		ASSERT(!(so->so_state & SS_ISBOUND));
1587 		mutex_exit(&so->so_lock);
1588 	} else if (!(so->so_state & SS_ISBOUND))  {
1589 		mutex_exit(&so->so_lock);
1590 	} else {
1591 		so->so_unbind_mp = NULL;
1592 
1593 		/*
1594 		 * Is another T_DISCON_IND being processed.
1595 		 */
1596 		ASSERT((so->so_flag & SOASYNC_UNBIND) == 0);
1597 
1598 		/*
1599 		 * Make strsock_proto ignore T_OK_ACK and T_ERROR_ACK for
1600 		 * this unbind. Set SOASYNC_UNBIND. This should be cleared
1601 		 * only after we receive the ACK in strsock_proto.
1602 		 */
1603 		so->so_flag |= SOASYNC_UNBIND;
1604 		ASSERT(!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)));
1605 		so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN|SS_LADDR_VALID);
1606 		mutex_exit(&so->so_lock);
1607 
1608 		/*
1609 		 * Send down T_UNBIND_REQ ignoring flow control.
1610 		 * XXX Assumes that MSG_IGNFLOW implies that this thread
1611 		 * does not run service procedures.
1612 		 */
1613 		ASSERT(DB_TYPE(mp) == M_PROTO);
1614 		ubr = (struct T_unbind_req *)mp->b_rptr;
1615 		mp->b_wptr += sizeof (*ubr);
1616 		ubr->PRIM_type = T_UNBIND_REQ;
1617 
1618 		/*
1619 		 * Flush the read and write side (except stream head read queue)
1620 		 * and send down T_UNBIND_REQ.
1621 		 */
1622 		(void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW);
1623 		error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
1624 		    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0);
1625 		/* LINTED - warning: statement has no consequent: if */
1626 		if (error) {
1627 			eprintsoline(so, error);
1628 		}
1629 	}
1630 
1631 	if (tpr->discon_ind.DISCON_reason != 0)
1632 		strsetrerror(SOTOV(so), 0, 0, sogetrderr);
1633 	strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
1634 	strseteof(SOTOV(so), 1);
1635 	/*
1636 	 * strseteof takes care of read side wakeups,
1637 	 * pollwakeups, and signals.
1638 	 */
1639 	dprintso(so, 1, ("T_DISCON_IND: error %d\n", so->so_error));
1640 	freemsg(discon_mp);
1641 
1642 
1643 	pollwakeup(&stp->sd_pollist, POLLOUT);
1644 	mutex_enter(&stp->sd_lock);
1645 
1646 	/*
1647 	 * Wake sleeping write
1648 	 */
1649 	if (stp->sd_flag & WSLEEP) {
1650 		stp->sd_flag &= ~WSLEEP;
1651 		cv_broadcast(&stp->sd_wrq->q_wait);
1652 	}
1653 
1654 	/*
1655 	 * strsendsig can handle multiple signals with a
1656 	 * single call.  Send SIGPOLL for S_OUTPUT event.
1657 	 */
1658 	if (stp->sd_sigflags & S_OUTPUT)
1659 		strsendsig(stp->sd_siglist, S_OUTPUT, 0, 0);
1660 
1661 	mutex_exit(&stp->sd_lock);
1662 	mutex_enter(&so->so_lock);
1663 }
1664 
1665 /*
1666  * This routine is registered with the stream head to receive M_PROTO
1667  * and M_PCPROTO messages.
1668  *
1669  * Returns NULL if the message was consumed.
1670  * Returns an mblk to make that mblk be processed (and queued) by the stream
1671  * head.
1672  *
1673  * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and
1674  * *pollwakeups) for the stream head to take action on. Note that since
1675  * sockets always deliver SIGIO for every new piece of data this routine
1676  * never sets *firstmsgsigs; any signals are returned in *allmsgsigs.
1677  *
1678  * This routine handles all data related TPI messages independent of
1679  * the type of the socket i.e. it doesn't care if T_UNITDATA_IND message
1680  * arrive on a SOCK_STREAM.
1681  */
1682 static mblk_t *
1683 strsock_proto(vnode_t *vp, mblk_t *mp,
1684 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
1685 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
1686 {
1687 	union T_primitives *tpr;
1688 	struct sonode *so;
1689 
1690 	so = VTOSO(vp);
1691 
1692 	dprintso(so, 1, ("strsock_proto(%p, %p)\n", (void *)vp, (void *)mp));
1693 
1694 	/* Set default return values */
1695 	*firstmsgsigs = *wakeups = *allmsgsigs = *pollwakeups = 0;
1696 
1697 	ASSERT(DB_TYPE(mp) == M_PROTO ||
1698 	    DB_TYPE(mp) == M_PCPROTO);
1699 
1700 	if (MBLKL(mp) < sizeof (tpr->type)) {
1701 		/* The message is too short to even contain the primitive */
1702 		zcmn_err(getzoneid(), CE_WARN,
1703 		    "sockfs: Too short TPI message received. Len = %ld\n",
1704 		    (ptrdiff_t)(MBLKL(mp)));
1705 		freemsg(mp);
1706 		return (NULL);
1707 	}
1708 	if (!__TPI_PRIM_ISALIGNED(mp->b_rptr)) {
1709 		/* The read pointer is not aligned correctly for TPI */
1710 		zcmn_err(getzoneid(), CE_WARN,
1711 		    "sockfs: Unaligned TPI message received. rptr = %p\n",
1712 		    (void *)mp->b_rptr);
1713 		freemsg(mp);
1714 		return (NULL);
1715 	}
1716 	tpr = (union T_primitives *)mp->b_rptr;
1717 	dprintso(so, 1, ("strsock_proto: primitive %d\n", tpr->type));
1718 
1719 	switch (tpr->type) {
1720 
1721 	case T_DATA_IND:
1722 		if (MBLKL(mp) < sizeof (struct T_data_ind)) {
1723 			zcmn_err(getzoneid(), CE_WARN,
1724 			    "sockfs: Too short T_DATA_IND. Len = %ld\n",
1725 			    (ptrdiff_t)(MBLKL(mp)));
1726 			freemsg(mp);
1727 			return (NULL);
1728 		}
1729 		/*
1730 		 * Ignore zero-length T_DATA_IND messages. These might be
1731 		 * generated by some transports.
1732 		 * This is needed to prevent read (which skips the M_PROTO
1733 		 * part) to unexpectedly return 0 (or return EWOULDBLOCK
1734 		 * on a non-blocking socket after select/poll has indicated
1735 		 * that data is available).
1736 		 */
1737 		if (msgdsize(mp->b_cont) == 0) {
1738 			dprintso(so, 0,
1739 			    ("strsock_proto: zero length T_DATA_IND\n"));
1740 			freemsg(mp);
1741 			return (NULL);
1742 		}
1743 		*allmsgsigs = S_INPUT | S_RDNORM;
1744 		*pollwakeups = POLLIN | POLLRDNORM;
1745 		*wakeups = RSLEEP;
1746 		return (mp);
1747 
1748 	case T_UNITDATA_IND: {
1749 		struct T_unitdata_ind	*tudi = &tpr->unitdata_ind;
1750 		void			*addr;
1751 		t_uscalar_t		addrlen;
1752 
1753 		if (MBLKL(mp) < sizeof (struct T_unitdata_ind)) {
1754 			zcmn_err(getzoneid(), CE_WARN,
1755 			    "sockfs: Too short T_UNITDATA_IND. Len = %ld\n",
1756 			    (ptrdiff_t)(MBLKL(mp)));
1757 			freemsg(mp);
1758 			return (NULL);
1759 		}
1760 
1761 		/* Is this is not a connected datagram socket? */
1762 		if ((so->so_mode & SM_CONNREQUIRED) ||
1763 		    !(so->so_state & SS_ISCONNECTED)) {
1764 			/*
1765 			 * Not a connected datagram socket. Look for
1766 			 * the SO_UNIX_CLOSE option. If such an option is found
1767 			 * discard the message (since it has no meaning
1768 			 * unless connected).
1769 			 */
1770 			if (so->so_family == AF_UNIX && msgdsize(mp) == 0 &&
1771 			    tudi->OPT_length != 0) {
1772 				void *opt;
1773 				t_uscalar_t optlen = tudi->OPT_length;
1774 
1775 				opt = sogetoff(mp, tudi->OPT_offset,
1776 				    optlen, __TPI_ALIGN_SIZE);
1777 				if (opt == NULL) {
1778 					/* The len/off falls outside mp */
1779 					freemsg(mp);
1780 					mutex_enter(&so->so_lock);
1781 					soseterror(so, EPROTO);
1782 					mutex_exit(&so->so_lock);
1783 					zcmn_err(getzoneid(), CE_WARN,
1784 					    "sockfs: T_unidata_ind with "
1785 					    "invalid optlen/offset %u/%d\n",
1786 					    optlen, tudi->OPT_offset);
1787 					return (NULL);
1788 				}
1789 				if (so_getopt_unix_close(opt, optlen)) {
1790 					freemsg(mp);
1791 					return (NULL);
1792 				}
1793 			}
1794 			*allmsgsigs = S_INPUT | S_RDNORM;
1795 			*pollwakeups = POLLIN | POLLRDNORM;
1796 			*wakeups = RSLEEP;
1797 			if (audit_active)
1798 				audit_sock(T_UNITDATA_IND, strvp2wq(vp),
1799 				    mp, 0);
1800 			return (mp);
1801 		}
1802 
1803 		/*
1804 		 * A connect datagram socket. For AF_INET{,6} we verify that
1805 		 * the source address matches the "connected to" address.
1806 		 * The semantics of AF_UNIX sockets is to not verify
1807 		 * the source address.
1808 		 * Note that this source address verification is transport
1809 		 * specific. Thus the real fix would be to extent TPI
1810 		 * to allow T_CONN_REQ messages to be send to connectionless
1811 		 * transport providers and always let the transport provider
1812 		 * do whatever filtering is needed.
1813 		 *
1814 		 * The verification/filtering semantics for transports
1815 		 * other than AF_INET and AF_UNIX are unknown. The choice
1816 		 * would be to either filter using bcmp or let all messages
1817 		 * get through. This code does not filter other address
1818 		 * families since this at least allows the application to
1819 		 * work around any missing filtering.
1820 		 *
1821 		 * XXX Should we move filtering to UDP/ICMP???
1822 		 * That would require passing e.g. a T_DISCON_REQ to UDP
1823 		 * when the socket becomes unconnected.
1824 		 */
1825 		addrlen = tudi->SRC_length;
1826 		/*
1827 		 * The alignment restriction is really to strict but
1828 		 * we want enough alignment to inspect the fields of
1829 		 * a sockaddr_in.
1830 		 */
1831 		addr = sogetoff(mp, tudi->SRC_offset, addrlen,
1832 		    __TPI_ALIGN_SIZE);
1833 		if (addr == NULL) {
1834 			freemsg(mp);
1835 			mutex_enter(&so->so_lock);
1836 			soseterror(so, EPROTO);
1837 			mutex_exit(&so->so_lock);
1838 			zcmn_err(getzoneid(), CE_WARN,
1839 			    "sockfs: T_unidata_ind with invalid "
1840 			    "addrlen/offset %u/%d\n",
1841 			    addrlen, tudi->SRC_offset);
1842 			return (NULL);
1843 		}
1844 
1845 		if (so->so_family == AF_INET) {
1846 			/*
1847 			 * For AF_INET we allow wildcarding both sin_addr
1848 			 * and sin_port.
1849 			 */
1850 			struct sockaddr_in *faddr, *sin;
1851 
1852 			/* Prevent so_faddr_sa from changing while accessed */
1853 			mutex_enter(&so->so_lock);
1854 			ASSERT(so->so_faddr_len ==
1855 			    (socklen_t)sizeof (struct sockaddr_in));
1856 			faddr = (struct sockaddr_in *)so->so_faddr_sa;
1857 			sin = (struct sockaddr_in *)addr;
1858 			if (addrlen !=
1859 			    (t_uscalar_t)sizeof (struct sockaddr_in) ||
1860 			    (sin->sin_addr.s_addr != faddr->sin_addr.s_addr &&
1861 			    faddr->sin_addr.s_addr != INADDR_ANY) ||
1862 			    (so->so_type != SOCK_RAW &&
1863 			    sin->sin_port != faddr->sin_port &&
1864 			    faddr->sin_port != 0)) {
1865 #ifdef DEBUG
1866 				dprintso(so, 0,
1867 				    ("sockfs: T_UNITDATA_IND mismatch: %s",
1868 				    pr_addr(so->so_family,
1869 				    (struct sockaddr *)addr,
1870 				    addrlen)));
1871 				dprintso(so, 0, (" - %s\n",
1872 				    pr_addr(so->so_family, so->so_faddr_sa,
1873 				    (t_uscalar_t)so->so_faddr_len)));
1874 #endif /* DEBUG */
1875 				mutex_exit(&so->so_lock);
1876 				freemsg(mp);
1877 				return (NULL);
1878 			}
1879 			mutex_exit(&so->so_lock);
1880 		} else if (so->so_family == AF_INET6) {
1881 			/*
1882 			 * For AF_INET6 we allow wildcarding both sin6_addr
1883 			 * and sin6_port.
1884 			 */
1885 			struct sockaddr_in6 *faddr6, *sin6;
1886 			static struct in6_addr zeroes; /* inits to all zeros */
1887 
1888 			/* Prevent so_faddr_sa from changing while accessed */
1889 			mutex_enter(&so->so_lock);
1890 			ASSERT(so->so_faddr_len ==
1891 			    (socklen_t)sizeof (struct sockaddr_in6));
1892 			faddr6 = (struct sockaddr_in6 *)so->so_faddr_sa;
1893 			sin6 = (struct sockaddr_in6 *)addr;
1894 			/* XXX could we get a mapped address ::ffff:0.0.0.0 ? */
1895 			if (addrlen !=
1896 			    (t_uscalar_t)sizeof (struct sockaddr_in6) ||
1897 			    (!IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
1898 			    &faddr6->sin6_addr) &&
1899 			    !IN6_ARE_ADDR_EQUAL(&faddr6->sin6_addr, &zeroes)) ||
1900 			    (so->so_type != SOCK_RAW &&
1901 			    sin6->sin6_port != faddr6->sin6_port &&
1902 			    faddr6->sin6_port != 0)) {
1903 #ifdef DEBUG
1904 				dprintso(so, 0,
1905 				    ("sockfs: T_UNITDATA_IND mismatch: %s",
1906 				    pr_addr(so->so_family,
1907 				    (struct sockaddr *)addr,
1908 				    addrlen)));
1909 				dprintso(so, 0, (" - %s\n",
1910 				    pr_addr(so->so_family, so->so_faddr_sa,
1911 				    (t_uscalar_t)so->so_faddr_len)));
1912 #endif /* DEBUG */
1913 				mutex_exit(&so->so_lock);
1914 				freemsg(mp);
1915 				return (NULL);
1916 			}
1917 			mutex_exit(&so->so_lock);
1918 		} else if (so->so_family == AF_UNIX &&
1919 		    msgdsize(mp->b_cont) == 0 &&
1920 		    tudi->OPT_length != 0) {
1921 			/*
1922 			 * Attempt to extract AF_UNIX
1923 			 * SO_UNIX_CLOSE indication from options.
1924 			 */
1925 			void *opt;
1926 			t_uscalar_t optlen = tudi->OPT_length;
1927 
1928 			opt = sogetoff(mp, tudi->OPT_offset,
1929 			    optlen, __TPI_ALIGN_SIZE);
1930 			if (opt == NULL) {
1931 				/* The len/off falls outside mp */
1932 				freemsg(mp);
1933 				mutex_enter(&so->so_lock);
1934 				soseterror(so, EPROTO);
1935 				mutex_exit(&so->so_lock);
1936 				zcmn_err(getzoneid(), CE_WARN,
1937 				    "sockfs: T_unidata_ind with invalid "
1938 				    "optlen/offset %u/%d\n",
1939 				    optlen, tudi->OPT_offset);
1940 				return (NULL);
1941 			}
1942 			/*
1943 			 * If we received a unix close indication mark the
1944 			 * socket and discard this message.
1945 			 */
1946 			if (so_getopt_unix_close(opt, optlen)) {
1947 				mutex_enter(&so->so_lock);
1948 				sobreakconn(so, ECONNRESET);
1949 				mutex_exit(&so->so_lock);
1950 				strsetrerror(SOTOV(so), 0, 0, sogetrderr);
1951 				freemsg(mp);
1952 				*pollwakeups = POLLIN | POLLRDNORM;
1953 				*allmsgsigs = S_INPUT | S_RDNORM;
1954 				*wakeups = RSLEEP;
1955 				return (NULL);
1956 			}
1957 		}
1958 		*allmsgsigs = S_INPUT | S_RDNORM;
1959 		*pollwakeups = POLLIN | POLLRDNORM;
1960 		*wakeups = RSLEEP;
1961 		return (mp);
1962 	}
1963 
1964 	case T_OPTDATA_IND: {
1965 		struct T_optdata_ind	*tdi = &tpr->optdata_ind;
1966 
1967 		if (MBLKL(mp) < sizeof (struct T_optdata_ind)) {
1968 			zcmn_err(getzoneid(), CE_WARN,
1969 			    "sockfs: Too short T_OPTDATA_IND. Len = %ld\n",
1970 			    (ptrdiff_t)(MBLKL(mp)));
1971 			freemsg(mp);
1972 			return (NULL);
1973 		}
1974 		/*
1975 		 * Allow zero-length messages carrying options.
1976 		 * This is used when carrying the SO_UNIX_CLOSE option.
1977 		 */
1978 		if (so->so_family == AF_UNIX && msgdsize(mp->b_cont) == 0 &&
1979 		    tdi->OPT_length != 0) {
1980 			/*
1981 			 * Attempt to extract AF_UNIX close indication
1982 			 * from the options. Ignore any other options -
1983 			 * those are handled once the message is removed
1984 			 * from the queue.
1985 			 * The close indication message should not carry data.
1986 			 */
1987 			void *opt;
1988 			t_uscalar_t optlen = tdi->OPT_length;
1989 
1990 			opt = sogetoff(mp, tdi->OPT_offset,
1991 			    optlen, __TPI_ALIGN_SIZE);
1992 			if (opt == NULL) {
1993 				/* The len/off falls outside mp */
1994 				freemsg(mp);
1995 				mutex_enter(&so->so_lock);
1996 				soseterror(so, EPROTO);
1997 				mutex_exit(&so->so_lock);
1998 				zcmn_err(getzoneid(), CE_WARN,
1999 				    "sockfs: T_optdata_ind with invalid "
2000 				    "optlen/offset %u/%d\n",
2001 				    optlen, tdi->OPT_offset);
2002 				return (NULL);
2003 			}
2004 			/*
2005 			 * If we received a close indication mark the
2006 			 * socket and discard this message.
2007 			 */
2008 			if (so_getopt_unix_close(opt, optlen)) {
2009 				mutex_enter(&so->so_lock);
2010 				socantsendmore(so);
2011 				mutex_exit(&so->so_lock);
2012 				strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2013 				freemsg(mp);
2014 				return (NULL);
2015 			}
2016 		}
2017 		*allmsgsigs = S_INPUT | S_RDNORM;
2018 		*pollwakeups = POLLIN | POLLRDNORM;
2019 		*wakeups = RSLEEP;
2020 		return (mp);
2021 	}
2022 
2023 	case T_EXDATA_IND: {
2024 		mblk_t		*mctl, *mdata;
2025 		mblk_t *lbp;
2026 		union T_primitives *tprp;
2027 		struct stdata   *stp;
2028 		queue_t *qp;
2029 
2030 		if (MBLKL(mp) < sizeof (struct T_exdata_ind)) {
2031 			zcmn_err(getzoneid(), CE_WARN,
2032 			    "sockfs: Too short T_EXDATA_IND. Len = %ld\n",
2033 			    (ptrdiff_t)(MBLKL(mp)));
2034 			freemsg(mp);
2035 			return (NULL);
2036 		}
2037 		/*
2038 		 * Ignore zero-length T_EXDATA_IND messages. These might be
2039 		 * generated by some transports.
2040 		 *
2041 		 * This is needed to prevent read (which skips the M_PROTO
2042 		 * part) to unexpectedly return 0 (or return EWOULDBLOCK
2043 		 * on a non-blocking socket after select/poll has indicated
2044 		 * that data is available).
2045 		 */
2046 		dprintso(so, 1,
2047 		    ("T_EXDATA_IND(%p): counts %d/%d state %s\n",
2048 		    (void *)vp, so->so_oobsigcnt, so->so_oobcnt,
2049 		    pr_state(so->so_state, so->so_mode)));
2050 
2051 		if (msgdsize(mp->b_cont) == 0) {
2052 			dprintso(so, 0,
2053 			    ("strsock_proto: zero length T_EXDATA_IND\n"));
2054 			freemsg(mp);
2055 			return (NULL);
2056 		}
2057 
2058 		/*
2059 		 * Split into the T_EXDATA_IND and the M_DATA part.
2060 		 * We process these three pieces separately:
2061 		 *	signal generation
2062 		 *	handling T_EXDATA_IND
2063 		 *	handling M_DATA component
2064 		 */
2065 		mctl = mp;
2066 		mdata = mctl->b_cont;
2067 		mctl->b_cont = NULL;
2068 		mutex_enter(&so->so_lock);
2069 		so_oob_sig(so, 0, allmsgsigs, pollwakeups);
2070 		mctl = so_oob_exdata(so, mctl, allmsgsigs, pollwakeups);
2071 		mdata = so_oob_data(so, mdata, allmsgsigs, pollwakeups);
2072 
2073 		stp = vp->v_stream;
2074 		ASSERT(stp != NULL);
2075 		qp = _RD(stp->sd_wrq);
2076 
2077 		mutex_enter(QLOCK(qp));
2078 		lbp = qp->q_last;
2079 
2080 		/*
2081 		 * We want to avoid queueing up a string of T_EXDATA_IND
2082 		 * messages with no intervening data messages at the stream
2083 		 * head. These messages contribute to the total message
2084 		 * count. Eventually this can lead to STREAMS flow contol
2085 		 * and also cause TCP to advertise a zero window condition
2086 		 * to the peer. This can happen in the degenerate case where
2087 		 * the sender and receiver exchange only OOB data. The sender
2088 		 * only sends messages with MSG_OOB flag and the receiver
2089 		 * receives only MSG_OOB messages and does not use SO_OOBINLINE.
2090 		 * An example of this scenario has been reported in applications
2091 		 * that use OOB data to exchange heart beats. Flow control
2092 		 * relief will never happen if the application only reads OOB
2093 		 * data which is done directly by sorecvoob() and the
2094 		 * T_EXDATA_IND messages at the streamhead won't be consumed.
2095 		 * Note that there is no correctness issue in compressing the
2096 		 * string of T_EXDATA_IND messages into a single T_EXDATA_IND
2097 		 * message. A single read that does not specify MSG_OOB will
2098 		 * read across all the marks in a loop in sotpi_recvmsg().
2099 		 * Each mark is individually distinguishable only if the
2100 		 * T_EXDATA_IND messages are separated by data messages.
2101 		 */
2102 		if ((qp->q_first != NULL) && (DB_TYPE(lbp) == M_PROTO)) {
2103 			tprp = (union T_primitives *)lbp->b_rptr;
2104 			if ((tprp->type == T_EXDATA_IND) &&
2105 			    !(so->so_options & SO_OOBINLINE)) {
2106 
2107 				/*
2108 				 * free the new M_PROTO message
2109 				 */
2110 				freemsg(mctl);
2111 
2112 				/*
2113 				 * adjust the OOB count and OOB	signal count
2114 				 * just incremented for the new OOB data.
2115 				 */
2116 				so->so_oobcnt--;
2117 				so->so_oobsigcnt--;
2118 				mutex_exit(QLOCK(qp));
2119 				mutex_exit(&so->so_lock);
2120 				return (NULL);
2121 			}
2122 		}
2123 		mutex_exit(QLOCK(qp));
2124 
2125 		/*
2126 		 * Pass the T_EXDATA_IND and the M_DATA back separately
2127 		 * by using b_next linkage. (The stream head will queue any
2128 		 * b_next linked messages separately.) This is needed
2129 		 * since MSGMARK applies to the last by of the message
2130 		 * hence we can not have any M_DATA component attached
2131 		 * to the marked T_EXDATA_IND. Note that the stream head
2132 		 * will not consolidate M_DATA messages onto an MSGMARK'ed
2133 		 * message in order to preserve the constraint that
2134 		 * the T_EXDATA_IND always is a separate message.
2135 		 */
2136 		ASSERT(mctl != NULL);
2137 		mctl->b_next = mdata;
2138 		mp = mctl;
2139 #ifdef DEBUG
2140 		if (mdata == NULL) {
2141 			dprintso(so, 1,
2142 			    ("after outofline T_EXDATA_IND(%p): "
2143 			    "counts %d/%d  poll 0x%x sig 0x%x state %s\n",
2144 			    (void *)vp, so->so_oobsigcnt,
2145 			    so->so_oobcnt, *pollwakeups, *allmsgsigs,
2146 			    pr_state(so->so_state, so->so_mode)));
2147 		} else {
2148 			dprintso(so, 1,
2149 			    ("after inline T_EXDATA_IND(%p): "
2150 			    "counts %d/%d  poll 0x%x sig 0x%x state %s\n",
2151 			    (void *)vp, so->so_oobsigcnt,
2152 			    so->so_oobcnt, *pollwakeups, *allmsgsigs,
2153 			    pr_state(so->so_state, so->so_mode)));
2154 		}
2155 #endif /* DEBUG */
2156 		mutex_exit(&so->so_lock);
2157 		*wakeups = RSLEEP;
2158 		return (mp);
2159 	}
2160 
2161 	case T_CONN_CON: {
2162 		struct T_conn_con	*conn_con;
2163 		void			*addr;
2164 		t_uscalar_t		addrlen;
2165 
2166 		/*
2167 		 * Verify the state, update the state to ISCONNECTED,
2168 		 * record the potentially new address in the message,
2169 		 * and drop the message.
2170 		 */
2171 		if (MBLKL(mp) < sizeof (struct T_conn_con)) {
2172 			zcmn_err(getzoneid(), CE_WARN,
2173 			    "sockfs: Too short T_CONN_CON. Len = %ld\n",
2174 			    (ptrdiff_t)(MBLKL(mp)));
2175 			freemsg(mp);
2176 			return (NULL);
2177 		}
2178 
2179 		mutex_enter(&so->so_lock);
2180 		if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) !=
2181 		    SS_ISCONNECTING) {
2182 			mutex_exit(&so->so_lock);
2183 			dprintso(so, 1,
2184 			    ("T_CONN_CON: state %x\n", so->so_state));
2185 			freemsg(mp);
2186 			return (NULL);
2187 		}
2188 
2189 		conn_con = &tpr->conn_con;
2190 		addrlen = conn_con->RES_length;
2191 		/*
2192 		 * Allow the address to be of different size than sent down
2193 		 * in the T_CONN_REQ as long as it doesn't exceed the maxlen.
2194 		 * For AF_UNIX require the identical length.
2195 		 */
2196 		if (so->so_family == AF_UNIX ?
2197 		    addrlen != (t_uscalar_t)sizeof (so->so_ux_laddr) :
2198 		    addrlen > (t_uscalar_t)so->so_faddr_maxlen) {
2199 			zcmn_err(getzoneid(), CE_WARN,
2200 			    "sockfs: T_conn_con with different "
2201 			    "length %u/%d\n",
2202 			    addrlen, conn_con->RES_length);
2203 			soisdisconnected(so, EPROTO);
2204 			mutex_exit(&so->so_lock);
2205 			strsetrerror(SOTOV(so), 0, 0, sogetrderr);
2206 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2207 			strseteof(SOTOV(so), 1);
2208 			freemsg(mp);
2209 			/*
2210 			 * strseteof takes care of read side wakeups,
2211 			 * pollwakeups, and signals.
2212 			 */
2213 			*wakeups = WSLEEP;
2214 			*allmsgsigs = S_OUTPUT;
2215 			*pollwakeups = POLLOUT;
2216 			return (NULL);
2217 		}
2218 		addr = sogetoff(mp, conn_con->RES_offset, addrlen, 1);
2219 		if (addr == NULL) {
2220 			zcmn_err(getzoneid(), CE_WARN,
2221 			    "sockfs: T_conn_con with invalid "
2222 			    "addrlen/offset %u/%d\n",
2223 			    addrlen, conn_con->RES_offset);
2224 			mutex_exit(&so->so_lock);
2225 			strsetrerror(SOTOV(so), 0, 0, sogetrderr);
2226 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2227 			strseteof(SOTOV(so), 1);
2228 			freemsg(mp);
2229 			/*
2230 			 * strseteof takes care of read side wakeups,
2231 			 * pollwakeups, and signals.
2232 			 */
2233 			*wakeups = WSLEEP;
2234 			*allmsgsigs = S_OUTPUT;
2235 			*pollwakeups = POLLOUT;
2236 			return (NULL);
2237 		}
2238 
2239 		/*
2240 		 * Save for getpeername.
2241 		 */
2242 		if (so->so_family != AF_UNIX) {
2243 			so->so_faddr_len = (socklen_t)addrlen;
2244 			ASSERT(so->so_faddr_len <= so->so_faddr_maxlen);
2245 			bcopy(addr, so->so_faddr_sa, addrlen);
2246 			so->so_state |= SS_FADDR_VALID;
2247 		}
2248 
2249 		if (so->so_peercred != NULL)
2250 			crfree(so->so_peercred);
2251 		so->so_peercred = DB_CRED(mp);
2252 		so->so_cpid = DB_CPID(mp);
2253 		if (so->so_peercred != NULL)
2254 			crhold(so->so_peercred);
2255 
2256 		/* Wakeup anybody sleeping in sowaitconnected */
2257 		soisconnected(so);
2258 		mutex_exit(&so->so_lock);
2259 
2260 		/*
2261 		 * The socket is now available for sending data.
2262 		 */
2263 		*wakeups = WSLEEP;
2264 		*allmsgsigs = S_OUTPUT;
2265 		*pollwakeups = POLLOUT;
2266 		freemsg(mp);
2267 		return (NULL);
2268 	}
2269 
2270 	/*
2271 	 * Extra processing in case of an SSL proxy, before queuing or
2272 	 * forwarding to the fallback endpoint
2273 	 */
2274 	case T_SSL_PROXY_CONN_IND:
2275 	case T_CONN_IND:
2276 		/*
2277 		 * Verify the min size and queue the message on
2278 		 * the so_conn_ind_head/tail list.
2279 		 */
2280 		if (MBLKL(mp) < sizeof (struct T_conn_ind)) {
2281 			zcmn_err(getzoneid(), CE_WARN,
2282 			    "sockfs: Too short T_CONN_IND. Len = %ld\n",
2283 			    (ptrdiff_t)(MBLKL(mp)));
2284 			freemsg(mp);
2285 			return (NULL);
2286 		}
2287 
2288 		if (audit_active)
2289 			audit_sock(T_CONN_IND, strvp2wq(vp), mp, 0);
2290 		if (!(so->so_state & SS_ACCEPTCONN)) {
2291 			zcmn_err(getzoneid(), CE_WARN,
2292 			    "sockfs: T_conn_ind on non-listening socket\n");
2293 			freemsg(mp);
2294 			return (NULL);
2295 		}
2296 
2297 		if (tpr->type == T_SSL_PROXY_CONN_IND && mp->b_cont == NULL) {
2298 			/* No context: need to fall back */
2299 			struct sonode *fbso;
2300 			stdata_t *fbstp;
2301 
2302 			tpr->type = T_CONN_IND;
2303 
2304 			fbso = kssl_find_fallback(so->so_kssl_ent);
2305 
2306 			/*
2307 			 * No fallback: the remote will timeout and
2308 			 * disconnect.
2309 			 */
2310 			if (fbso == NULL) {
2311 				freemsg(mp);
2312 				return (NULL);
2313 			}
2314 			fbstp = SOTOV(fbso)->v_stream;
2315 			qreply(fbstp->sd_wrq->q_next, mp);
2316 			return (NULL);
2317 		}
2318 		soqueueconnind(so, mp);
2319 		*allmsgsigs = S_INPUT | S_RDNORM;
2320 		*pollwakeups = POLLIN | POLLRDNORM;
2321 		*wakeups = RSLEEP;
2322 		return (NULL);
2323 
2324 	case T_ORDREL_IND:
2325 		if (MBLKL(mp) < sizeof (struct T_ordrel_ind)) {
2326 			zcmn_err(getzoneid(), CE_WARN,
2327 			    "sockfs: Too short T_ORDREL_IND. Len = %ld\n",
2328 			    (ptrdiff_t)(MBLKL(mp)));
2329 			freemsg(mp);
2330 			return (NULL);
2331 		}
2332 
2333 		/*
2334 		 * Some providers send this when not fully connected.
2335 		 * SunLink X.25 needs to retrieve disconnect reason after
2336 		 * disconnect for compatibility. It uses T_ORDREL_IND
2337 		 * instead of T_DISCON_IND so that it may use the
2338 		 * endpoint after a connect failure to retrieve the
2339 		 * reason using an ioctl. Thus we explicitly clear
2340 		 * SS_ISCONNECTING here for SunLink X.25.
2341 		 * This is a needed TPI violation.
2342 		 */
2343 		mutex_enter(&so->so_lock);
2344 		so->so_state &= ~SS_ISCONNECTING;
2345 		socantrcvmore(so);
2346 		mutex_exit(&so->so_lock);
2347 		strseteof(SOTOV(so), 1);
2348 		/*
2349 		 * strseteof takes care of read side wakeups,
2350 		 * pollwakeups, and signals.
2351 		 */
2352 		freemsg(mp);
2353 		return (NULL);
2354 
2355 	case T_DISCON_IND:
2356 		if (MBLKL(mp) < sizeof (struct T_discon_ind)) {
2357 			zcmn_err(getzoneid(), CE_WARN,
2358 			    "sockfs: Too short T_DISCON_IND. Len = %ld\n",
2359 			    (ptrdiff_t)(MBLKL(mp)));
2360 			freemsg(mp);
2361 			return (NULL);
2362 		}
2363 		if (so->so_state & SS_ACCEPTCONN) {
2364 			/*
2365 			 * This is a listener. Look for a queued T_CONN_IND
2366 			 * with a matching sequence number and remove it
2367 			 * from the list.
2368 			 * It is normal to not find the sequence number since
2369 			 * the soaccept might have already dequeued it
2370 			 * (in which case the T_CONN_RES will fail with
2371 			 * TBADSEQ).
2372 			 */
2373 			(void) soflushconnind(so, tpr->discon_ind.SEQ_number);
2374 			freemsg(mp);
2375 			return (0);
2376 		}
2377 
2378 		/*
2379 		 * Not a listener
2380 		 *
2381 		 * If SS_CANTRCVMORE for AF_UNIX ignore the discon_reason.
2382 		 * Such a discon_ind appears when the peer has first done
2383 		 * a shutdown() followed by a close() in which case we just
2384 		 * want to record socantsendmore.
2385 		 * In this case sockfs first receives a T_ORDREL_IND followed
2386 		 * by a T_DISCON_IND.
2387 		 * Note that for other transports (e.g. TCP) we need to handle
2388 		 * the discon_ind in this case since it signals an error.
2389 		 */
2390 		mutex_enter(&so->so_lock);
2391 		if ((so->so_state & SS_CANTRCVMORE) &&
2392 		    (so->so_family == AF_UNIX)) {
2393 			socantsendmore(so);
2394 			mutex_exit(&so->so_lock);
2395 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2396 			dprintso(so, 1,
2397 			    ("T_DISCON_IND: error %d\n", so->so_error));
2398 			freemsg(mp);
2399 			/*
2400 			 * Set these variables for caller to process them.
2401 			 * For the else part where T_DISCON_IND is processed,
2402 			 * this will be done in the function being called
2403 			 * (strsock_discon_ind())
2404 			 */
2405 			*wakeups = WSLEEP;
2406 			*allmsgsigs = S_OUTPUT;
2407 			*pollwakeups = POLLOUT;
2408 		} else if (so->so_flag & (SOASYNC_UNBIND | SOLOCKED)) {
2409 			/*
2410 			 * Deferred processing of T_DISCON_IND
2411 			 */
2412 			so_save_discon_ind(so, mp, strsock_discon_ind);
2413 			mutex_exit(&so->so_lock);
2414 		} else {
2415 			/*
2416 			 * Process T_DISCON_IND now
2417 			 */
2418 			(void) strsock_discon_ind(so, mp);
2419 			mutex_exit(&so->so_lock);
2420 		}
2421 		return (NULL);
2422 
2423 	case T_UDERROR_IND: {
2424 		struct T_uderror_ind	*tudi = &tpr->uderror_ind;
2425 		void			*addr;
2426 		t_uscalar_t		addrlen;
2427 		int			error;
2428 
2429 		dprintso(so, 0,
2430 		    ("T_UDERROR_IND: error %d\n", tudi->ERROR_type));
2431 
2432 		if (MBLKL(mp) < sizeof (struct T_uderror_ind)) {
2433 			zcmn_err(getzoneid(), CE_WARN,
2434 			    "sockfs: Too short T_UDERROR_IND. Len = %ld\n",
2435 			    (ptrdiff_t)(MBLKL(mp)));
2436 			freemsg(mp);
2437 			return (NULL);
2438 		}
2439 		/* Ignore on connection-oriented transports */
2440 		if (so->so_mode & SM_CONNREQUIRED) {
2441 			freemsg(mp);
2442 			eprintsoline(so, 0);
2443 			zcmn_err(getzoneid(), CE_WARN,
2444 			    "sockfs: T_uderror_ind on connection-oriented "
2445 			    "transport\n");
2446 			return (NULL);
2447 		}
2448 		addrlen = tudi->DEST_length;
2449 		addr = sogetoff(mp, tudi->DEST_offset, addrlen, 1);
2450 		if (addr == NULL) {
2451 			zcmn_err(getzoneid(), CE_WARN,
2452 			    "sockfs: T_uderror_ind with invalid "
2453 			    "addrlen/offset %u/%d\n",
2454 			    addrlen, tudi->DEST_offset);
2455 			freemsg(mp);
2456 			return (NULL);
2457 		}
2458 
2459 		/* Verify source address for connected socket. */
2460 		mutex_enter(&so->so_lock);
2461 		if (so->so_state & SS_ISCONNECTED) {
2462 			void *faddr;
2463 			t_uscalar_t faddr_len;
2464 			boolean_t match = B_FALSE;
2465 
2466 			switch (so->so_family) {
2467 			case AF_INET: {
2468 				/* Compare just IP address and port */
2469 				struct sockaddr_in *sin1, *sin2;
2470 
2471 				sin1 = (struct sockaddr_in *)so->so_faddr_sa;
2472 				sin2 = (struct sockaddr_in *)addr;
2473 				if (addrlen == sizeof (struct sockaddr_in) &&
2474 				    sin1->sin_port == sin2->sin_port &&
2475 				    sin1->sin_addr.s_addr ==
2476 				    sin2->sin_addr.s_addr)
2477 					match = B_TRUE;
2478 				break;
2479 			}
2480 			case AF_INET6: {
2481 				/* Compare just IP address and port. Not flow */
2482 				struct sockaddr_in6 *sin1, *sin2;
2483 
2484 				sin1 = (struct sockaddr_in6 *)so->so_faddr_sa;
2485 				sin2 = (struct sockaddr_in6 *)addr;
2486 				if (addrlen == sizeof (struct sockaddr_in6) &&
2487 				    sin1->sin6_port == sin2->sin6_port &&
2488 				    IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr,
2489 				    &sin2->sin6_addr))
2490 					match = B_TRUE;
2491 				break;
2492 			}
2493 			case AF_UNIX:
2494 				faddr = &so->so_ux_faddr;
2495 				faddr_len =
2496 				    (t_uscalar_t)sizeof (so->so_ux_faddr);
2497 				if (faddr_len == addrlen &&
2498 				    bcmp(addr, faddr, addrlen) == 0)
2499 					match = B_TRUE;
2500 				break;
2501 			default:
2502 				faddr = so->so_faddr_sa;
2503 				faddr_len = (t_uscalar_t)so->so_faddr_len;
2504 				if (faddr_len == addrlen &&
2505 				    bcmp(addr, faddr, addrlen) == 0)
2506 					match = B_TRUE;
2507 				break;
2508 			}
2509 
2510 			if (!match) {
2511 #ifdef DEBUG
2512 				dprintso(so, 0,
2513 				    ("sockfs: T_UDERR_IND mismatch: %s - ",
2514 				    pr_addr(so->so_family,
2515 				    (struct sockaddr *)addr,
2516 				    addrlen)));
2517 				dprintso(so, 0, ("%s\n",
2518 				    pr_addr(so->so_family, so->so_faddr_sa,
2519 				    so->so_faddr_len)));
2520 #endif /* DEBUG */
2521 				mutex_exit(&so->so_lock);
2522 				freemsg(mp);
2523 				return (NULL);
2524 			}
2525 			/*
2526 			 * Make the write error nonpersistent. If the error
2527 			 * is zero we use ECONNRESET.
2528 			 * This assumes that the name space for ERROR_type
2529 			 * is the errno name space.
2530 			 */
2531 			if (tudi->ERROR_type != 0)
2532 				error = tudi->ERROR_type;
2533 			else
2534 				error = ECONNRESET;
2535 
2536 			soseterror(so, error);
2537 			mutex_exit(&so->so_lock);
2538 			strsetrerror(SOTOV(so), 0, 0, sogetrderr);
2539 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2540 			*wakeups = RSLEEP | WSLEEP;
2541 			*allmsgsigs = S_INPUT | S_RDNORM | S_OUTPUT;
2542 			*pollwakeups = POLLIN | POLLRDNORM | POLLOUT;
2543 			freemsg(mp);
2544 			return (NULL);
2545 		}
2546 		/*
2547 		 * If the application asked for delayed errors
2548 		 * record the T_UDERROR_IND so_eaddr_mp and the reason in
2549 		 * so_delayed_error for delayed error posting. If the reason
2550 		 * is zero use ECONNRESET.
2551 		 * Note that delayed error indications do not make sense for
2552 		 * AF_UNIX sockets since sendto checks that the destination
2553 		 * address is valid at the time of the sendto.
2554 		 */
2555 		if (!(so->so_options & SO_DGRAM_ERRIND)) {
2556 			mutex_exit(&so->so_lock);
2557 			freemsg(mp);
2558 			return (NULL);
2559 		}
2560 		if (so->so_eaddr_mp != NULL)
2561 			freemsg(so->so_eaddr_mp);
2562 
2563 		so->so_eaddr_mp = mp;
2564 		if (tudi->ERROR_type != 0)
2565 			error = tudi->ERROR_type;
2566 		else
2567 			error = ECONNRESET;
2568 		so->so_delayed_error = (ushort_t)error;
2569 		mutex_exit(&so->so_lock);
2570 		return (NULL);
2571 	}
2572 
2573 	case T_ERROR_ACK:
2574 		dprintso(so, 0,
2575 		    ("strsock_proto: T_ERROR_ACK for %d, error %d/%d\n",
2576 		    tpr->error_ack.ERROR_prim,
2577 		    tpr->error_ack.TLI_error,
2578 		    tpr->error_ack.UNIX_error));
2579 
2580 		if (MBLKL(mp) < sizeof (struct T_error_ack)) {
2581 			zcmn_err(getzoneid(), CE_WARN,
2582 			    "sockfs: Too short T_ERROR_ACK. Len = %ld\n",
2583 			    (ptrdiff_t)(MBLKL(mp)));
2584 			freemsg(mp);
2585 			return (NULL);
2586 		}
2587 		/*
2588 		 * Check if we were waiting for the async message
2589 		 */
2590 		mutex_enter(&so->so_lock);
2591 		if ((so->so_flag & SOASYNC_UNBIND) &&
2592 		    tpr->error_ack.ERROR_prim == T_UNBIND_REQ) {
2593 			so_unlock_single(so, SOASYNC_UNBIND);
2594 			mutex_exit(&so->so_lock);
2595 			freemsg(mp);
2596 			return (NULL);
2597 		}
2598 		mutex_exit(&so->so_lock);
2599 		soqueueack(so, mp);
2600 		return (NULL);
2601 
2602 	case T_OK_ACK:
2603 		if (MBLKL(mp) < sizeof (struct T_ok_ack)) {
2604 			zcmn_err(getzoneid(), CE_WARN,
2605 			    "sockfs: Too short T_OK_ACK. Len = %ld\n",
2606 			    (ptrdiff_t)(MBLKL(mp)));
2607 			freemsg(mp);
2608 			return (NULL);
2609 		}
2610 		/*
2611 		 * Check if we were waiting for the async message
2612 		 */
2613 		mutex_enter(&so->so_lock);
2614 		if ((so->so_flag & SOASYNC_UNBIND) &&
2615 		    tpr->ok_ack.CORRECT_prim == T_UNBIND_REQ) {
2616 			dprintso(so, 1,
2617 			    ("strsock_proto: T_OK_ACK async unbind\n"));
2618 			so_unlock_single(so, SOASYNC_UNBIND);
2619 			mutex_exit(&so->so_lock);
2620 			freemsg(mp);
2621 			return (NULL);
2622 		}
2623 		mutex_exit(&so->so_lock);
2624 		soqueueack(so, mp);
2625 		return (NULL);
2626 
2627 	case T_INFO_ACK:
2628 		if (MBLKL(mp) < sizeof (struct T_info_ack)) {
2629 			zcmn_err(getzoneid(), CE_WARN,
2630 			    "sockfs: Too short T_INFO_ACK. Len = %ld\n",
2631 			    (ptrdiff_t)(MBLKL(mp)));
2632 			freemsg(mp);
2633 			return (NULL);
2634 		}
2635 		soqueueack(so, mp);
2636 		return (NULL);
2637 
2638 	case T_CAPABILITY_ACK:
2639 		/*
2640 		 * A T_capability_ack need only be large enough to hold
2641 		 * the PRIM_type and CAP_bits1 fields; checking for anything
2642 		 * larger might reject a correct response from an older
2643 		 * provider.
2644 		 */
2645 		if (MBLKL(mp) < 2 * sizeof (t_uscalar_t)) {
2646 			zcmn_err(getzoneid(), CE_WARN,
2647 			    "sockfs: Too short T_CAPABILITY_ACK. Len = %ld\n",
2648 			    (ptrdiff_t)(MBLKL(mp)));
2649 			freemsg(mp);
2650 			return (NULL);
2651 		}
2652 		soqueueack(so, mp);
2653 		return (NULL);
2654 
2655 	case T_BIND_ACK:
2656 		if (MBLKL(mp) < sizeof (struct T_bind_ack)) {
2657 			zcmn_err(getzoneid(), CE_WARN,
2658 			    "sockfs: Too short T_BIND_ACK. Len = %ld\n",
2659 			    (ptrdiff_t)(MBLKL(mp)));
2660 			freemsg(mp);
2661 			return (NULL);
2662 		}
2663 		soqueueack(so, mp);
2664 		return (NULL);
2665 
2666 	case T_OPTMGMT_ACK:
2667 		if (MBLKL(mp) < sizeof (struct T_optmgmt_ack)) {
2668 			zcmn_err(getzoneid(), CE_WARN,
2669 			    "sockfs: Too short T_OPTMGMT_ACK. Len = %ld\n",
2670 			    (ptrdiff_t)(MBLKL(mp)));
2671 			freemsg(mp);
2672 			return (NULL);
2673 		}
2674 		soqueueack(so, mp);
2675 		return (NULL);
2676 	default:
2677 #ifdef DEBUG
2678 		zcmn_err(getzoneid(), CE_WARN,
2679 		    "sockfs: unknown TPI primitive %d received\n",
2680 		    tpr->type);
2681 #endif /* DEBUG */
2682 		freemsg(mp);
2683 		return (NULL);
2684 	}
2685 }
2686 
2687 /*
2688  * This routine is registered with the stream head to receive other
2689  * (non-data, and non-proto) messages.
2690  *
2691  * Returns NULL if the message was consumed.
2692  * Returns an mblk to make that mblk be processed by the stream head.
2693  *
2694  * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and
2695  * *pollwakeups) for the stream head to take action on.
2696  */
2697 static mblk_t *
2698 strsock_misc(vnode_t *vp, mblk_t *mp,
2699 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
2700 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
2701 {
2702 	struct sonode *so;
2703 
2704 	so = VTOSO(vp);
2705 
2706 	dprintso(so, 1, ("strsock_misc(%p, %p, 0x%x)\n",
2707 	    (void *)vp, (void *)mp, DB_TYPE(mp)));
2708 
2709 	/* Set default return values */
2710 	*wakeups = *allmsgsigs = *firstmsgsigs = *pollwakeups = 0;
2711 
2712 	switch (DB_TYPE(mp)) {
2713 	case M_PCSIG:
2714 		/*
2715 		 * This assumes that an M_PCSIG for the urgent data arrives
2716 		 * before the corresponding T_EXDATA_IND.
2717 		 *
2718 		 * Note: Just like in SunOS 4.X and 4.4BSD a poll will be
2719 		 * awoken before the urgent data shows up.
2720 		 * For OOBINLINE this can result in select returning
2721 		 * only exceptions as opposed to except|read.
2722 		 */
2723 		if (*mp->b_rptr == SIGURG) {
2724 			mutex_enter(&so->so_lock);
2725 			dprintso(so, 1,
2726 			    ("SIGURG(%p): counts %d/%d state %s\n",
2727 			    (void *)vp, so->so_oobsigcnt,
2728 			    so->so_oobcnt,
2729 			    pr_state(so->so_state, so->so_mode)));
2730 			so_oob_sig(so, 1, allmsgsigs, pollwakeups);
2731 			dprintso(so, 1,
2732 			    ("after SIGURG(%p): counts %d/%d "
2733 			    " poll 0x%x sig 0x%x state %s\n",
2734 			    (void *)vp, so->so_oobsigcnt,
2735 			    so->so_oobcnt, *pollwakeups, *allmsgsigs,
2736 			    pr_state(so->so_state, so->so_mode)));
2737 			mutex_exit(&so->so_lock);
2738 		}
2739 		freemsg(mp);
2740 		return (NULL);
2741 
2742 	case M_SIG:
2743 	case M_HANGUP:
2744 	case M_UNHANGUP:
2745 	case M_ERROR:
2746 		/* M_ERRORs etc are ignored */
2747 		freemsg(mp);
2748 		return (NULL);
2749 
2750 	case M_FLUSH:
2751 		/*
2752 		 * Do not flush read queue. If the M_FLUSH
2753 		 * arrives because of an impending T_discon_ind
2754 		 * we still have to keep any queued data - this is part of
2755 		 * socket semantics.
2756 		 */
2757 		if (*mp->b_rptr & FLUSHW) {
2758 			*mp->b_rptr &= ~FLUSHR;
2759 			return (mp);
2760 		}
2761 		freemsg(mp);
2762 		return (NULL);
2763 
2764 	default:
2765 		return (mp);
2766 	}
2767 }
2768 
2769 
2770 /* Register to receive signals for certain events */
2771 int
2772 so_set_asyncsigs(vnode_t *vp, pid_t pgrp, int events, int mode, cred_t *cr)
2773 {
2774 	struct strsigset ss;
2775 	int32_t rval;
2776 
2777 	/*
2778 	 * Note that SOLOCKED will be set except for the call from soaccept().
2779 	 */
2780 	ASSERT(!mutex_owned(&VTOSO(vp)->so_lock));
2781 	ss.ss_pid = pgrp;
2782 	ss.ss_events = events;
2783 	return (strioctl(vp, I_ESETSIG, (intptr_t)&ss, mode, K_TO_K, cr,
2784 	    &rval));
2785 }
2786 
2787 
2788 /* Register for events matching the SS_ASYNC flag */
2789 int
2790 so_set_events(struct sonode *so, vnode_t *vp, cred_t *cr)
2791 {
2792 	int events = so->so_state & SS_ASYNC ?
2793 	    S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT :
2794 	    S_RDBAND | S_BANDURG;
2795 
2796 	return (so_set_asyncsigs(vp, so->so_pgrp, events, 0, cr));
2797 }
2798 
2799 
2800 /* Change the SS_ASYNC flag, and update signal delivery if needed */
2801 int
2802 so_flip_async(struct sonode *so, vnode_t *vp, int mode, cred_t *cr)
2803 {
2804 	ASSERT(mutex_owned(&so->so_lock));
2805 	if (so->so_pgrp != 0) {
2806 		int error;
2807 		int events = so->so_state & SS_ASYNC ?		/* Old flag */
2808 		    S_RDBAND | S_BANDURG :			/* New sigs */
2809 		    S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT;
2810 
2811 		so_lock_single(so);
2812 		mutex_exit(&so->so_lock);
2813 
2814 		error = so_set_asyncsigs(vp, so->so_pgrp, events, mode, cr);
2815 
2816 		mutex_enter(&so->so_lock);
2817 		so_unlock_single(so, SOLOCKED);
2818 		if (error)
2819 			return (error);
2820 	}
2821 	so->so_state ^= SS_ASYNC;
2822 	return (0);
2823 }
2824 
2825 /*
2826  * Set new pid/pgrp for SIGPOLL (or SIGIO for FIOASYNC mode), replacing
2827  * any existing one.  If passed zero, just clear the existing one.
2828  */
2829 int
2830 so_set_siggrp(struct sonode *so, vnode_t *vp, pid_t pgrp, int mode, cred_t *cr)
2831 {
2832 	int events = so->so_state & SS_ASYNC ?
2833 	    S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT :
2834 	    S_RDBAND | S_BANDURG;
2835 	int error;
2836 
2837 	ASSERT(mutex_owned(&so->so_lock));
2838 
2839 	/*
2840 	 * Change socket process (group).
2841 	 *
2842 	 * strioctl (via so_set_asyncsigs) will perform permission check and
2843 	 * also keep a PID_HOLD to prevent the pid from being reused.
2844 	 */
2845 	so_lock_single(so);
2846 	mutex_exit(&so->so_lock);
2847 
2848 	if (pgrp != 0) {
2849 		dprintso(so, 1, ("setown: adding pgrp %d ev 0x%x\n",
2850 		    pgrp, events));
2851 		error = so_set_asyncsigs(vp, pgrp, events, mode, cr);
2852 		if (error != 0) {
2853 			eprintsoline(so, error);
2854 			goto bad;
2855 		}
2856 	}
2857 	/* Remove the previously registered process/group */
2858 	if (so->so_pgrp != 0) {
2859 		dprintso(so, 1, ("setown: removing pgrp %d\n", so->so_pgrp));
2860 		error = so_set_asyncsigs(vp, so->so_pgrp, 0, mode, cr);
2861 		if (error != 0) {
2862 			eprintsoline(so, error);
2863 			error = 0;
2864 		}
2865 	}
2866 	mutex_enter(&so->so_lock);
2867 	so_unlock_single(so, SOLOCKED);
2868 	so->so_pgrp = pgrp;
2869 	return (0);
2870 bad:
2871 	mutex_enter(&so->so_lock);
2872 	so_unlock_single(so, SOLOCKED);
2873 	return (error);
2874 }
2875 
2876 
2877 
2878 /*
2879  * Translate a TLI(/XTI) error into a system error as best we can.
2880  */
2881 static const int tli_errs[] = {
2882 		0,		/* no error	*/
2883 		EADDRNOTAVAIL,  /* TBADADDR	*/
2884 		ENOPROTOOPT,	/* TBADOPT	*/
2885 		EACCES,		/* TACCES	*/
2886 		EBADF,		/* TBADF	*/
2887 		EADDRNOTAVAIL,	/* TNOADDR	*/
2888 		EPROTO,		/* TOUTSTATE	*/
2889 		ECONNABORTED,	/* TBADSEQ	*/
2890 		0,		/* TSYSERR - will never get	*/
2891 		EPROTO,		/* TLOOK - should never be sent by transport */
2892 		EMSGSIZE,	/* TBADDATA	*/
2893 		EMSGSIZE,	/* TBUFOVFLW	*/
2894 		EPROTO,		/* TFLOW	*/
2895 		EWOULDBLOCK,	/* TNODATA	*/
2896 		EPROTO,		/* TNODIS	*/
2897 		EPROTO,		/* TNOUDERR	*/
2898 		EINVAL,		/* TBADFLAG	*/
2899 		EPROTO,		/* TNOREL	*/
2900 		EOPNOTSUPP,	/* TNOTSUPPORT	*/
2901 		EPROTO,		/* TSTATECHNG	*/
2902 		/* following represent error namespace expansion with XTI */
2903 		EPROTO,		/* TNOSTRUCTYPE - never sent by transport */
2904 		EPROTO,		/* TBADNAME - never sent by transport */
2905 		EPROTO,		/* TBADQLEN - never sent by transport */
2906 		EADDRINUSE,	/* TADDRBUSY	*/
2907 		EBADF,		/* TINDOUT	*/
2908 		EBADF,		/* TPROVMISMATCH */
2909 		EBADF,		/* TRESQLEN	*/
2910 		EBADF,		/* TRESADDR	*/
2911 		EPROTO,		/* TQFULL - never sent by transport */
2912 		EPROTO,		/* TPROTO	*/
2913 };
2914 
2915 static int
2916 tlitosyserr(int terr)
2917 {
2918 	ASSERT(terr != TSYSERR);
2919 	if (terr >= (sizeof (tli_errs) / sizeof (tli_errs[0])))
2920 		return (EPROTO);
2921 	else
2922 		return (tli_errs[terr]);
2923 }
2924 
2925 /*
2926  * Sockfs sodirect STREAMS read put procedure. Called from sodirect enable
2927  * transport driver/module with an mblk_t chain.
2928  *
2929  * Note, we in-line putq() for the fast-path cases of q is empty, q_last and
2930  * bp are of type M_DATA. All other cases we call putq().
2931  *
2932  * On success a zero will be return, else an errno will be returned.
2933  */
2934 int
2935 sodput(sodirect_t *sodp, mblk_t *bp)
2936 {
2937 	queue_t		*q = sodp->sod_q;
2938 	struct stdata	*stp = (struct stdata *)q->q_ptr;
2939 	mblk_t		*nbp;
2940 	mblk_t		*last = q->q_last;
2941 	int		bytecnt = 0;
2942 	int		mblkcnt = 0;
2943 
2944 
2945 	ASSERT(MUTEX_HELD(sodp->sod_lockp));
2946 
2947 	if (stp->sd_flag == STREOF) {
2948 		do {
2949 			if ((nbp = bp->b_next) != NULL)
2950 				bp->b_next = NULL;
2951 			freemsg(bp);
2952 		} while ((bp = nbp) != NULL);
2953 
2954 		return (0);
2955 	}
2956 
2957 	mutex_enter(QLOCK(q));
2958 	if (q->q_first == NULL) {
2959 		/* Q empty, really fast fast-path */
2960 		bp->b_prev = NULL;
2961 		bp->b_next = NULL;
2962 		q->q_first = bp;
2963 		q->q_last = bp;
2964 
2965 	} else if (last->b_datap->db_type == M_DATA &&
2966 	    bp->b_datap->db_type == M_DATA) {
2967 		/*
2968 		 * Last mblk_t chain and bp are both type M_DATA so
2969 		 * in-line putq() here, if the DBLK_UIOA state match
2970 		 * add bp to the end of the current last chain, else
2971 		 * start a new last chain with bp.
2972 		 */
2973 		if ((last->b_datap->db_flags & DBLK_UIOA) ==
2974 		    (bp->b_datap->db_flags & DBLK_UIOA)) {
2975 			/* Added to end */
2976 			while ((nbp = last->b_cont) != NULL)
2977 				last = nbp;
2978 			last->b_cont = bp;
2979 		} else {
2980 			/* New last */
2981 			ASSERT((bp->b_datap->db_flags & DBLK_UIOA) == 0 ||
2982 			    msgdsize(bp) == sodp->sod_uioa.uioa_mbytes);
2983 			last->b_next = bp;
2984 			bp->b_next = NULL;
2985 			bp->b_prev = last;
2986 			q->q_last = bp;
2987 		}
2988 	} else {
2989 		/*
2990 		 * Can't use q_last so just call putq().
2991 		 */
2992 		mutex_exit(QLOCK(q));
2993 
2994 		ASSERT((bp->b_datap->db_flags & DBLK_UIOA) == 0 ||
2995 		    msgdsize(bp) == sodp->sod_uioa.uioa_mbytes);
2996 		(void) putq(q, bp);
2997 		return (0);
2998 	}
2999 
3000 	/* Count bytes and mblk_t's */
3001 	do {
3002 		bytecnt += MBLKL(bp);
3003 		mblkcnt++;
3004 	} while ((bp = bp->b_cont) != NULL);
3005 	q->q_count += bytecnt;
3006 	q->q_mblkcnt += mblkcnt;
3007 
3008 	/* Check for QFULL */
3009 	if (q->q_count >= q->q_hiwat + sodp->sod_want ||
3010 	    q->q_mblkcnt >= q->q_hiwat) {
3011 		q->q_flag |= QFULL;
3012 	}
3013 
3014 	mutex_exit(QLOCK(q));
3015 	return (0);
3016 }
3017 
3018 /*
3019  * Sockfs sodirect read wakeup. Called from a sodirect enabled transport
3020  * driver/module to indicate that read-side data is available.
3021  *
3022  * On return the sodirect_t.lock mutex will be exited so this must be the
3023  * last sodirect_t call to guarantee atomic access of *sodp.
3024  */
3025 void
3026 sodwakeup(sodirect_t *sodp)
3027 {
3028 	queue_t		*q = sodp->sod_q;
3029 	struct stdata	*stp = (struct stdata *)q->q_ptr;
3030 
3031 	ASSERT(MUTEX_HELD(sodp->sod_lockp));
3032 
3033 	if (stp->sd_flag & RSLEEP) {
3034 		stp->sd_flag &= ~RSLEEP;
3035 		cv_broadcast(&q->q_wait);
3036 	}
3037 
3038 	if (stp->sd_rput_opt & SR_POLLIN) {
3039 		stp->sd_rput_opt &= ~SR_POLLIN;
3040 		mutex_exit(sodp->sod_lockp);
3041 		pollwakeup(&stp->sd_pollist, POLLIN | POLLRDNORM);
3042 	} else
3043 		mutex_exit(sodp->sod_lockp);
3044 }
3045