xref: /titanic_44/usr/src/uts/common/fs/sockfs/sockstr.c (revision 0dc974a9a2e66d676505db23524ebff105fb36a9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/inttypes.h>
31 #include <sys/t_lock.h>
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/buf.h>
35 #include <sys/conf.h>
36 #include <sys/cred.h>
37 #include <sys/kmem.h>
38 #include <sys/sysmacros.h>
39 #include <sys/vfs.h>
40 #include <sys/vnode.h>
41 #include <sys/debug.h>
42 #include <sys/errno.h>
43 #include <sys/time.h>
44 #include <sys/file.h>
45 #include <sys/user.h>
46 #include <sys/stream.h>
47 #include <sys/strsubr.h>
48 #include <sys/esunddi.h>
49 #include <sys/flock.h>
50 #include <sys/modctl.h>
51 #include <sys/vtrace.h>
52 #include <sys/strsun.h>
53 #include <sys/cmn_err.h>
54 #include <sys/proc.h>
55 #include <sys/ddi.h>
56 #include <sys/kmem_impl.h>
57 
58 #include <sys/suntpi.h>
59 #include <sys/socket.h>
60 #include <sys/sockio.h>
61 #include <sys/socketvar.h>
62 #include <netinet/in.h>
63 
64 #include <sys/tiuser.h>
65 #define	_SUN_TPI_VERSION	2
66 #include <sys/tihdr.h>
67 
68 #include <inet/kssl/ksslapi.h>
69 
70 #include <c2/audit.h>
71 
72 #include <sys/dcopy.h>
73 
74 int so_default_version = SOV_SOCKSTREAM;
75 
76 #ifdef DEBUG
77 /* Set sockdebug to print debug messages when SO_DEBUG is set */
78 int sockdebug = 0;
79 
80 /* Set sockprinterr to print error messages when SO_DEBUG is set */
81 int sockprinterr = 0;
82 
83 /*
84  * Set so_default_options to SO_DEBUG is all sockets should be created
85  * with SO_DEBUG set. This is needed to get debug printouts from the
86  * socket() call itself.
87  */
88 int so_default_options = 0;
89 #endif /* DEBUG */
90 
91 #ifdef SOCK_TEST
92 /*
93  * Set to number of ticks to limit cv_waits for code coverage testing.
94  * Set to 1000 when SO_DEBUG is set to 2.
95  */
96 clock_t sock_test_timelimit = 0;
97 #endif /* SOCK_TEST */
98 
99 /*
100  * For concurrency testing of e.g. opening /dev/ip which does not
101  * handle T_INFO_REQ messages.
102  */
103 int so_no_tinfo = 0;
104 
105 /*
106  * Timeout for getting a T_CAPABILITY_ACK - it is possible for a provider
107  * to simply ignore the T_CAPABILITY_REQ.
108  */
109 clock_t	sock_capability_timeout	= 2;	/* seconds */
110 
111 static int	do_tcapability(struct sonode *so, t_uscalar_t cap_bits1);
112 static void	so_removehooks(struct sonode *so);
113 
114 static mblk_t *strsock_proto(vnode_t *vp, mblk_t *mp,
115 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
116 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups);
117 static mblk_t *strsock_misc(vnode_t *vp, mblk_t *mp,
118 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
119 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups);
120 
121 static int tlitosyserr(int terr);
122 
123 /*
124  * Sodirect kmem_cache and put/wakeup functions.
125  */
126 struct kmem_cache *socktpi_sod_cache;
127 static int sodput(sodirect_t *, mblk_t *);
128 static void sodwakeup(sodirect_t *);
129 
130 /*
131  * Called by sockinit() when sockfs is loaded.
132  */
133 int
134 sostr_init()
135 {
136 	/* Allocate sodirect_t kmem_cache */
137 	socktpi_sod_cache = kmem_cache_create("socktpi_sod_cache",
138 	    sizeof (sodirect_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
139 
140 	return (0);
141 }
142 
143 /*
144  * Convert a socket to a stream. Invoked when the illusory sockmod
145  * is popped from the stream.
146  * Change the stream head back to default operation without losing
147  * any messages (T_conn_ind's are moved to the stream head queue).
148  */
149 int
150 so_sock2stream(struct sonode *so)
151 {
152 	struct vnode		*vp = SOTOV(so);
153 	queue_t			*rq;
154 	mblk_t			*mp;
155 	int			error = 0;
156 
157 	ASSERT(MUTEX_HELD(&so->so_plumb_lock));
158 
159 	mutex_enter(&so->so_lock);
160 	so_lock_single(so);
161 
162 	ASSERT(so->so_version != SOV_STREAM);
163 
164 	if (so->so_state & SS_DIRECT) {
165 		mblk_t **mpp;
166 		int rval;
167 
168 		/*
169 		 * Tell the transport below that sockmod is being popped
170 		 */
171 		mutex_exit(&so->so_lock);
172 		error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, CRED(),
173 		    &rval);
174 		mutex_enter(&so->so_lock);
175 		if (error != 0) {
176 			dprintso(so, 0, ("so_sock2stream(%p): "
177 			    "_SIOCSOCKFALLBACK failed\n", (void *)so));
178 			goto exit;
179 		}
180 		so->so_state &= ~SS_DIRECT;
181 
182 		for (mpp = &so->so_conn_ind_head; (mp = *mpp) != NULL;
183 		    mpp = &mp->b_next) {
184 			struct T_conn_ind	*conn_ind;
185 
186 			/*
187 			 * strsock_proto() has already verified the length of
188 			 * this message block.
189 			 */
190 			ASSERT(MBLKL(mp) >= sizeof (struct T_conn_ind));
191 
192 			conn_ind = (struct T_conn_ind *)mp->b_rptr;
193 			if (conn_ind->OPT_length == 0 &&
194 			    conn_ind->OPT_offset == 0)
195 				continue;
196 
197 			if (DB_REF(mp) > 1) {
198 				mblk_t	*newmp;
199 				size_t	length;
200 				cred_t	*cr;
201 
202 				/*
203 				 * Copy the message block because it is used
204 				 * elsewhere, too.
205 				 */
206 				length = MBLKL(mp);
207 				newmp = soallocproto(length, _ALLOC_INTR);
208 				if (newmp == NULL) {
209 					error = EINTR;
210 					goto exit;
211 				}
212 				bcopy(mp->b_rptr, newmp->b_wptr, length);
213 				newmp->b_wptr += length;
214 				newmp->b_next = mp->b_next;
215 				cr = DB_CRED(mp);
216 				if (cr != NULL)
217 					mblk_setcred(newmp, cr);
218 				DB_CPID(newmp) = DB_CPID(mp);
219 
220 				/*
221 				 * Link the new message block into the queue
222 				 * and free the old one.
223 				 */
224 				*mpp = newmp;
225 				mp->b_next = NULL;
226 				freemsg(mp);
227 
228 				mp = newmp;
229 				conn_ind = (struct T_conn_ind *)mp->b_rptr;
230 			}
231 
232 			/*
233 			 * Remove options added by TCP for accept fast-path.
234 			 */
235 			conn_ind->OPT_length = 0;
236 			conn_ind->OPT_offset = 0;
237 		}
238 	}
239 
240 	so->so_version = SOV_STREAM;
241 	so->so_priv = NULL;
242 
243 	/*
244 	 * Remove the hooks in the stream head to avoid queuing more
245 	 * packets in sockfs.
246 	 */
247 	mutex_exit(&so->so_lock);
248 	so_removehooks(so);
249 	mutex_enter(&so->so_lock);
250 
251 	/*
252 	 * Clear any state related to urgent data. Leave any T_EXDATA_IND
253 	 * on the queue - the behavior of urgent data after a switch is
254 	 * left undefined.
255 	 */
256 	so->so_error = so->so_delayed_error = 0;
257 	freemsg(so->so_oobmsg);
258 	so->so_oobmsg = NULL;
259 	so->so_oobsigcnt = so->so_oobcnt = 0;
260 
261 	so->so_state &= ~(SS_RCVATMARK|SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA|
262 	    SS_HASCONNIND|SS_SAVEDEOR);
263 	ASSERT(so_verify_oobstate(so));
264 
265 	freemsg(so->so_ack_mp);
266 	so->so_ack_mp = NULL;
267 
268 	/*
269 	 * Flush the T_DISCON_IND on so_discon_ind_mp.
270 	 */
271 	so_flush_discon_ind(so);
272 
273 	/*
274 	 * Move any queued T_CONN_IND messages to stream head queue.
275 	 */
276 	rq = RD(strvp2wq(vp));
277 	while ((mp = so->so_conn_ind_head) != NULL) {
278 		so->so_conn_ind_head = mp->b_next;
279 		mp->b_next = NULL;
280 		if (so->so_conn_ind_head == NULL) {
281 			ASSERT(so->so_conn_ind_tail == mp);
282 			so->so_conn_ind_tail = NULL;
283 		}
284 		dprintso(so, 0,
285 		    ("so_sock2stream(%p): moving T_CONN_IND\n",
286 		    (void *)so));
287 
288 		/* Drop lock across put() */
289 		mutex_exit(&so->so_lock);
290 		put(rq, mp);
291 		mutex_enter(&so->so_lock);
292 	}
293 
294 exit:
295 	ASSERT(MUTEX_HELD(&so->so_lock));
296 	so_unlock_single(so, SOLOCKED);
297 	mutex_exit(&so->so_lock);
298 	return (error);
299 }
300 
301 /*
302  * Covert a stream back to a socket. This is invoked when the illusory
303  * sockmod is pushed on a stream (where the stream was "created" by
304  * popping the illusory sockmod).
305  * This routine can not recreate the socket state (certain aspects of
306  * it like urgent data state and the bound/connected addresses for AF_UNIX
307  * sockets can not be recreated by asking the transport for information).
308  * Thus this routine implicitly assumes that the socket is in an initial
309  * state (as if it was just created). It flushes any messages queued on the
310  * read queue to avoid dealing with e.g. TPI acks or T_exdata_ind messages.
311  */
312 void
313 so_stream2sock(struct sonode *so)
314 {
315 	struct vnode *vp = SOTOV(so);
316 
317 	ASSERT(MUTEX_HELD(&so->so_plumb_lock));
318 
319 	mutex_enter(&so->so_lock);
320 	so_lock_single(so);
321 	ASSERT(so->so_version == SOV_STREAM);
322 	so->so_version = SOV_SOCKSTREAM;
323 	so->so_pushcnt = 0;
324 	mutex_exit(&so->so_lock);
325 
326 	/*
327 	 * Set a permenent error to force any thread in sorecvmsg to
328 	 * return (and drop SOREADLOCKED). Clear the error once
329 	 * we have SOREADLOCKED.
330 	 * This makes a read sleeping during the I_PUSH of sockmod return
331 	 * EIO.
332 	 */
333 	strsetrerror(SOTOV(so), EIO, 1, NULL);
334 
335 	/*
336 	 * Get the read lock before flushing data to avoid
337 	 * problems with the T_EXDATA_IND MSG_PEEK code in sorecvmsg.
338 	 */
339 	mutex_enter(&so->so_lock);
340 	(void) so_lock_read(so, 0);	/* Set SOREADLOCKED */
341 	mutex_exit(&so->so_lock);
342 
343 	strsetrerror(SOTOV(so), 0, 0, NULL);
344 	so_installhooks(so);
345 
346 	/*
347 	 * Flush everything on the read queue.
348 	 * This ensures that no T_CONN_IND remain and that no T_EXDATA_IND
349 	 * remain; those types of messages would confuse sockfs.
350 	 */
351 	strflushrq(vp, FLUSHALL);
352 	mutex_enter(&so->so_lock);
353 
354 	/*
355 	 * Flush the T_DISCON_IND on so_discon_ind_mp.
356 	 */
357 	so_flush_discon_ind(so);
358 	so_unlock_read(so);	/* Clear SOREADLOCKED */
359 
360 	so_unlock_single(so, SOLOCKED);
361 	mutex_exit(&so->so_lock);
362 }
363 
364 /*
365  * Install the hooks in the stream head.
366  */
367 void
368 so_installhooks(struct sonode *so)
369 {
370 	struct vnode *vp = SOTOV(so);
371 
372 	strsetrputhooks(vp, SH_SIGALLDATA | SH_IGN_ZEROLEN | SH_CONSOL_DATA,
373 	    strsock_proto, strsock_misc);
374 	strsetwputhooks(vp, SH_SIGPIPE | SH_RECHECK_ERR, 0);
375 }
376 
377 /*
378  * Remove the hooks in the stream head.
379  */
380 static void
381 so_removehooks(struct sonode *so)
382 {
383 	struct vnode *vp = SOTOV(so);
384 
385 	strsetrputhooks(vp, 0, NULL, NULL);
386 	strsetwputhooks(vp, 0, STRTIMOUT);
387 	/*
388 	 * Leave read behavior as it would have been for a normal
389 	 * stream i.e. a read of an M_PROTO will fail.
390 	 */
391 }
392 
393 /*
394  * Initialize the streams side of a socket including
395  * T_info_req/ack processing. If tso is not NULL its values are used thereby
396  * avoiding the T_INFO_REQ.
397  */
398 int
399 so_strinit(struct sonode *so, struct sonode *tso)
400 {
401 	struct vnode *vp = SOTOV(so);
402 	struct stdata *stp;
403 	mblk_t *mp;
404 	int error;
405 
406 	dprintso(so, 1, ("so_strinit(%p)\n", (void *)so));
407 
408 	/* Preallocate an unbind_req message */
409 	mp = soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP);
410 	mutex_enter(&so->so_lock);
411 	so->so_unbind_mp = mp;
412 #ifdef DEBUG
413 	so->so_options = so_default_options;
414 #endif /* DEBUG */
415 	mutex_exit(&so->so_lock);
416 
417 	so_installhooks(so);
418 
419 	/*
420 	 * The T_CAPABILITY_REQ should be the first message sent down because
421 	 * at least TCP has a fast-path for this which avoids timeouts while
422 	 * waiting for the T_CAPABILITY_ACK under high system load.
423 	 */
424 	if (tso == NULL) {
425 		error = do_tcapability(so, TC1_ACCEPTOR_ID | TC1_INFO);
426 		if (error)
427 			return (error);
428 	} else {
429 		mutex_enter(&so->so_lock);
430 		so->so_tsdu_size = tso->so_tsdu_size;
431 		so->so_etsdu_size = tso->so_etsdu_size;
432 		so->so_addr_size = tso->so_addr_size;
433 		so->so_opt_size = tso->so_opt_size;
434 		so->so_tidu_size = tso->so_tidu_size;
435 		so->so_serv_type = tso->so_serv_type;
436 		so->so_mode = tso->so_mode & ~SM_ACCEPTOR_ID;
437 		mutex_exit(&so->so_lock);
438 
439 		/* the following do_tcapability may update so->so_mode */
440 		if ((tso->so_serv_type != T_CLTS) &&
441 		    !(tso->so_state & SS_DIRECT)) {
442 			error = do_tcapability(so, TC1_ACCEPTOR_ID);
443 			if (error)
444 				return (error);
445 		}
446 	}
447 	/*
448 	 * If the addr_size is 0 we treat it as already bound
449 	 * and connected. This is used by the routing socket.
450 	 * We set the addr_size to something to allocate a the address
451 	 * structures.
452 	 */
453 	if (so->so_addr_size == 0) {
454 		so->so_state |= SS_ISBOUND | SS_ISCONNECTED;
455 		/* Address size can vary with address families. */
456 		if (so->so_family == AF_INET6)
457 			so->so_addr_size =
458 			    (t_scalar_t)sizeof (struct sockaddr_in6);
459 		else
460 			so->so_addr_size =
461 			    (t_scalar_t)sizeof (struct sockaddr_in);
462 		ASSERT(so->so_unbind_mp);
463 	}
464 	/*
465 	 * Allocate the addresses.
466 	 */
467 	ASSERT(so->so_laddr_sa == NULL && so->so_faddr_sa == NULL);
468 	ASSERT(so->so_laddr_len == 0 && so->so_faddr_len == 0);
469 	so->so_laddr_maxlen = so->so_faddr_maxlen =
470 	    P2ROUNDUP(so->so_addr_size, KMEM_ALIGN);
471 	so->so_laddr_sa = kmem_alloc(so->so_laddr_maxlen * 2, KM_SLEEP);
472 	so->so_faddr_sa = (struct sockaddr *)((caddr_t)so->so_laddr_sa
473 	    + so->so_laddr_maxlen);
474 
475 	if (so->so_family == AF_UNIX) {
476 		/*
477 		 * Initialize AF_UNIX related fields.
478 		 */
479 		bzero(&so->so_ux_laddr, sizeof (so->so_ux_laddr));
480 		bzero(&so->so_ux_faddr, sizeof (so->so_ux_faddr));
481 	}
482 
483 	stp = vp->v_stream;
484 	/*
485 	 * Have to keep minpsz at zero in order to allow write/send of zero
486 	 * bytes.
487 	 */
488 	mutex_enter(&stp->sd_lock);
489 	if (stp->sd_qn_minpsz == 1)
490 		stp->sd_qn_minpsz = 0;
491 	mutex_exit(&stp->sd_lock);
492 
493 	/*
494 	 * If sodirect capable allocate and initialize sodirect_t.
495 	 * Note, SS_SODIRECT is set in socktpi_open().
496 	 */
497 	if (so->so_state & SS_SODIRECT) {
498 		sodirect_t	*sodp;
499 
500 		ASSERT(so->so_direct == NULL);
501 
502 		sodp = kmem_cache_alloc(socktpi_sod_cache, KM_SLEEP);
503 		sodp->sod_state = SOD_ENABLED | SOD_WAKE_NOT;
504 		sodp->sod_want = 0;
505 		sodp->sod_q = RD(stp->sd_wrq);
506 		sodp->sod_enqueue = sodput;
507 		sodp->sod_wakeup = sodwakeup;
508 		sodp->sod_uioafh = NULL;
509 		sodp->sod_uioaft = NULL;
510 		sodp->sod_lock = &stp->sd_lock;
511 		/*
512 		 * Remainder of the sod_uioa members are left uninitialized
513 		 * but will be initialized later by uioainit() before uioa
514 		 * is enabled.
515 		 */
516 		sodp->sod_uioa.uioa_state = UIOA_ALLOC;
517 		so->so_direct = sodp;
518 		stp->sd_sodirect = sodp;
519 	}
520 
521 	return (0);
522 }
523 
524 static void
525 copy_tinfo(struct sonode *so, struct T_info_ack *tia)
526 {
527 	so->so_tsdu_size = tia->TSDU_size;
528 	so->so_etsdu_size = tia->ETSDU_size;
529 	so->so_addr_size = tia->ADDR_size;
530 	so->so_opt_size = tia->OPT_size;
531 	so->so_tidu_size = tia->TIDU_size;
532 	so->so_serv_type = tia->SERV_type;
533 	switch (tia->CURRENT_state) {
534 	case TS_UNBND:
535 		break;
536 	case TS_IDLE:
537 		so->so_state |= SS_ISBOUND;
538 		so->so_laddr_len = 0;
539 		so->so_state &= ~SS_LADDR_VALID;
540 		break;
541 	case TS_DATA_XFER:
542 		so->so_state |= SS_ISBOUND|SS_ISCONNECTED;
543 		so->so_laddr_len = 0;
544 		so->so_faddr_len = 0;
545 		so->so_state &= ~(SS_LADDR_VALID | SS_FADDR_VALID);
546 		break;
547 	}
548 
549 	/*
550 	 * Heuristics for determining the socket mode flags
551 	 * (SM_ATOMIC, SM_CONNREQUIRED, SM_ADDR, SM_FDPASSING,
552 	 * and SM_EXDATA, SM_OPTDATA, and SM_BYTESTREAM)
553 	 * from the info ack.
554 	 */
555 	if (so->so_serv_type == T_CLTS) {
556 		so->so_mode |= SM_ATOMIC | SM_ADDR;
557 	} else {
558 		so->so_mode |= SM_CONNREQUIRED;
559 		if (so->so_etsdu_size != 0 && so->so_etsdu_size != -2)
560 			so->so_mode |= SM_EXDATA;
561 	}
562 	if (so->so_type == SOCK_SEQPACKET || so->so_type == SOCK_RAW) {
563 		/* Semantics are to discard tail end of messages */
564 		so->so_mode |= SM_ATOMIC;
565 	}
566 	if (so->so_family == AF_UNIX) {
567 		so->so_mode |= SM_FDPASSING | SM_OPTDATA;
568 		if (so->so_addr_size == -1) {
569 			/* MAXPATHLEN + soun_family + nul termination */
570 			so->so_addr_size = (t_scalar_t)(MAXPATHLEN +
571 			    sizeof (short) + 1);
572 		}
573 		if (so->so_type == SOCK_STREAM) {
574 			/*
575 			 * Make it into a byte-stream transport.
576 			 * SOCK_SEQPACKET sockets are unchanged.
577 			 */
578 			so->so_tsdu_size = 0;
579 		}
580 	} else if (so->so_addr_size == -1) {
581 		/*
582 		 * Logic extracted from sockmod - have to pick some max address
583 		 * length in order to preallocate the addresses.
584 		 */
585 		so->so_addr_size = SOA_DEFSIZE;
586 	}
587 	if (so->so_tsdu_size == 0)
588 		so->so_mode |= SM_BYTESTREAM;
589 }
590 
591 static int
592 check_tinfo(struct sonode *so)
593 {
594 	/* Consistency checks */
595 	if (so->so_type == SOCK_DGRAM && so->so_serv_type != T_CLTS) {
596 		eprintso(so, ("service type and socket type mismatch\n"));
597 		eprintsoline(so, EPROTO);
598 		return (EPROTO);
599 	}
600 	if (so->so_type == SOCK_STREAM && so->so_serv_type == T_CLTS) {
601 		eprintso(so, ("service type and socket type mismatch\n"));
602 		eprintsoline(so, EPROTO);
603 		return (EPROTO);
604 	}
605 	if (so->so_type == SOCK_SEQPACKET && so->so_serv_type == T_CLTS) {
606 		eprintso(so, ("service type and socket type mismatch\n"));
607 		eprintsoline(so, EPROTO);
608 		return (EPROTO);
609 	}
610 	if (so->so_family == AF_INET &&
611 	    so->so_addr_size != (t_scalar_t)sizeof (struct sockaddr_in)) {
612 		eprintso(so,
613 		    ("AF_INET must have sockaddr_in address length. Got %d\n",
614 		    so->so_addr_size));
615 		eprintsoline(so, EMSGSIZE);
616 		return (EMSGSIZE);
617 	}
618 	if (so->so_family == AF_INET6 &&
619 	    so->so_addr_size != (t_scalar_t)sizeof (struct sockaddr_in6)) {
620 		eprintso(so,
621 		    ("AF_INET6 must have sockaddr_in6 address length. Got %d\n",
622 		    so->so_addr_size));
623 		eprintsoline(so, EMSGSIZE);
624 		return (EMSGSIZE);
625 	}
626 
627 	dprintso(so, 1, (
628 	    "tinfo: serv %d tsdu %d, etsdu %d, addr %d, opt %d, tidu %d\n",
629 	    so->so_serv_type, so->so_tsdu_size, so->so_etsdu_size,
630 	    so->so_addr_size, so->so_opt_size,
631 	    so->so_tidu_size));
632 	dprintso(so, 1, ("tinfo: so_state %s\n",
633 	    pr_state(so->so_state, so->so_mode)));
634 	return (0);
635 }
636 
637 /*
638  * Send down T_info_req and wait for the ack.
639  * Record interesting T_info_ack values in the sonode.
640  */
641 static int
642 do_tinfo(struct sonode *so)
643 {
644 	struct T_info_req tir;
645 	mblk_t *mp;
646 	int error;
647 
648 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
649 
650 	if (so_no_tinfo) {
651 		so->so_addr_size = 0;
652 		return (0);
653 	}
654 
655 	dprintso(so, 1, ("do_tinfo(%p)\n", (void *)so));
656 
657 	/* Send T_INFO_REQ */
658 	tir.PRIM_type = T_INFO_REQ;
659 	mp = soallocproto1(&tir, sizeof (tir),
660 	    sizeof (struct T_info_req) + sizeof (struct T_info_ack),
661 	    _ALLOC_INTR);
662 	if (mp == NULL) {
663 		eprintsoline(so, ENOBUFS);
664 		return (ENOBUFS);
665 	}
666 	/* T_INFO_REQ has to be M_PCPROTO */
667 	DB_TYPE(mp) = M_PCPROTO;
668 
669 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
670 	    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
671 	if (error) {
672 		eprintsoline(so, error);
673 		return (error);
674 	}
675 	mutex_enter(&so->so_lock);
676 	/* Wait for T_INFO_ACK */
677 	if ((error = sowaitprim(so, T_INFO_REQ, T_INFO_ACK,
678 	    (t_uscalar_t)sizeof (struct T_info_ack), &mp, 0))) {
679 		mutex_exit(&so->so_lock);
680 		eprintsoline(so, error);
681 		return (error);
682 	}
683 
684 	ASSERT(mp);
685 	copy_tinfo(so, (struct T_info_ack *)mp->b_rptr);
686 	mutex_exit(&so->so_lock);
687 	freemsg(mp);
688 	return (check_tinfo(so));
689 }
690 
691 /*
692  * Send down T_capability_req and wait for the ack.
693  * Record interesting T_capability_ack values in the sonode.
694  */
695 static int
696 do_tcapability(struct sonode *so, t_uscalar_t cap_bits1)
697 {
698 	struct T_capability_req tcr;
699 	struct T_capability_ack *tca;
700 	mblk_t *mp;
701 	int error;
702 
703 	ASSERT(cap_bits1 != 0);
704 	ASSERT((cap_bits1 & ~(TC1_ACCEPTOR_ID | TC1_INFO)) == 0);
705 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
706 
707 	if (so->so_provinfo->tpi_capability == PI_NO)
708 		return (do_tinfo(so));
709 
710 	if (so_no_tinfo) {
711 		so->so_addr_size = 0;
712 		if ((cap_bits1 &= ~TC1_INFO) == 0)
713 			return (0);
714 	}
715 
716 	dprintso(so, 1, ("do_tcapability(%p)\n", (void *)so));
717 
718 	/* Send T_CAPABILITY_REQ */
719 	tcr.PRIM_type = T_CAPABILITY_REQ;
720 	tcr.CAP_bits1 = cap_bits1;
721 	mp = soallocproto1(&tcr, sizeof (tcr),
722 	    sizeof (struct T_capability_req) + sizeof (struct T_capability_ack),
723 	    _ALLOC_INTR);
724 	if (mp == NULL) {
725 		eprintsoline(so, ENOBUFS);
726 		return (ENOBUFS);
727 	}
728 	/* T_CAPABILITY_REQ should be M_PCPROTO here */
729 	DB_TYPE(mp) = M_PCPROTO;
730 
731 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
732 	    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
733 	if (error) {
734 		eprintsoline(so, error);
735 		return (error);
736 	}
737 	mutex_enter(&so->so_lock);
738 	/* Wait for T_CAPABILITY_ACK */
739 	if ((error = sowaitprim(so, T_CAPABILITY_REQ, T_CAPABILITY_ACK,
740 	    (t_uscalar_t)sizeof (*tca), &mp, sock_capability_timeout * hz))) {
741 		mutex_exit(&so->so_lock);
742 		PI_PROVLOCK(so->so_provinfo);
743 		if (so->so_provinfo->tpi_capability == PI_DONTKNOW)
744 			so->so_provinfo->tpi_capability = PI_NO;
745 		PI_PROVUNLOCK(so->so_provinfo);
746 		ASSERT((so->so_mode & SM_ACCEPTOR_ID) == 0);
747 		if (cap_bits1 & TC1_INFO) {
748 			/*
749 			 * If the T_CAPABILITY_REQ timed out and then a
750 			 * T_INFO_REQ gets a protocol error, most likely
751 			 * the capability was slow (vs. unsupported). Return
752 			 * ENOSR for this case as a best guess.
753 			 */
754 			if (error == ETIME) {
755 				return ((error = do_tinfo(so)) == EPROTO ?
756 				    ENOSR : error);
757 			}
758 			return (do_tinfo(so));
759 		}
760 		return (0);
761 	}
762 
763 	if (so->so_provinfo->tpi_capability == PI_DONTKNOW) {
764 		PI_PROVLOCK(so->so_provinfo);
765 		so->so_provinfo->tpi_capability = PI_YES;
766 		PI_PROVUNLOCK(so->so_provinfo);
767 	}
768 
769 	ASSERT(mp);
770 	tca = (struct T_capability_ack *)mp->b_rptr;
771 
772 	ASSERT((cap_bits1 & TC1_INFO) == (tca->CAP_bits1 & TC1_INFO));
773 
774 	cap_bits1 = tca->CAP_bits1;
775 
776 	if (cap_bits1 & TC1_ACCEPTOR_ID) {
777 		so->so_acceptor_id = tca->ACCEPTOR_id;
778 		so->so_mode |= SM_ACCEPTOR_ID;
779 	}
780 
781 	if (cap_bits1 & TC1_INFO)
782 		copy_tinfo(so, &tca->INFO_ack);
783 
784 	mutex_exit(&so->so_lock);
785 	freemsg(mp);
786 
787 	if (cap_bits1 & TC1_INFO)
788 		return (check_tinfo(so));
789 
790 	return (0);
791 }
792 
793 /*
794  * Retrieve and clear the socket error.
795  */
796 int
797 sogeterr(struct sonode *so)
798 {
799 	int error;
800 
801 	ASSERT(MUTEX_HELD(&so->so_lock));
802 
803 	error = so->so_error;
804 	so->so_error = 0;
805 
806 	return (error);
807 }
808 
809 /*
810  * This routine is registered with the stream head to retrieve read
811  * side errors.
812  * It does not clear the socket error for a peeking read side operation.
813  * It the error is to be cleared it sets *clearerr.
814  */
815 int
816 sogetrderr(vnode_t *vp, int ispeek, int *clearerr)
817 {
818 	struct sonode *so = VTOSO(vp);
819 	int error;
820 
821 	mutex_enter(&so->so_lock);
822 	if (ispeek) {
823 		error = so->so_error;
824 		*clearerr = 0;
825 	} else {
826 		error = so->so_error;
827 		so->so_error = 0;
828 		*clearerr = 1;
829 	}
830 	mutex_exit(&so->so_lock);
831 	return (error);
832 }
833 
834 /*
835  * This routine is registered with the stream head to retrieve write
836  * side errors.
837  * It does not clear the socket error for a peeking read side operation.
838  * It the error is to be cleared it sets *clearerr.
839  */
840 int
841 sogetwrerr(vnode_t *vp, int ispeek, int *clearerr)
842 {
843 	struct sonode *so = VTOSO(vp);
844 	int error;
845 
846 	mutex_enter(&so->so_lock);
847 	if (so->so_state & SS_CANTSENDMORE) {
848 		error = EPIPE;
849 		*clearerr = 0;
850 	} else {
851 		error = so->so_error;
852 		if (ispeek) {
853 			*clearerr = 0;
854 		} else {
855 			so->so_error = 0;
856 			*clearerr = 1;
857 		}
858 	}
859 	mutex_exit(&so->so_lock);
860 	return (error);
861 }
862 
863 /*
864  * Set a nonpersistent read and write error on the socket.
865  * Used when there is a T_uderror_ind for a connected socket.
866  * The caller also needs to call strsetrerror and strsetwerror
867  * after dropping the lock.
868  */
869 void
870 soseterror(struct sonode *so, int error)
871 {
872 	ASSERT(error != 0);
873 
874 	ASSERT(MUTEX_HELD(&so->so_lock));
875 	so->so_error = (ushort_t)error;
876 }
877 
878 void
879 soisconnecting(struct sonode *so)
880 {
881 	ASSERT(MUTEX_HELD(&so->so_lock));
882 	so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
883 	so->so_state |= SS_ISCONNECTING;
884 	cv_broadcast(&so->so_state_cv);
885 }
886 
887 void
888 soisconnected(struct sonode *so)
889 {
890 	ASSERT(MUTEX_HELD(&so->so_lock));
891 	so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING);
892 	so->so_state |= SS_ISCONNECTED;
893 	cv_broadcast(&so->so_state_cv);
894 }
895 
896 /*
897  * The caller also needs to call strsetrerror, strsetwerror and strseteof.
898  */
899 void
900 soisdisconnected(struct sonode *so, int error)
901 {
902 	ASSERT(MUTEX_HELD(&so->so_lock));
903 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING|
904 	    SS_LADDR_VALID|SS_FADDR_VALID);
905 	so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE);
906 	so->so_error = (ushort_t)error;
907 	if (so->so_peercred != NULL) {
908 		crfree(so->so_peercred);
909 		so->so_peercred = NULL;
910 	}
911 	cv_broadcast(&so->so_state_cv);
912 }
913 
914 /*
915  * For connected AF_UNIX SOCK_DGRAM sockets when the peer closes.
916  * Does not affect write side.
917  * The caller also has to call strsetrerror.
918  */
919 static void
920 sobreakconn(struct sonode *so, int error)
921 {
922 	ASSERT(MUTEX_HELD(&so->so_lock));
923 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
924 	so->so_error = (ushort_t)error;
925 	cv_broadcast(&so->so_state_cv);
926 }
927 
928 /*
929  * Can no longer send.
930  * Caller must also call strsetwerror.
931  *
932  * We mark the peer address as no longer valid for getpeername, but
933  * leave it around for so_unix_close to notify the peer (that
934  * transport has no addressing held at that layer).
935  */
936 void
937 socantsendmore(struct sonode *so)
938 {
939 	ASSERT(MUTEX_HELD(&so->so_lock));
940 	so->so_state = so->so_state & ~SS_FADDR_VALID | SS_CANTSENDMORE;
941 	cv_broadcast(&so->so_state_cv);
942 }
943 
944 /*
945  * The caller must call strseteof(,1) as well as this routine
946  * to change the socket state.
947  */
948 void
949 socantrcvmore(struct sonode *so)
950 {
951 	ASSERT(MUTEX_HELD(&so->so_lock));
952 	so->so_state |= SS_CANTRCVMORE;
953 	cv_broadcast(&so->so_state_cv);
954 }
955 
956 /*
957  * The caller has sent down a "request_prim" primitive and wants to wait for
958  * an ack ("ack_prim") or an T_ERROR_ACK for it.
959  * The specified "ack_prim" can be a T_OK_ACK.
960  *
961  * Assumes that all the TPI acks are M_PCPROTO messages.
962  *
963  * Note that the socket is single-threaded (using so_lock_single)
964  * for all operations that generate TPI ack messages. Since
965  * only TPI ack messages are M_PCPROTO we should never receive
966  * anything except either the ack we are expecting or a T_ERROR_ACK
967  * for the same primitive.
968  */
969 int
970 sowaitprim(struct sonode *so, t_scalar_t request_prim, t_scalar_t ack_prim,
971 	    t_uscalar_t min_size, mblk_t **mpp, clock_t wait)
972 {
973 	mblk_t *mp;
974 	union T_primitives *tpr;
975 	int error;
976 
977 	dprintso(so, 1, ("sowaitprim(%p, %d, %d, %d, %p, %lu)\n",
978 	    (void *)so, request_prim, ack_prim, min_size, (void *)mpp, wait));
979 
980 	ASSERT(MUTEX_HELD(&so->so_lock));
981 
982 	error = sowaitack(so, &mp, wait);
983 	if (error)
984 		return (error);
985 
986 	dprintso(so, 1, ("got msg %p\n", (void *)mp));
987 	if (DB_TYPE(mp) != M_PCPROTO ||
988 	    MBLKL(mp) < sizeof (tpr->type)) {
989 		freemsg(mp);
990 		eprintsoline(so, EPROTO);
991 		return (EPROTO);
992 	}
993 	tpr = (union T_primitives *)mp->b_rptr;
994 	/*
995 	 * Did we get the primitive that we were asking for?
996 	 * For T_OK_ACK we also check that it matches the request primitive.
997 	 */
998 	if (tpr->type == ack_prim &&
999 	    (ack_prim != T_OK_ACK ||
1000 	    tpr->ok_ack.CORRECT_prim == request_prim)) {
1001 		if (MBLKL(mp) >= (ssize_t)min_size) {
1002 			/* Found what we are looking for */
1003 			*mpp = mp;
1004 			return (0);
1005 		}
1006 		/* Too short */
1007 		freemsg(mp);
1008 		eprintsoline(so, EPROTO);
1009 		return (EPROTO);
1010 	}
1011 
1012 	if (tpr->type == T_ERROR_ACK &&
1013 	    tpr->error_ack.ERROR_prim == request_prim) {
1014 		/* Error to the primitive we were looking for */
1015 		if (tpr->error_ack.TLI_error == TSYSERR) {
1016 			error = tpr->error_ack.UNIX_error;
1017 		} else {
1018 			error = tlitosyserr(tpr->error_ack.TLI_error);
1019 		}
1020 		dprintso(so, 0, ("error_ack for %d: %d/%d ->%d\n",
1021 		    tpr->error_ack.ERROR_prim,
1022 		    tpr->error_ack.TLI_error,
1023 		    tpr->error_ack.UNIX_error,
1024 		    error));
1025 		freemsg(mp);
1026 		return (error);
1027 	}
1028 	/*
1029 	 * Wrong primitive or T_ERROR_ACK for the wrong primitive
1030 	 */
1031 #ifdef DEBUG
1032 	if (tpr->type == T_ERROR_ACK) {
1033 		dprintso(so, 0, ("error_ack for %d: %d/%d\n",
1034 		    tpr->error_ack.ERROR_prim,
1035 		    tpr->error_ack.TLI_error,
1036 		    tpr->error_ack.UNIX_error));
1037 	} else if (tpr->type == T_OK_ACK) {
1038 		dprintso(so, 0, ("ok_ack for %d, expected %d for %d\n",
1039 		    tpr->ok_ack.CORRECT_prim,
1040 		    ack_prim, request_prim));
1041 	} else {
1042 		dprintso(so, 0,
1043 		    ("unexpected primitive %d, expected %d for %d\n",
1044 		    tpr->type, ack_prim, request_prim));
1045 	}
1046 #endif /* DEBUG */
1047 
1048 	freemsg(mp);
1049 	eprintsoline(so, EPROTO);
1050 	return (EPROTO);
1051 }
1052 
1053 /*
1054  * Wait for a T_OK_ACK for the specified primitive.
1055  */
1056 int
1057 sowaitokack(struct sonode *so, t_scalar_t request_prim)
1058 {
1059 	mblk_t *mp;
1060 	int error;
1061 
1062 	error = sowaitprim(so, request_prim, T_OK_ACK,
1063 	    (t_uscalar_t)sizeof (struct T_ok_ack), &mp, 0);
1064 	if (error)
1065 		return (error);
1066 	freemsg(mp);
1067 	return (0);
1068 }
1069 
1070 /*
1071  * Queue a received TPI ack message on so_ack_mp.
1072  */
1073 void
1074 soqueueack(struct sonode *so, mblk_t *mp)
1075 {
1076 	if (DB_TYPE(mp) != M_PCPROTO) {
1077 		zcmn_err(getzoneid(), CE_WARN,
1078 		    "sockfs: received unexpected M_PROTO TPI ack. Prim %d\n",
1079 		    *(t_scalar_t *)mp->b_rptr);
1080 		freemsg(mp);
1081 		return;
1082 	}
1083 
1084 	mutex_enter(&so->so_lock);
1085 	if (so->so_ack_mp != NULL) {
1086 		dprintso(so, 1, ("so_ack_mp already set\n"));
1087 		freemsg(so->so_ack_mp);
1088 		so->so_ack_mp = NULL;
1089 	}
1090 	so->so_ack_mp = mp;
1091 	cv_broadcast(&so->so_ack_cv);
1092 	mutex_exit(&so->so_lock);
1093 }
1094 
1095 /*
1096  * Wait for a TPI ack ignoring signals and errors.
1097  */
1098 int
1099 sowaitack(struct sonode *so, mblk_t **mpp, clock_t wait)
1100 {
1101 	ASSERT(MUTEX_HELD(&so->so_lock));
1102 
1103 	while (so->so_ack_mp == NULL) {
1104 #ifdef SOCK_TEST
1105 		if (wait == 0 && sock_test_timelimit != 0)
1106 			wait = sock_test_timelimit;
1107 #endif
1108 		if (wait != 0) {
1109 			/*
1110 			 * Only wait for the time limit.
1111 			 */
1112 			clock_t now;
1113 
1114 			time_to_wait(&now, wait);
1115 			if (cv_timedwait(&so->so_ack_cv, &so->so_lock,
1116 			    now) == -1) {
1117 				eprintsoline(so, ETIME);
1118 				return (ETIME);
1119 			}
1120 		}
1121 		else
1122 			cv_wait(&so->so_ack_cv, &so->so_lock);
1123 	}
1124 	*mpp = so->so_ack_mp;
1125 #ifdef DEBUG
1126 	{
1127 		union T_primitives *tpr;
1128 		mblk_t *mp = *mpp;
1129 
1130 		tpr = (union T_primitives *)mp->b_rptr;
1131 		ASSERT(DB_TYPE(mp) == M_PCPROTO);
1132 		ASSERT(tpr->type == T_OK_ACK ||
1133 		    tpr->type == T_ERROR_ACK ||
1134 		    tpr->type == T_BIND_ACK ||
1135 		    tpr->type == T_CAPABILITY_ACK ||
1136 		    tpr->type == T_INFO_ACK ||
1137 		    tpr->type == T_OPTMGMT_ACK);
1138 	}
1139 #endif /* DEBUG */
1140 	so->so_ack_mp = NULL;
1141 	return (0);
1142 }
1143 
1144 /*
1145  * Queue a received T_CONN_IND message on so_conn_ind_head/tail.
1146  */
1147 void
1148 soqueueconnind(struct sonode *so, mblk_t *mp)
1149 {
1150 	if (DB_TYPE(mp) != M_PROTO) {
1151 		zcmn_err(getzoneid(), CE_WARN,
1152 		    "sockfs: received unexpected M_PCPROTO T_CONN_IND\n");
1153 		freemsg(mp);
1154 		return;
1155 	}
1156 
1157 	mutex_enter(&so->so_lock);
1158 	ASSERT(mp->b_next == NULL);
1159 	if (so->so_conn_ind_head == NULL) {
1160 		so->so_conn_ind_head = mp;
1161 		so->so_state |= SS_HASCONNIND;
1162 	} else {
1163 		ASSERT(so->so_state & SS_HASCONNIND);
1164 		ASSERT(so->so_conn_ind_tail->b_next == NULL);
1165 		so->so_conn_ind_tail->b_next = mp;
1166 	}
1167 	so->so_conn_ind_tail = mp;
1168 	/* Wakeup a single consumer of the T_CONN_IND */
1169 	cv_signal(&so->so_connind_cv);
1170 	mutex_exit(&so->so_lock);
1171 }
1172 
1173 /*
1174  * Wait for a T_CONN_IND.
1175  * Don't wait if nonblocking.
1176  * Accept signals and socket errors.
1177  */
1178 int
1179 sowaitconnind(struct sonode *so, int fmode, mblk_t **mpp)
1180 {
1181 	mblk_t *mp;
1182 	int error = 0;
1183 
1184 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
1185 	mutex_enter(&so->so_lock);
1186 check_error:
1187 	if (so->so_error) {
1188 		error = sogeterr(so);
1189 		if (error) {
1190 			mutex_exit(&so->so_lock);
1191 			return (error);
1192 		}
1193 	}
1194 
1195 	if (so->so_conn_ind_head == NULL) {
1196 		if (fmode & (FNDELAY|FNONBLOCK)) {
1197 			error = EWOULDBLOCK;
1198 			goto done;
1199 		}
1200 		if (!cv_wait_sig_swap(&so->so_connind_cv, &so->so_lock)) {
1201 			error = EINTR;
1202 			goto done;
1203 		}
1204 		goto check_error;
1205 	}
1206 	mp = so->so_conn_ind_head;
1207 	so->so_conn_ind_head = mp->b_next;
1208 	mp->b_next = NULL;
1209 	if (so->so_conn_ind_head == NULL) {
1210 		ASSERT(so->so_conn_ind_tail == mp);
1211 		so->so_conn_ind_tail = NULL;
1212 		so->so_state &= ~SS_HASCONNIND;
1213 	}
1214 	*mpp = mp;
1215 done:
1216 	mutex_exit(&so->so_lock);
1217 	return (error);
1218 }
1219 
1220 /*
1221  * Flush a T_CONN_IND matching the sequence number from the list.
1222  * Return zero if found; non-zero otherwise.
1223  * This is called very infrequently thus it is ok to do a linear search.
1224  */
1225 int
1226 soflushconnind(struct sonode *so, t_scalar_t seqno)
1227 {
1228 	mblk_t *prevmp, *mp;
1229 	struct T_conn_ind *tci;
1230 
1231 	mutex_enter(&so->so_lock);
1232 	for (prevmp = NULL, mp = so->so_conn_ind_head; mp != NULL;
1233 	    prevmp = mp, mp = mp->b_next) {
1234 		tci = (struct T_conn_ind *)mp->b_rptr;
1235 		if (tci->SEQ_number == seqno) {
1236 			dprintso(so, 1,
1237 			    ("t_discon_ind: found T_CONN_IND %d\n", seqno));
1238 			/* Deleting last? */
1239 			if (so->so_conn_ind_tail == mp) {
1240 				so->so_conn_ind_tail = prevmp;
1241 			}
1242 			if (prevmp == NULL) {
1243 				/* Deleting first */
1244 				so->so_conn_ind_head = mp->b_next;
1245 			} else {
1246 				prevmp->b_next = mp->b_next;
1247 			}
1248 			mp->b_next = NULL;
1249 			if (so->so_conn_ind_head == NULL) {
1250 				ASSERT(so->so_conn_ind_tail == NULL);
1251 				so->so_state &= ~SS_HASCONNIND;
1252 			} else {
1253 				ASSERT(so->so_conn_ind_tail != NULL);
1254 			}
1255 			so->so_error = ECONNABORTED;
1256 			mutex_exit(&so->so_lock);
1257 
1258 			/*
1259 			 * T_KSSL_PROXY_CONN_IND may carry a handle for
1260 			 * an SSL context, and needs to be released.
1261 			 */
1262 			if ((tci->PRIM_type == T_SSL_PROXY_CONN_IND) &&
1263 			    (mp->b_cont != NULL)) {
1264 				kssl_ctx_t kssl_ctx;
1265 
1266 				ASSERT(MBLKL(mp->b_cont) ==
1267 				    sizeof (kssl_ctx_t));
1268 				kssl_ctx = *((kssl_ctx_t *)mp->b_cont->b_rptr);
1269 				kssl_release_ctx(kssl_ctx);
1270 			}
1271 			freemsg(mp);
1272 			return (0);
1273 		}
1274 	}
1275 	mutex_exit(&so->so_lock);
1276 	dprintso(so, 1,	("t_discon_ind: NOT found T_CONN_IND %d\n", seqno));
1277 	return (-1);
1278 }
1279 
1280 /*
1281  * Wait until the socket is connected or there is an error.
1282  * fmode should contain any nonblocking flags. nosig should be
1283  * set if the caller does not want the wait to be interrupted by a signal.
1284  */
1285 int
1286 sowaitconnected(struct sonode *so, int fmode, int nosig)
1287 {
1288 	int error;
1289 
1290 	ASSERT(MUTEX_HELD(&so->so_lock));
1291 
1292 	while ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) ==
1293 	    SS_ISCONNECTING && so->so_error == 0) {
1294 
1295 		dprintso(so, 1, ("waiting for SS_ISCONNECTED on %p\n",
1296 		    (void *)so));
1297 		if (fmode & (FNDELAY|FNONBLOCK))
1298 			return (EINPROGRESS);
1299 
1300 		if (nosig)
1301 			cv_wait(&so->so_state_cv, &so->so_lock);
1302 		else if (!cv_wait_sig_swap(&so->so_state_cv, &so->so_lock)) {
1303 			/*
1304 			 * Return EINTR and let the application use
1305 			 * nonblocking techniques for detecting when
1306 			 * the connection has been established.
1307 			 */
1308 			return (EINTR);
1309 		}
1310 		dprintso(so, 1, ("awoken on %p\n", (void *)so));
1311 	}
1312 
1313 	if (so->so_error != 0) {
1314 		error = sogeterr(so);
1315 		ASSERT(error != 0);
1316 		dprintso(so, 1, ("sowaitconnected: error %d\n", error));
1317 		return (error);
1318 	}
1319 	if (!(so->so_state & SS_ISCONNECTED)) {
1320 		/*
1321 		 * Could have received a T_ORDREL_IND or a T_DISCON_IND with
1322 		 * zero errno. Or another thread could have consumed so_error
1323 		 * e.g. by calling read.
1324 		 */
1325 		error = ECONNREFUSED;
1326 		dprintso(so, 1, ("sowaitconnected: error %d\n", error));
1327 		return (error);
1328 	}
1329 	return (0);
1330 }
1331 
1332 
1333 /*
1334  * Handle the signal generation aspect of urgent data.
1335  */
1336 static void
1337 so_oob_sig(struct sonode *so, int extrasig,
1338     strsigset_t *signals, strpollset_t *pollwakeups)
1339 {
1340 	ASSERT(MUTEX_HELD(&so->so_lock));
1341 
1342 	ASSERT(so_verify_oobstate(so));
1343 	ASSERT(so->so_oobsigcnt >= so->so_oobcnt);
1344 	if (so->so_oobsigcnt > so->so_oobcnt) {
1345 		/*
1346 		 * Signal has already been generated once for this
1347 		 * urgent "event". However, since TCP can receive updated
1348 		 * urgent pointers we still generate a signal.
1349 		 */
1350 		ASSERT(so->so_state & SS_OOBPEND);
1351 		if (extrasig) {
1352 			*signals |= S_RDBAND;
1353 			*pollwakeups |= POLLRDBAND;
1354 		}
1355 		return;
1356 	}
1357 
1358 	so->so_oobsigcnt++;
1359 	ASSERT(so->so_oobsigcnt > 0);	/* Wraparound */
1360 	ASSERT(so->so_oobsigcnt > so->so_oobcnt);
1361 
1362 	/*
1363 	 * Record (for select/poll) that urgent data is pending.
1364 	 */
1365 	so->so_state |= SS_OOBPEND;
1366 	/*
1367 	 * New urgent data on the way so forget about any old
1368 	 * urgent data.
1369 	 */
1370 	so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA);
1371 	if (so->so_oobmsg != NULL) {
1372 		dprintso(so, 1, ("sock: discarding old oob\n"));
1373 		freemsg(so->so_oobmsg);
1374 		so->so_oobmsg = NULL;
1375 	}
1376 	*signals |= S_RDBAND;
1377 	*pollwakeups |= POLLRDBAND;
1378 	ASSERT(so_verify_oobstate(so));
1379 }
1380 
1381 /*
1382  * Handle the processing of the T_EXDATA_IND with urgent data.
1383  * Returns the T_EXDATA_IND if it should be queued on the read queue.
1384  */
1385 /* ARGSUSED2 */
1386 static mblk_t *
1387 so_oob_exdata(struct sonode *so, mblk_t *mp,
1388 	strsigset_t *signals, strpollset_t *pollwakeups)
1389 {
1390 	ASSERT(MUTEX_HELD(&so->so_lock));
1391 
1392 	ASSERT(so_verify_oobstate(so));
1393 
1394 	ASSERT(so->so_oobsigcnt > so->so_oobcnt);
1395 
1396 	so->so_oobcnt++;
1397 	ASSERT(so->so_oobcnt > 0);	/* wraparound? */
1398 	ASSERT(so->so_oobsigcnt >= so->so_oobcnt);
1399 
1400 	/*
1401 	 * Set MSGMARK for SIOCATMARK.
1402 	 */
1403 	mp->b_flag |= MSGMARK;
1404 
1405 	ASSERT(so_verify_oobstate(so));
1406 	return (mp);
1407 }
1408 
1409 /*
1410  * Handle the processing of the actual urgent data.
1411  * Returns the data mblk if it should be queued on the read queue.
1412  */
1413 static mblk_t *
1414 so_oob_data(struct sonode *so, mblk_t *mp,
1415 	strsigset_t *signals, strpollset_t *pollwakeups)
1416 {
1417 	ASSERT(MUTEX_HELD(&so->so_lock));
1418 
1419 	ASSERT(so_verify_oobstate(so));
1420 
1421 	ASSERT(so->so_oobsigcnt >= so->so_oobcnt);
1422 	ASSERT(mp != NULL);
1423 	/*
1424 	 * For OOBINLINE we keep the data in the T_EXDATA_IND.
1425 	 * Otherwise we store it in so_oobmsg.
1426 	 */
1427 	ASSERT(so->so_oobmsg == NULL);
1428 	if (so->so_options & SO_OOBINLINE) {
1429 		*pollwakeups |= POLLIN | POLLRDNORM | POLLRDBAND;
1430 		*signals |= S_INPUT | S_RDNORM;
1431 	} else {
1432 		*pollwakeups |= POLLRDBAND;
1433 		so->so_state |= SS_HAVEOOBDATA;
1434 		so->so_oobmsg = mp;
1435 		mp = NULL;
1436 	}
1437 	ASSERT(so_verify_oobstate(so));
1438 	return (mp);
1439 }
1440 
1441 /*
1442  * Caller must hold the mutex.
1443  * For delayed processing, save the T_DISCON_IND received
1444  * from below on so_discon_ind_mp.
1445  * When the message is processed the framework will call:
1446  *      (*func)(so, mp);
1447  */
1448 static void
1449 so_save_discon_ind(struct sonode *so,
1450 	mblk_t *mp,
1451 	void (*func)(struct sonode *so, mblk_t *))
1452 {
1453 	ASSERT(MUTEX_HELD(&so->so_lock));
1454 
1455 	/*
1456 	 * Discard new T_DISCON_IND if we have already received another.
1457 	 * Currently the earlier message can either be on so_discon_ind_mp
1458 	 * or being processed.
1459 	 */
1460 	if (so->so_discon_ind_mp != NULL || (so->so_flag & SOASYNC_UNBIND)) {
1461 		zcmn_err(getzoneid(), CE_WARN,
1462 		    "sockfs: received unexpected additional T_DISCON_IND\n");
1463 		freemsg(mp);
1464 		return;
1465 	}
1466 	mp->b_prev = (mblk_t *)func;
1467 	mp->b_next = NULL;
1468 	so->so_discon_ind_mp = mp;
1469 }
1470 
1471 /*
1472  * Caller must hold the mutex and make sure that either SOLOCKED
1473  * or SOASYNC_UNBIND is set. Called from so_unlock_single().
1474  * Perform delayed processing of T_DISCON_IND message on so_discon_ind_mp.
1475  * Need to ensure that strsock_proto() will not end up sleeping for
1476  * SOASYNC_UNBIND, while executing this function.
1477  */
1478 void
1479 so_drain_discon_ind(struct sonode *so)
1480 {
1481 	mblk_t	*bp;
1482 	void (*func)(struct sonode *so, mblk_t *);
1483 
1484 	ASSERT(MUTEX_HELD(&so->so_lock));
1485 	ASSERT(so->so_flag & (SOLOCKED|SOASYNC_UNBIND));
1486 
1487 	/* Process T_DISCON_IND on so_discon_ind_mp */
1488 	if ((bp = so->so_discon_ind_mp) != NULL) {
1489 		so->so_discon_ind_mp = NULL;
1490 		func = (void (*)())bp->b_prev;
1491 		bp->b_prev = NULL;
1492 
1493 		/*
1494 		 * This (*func) is supposed to generate a message downstream
1495 		 * and we need to have a flag set until the corresponding
1496 		 * upstream message reaches stream head.
1497 		 * When processing T_DISCON_IND in strsock_discon_ind
1498 		 * we hold SOASYN_UNBIND when sending T_UNBIND_REQ down and
1499 		 * drop the flag after we get the ACK in strsock_proto.
1500 		 */
1501 		(void) (*func)(so, bp);
1502 	}
1503 }
1504 
1505 /*
1506  * Caller must hold the mutex.
1507  * Remove the T_DISCON_IND on so_discon_ind_mp.
1508  */
1509 void
1510 so_flush_discon_ind(struct sonode *so)
1511 {
1512 	mblk_t	*bp;
1513 
1514 	ASSERT(MUTEX_HELD(&so->so_lock));
1515 
1516 	/*
1517 	 * Remove T_DISCON_IND mblk at so_discon_ind_mp.
1518 	 */
1519 	if ((bp = so->so_discon_ind_mp) != NULL) {
1520 		so->so_discon_ind_mp = NULL;
1521 		bp->b_prev = NULL;
1522 		freemsg(bp);
1523 	}
1524 }
1525 
1526 /*
1527  * Caller must hold the mutex.
1528  *
1529  * This function is used to process the T_DISCON_IND message. It does
1530  * immediate processing when called from strsock_proto and delayed
1531  * processing of discon_ind saved on so_discon_ind_mp when called from
1532  * so_drain_discon_ind. When a T_DISCON_IND message is saved in
1533  * so_discon_ind_mp for delayed processing, this function is registered
1534  * as the callback function to process the message.
1535  *
1536  * SOASYNC_UNBIND should be held in this function, during the non-blocking
1537  * unbind operation, and should be released only after we receive the ACK
1538  * in strsock_proto, for the T_UNBIND_REQ sent here. Since SOLOCKED is not set,
1539  * no TPI messages would be sent down at this time. This is to prevent M_FLUSH
1540  * sent from either this function or tcp_unbind(), flushing away any TPI
1541  * message that is being sent down and stays in a lower module's queue.
1542  *
1543  * This function drops so_lock and grabs it again.
1544  */
1545 static void
1546 strsock_discon_ind(struct sonode *so, mblk_t *discon_mp)
1547 {
1548 	struct vnode *vp;
1549 	struct stdata *stp;
1550 	union T_primitives *tpr;
1551 	struct T_unbind_req *ubr;
1552 	mblk_t *mp;
1553 	int error;
1554 
1555 	ASSERT(MUTEX_HELD(&so->so_lock));
1556 	ASSERT(discon_mp);
1557 	ASSERT(discon_mp->b_rptr);
1558 
1559 	tpr = (union T_primitives *)discon_mp->b_rptr;
1560 	ASSERT(tpr->type == T_DISCON_IND);
1561 
1562 	vp = SOTOV(so);
1563 	stp = vp->v_stream;
1564 	ASSERT(stp);
1565 
1566 	/*
1567 	 * Not a listener
1568 	 */
1569 	ASSERT((so->so_state & SS_ACCEPTCONN) == 0);
1570 
1571 	/*
1572 	 * This assumes that the name space for DISCON_reason
1573 	 * is the errno name space.
1574 	 */
1575 	soisdisconnected(so, tpr->discon_ind.DISCON_reason);
1576 
1577 	/*
1578 	 * Unbind with the transport without blocking.
1579 	 * If we've already received a T_DISCON_IND do not unbind.
1580 	 *
1581 	 * If there is no preallocated unbind message, we have already
1582 	 * unbound with the transport
1583 	 *
1584 	 * If the socket is not bound, no need to unbind.
1585 	 */
1586 	mp = so->so_unbind_mp;
1587 	if (mp == NULL) {
1588 		ASSERT(!(so->so_state & SS_ISBOUND));
1589 		mutex_exit(&so->so_lock);
1590 	} else if (!(so->so_state & SS_ISBOUND))  {
1591 		mutex_exit(&so->so_lock);
1592 	} else {
1593 		so->so_unbind_mp = NULL;
1594 
1595 		/*
1596 		 * Is another T_DISCON_IND being processed.
1597 		 */
1598 		ASSERT((so->so_flag & SOASYNC_UNBIND) == 0);
1599 
1600 		/*
1601 		 * Make strsock_proto ignore T_OK_ACK and T_ERROR_ACK for
1602 		 * this unbind. Set SOASYNC_UNBIND. This should be cleared
1603 		 * only after we receive the ACK in strsock_proto.
1604 		 */
1605 		so->so_flag |= SOASYNC_UNBIND;
1606 		ASSERT(!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)));
1607 		so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN|SS_LADDR_VALID);
1608 		mutex_exit(&so->so_lock);
1609 
1610 		/*
1611 		 * Send down T_UNBIND_REQ ignoring flow control.
1612 		 * XXX Assumes that MSG_IGNFLOW implies that this thread
1613 		 * does not run service procedures.
1614 		 */
1615 		ASSERT(DB_TYPE(mp) == M_PROTO);
1616 		ubr = (struct T_unbind_req *)mp->b_rptr;
1617 		mp->b_wptr += sizeof (*ubr);
1618 		ubr->PRIM_type = T_UNBIND_REQ;
1619 
1620 		/*
1621 		 * Flush the read and write side (except stream head read queue)
1622 		 * and send down T_UNBIND_REQ.
1623 		 */
1624 		(void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW);
1625 		error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
1626 		    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0);
1627 		/* LINTED - warning: statement has no consequent: if */
1628 		if (error) {
1629 			eprintsoline(so, error);
1630 		}
1631 	}
1632 
1633 	if (tpr->discon_ind.DISCON_reason != 0)
1634 		strsetrerror(SOTOV(so), 0, 0, sogetrderr);
1635 	strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
1636 	strseteof(SOTOV(so), 1);
1637 	/*
1638 	 * strseteof takes care of read side wakeups,
1639 	 * pollwakeups, and signals.
1640 	 */
1641 	dprintso(so, 1, ("T_DISCON_IND: error %d\n", so->so_error));
1642 	freemsg(discon_mp);
1643 
1644 
1645 	pollwakeup(&stp->sd_pollist, POLLOUT);
1646 	mutex_enter(&stp->sd_lock);
1647 
1648 	/*
1649 	 * Wake sleeping write
1650 	 */
1651 	if (stp->sd_flag & WSLEEP) {
1652 		stp->sd_flag &= ~WSLEEP;
1653 		cv_broadcast(&stp->sd_wrq->q_wait);
1654 	}
1655 
1656 	/*
1657 	 * strsendsig can handle multiple signals with a
1658 	 * single call.  Send SIGPOLL for S_OUTPUT event.
1659 	 */
1660 	if (stp->sd_sigflags & S_OUTPUT)
1661 		strsendsig(stp->sd_siglist, S_OUTPUT, 0, 0);
1662 
1663 	mutex_exit(&stp->sd_lock);
1664 	mutex_enter(&so->so_lock);
1665 }
1666 
1667 /*
1668  * This routine is registered with the stream head to receive M_PROTO
1669  * and M_PCPROTO messages.
1670  *
1671  * Returns NULL if the message was consumed.
1672  * Returns an mblk to make that mblk be processed (and queued) by the stream
1673  * head.
1674  *
1675  * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and
1676  * *pollwakeups) for the stream head to take action on. Note that since
1677  * sockets always deliver SIGIO for every new piece of data this routine
1678  * never sets *firstmsgsigs; any signals are returned in *allmsgsigs.
1679  *
1680  * This routine handles all data related TPI messages independent of
1681  * the type of the socket i.e. it doesn't care if T_UNITDATA_IND message
1682  * arrive on a SOCK_STREAM.
1683  */
1684 static mblk_t *
1685 strsock_proto(vnode_t *vp, mblk_t *mp,
1686 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
1687 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
1688 {
1689 	union T_primitives *tpr;
1690 	struct sonode *so;
1691 
1692 	so = VTOSO(vp);
1693 
1694 	dprintso(so, 1, ("strsock_proto(%p, %p)\n", (void *)vp, (void *)mp));
1695 
1696 	/* Set default return values */
1697 	*firstmsgsigs = *wakeups = *allmsgsigs = *pollwakeups = 0;
1698 
1699 	ASSERT(DB_TYPE(mp) == M_PROTO ||
1700 	    DB_TYPE(mp) == M_PCPROTO);
1701 
1702 	if (MBLKL(mp) < sizeof (tpr->type)) {
1703 		/* The message is too short to even contain the primitive */
1704 		zcmn_err(getzoneid(), CE_WARN,
1705 		    "sockfs: Too short TPI message received. Len = %ld\n",
1706 		    (ptrdiff_t)(MBLKL(mp)));
1707 		freemsg(mp);
1708 		return (NULL);
1709 	}
1710 	if (!__TPI_PRIM_ISALIGNED(mp->b_rptr)) {
1711 		/* The read pointer is not aligned correctly for TPI */
1712 		zcmn_err(getzoneid(), CE_WARN,
1713 		    "sockfs: Unaligned TPI message received. rptr = %p\n",
1714 		    (void *)mp->b_rptr);
1715 		freemsg(mp);
1716 		return (NULL);
1717 	}
1718 	tpr = (union T_primitives *)mp->b_rptr;
1719 	dprintso(so, 1, ("strsock_proto: primitive %d\n", tpr->type));
1720 
1721 	switch (tpr->type) {
1722 
1723 	case T_DATA_IND:
1724 		if (MBLKL(mp) < sizeof (struct T_data_ind)) {
1725 			zcmn_err(getzoneid(), CE_WARN,
1726 			    "sockfs: Too short T_DATA_IND. Len = %ld\n",
1727 			    (ptrdiff_t)(MBLKL(mp)));
1728 			freemsg(mp);
1729 			return (NULL);
1730 		}
1731 		/*
1732 		 * Ignore zero-length T_DATA_IND messages. These might be
1733 		 * generated by some transports.
1734 		 * This is needed to prevent read (which skips the M_PROTO
1735 		 * part) to unexpectedly return 0 (or return EWOULDBLOCK
1736 		 * on a non-blocking socket after select/poll has indicated
1737 		 * that data is available).
1738 		 */
1739 		if (msgdsize(mp->b_cont) == 0) {
1740 			dprintso(so, 0,
1741 			    ("strsock_proto: zero length T_DATA_IND\n"));
1742 			freemsg(mp);
1743 			return (NULL);
1744 		}
1745 		*allmsgsigs = S_INPUT | S_RDNORM;
1746 		*pollwakeups = POLLIN | POLLRDNORM;
1747 		*wakeups = RSLEEP;
1748 		return (mp);
1749 
1750 	case T_UNITDATA_IND: {
1751 		struct T_unitdata_ind	*tudi = &tpr->unitdata_ind;
1752 		void			*addr;
1753 		t_uscalar_t		addrlen;
1754 
1755 		if (MBLKL(mp) < sizeof (struct T_unitdata_ind)) {
1756 			zcmn_err(getzoneid(), CE_WARN,
1757 			    "sockfs: Too short T_UNITDATA_IND. Len = %ld\n",
1758 			    (ptrdiff_t)(MBLKL(mp)));
1759 			freemsg(mp);
1760 			return (NULL);
1761 		}
1762 
1763 		/* Is this is not a connected datagram socket? */
1764 		if ((so->so_mode & SM_CONNREQUIRED) ||
1765 		    !(so->so_state & SS_ISCONNECTED)) {
1766 			/*
1767 			 * Not a connected datagram socket. Look for
1768 			 * the SO_UNIX_CLOSE option. If such an option is found
1769 			 * discard the message (since it has no meaning
1770 			 * unless connected).
1771 			 */
1772 			if (so->so_family == AF_UNIX && msgdsize(mp) == 0 &&
1773 			    tudi->OPT_length != 0) {
1774 				void *opt;
1775 				t_uscalar_t optlen = tudi->OPT_length;
1776 
1777 				opt = sogetoff(mp, tudi->OPT_offset,
1778 				    optlen, __TPI_ALIGN_SIZE);
1779 				if (opt == NULL) {
1780 					/* The len/off falls outside mp */
1781 					freemsg(mp);
1782 					mutex_enter(&so->so_lock);
1783 					soseterror(so, EPROTO);
1784 					mutex_exit(&so->so_lock);
1785 					zcmn_err(getzoneid(), CE_WARN,
1786 					    "sockfs: T_unidata_ind with "
1787 					    "invalid optlen/offset %u/%d\n",
1788 					    optlen, tudi->OPT_offset);
1789 					return (NULL);
1790 				}
1791 				if (so_getopt_unix_close(opt, optlen)) {
1792 					freemsg(mp);
1793 					return (NULL);
1794 				}
1795 			}
1796 			*allmsgsigs = S_INPUT | S_RDNORM;
1797 			*pollwakeups = POLLIN | POLLRDNORM;
1798 			*wakeups = RSLEEP;
1799 			if (audit_active)
1800 				audit_sock(T_UNITDATA_IND, strvp2wq(vp),
1801 				    mp, 0);
1802 			return (mp);
1803 		}
1804 
1805 		/*
1806 		 * A connect datagram socket. For AF_INET{,6} we verify that
1807 		 * the source address matches the "connected to" address.
1808 		 * The semantics of AF_UNIX sockets is to not verify
1809 		 * the source address.
1810 		 * Note that this source address verification is transport
1811 		 * specific. Thus the real fix would be to extent TPI
1812 		 * to allow T_CONN_REQ messages to be send to connectionless
1813 		 * transport providers and always let the transport provider
1814 		 * do whatever filtering is needed.
1815 		 *
1816 		 * The verification/filtering semantics for transports
1817 		 * other than AF_INET and AF_UNIX are unknown. The choice
1818 		 * would be to either filter using bcmp or let all messages
1819 		 * get through. This code does not filter other address
1820 		 * families since this at least allows the application to
1821 		 * work around any missing filtering.
1822 		 *
1823 		 * XXX Should we move filtering to UDP/ICMP???
1824 		 * That would require passing e.g. a T_DISCON_REQ to UDP
1825 		 * when the socket becomes unconnected.
1826 		 */
1827 		addrlen = tudi->SRC_length;
1828 		/*
1829 		 * The alignment restriction is really to strict but
1830 		 * we want enough alignment to inspect the fields of
1831 		 * a sockaddr_in.
1832 		 */
1833 		addr = sogetoff(mp, tudi->SRC_offset, addrlen,
1834 		    __TPI_ALIGN_SIZE);
1835 		if (addr == NULL) {
1836 			freemsg(mp);
1837 			mutex_enter(&so->so_lock);
1838 			soseterror(so, EPROTO);
1839 			mutex_exit(&so->so_lock);
1840 			zcmn_err(getzoneid(), CE_WARN,
1841 			    "sockfs: T_unidata_ind with invalid "
1842 			    "addrlen/offset %u/%d\n",
1843 			    addrlen, tudi->SRC_offset);
1844 			return (NULL);
1845 		}
1846 
1847 		if (so->so_family == AF_INET) {
1848 			/*
1849 			 * For AF_INET we allow wildcarding both sin_addr
1850 			 * and sin_port.
1851 			 */
1852 			struct sockaddr_in *faddr, *sin;
1853 
1854 			/* Prevent so_faddr_sa from changing while accessed */
1855 			mutex_enter(&so->so_lock);
1856 			ASSERT(so->so_faddr_len ==
1857 			    (socklen_t)sizeof (struct sockaddr_in));
1858 			faddr = (struct sockaddr_in *)so->so_faddr_sa;
1859 			sin = (struct sockaddr_in *)addr;
1860 			if (addrlen !=
1861 			    (t_uscalar_t)sizeof (struct sockaddr_in) ||
1862 			    (sin->sin_addr.s_addr != faddr->sin_addr.s_addr &&
1863 			    faddr->sin_addr.s_addr != INADDR_ANY) ||
1864 			    (so->so_type != SOCK_RAW &&
1865 			    sin->sin_port != faddr->sin_port &&
1866 			    faddr->sin_port != 0)) {
1867 #ifdef DEBUG
1868 				dprintso(so, 0,
1869 				    ("sockfs: T_UNITDATA_IND mismatch: %s",
1870 				    pr_addr(so->so_family,
1871 				    (struct sockaddr *)addr,
1872 				    addrlen)));
1873 				dprintso(so, 0, (" - %s\n",
1874 				    pr_addr(so->so_family, so->so_faddr_sa,
1875 				    (t_uscalar_t)so->so_faddr_len)));
1876 #endif /* DEBUG */
1877 				mutex_exit(&so->so_lock);
1878 				freemsg(mp);
1879 				return (NULL);
1880 			}
1881 			mutex_exit(&so->so_lock);
1882 		} else if (so->so_family == AF_INET6) {
1883 			/*
1884 			 * For AF_INET6 we allow wildcarding both sin6_addr
1885 			 * and sin6_port.
1886 			 */
1887 			struct sockaddr_in6 *faddr6, *sin6;
1888 			static struct in6_addr zeroes; /* inits to all zeros */
1889 
1890 			/* Prevent so_faddr_sa from changing while accessed */
1891 			mutex_enter(&so->so_lock);
1892 			ASSERT(so->so_faddr_len ==
1893 			    (socklen_t)sizeof (struct sockaddr_in6));
1894 			faddr6 = (struct sockaddr_in6 *)so->so_faddr_sa;
1895 			sin6 = (struct sockaddr_in6 *)addr;
1896 			/* XXX could we get a mapped address ::ffff:0.0.0.0 ? */
1897 			if (addrlen !=
1898 			    (t_uscalar_t)sizeof (struct sockaddr_in6) ||
1899 			    (!IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
1900 			    &faddr6->sin6_addr) &&
1901 			    !IN6_ARE_ADDR_EQUAL(&faddr6->sin6_addr, &zeroes)) ||
1902 			    (so->so_type != SOCK_RAW &&
1903 			    sin6->sin6_port != faddr6->sin6_port &&
1904 			    faddr6->sin6_port != 0)) {
1905 #ifdef DEBUG
1906 				dprintso(so, 0,
1907 				    ("sockfs: T_UNITDATA_IND mismatch: %s",
1908 				    pr_addr(so->so_family,
1909 				    (struct sockaddr *)addr,
1910 				    addrlen)));
1911 				dprintso(so, 0, (" - %s\n",
1912 				    pr_addr(so->so_family, so->so_faddr_sa,
1913 				    (t_uscalar_t)so->so_faddr_len)));
1914 #endif /* DEBUG */
1915 				mutex_exit(&so->so_lock);
1916 				freemsg(mp);
1917 				return (NULL);
1918 			}
1919 			mutex_exit(&so->so_lock);
1920 		} else if (so->so_family == AF_UNIX &&
1921 		    msgdsize(mp->b_cont) == 0 &&
1922 		    tudi->OPT_length != 0) {
1923 			/*
1924 			 * Attempt to extract AF_UNIX
1925 			 * SO_UNIX_CLOSE indication from options.
1926 			 */
1927 			void *opt;
1928 			t_uscalar_t optlen = tudi->OPT_length;
1929 
1930 			opt = sogetoff(mp, tudi->OPT_offset,
1931 			    optlen, __TPI_ALIGN_SIZE);
1932 			if (opt == NULL) {
1933 				/* The len/off falls outside mp */
1934 				freemsg(mp);
1935 				mutex_enter(&so->so_lock);
1936 				soseterror(so, EPROTO);
1937 				mutex_exit(&so->so_lock);
1938 				zcmn_err(getzoneid(), CE_WARN,
1939 				    "sockfs: T_unidata_ind with invalid "
1940 				    "optlen/offset %u/%d\n",
1941 				    optlen, tudi->OPT_offset);
1942 				return (NULL);
1943 			}
1944 			/*
1945 			 * If we received a unix close indication mark the
1946 			 * socket and discard this message.
1947 			 */
1948 			if (so_getopt_unix_close(opt, optlen)) {
1949 				mutex_enter(&so->so_lock);
1950 				sobreakconn(so, ECONNRESET);
1951 				mutex_exit(&so->so_lock);
1952 				strsetrerror(SOTOV(so), 0, 0, sogetrderr);
1953 				freemsg(mp);
1954 				*pollwakeups = POLLIN | POLLRDNORM;
1955 				*allmsgsigs = S_INPUT | S_RDNORM;
1956 				*wakeups = RSLEEP;
1957 				return (NULL);
1958 			}
1959 		}
1960 		*allmsgsigs = S_INPUT | S_RDNORM;
1961 		*pollwakeups = POLLIN | POLLRDNORM;
1962 		*wakeups = RSLEEP;
1963 		return (mp);
1964 	}
1965 
1966 	case T_OPTDATA_IND: {
1967 		struct T_optdata_ind	*tdi = &tpr->optdata_ind;
1968 
1969 		if (MBLKL(mp) < sizeof (struct T_optdata_ind)) {
1970 			zcmn_err(getzoneid(), CE_WARN,
1971 			    "sockfs: Too short T_OPTDATA_IND. Len = %ld\n",
1972 			    (ptrdiff_t)(MBLKL(mp)));
1973 			freemsg(mp);
1974 			return (NULL);
1975 		}
1976 		/*
1977 		 * Allow zero-length messages carrying options.
1978 		 * This is used when carrying the SO_UNIX_CLOSE option.
1979 		 */
1980 		if (so->so_family == AF_UNIX && msgdsize(mp->b_cont) == 0 &&
1981 		    tdi->OPT_length != 0) {
1982 			/*
1983 			 * Attempt to extract AF_UNIX close indication
1984 			 * from the options. Ignore any other options -
1985 			 * those are handled once the message is removed
1986 			 * from the queue.
1987 			 * The close indication message should not carry data.
1988 			 */
1989 			void *opt;
1990 			t_uscalar_t optlen = tdi->OPT_length;
1991 
1992 			opt = sogetoff(mp, tdi->OPT_offset,
1993 			    optlen, __TPI_ALIGN_SIZE);
1994 			if (opt == NULL) {
1995 				/* The len/off falls outside mp */
1996 				freemsg(mp);
1997 				mutex_enter(&so->so_lock);
1998 				soseterror(so, EPROTO);
1999 				mutex_exit(&so->so_lock);
2000 				zcmn_err(getzoneid(), CE_WARN,
2001 				    "sockfs: T_optdata_ind with invalid "
2002 				    "optlen/offset %u/%d\n",
2003 				    optlen, tdi->OPT_offset);
2004 				return (NULL);
2005 			}
2006 			/*
2007 			 * If we received a close indication mark the
2008 			 * socket and discard this message.
2009 			 */
2010 			if (so_getopt_unix_close(opt, optlen)) {
2011 				mutex_enter(&so->so_lock);
2012 				socantsendmore(so);
2013 				mutex_exit(&so->so_lock);
2014 				strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2015 				freemsg(mp);
2016 				return (NULL);
2017 			}
2018 		}
2019 		*allmsgsigs = S_INPUT | S_RDNORM;
2020 		*pollwakeups = POLLIN | POLLRDNORM;
2021 		*wakeups = RSLEEP;
2022 		return (mp);
2023 	}
2024 
2025 	case T_EXDATA_IND: {
2026 		mblk_t		*mctl, *mdata;
2027 		mblk_t *lbp;
2028 		union T_primitives *tprp;
2029 		struct stdata   *stp;
2030 		queue_t *qp;
2031 
2032 		if (MBLKL(mp) < sizeof (struct T_exdata_ind)) {
2033 			zcmn_err(getzoneid(), CE_WARN,
2034 			    "sockfs: Too short T_EXDATA_IND. Len = %ld\n",
2035 			    (ptrdiff_t)(MBLKL(mp)));
2036 			freemsg(mp);
2037 			return (NULL);
2038 		}
2039 		/*
2040 		 * Ignore zero-length T_EXDATA_IND messages. These might be
2041 		 * generated by some transports.
2042 		 *
2043 		 * This is needed to prevent read (which skips the M_PROTO
2044 		 * part) to unexpectedly return 0 (or return EWOULDBLOCK
2045 		 * on a non-blocking socket after select/poll has indicated
2046 		 * that data is available).
2047 		 */
2048 		dprintso(so, 1,
2049 		    ("T_EXDATA_IND(%p): counts %d/%d state %s\n",
2050 		    (void *)vp, so->so_oobsigcnt, so->so_oobcnt,
2051 		    pr_state(so->so_state, so->so_mode)));
2052 
2053 		if (msgdsize(mp->b_cont) == 0) {
2054 			dprintso(so, 0,
2055 			    ("strsock_proto: zero length T_EXDATA_IND\n"));
2056 			freemsg(mp);
2057 			return (NULL);
2058 		}
2059 
2060 		/*
2061 		 * Split into the T_EXDATA_IND and the M_DATA part.
2062 		 * We process these three pieces separately:
2063 		 *	signal generation
2064 		 *	handling T_EXDATA_IND
2065 		 *	handling M_DATA component
2066 		 */
2067 		mctl = mp;
2068 		mdata = mctl->b_cont;
2069 		mctl->b_cont = NULL;
2070 		mutex_enter(&so->so_lock);
2071 		so_oob_sig(so, 0, allmsgsigs, pollwakeups);
2072 		mctl = so_oob_exdata(so, mctl, allmsgsigs, pollwakeups);
2073 		mdata = so_oob_data(so, mdata, allmsgsigs, pollwakeups);
2074 
2075 		stp = vp->v_stream;
2076 		ASSERT(stp != NULL);
2077 		qp = _RD(stp->sd_wrq);
2078 
2079 		mutex_enter(QLOCK(qp));
2080 		lbp = qp->q_last;
2081 
2082 		/*
2083 		 * We want to avoid queueing up a string of T_EXDATA_IND
2084 		 * messages with no intervening data messages at the stream
2085 		 * head. These messages contribute to the total message
2086 		 * count. Eventually this can lead to STREAMS flow contol
2087 		 * and also cause TCP to advertise a zero window condition
2088 		 * to the peer. This can happen in the degenerate case where
2089 		 * the sender and receiver exchange only OOB data. The sender
2090 		 * only sends messages with MSG_OOB flag and the receiver
2091 		 * receives only MSG_OOB messages and does not use SO_OOBINLINE.
2092 		 * An example of this scenario has been reported in applications
2093 		 * that use OOB data to exchange heart beats. Flow control
2094 		 * relief will never happen if the application only reads OOB
2095 		 * data which is done directly by sorecvoob() and the
2096 		 * T_EXDATA_IND messages at the streamhead won't be consumed.
2097 		 * Note that there is no correctness issue in compressing the
2098 		 * string of T_EXDATA_IND messages into a single T_EXDATA_IND
2099 		 * message. A single read that does not specify MSG_OOB will
2100 		 * read across all the marks in a loop in sotpi_recvmsg().
2101 		 * Each mark is individually distinguishable only if the
2102 		 * T_EXDATA_IND messages are separated by data messages.
2103 		 */
2104 		if ((qp->q_first != NULL) && (DB_TYPE(lbp) == M_PROTO)) {
2105 			tprp = (union T_primitives *)lbp->b_rptr;
2106 			if ((tprp->type == T_EXDATA_IND) &&
2107 			    !(so->so_options & SO_OOBINLINE)) {
2108 
2109 				/*
2110 				 * free the new M_PROTO message
2111 				 */
2112 				freemsg(mctl);
2113 
2114 				/*
2115 				 * adjust the OOB count and OOB	signal count
2116 				 * just incremented for the new OOB data.
2117 				 */
2118 				so->so_oobcnt--;
2119 				so->so_oobsigcnt--;
2120 				mutex_exit(QLOCK(qp));
2121 				mutex_exit(&so->so_lock);
2122 				return (NULL);
2123 			}
2124 		}
2125 		mutex_exit(QLOCK(qp));
2126 
2127 		/*
2128 		 * Pass the T_EXDATA_IND and the M_DATA back separately
2129 		 * by using b_next linkage. (The stream head will queue any
2130 		 * b_next linked messages separately.) This is needed
2131 		 * since MSGMARK applies to the last by of the message
2132 		 * hence we can not have any M_DATA component attached
2133 		 * to the marked T_EXDATA_IND. Note that the stream head
2134 		 * will not consolidate M_DATA messages onto an MSGMARK'ed
2135 		 * message in order to preserve the constraint that
2136 		 * the T_EXDATA_IND always is a separate message.
2137 		 */
2138 		ASSERT(mctl != NULL);
2139 		mctl->b_next = mdata;
2140 		mp = mctl;
2141 #ifdef DEBUG
2142 		if (mdata == NULL) {
2143 			dprintso(so, 1,
2144 			    ("after outofline T_EXDATA_IND(%p): "
2145 			    "counts %d/%d  poll 0x%x sig 0x%x state %s\n",
2146 			    (void *)vp, so->so_oobsigcnt,
2147 			    so->so_oobcnt, *pollwakeups, *allmsgsigs,
2148 			    pr_state(so->so_state, so->so_mode)));
2149 		} else {
2150 			dprintso(so, 1,
2151 			    ("after inline T_EXDATA_IND(%p): "
2152 			    "counts %d/%d  poll 0x%x sig 0x%x state %s\n",
2153 			    (void *)vp, so->so_oobsigcnt,
2154 			    so->so_oobcnt, *pollwakeups, *allmsgsigs,
2155 			    pr_state(so->so_state, so->so_mode)));
2156 		}
2157 #endif /* DEBUG */
2158 		mutex_exit(&so->so_lock);
2159 		*wakeups = RSLEEP;
2160 		return (mp);
2161 	}
2162 
2163 	case T_CONN_CON: {
2164 		struct T_conn_con	*conn_con;
2165 		void			*addr;
2166 		t_uscalar_t		addrlen;
2167 
2168 		/*
2169 		 * Verify the state, update the state to ISCONNECTED,
2170 		 * record the potentially new address in the message,
2171 		 * and drop the message.
2172 		 */
2173 		if (MBLKL(mp) < sizeof (struct T_conn_con)) {
2174 			zcmn_err(getzoneid(), CE_WARN,
2175 			    "sockfs: Too short T_CONN_CON. Len = %ld\n",
2176 			    (ptrdiff_t)(MBLKL(mp)));
2177 			freemsg(mp);
2178 			return (NULL);
2179 		}
2180 
2181 		mutex_enter(&so->so_lock);
2182 		if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) !=
2183 		    SS_ISCONNECTING) {
2184 			mutex_exit(&so->so_lock);
2185 			dprintso(so, 1,
2186 			    ("T_CONN_CON: state %x\n", so->so_state));
2187 			freemsg(mp);
2188 			return (NULL);
2189 		}
2190 
2191 		conn_con = &tpr->conn_con;
2192 		addrlen = conn_con->RES_length;
2193 		/*
2194 		 * Allow the address to be of different size than sent down
2195 		 * in the T_CONN_REQ as long as it doesn't exceed the maxlen.
2196 		 * For AF_UNIX require the identical length.
2197 		 */
2198 		if (so->so_family == AF_UNIX ?
2199 		    addrlen != (t_uscalar_t)sizeof (so->so_ux_laddr) :
2200 		    addrlen > (t_uscalar_t)so->so_faddr_maxlen) {
2201 			zcmn_err(getzoneid(), CE_WARN,
2202 			    "sockfs: T_conn_con with different "
2203 			    "length %u/%d\n",
2204 			    addrlen, conn_con->RES_length);
2205 			soisdisconnected(so, EPROTO);
2206 			mutex_exit(&so->so_lock);
2207 			strsetrerror(SOTOV(so), 0, 0, sogetrderr);
2208 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2209 			strseteof(SOTOV(so), 1);
2210 			freemsg(mp);
2211 			/*
2212 			 * strseteof takes care of read side wakeups,
2213 			 * pollwakeups, and signals.
2214 			 */
2215 			*wakeups = WSLEEP;
2216 			*allmsgsigs = S_OUTPUT;
2217 			*pollwakeups = POLLOUT;
2218 			return (NULL);
2219 		}
2220 		addr = sogetoff(mp, conn_con->RES_offset, addrlen, 1);
2221 		if (addr == NULL) {
2222 			zcmn_err(getzoneid(), CE_WARN,
2223 			    "sockfs: T_conn_con with invalid "
2224 			    "addrlen/offset %u/%d\n",
2225 			    addrlen, conn_con->RES_offset);
2226 			mutex_exit(&so->so_lock);
2227 			strsetrerror(SOTOV(so), 0, 0, sogetrderr);
2228 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2229 			strseteof(SOTOV(so), 1);
2230 			freemsg(mp);
2231 			/*
2232 			 * strseteof takes care of read side wakeups,
2233 			 * pollwakeups, and signals.
2234 			 */
2235 			*wakeups = WSLEEP;
2236 			*allmsgsigs = S_OUTPUT;
2237 			*pollwakeups = POLLOUT;
2238 			return (NULL);
2239 		}
2240 
2241 		/*
2242 		 * Save for getpeername.
2243 		 */
2244 		if (so->so_family != AF_UNIX) {
2245 			so->so_faddr_len = (socklen_t)addrlen;
2246 			ASSERT(so->so_faddr_len <= so->so_faddr_maxlen);
2247 			bcopy(addr, so->so_faddr_sa, addrlen);
2248 			so->so_state |= SS_FADDR_VALID;
2249 		}
2250 
2251 		if (so->so_peercred != NULL)
2252 			crfree(so->so_peercred);
2253 		so->so_peercred = DB_CRED(mp);
2254 		so->so_cpid = DB_CPID(mp);
2255 		if (so->so_peercred != NULL)
2256 			crhold(so->so_peercred);
2257 
2258 		/* Wakeup anybody sleeping in sowaitconnected */
2259 		soisconnected(so);
2260 		mutex_exit(&so->so_lock);
2261 
2262 		/*
2263 		 * The socket is now available for sending data.
2264 		 */
2265 		*wakeups = WSLEEP;
2266 		*allmsgsigs = S_OUTPUT;
2267 		*pollwakeups = POLLOUT;
2268 		freemsg(mp);
2269 		return (NULL);
2270 	}
2271 
2272 	/*
2273 	 * Extra processing in case of an SSL proxy, before queuing or
2274 	 * forwarding to the fallback endpoint
2275 	 */
2276 	case T_SSL_PROXY_CONN_IND:
2277 	case T_CONN_IND:
2278 		/*
2279 		 * Verify the min size and queue the message on
2280 		 * the so_conn_ind_head/tail list.
2281 		 */
2282 		if (MBLKL(mp) < sizeof (struct T_conn_ind)) {
2283 			zcmn_err(getzoneid(), CE_WARN,
2284 			    "sockfs: Too short T_CONN_IND. Len = %ld\n",
2285 			    (ptrdiff_t)(MBLKL(mp)));
2286 			freemsg(mp);
2287 			return (NULL);
2288 		}
2289 
2290 		if (audit_active)
2291 			audit_sock(T_CONN_IND, strvp2wq(vp), mp, 0);
2292 		if (!(so->so_state & SS_ACCEPTCONN)) {
2293 			zcmn_err(getzoneid(), CE_WARN,
2294 			    "sockfs: T_conn_ind on non-listening socket\n");
2295 			freemsg(mp);
2296 			return (NULL);
2297 		}
2298 
2299 		if (tpr->type == T_SSL_PROXY_CONN_IND && mp->b_cont == NULL) {
2300 			/* No context: need to fall back */
2301 			struct sonode *fbso;
2302 			stdata_t *fbstp;
2303 
2304 			tpr->type = T_CONN_IND;
2305 
2306 			fbso = kssl_find_fallback(so->so_kssl_ent);
2307 
2308 			/*
2309 			 * No fallback: the remote will timeout and
2310 			 * disconnect.
2311 			 */
2312 			if (fbso == NULL) {
2313 				freemsg(mp);
2314 				return (NULL);
2315 			}
2316 			fbstp = SOTOV(fbso)->v_stream;
2317 			qreply(fbstp->sd_wrq->q_next, mp);
2318 			return (NULL);
2319 		}
2320 		soqueueconnind(so, mp);
2321 		*allmsgsigs = S_INPUT | S_RDNORM;
2322 		*pollwakeups = POLLIN | POLLRDNORM;
2323 		*wakeups = RSLEEP;
2324 		return (NULL);
2325 
2326 	case T_ORDREL_IND:
2327 		if (MBLKL(mp) < sizeof (struct T_ordrel_ind)) {
2328 			zcmn_err(getzoneid(), CE_WARN,
2329 			    "sockfs: Too short T_ORDREL_IND. Len = %ld\n",
2330 			    (ptrdiff_t)(MBLKL(mp)));
2331 			freemsg(mp);
2332 			return (NULL);
2333 		}
2334 
2335 		/*
2336 		 * Some providers send this when not fully connected.
2337 		 * SunLink X.25 needs to retrieve disconnect reason after
2338 		 * disconnect for compatibility. It uses T_ORDREL_IND
2339 		 * instead of T_DISCON_IND so that it may use the
2340 		 * endpoint after a connect failure to retrieve the
2341 		 * reason using an ioctl. Thus we explicitly clear
2342 		 * SS_ISCONNECTING here for SunLink X.25.
2343 		 * This is a needed TPI violation.
2344 		 */
2345 		mutex_enter(&so->so_lock);
2346 		so->so_state &= ~SS_ISCONNECTING;
2347 		socantrcvmore(so);
2348 		mutex_exit(&so->so_lock);
2349 		strseteof(SOTOV(so), 1);
2350 		/*
2351 		 * strseteof takes care of read side wakeups,
2352 		 * pollwakeups, and signals.
2353 		 */
2354 		freemsg(mp);
2355 		return (NULL);
2356 
2357 	case T_DISCON_IND:
2358 		if (MBLKL(mp) < sizeof (struct T_discon_ind)) {
2359 			zcmn_err(getzoneid(), CE_WARN,
2360 			    "sockfs: Too short T_DISCON_IND. Len = %ld\n",
2361 			    (ptrdiff_t)(MBLKL(mp)));
2362 			freemsg(mp);
2363 			return (NULL);
2364 		}
2365 		if (so->so_state & SS_ACCEPTCONN) {
2366 			/*
2367 			 * This is a listener. Look for a queued T_CONN_IND
2368 			 * with a matching sequence number and remove it
2369 			 * from the list.
2370 			 * It is normal to not find the sequence number since
2371 			 * the soaccept might have already dequeued it
2372 			 * (in which case the T_CONN_RES will fail with
2373 			 * TBADSEQ).
2374 			 */
2375 			(void) soflushconnind(so, tpr->discon_ind.SEQ_number);
2376 			freemsg(mp);
2377 			return (0);
2378 		}
2379 
2380 		/*
2381 		 * Not a listener
2382 		 *
2383 		 * If SS_CANTRCVMORE for AF_UNIX ignore the discon_reason.
2384 		 * Such a discon_ind appears when the peer has first done
2385 		 * a shutdown() followed by a close() in which case we just
2386 		 * want to record socantsendmore.
2387 		 * In this case sockfs first receives a T_ORDREL_IND followed
2388 		 * by a T_DISCON_IND.
2389 		 * Note that for other transports (e.g. TCP) we need to handle
2390 		 * the discon_ind in this case since it signals an error.
2391 		 */
2392 		mutex_enter(&so->so_lock);
2393 		if ((so->so_state & SS_CANTRCVMORE) &&
2394 		    (so->so_family == AF_UNIX)) {
2395 			socantsendmore(so);
2396 			mutex_exit(&so->so_lock);
2397 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2398 			dprintso(so, 1,
2399 			    ("T_DISCON_IND: error %d\n", so->so_error));
2400 			freemsg(mp);
2401 			/*
2402 			 * Set these variables for caller to process them.
2403 			 * For the else part where T_DISCON_IND is processed,
2404 			 * this will be done in the function being called
2405 			 * (strsock_discon_ind())
2406 			 */
2407 			*wakeups = WSLEEP;
2408 			*allmsgsigs = S_OUTPUT;
2409 			*pollwakeups = POLLOUT;
2410 		} else if (so->so_flag & (SOASYNC_UNBIND | SOLOCKED)) {
2411 			/*
2412 			 * Deferred processing of T_DISCON_IND
2413 			 */
2414 			so_save_discon_ind(so, mp, strsock_discon_ind);
2415 			mutex_exit(&so->so_lock);
2416 		} else {
2417 			/*
2418 			 * Process T_DISCON_IND now
2419 			 */
2420 			(void) strsock_discon_ind(so, mp);
2421 			mutex_exit(&so->so_lock);
2422 		}
2423 		return (NULL);
2424 
2425 	case T_UDERROR_IND: {
2426 		struct T_uderror_ind	*tudi = &tpr->uderror_ind;
2427 		void			*addr;
2428 		t_uscalar_t		addrlen;
2429 		int			error;
2430 
2431 		dprintso(so, 0,
2432 		    ("T_UDERROR_IND: error %d\n", tudi->ERROR_type));
2433 
2434 		if (MBLKL(mp) < sizeof (struct T_uderror_ind)) {
2435 			zcmn_err(getzoneid(), CE_WARN,
2436 			    "sockfs: Too short T_UDERROR_IND. Len = %ld\n",
2437 			    (ptrdiff_t)(MBLKL(mp)));
2438 			freemsg(mp);
2439 			return (NULL);
2440 		}
2441 		/* Ignore on connection-oriented transports */
2442 		if (so->so_mode & SM_CONNREQUIRED) {
2443 			freemsg(mp);
2444 			eprintsoline(so, 0);
2445 			zcmn_err(getzoneid(), CE_WARN,
2446 			    "sockfs: T_uderror_ind on connection-oriented "
2447 			    "transport\n");
2448 			return (NULL);
2449 		}
2450 		addrlen = tudi->DEST_length;
2451 		addr = sogetoff(mp, tudi->DEST_offset, addrlen, 1);
2452 		if (addr == NULL) {
2453 			zcmn_err(getzoneid(), CE_WARN,
2454 			    "sockfs: T_uderror_ind with invalid "
2455 			    "addrlen/offset %u/%d\n",
2456 			    addrlen, tudi->DEST_offset);
2457 			freemsg(mp);
2458 			return (NULL);
2459 		}
2460 
2461 		/* Verify source address for connected socket. */
2462 		mutex_enter(&so->so_lock);
2463 		if (so->so_state & SS_ISCONNECTED) {
2464 			void *faddr;
2465 			t_uscalar_t faddr_len;
2466 			boolean_t match = B_FALSE;
2467 
2468 			switch (so->so_family) {
2469 			case AF_INET: {
2470 				/* Compare just IP address and port */
2471 				struct sockaddr_in *sin1, *sin2;
2472 
2473 				sin1 = (struct sockaddr_in *)so->so_faddr_sa;
2474 				sin2 = (struct sockaddr_in *)addr;
2475 				if (addrlen == sizeof (struct sockaddr_in) &&
2476 				    sin1->sin_port == sin2->sin_port &&
2477 				    sin1->sin_addr.s_addr ==
2478 				    sin2->sin_addr.s_addr)
2479 					match = B_TRUE;
2480 				break;
2481 			}
2482 			case AF_INET6: {
2483 				/* Compare just IP address and port. Not flow */
2484 				struct sockaddr_in6 *sin1, *sin2;
2485 
2486 				sin1 = (struct sockaddr_in6 *)so->so_faddr_sa;
2487 				sin2 = (struct sockaddr_in6 *)addr;
2488 				if (addrlen == sizeof (struct sockaddr_in6) &&
2489 				    sin1->sin6_port == sin2->sin6_port &&
2490 				    IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr,
2491 				    &sin2->sin6_addr))
2492 					match = B_TRUE;
2493 				break;
2494 			}
2495 			case AF_UNIX:
2496 				faddr = &so->so_ux_faddr;
2497 				faddr_len =
2498 				    (t_uscalar_t)sizeof (so->so_ux_faddr);
2499 				if (faddr_len == addrlen &&
2500 				    bcmp(addr, faddr, addrlen) == 0)
2501 					match = B_TRUE;
2502 				break;
2503 			default:
2504 				faddr = so->so_faddr_sa;
2505 				faddr_len = (t_uscalar_t)so->so_faddr_len;
2506 				if (faddr_len == addrlen &&
2507 				    bcmp(addr, faddr, addrlen) == 0)
2508 					match = B_TRUE;
2509 				break;
2510 			}
2511 
2512 			if (!match) {
2513 #ifdef DEBUG
2514 				dprintso(so, 0,
2515 				    ("sockfs: T_UDERR_IND mismatch: %s - ",
2516 				    pr_addr(so->so_family,
2517 				    (struct sockaddr *)addr,
2518 				    addrlen)));
2519 				dprintso(so, 0, ("%s\n",
2520 				    pr_addr(so->so_family, so->so_faddr_sa,
2521 				    so->so_faddr_len)));
2522 #endif /* DEBUG */
2523 				mutex_exit(&so->so_lock);
2524 				freemsg(mp);
2525 				return (NULL);
2526 			}
2527 			/*
2528 			 * Make the write error nonpersistent. If the error
2529 			 * is zero we use ECONNRESET.
2530 			 * This assumes that the name space for ERROR_type
2531 			 * is the errno name space.
2532 			 */
2533 			if (tudi->ERROR_type != 0)
2534 				error = tudi->ERROR_type;
2535 			else
2536 				error = ECONNRESET;
2537 
2538 			soseterror(so, error);
2539 			mutex_exit(&so->so_lock);
2540 			strsetrerror(SOTOV(so), 0, 0, sogetrderr);
2541 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2542 			*wakeups = RSLEEP | WSLEEP;
2543 			*allmsgsigs = S_INPUT | S_RDNORM | S_OUTPUT;
2544 			*pollwakeups = POLLIN | POLLRDNORM | POLLOUT;
2545 			freemsg(mp);
2546 			return (NULL);
2547 		}
2548 		/*
2549 		 * If the application asked for delayed errors
2550 		 * record the T_UDERROR_IND so_eaddr_mp and the reason in
2551 		 * so_delayed_error for delayed error posting. If the reason
2552 		 * is zero use ECONNRESET.
2553 		 * Note that delayed error indications do not make sense for
2554 		 * AF_UNIX sockets since sendto checks that the destination
2555 		 * address is valid at the time of the sendto.
2556 		 */
2557 		if (!(so->so_options & SO_DGRAM_ERRIND)) {
2558 			mutex_exit(&so->so_lock);
2559 			freemsg(mp);
2560 			return (NULL);
2561 		}
2562 		if (so->so_eaddr_mp != NULL)
2563 			freemsg(so->so_eaddr_mp);
2564 
2565 		so->so_eaddr_mp = mp;
2566 		if (tudi->ERROR_type != 0)
2567 			error = tudi->ERROR_type;
2568 		else
2569 			error = ECONNRESET;
2570 		so->so_delayed_error = (ushort_t)error;
2571 		mutex_exit(&so->so_lock);
2572 		return (NULL);
2573 	}
2574 
2575 	case T_ERROR_ACK:
2576 		dprintso(so, 0,
2577 		    ("strsock_proto: T_ERROR_ACK for %d, error %d/%d\n",
2578 		    tpr->error_ack.ERROR_prim,
2579 		    tpr->error_ack.TLI_error,
2580 		    tpr->error_ack.UNIX_error));
2581 
2582 		if (MBLKL(mp) < sizeof (struct T_error_ack)) {
2583 			zcmn_err(getzoneid(), CE_WARN,
2584 			    "sockfs: Too short T_ERROR_ACK. Len = %ld\n",
2585 			    (ptrdiff_t)(MBLKL(mp)));
2586 			freemsg(mp);
2587 			return (NULL);
2588 		}
2589 		/*
2590 		 * Check if we were waiting for the async message
2591 		 */
2592 		mutex_enter(&so->so_lock);
2593 		if ((so->so_flag & SOASYNC_UNBIND) &&
2594 		    tpr->error_ack.ERROR_prim == T_UNBIND_REQ) {
2595 			so_unlock_single(so, SOASYNC_UNBIND);
2596 			mutex_exit(&so->so_lock);
2597 			freemsg(mp);
2598 			return (NULL);
2599 		}
2600 		mutex_exit(&so->so_lock);
2601 		soqueueack(so, mp);
2602 		return (NULL);
2603 
2604 	case T_OK_ACK:
2605 		if (MBLKL(mp) < sizeof (struct T_ok_ack)) {
2606 			zcmn_err(getzoneid(), CE_WARN,
2607 			    "sockfs: Too short T_OK_ACK. Len = %ld\n",
2608 			    (ptrdiff_t)(MBLKL(mp)));
2609 			freemsg(mp);
2610 			return (NULL);
2611 		}
2612 		/*
2613 		 * Check if we were waiting for the async message
2614 		 */
2615 		mutex_enter(&so->so_lock);
2616 		if ((so->so_flag & SOASYNC_UNBIND) &&
2617 		    tpr->ok_ack.CORRECT_prim == T_UNBIND_REQ) {
2618 			dprintso(so, 1,
2619 			    ("strsock_proto: T_OK_ACK async unbind\n"));
2620 			so_unlock_single(so, SOASYNC_UNBIND);
2621 			mutex_exit(&so->so_lock);
2622 			freemsg(mp);
2623 			return (NULL);
2624 		}
2625 		mutex_exit(&so->so_lock);
2626 		soqueueack(so, mp);
2627 		return (NULL);
2628 
2629 	case T_INFO_ACK:
2630 		if (MBLKL(mp) < sizeof (struct T_info_ack)) {
2631 			zcmn_err(getzoneid(), CE_WARN,
2632 			    "sockfs: Too short T_INFO_ACK. Len = %ld\n",
2633 			    (ptrdiff_t)(MBLKL(mp)));
2634 			freemsg(mp);
2635 			return (NULL);
2636 		}
2637 		soqueueack(so, mp);
2638 		return (NULL);
2639 
2640 	case T_CAPABILITY_ACK:
2641 		/*
2642 		 * A T_capability_ack need only be large enough to hold
2643 		 * the PRIM_type and CAP_bits1 fields; checking for anything
2644 		 * larger might reject a correct response from an older
2645 		 * provider.
2646 		 */
2647 		if (MBLKL(mp) < 2 * sizeof (t_uscalar_t)) {
2648 			zcmn_err(getzoneid(), CE_WARN,
2649 			    "sockfs: Too short T_CAPABILITY_ACK. Len = %ld\n",
2650 			    (ptrdiff_t)(MBLKL(mp)));
2651 			freemsg(mp);
2652 			return (NULL);
2653 		}
2654 		soqueueack(so, mp);
2655 		return (NULL);
2656 
2657 	case T_BIND_ACK:
2658 		if (MBLKL(mp) < sizeof (struct T_bind_ack)) {
2659 			zcmn_err(getzoneid(), CE_WARN,
2660 			    "sockfs: Too short T_BIND_ACK. Len = %ld\n",
2661 			    (ptrdiff_t)(MBLKL(mp)));
2662 			freemsg(mp);
2663 			return (NULL);
2664 		}
2665 		soqueueack(so, mp);
2666 		return (NULL);
2667 
2668 	case T_OPTMGMT_ACK:
2669 		if (MBLKL(mp) < sizeof (struct T_optmgmt_ack)) {
2670 			zcmn_err(getzoneid(), CE_WARN,
2671 			    "sockfs: Too short T_OPTMGMT_ACK. Len = %ld\n",
2672 			    (ptrdiff_t)(MBLKL(mp)));
2673 			freemsg(mp);
2674 			return (NULL);
2675 		}
2676 		soqueueack(so, mp);
2677 		return (NULL);
2678 	default:
2679 #ifdef DEBUG
2680 		zcmn_err(getzoneid(), CE_WARN,
2681 		    "sockfs: unknown TPI primitive %d received\n",
2682 		    tpr->type);
2683 #endif /* DEBUG */
2684 		freemsg(mp);
2685 		return (NULL);
2686 	}
2687 }
2688 
2689 /*
2690  * This routine is registered with the stream head to receive other
2691  * (non-data, and non-proto) messages.
2692  *
2693  * Returns NULL if the message was consumed.
2694  * Returns an mblk to make that mblk be processed by the stream head.
2695  *
2696  * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and
2697  * *pollwakeups) for the stream head to take action on.
2698  */
2699 static mblk_t *
2700 strsock_misc(vnode_t *vp, mblk_t *mp,
2701 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
2702 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
2703 {
2704 	struct sonode *so;
2705 
2706 	so = VTOSO(vp);
2707 
2708 	dprintso(so, 1, ("strsock_misc(%p, %p, 0x%x)\n",
2709 	    (void *)vp, (void *)mp, DB_TYPE(mp)));
2710 
2711 	/* Set default return values */
2712 	*wakeups = *allmsgsigs = *firstmsgsigs = *pollwakeups = 0;
2713 
2714 	switch (DB_TYPE(mp)) {
2715 	case M_PCSIG:
2716 		/*
2717 		 * This assumes that an M_PCSIG for the urgent data arrives
2718 		 * before the corresponding T_EXDATA_IND.
2719 		 *
2720 		 * Note: Just like in SunOS 4.X and 4.4BSD a poll will be
2721 		 * awoken before the urgent data shows up.
2722 		 * For OOBINLINE this can result in select returning
2723 		 * only exceptions as opposed to except|read.
2724 		 */
2725 		if (*mp->b_rptr == SIGURG) {
2726 			mutex_enter(&so->so_lock);
2727 			dprintso(so, 1,
2728 			    ("SIGURG(%p): counts %d/%d state %s\n",
2729 			    (void *)vp, so->so_oobsigcnt,
2730 			    so->so_oobcnt,
2731 			    pr_state(so->so_state, so->so_mode)));
2732 			so_oob_sig(so, 1, allmsgsigs, pollwakeups);
2733 			dprintso(so, 1,
2734 			    ("after SIGURG(%p): counts %d/%d "
2735 			    " poll 0x%x sig 0x%x state %s\n",
2736 			    (void *)vp, so->so_oobsigcnt,
2737 			    so->so_oobcnt, *pollwakeups, *allmsgsigs,
2738 			    pr_state(so->so_state, so->so_mode)));
2739 			mutex_exit(&so->so_lock);
2740 		}
2741 		freemsg(mp);
2742 		return (NULL);
2743 
2744 	case M_SIG:
2745 	case M_HANGUP:
2746 	case M_UNHANGUP:
2747 	case M_ERROR:
2748 		/* M_ERRORs etc are ignored */
2749 		freemsg(mp);
2750 		return (NULL);
2751 
2752 	case M_FLUSH:
2753 		/*
2754 		 * Do not flush read queue. If the M_FLUSH
2755 		 * arrives because of an impending T_discon_ind
2756 		 * we still have to keep any queued data - this is part of
2757 		 * socket semantics.
2758 		 */
2759 		if (*mp->b_rptr & FLUSHW) {
2760 			*mp->b_rptr &= ~FLUSHR;
2761 			return (mp);
2762 		}
2763 		freemsg(mp);
2764 		return (NULL);
2765 
2766 	default:
2767 		return (mp);
2768 	}
2769 }
2770 
2771 
2772 /* Register to receive signals for certain events */
2773 int
2774 so_set_asyncsigs(vnode_t *vp, pid_t pgrp, int events, int mode, cred_t *cr)
2775 {
2776 	struct strsigset ss;
2777 	int32_t rval;
2778 
2779 	/*
2780 	 * Note that SOLOCKED will be set except for the call from soaccept().
2781 	 */
2782 	ASSERT(!mutex_owned(&VTOSO(vp)->so_lock));
2783 	ss.ss_pid = pgrp;
2784 	ss.ss_events = events;
2785 	return (strioctl(vp, I_ESETSIG, (intptr_t)&ss, mode, K_TO_K, cr,
2786 	    &rval));
2787 }
2788 
2789 
2790 /* Register for events matching the SS_ASYNC flag */
2791 int
2792 so_set_events(struct sonode *so, vnode_t *vp, cred_t *cr)
2793 {
2794 	int events = so->so_state & SS_ASYNC ?
2795 	    S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT :
2796 	    S_RDBAND | S_BANDURG;
2797 
2798 	return (so_set_asyncsigs(vp, so->so_pgrp, events, 0, cr));
2799 }
2800 
2801 
2802 /* Change the SS_ASYNC flag, and update signal delivery if needed */
2803 int
2804 so_flip_async(struct sonode *so, vnode_t *vp, int mode, cred_t *cr)
2805 {
2806 	ASSERT(mutex_owned(&so->so_lock));
2807 	if (so->so_pgrp != 0) {
2808 		int error;
2809 		int events = so->so_state & SS_ASYNC ?		/* Old flag */
2810 		    S_RDBAND | S_BANDURG :			/* New sigs */
2811 		    S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT;
2812 
2813 		so_lock_single(so);
2814 		mutex_exit(&so->so_lock);
2815 
2816 		error = so_set_asyncsigs(vp, so->so_pgrp, events, mode, cr);
2817 
2818 		mutex_enter(&so->so_lock);
2819 		so_unlock_single(so, SOLOCKED);
2820 		if (error)
2821 			return (error);
2822 	}
2823 	so->so_state ^= SS_ASYNC;
2824 	return (0);
2825 }
2826 
2827 /*
2828  * Set new pid/pgrp for SIGPOLL (or SIGIO for FIOASYNC mode), replacing
2829  * any existing one.  If passed zero, just clear the existing one.
2830  */
2831 int
2832 so_set_siggrp(struct sonode *so, vnode_t *vp, pid_t pgrp, int mode, cred_t *cr)
2833 {
2834 	int events = so->so_state & SS_ASYNC ?
2835 	    S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT :
2836 	    S_RDBAND | S_BANDURG;
2837 	int error;
2838 
2839 	ASSERT(mutex_owned(&so->so_lock));
2840 
2841 	/*
2842 	 * Change socket process (group).
2843 	 *
2844 	 * strioctl (via so_set_asyncsigs) will perform permission check and
2845 	 * also keep a PID_HOLD to prevent the pid from being reused.
2846 	 */
2847 	so_lock_single(so);
2848 	mutex_exit(&so->so_lock);
2849 
2850 	if (pgrp != 0) {
2851 		dprintso(so, 1, ("setown: adding pgrp %d ev 0x%x\n",
2852 		    pgrp, events));
2853 		error = so_set_asyncsigs(vp, pgrp, events, mode, cr);
2854 		if (error != 0) {
2855 			eprintsoline(so, error);
2856 			goto bad;
2857 		}
2858 	}
2859 	/* Remove the previously registered process/group */
2860 	if (so->so_pgrp != 0) {
2861 		dprintso(so, 1, ("setown: removing pgrp %d\n", so->so_pgrp));
2862 		error = so_set_asyncsigs(vp, so->so_pgrp, 0, mode, cr);
2863 		if (error != 0) {
2864 			eprintsoline(so, error);
2865 			error = 0;
2866 		}
2867 	}
2868 	mutex_enter(&so->so_lock);
2869 	so_unlock_single(so, SOLOCKED);
2870 	so->so_pgrp = pgrp;
2871 	return (0);
2872 bad:
2873 	mutex_enter(&so->so_lock);
2874 	so_unlock_single(so, SOLOCKED);
2875 	return (error);
2876 }
2877 
2878 
2879 
2880 /*
2881  * Translate a TLI(/XTI) error into a system error as best we can.
2882  */
2883 static const int tli_errs[] = {
2884 		0,		/* no error	*/
2885 		EADDRNOTAVAIL,  /* TBADADDR	*/
2886 		ENOPROTOOPT,	/* TBADOPT	*/
2887 		EACCES,		/* TACCES	*/
2888 		EBADF,		/* TBADF	*/
2889 		EADDRNOTAVAIL,	/* TNOADDR	*/
2890 		EPROTO,		/* TOUTSTATE	*/
2891 		ECONNABORTED,	/* TBADSEQ	*/
2892 		0,		/* TSYSERR - will never get	*/
2893 		EPROTO,		/* TLOOK - should never be sent by transport */
2894 		EMSGSIZE,	/* TBADDATA	*/
2895 		EMSGSIZE,	/* TBUFOVFLW	*/
2896 		EPROTO,		/* TFLOW	*/
2897 		EWOULDBLOCK,	/* TNODATA	*/
2898 		EPROTO,		/* TNODIS	*/
2899 		EPROTO,		/* TNOUDERR	*/
2900 		EINVAL,		/* TBADFLAG	*/
2901 		EPROTO,		/* TNOREL	*/
2902 		EOPNOTSUPP,	/* TNOTSUPPORT	*/
2903 		EPROTO,		/* TSTATECHNG	*/
2904 		/* following represent error namespace expansion with XTI */
2905 		EPROTO,		/* TNOSTRUCTYPE - never sent by transport */
2906 		EPROTO,		/* TBADNAME - never sent by transport */
2907 		EPROTO,		/* TBADQLEN - never sent by transport */
2908 		EADDRINUSE,	/* TADDRBUSY	*/
2909 		EBADF,		/* TINDOUT	*/
2910 		EBADF,		/* TPROVMISMATCH */
2911 		EBADF,		/* TRESQLEN	*/
2912 		EBADF,		/* TRESADDR	*/
2913 		EPROTO,		/* TQFULL - never sent by transport */
2914 		EPROTO,		/* TPROTO	*/
2915 };
2916 
2917 static int
2918 tlitosyserr(int terr)
2919 {
2920 	ASSERT(terr != TSYSERR);
2921 	if (terr >= (sizeof (tli_errs) / sizeof (tli_errs[0])))
2922 		return (EPROTO);
2923 	else
2924 		return (tli_errs[terr]);
2925 }
2926 
2927 /*
2928  * Sockfs sodirect STREAMS read put procedure. Called from sodirect enable
2929  * transport driver/module with an mblk_t chain.
2930  *
2931  * Note, we in-line putq() for the fast-path cases of q is empty, q_last and
2932  * bp are of type M_DATA. All other cases we call putq().
2933  *
2934  * On success a zero will be return, else an errno will be returned.
2935  */
2936 int
2937 sodput(sodirect_t *sodp, mblk_t *bp)
2938 {
2939 	queue_t		*q = sodp->sod_q;
2940 	struct stdata	*stp = (struct stdata *)q->q_ptr;
2941 	mblk_t		*nbp;
2942 	int		ret;
2943 	mblk_t		*last = q->q_last;
2944 	int		bytecnt = 0;
2945 	int		mblkcnt = 0;
2946 
2947 
2948 	ASSERT(MUTEX_HELD(sodp->sod_lock));
2949 
2950 	if (stp->sd_flag == STREOF) {
2951 		ret = 0;
2952 		goto error;
2953 	}
2954 
2955 	if (q->q_first == NULL) {
2956 		/* Q empty, really fast fast-path */
2957 		bp->b_prev = NULL;
2958 		bp->b_next = NULL;
2959 		q->q_first = bp;
2960 		q->q_last = bp;
2961 
2962 	} else if (last->b_datap->db_type == M_DATA &&
2963 	    bp->b_datap->db_type == M_DATA) {
2964 		/*
2965 		 * Last mblk_t chain and bp are both type M_DATA so
2966 		 * in-line putq() here, if the DBLK_UIOA state match
2967 		 * add bp to the end of the current last chain, else
2968 		 * start a new last chain with bp.
2969 		 */
2970 		if ((last->b_datap->db_flags & DBLK_UIOA) ==
2971 		    (bp->b_datap->db_flags & DBLK_UIOA)) {
2972 			/* Added to end */
2973 			while ((nbp = last->b_cont) != NULL)
2974 				last = nbp;
2975 			last->b_cont = bp;
2976 		} else {
2977 			/* New last */
2978 			last->b_next = bp;
2979 			bp->b_next = NULL;
2980 			bp->b_prev = last;
2981 			q->q_last = bp;
2982 		}
2983 	} else {
2984 		/*
2985 		 * Can't use q_last so just call putq().
2986 		 */
2987 		(void) putq(q, bp);
2988 		return (0);
2989 	}
2990 
2991 	/* Count bytes and mblk_t's */
2992 	do {
2993 		bytecnt += MBLKL(bp);
2994 		mblkcnt++;
2995 	} while ((bp = bp->b_cont) != NULL);
2996 	q->q_count += bytecnt;
2997 	q->q_mblkcnt += mblkcnt;
2998 
2999 	/* Check for QFULL */
3000 	if (q->q_count >= q->q_hiwat + sodp->sod_want ||
3001 	    q->q_mblkcnt >= q->q_hiwat) {
3002 		q->q_flag |= QFULL;
3003 	}
3004 
3005 	return (0);
3006 
3007 error:
3008 	do {
3009 		if ((nbp = bp->b_next) != NULL)
3010 			bp->b_next = NULL;
3011 		freemsg(bp);
3012 	} while ((bp = nbp) != NULL);
3013 
3014 	return (ret);
3015 }
3016 
3017 /*
3018  * Sockfs sodirect read wakeup. Called from a sodirect enabled transport
3019  * driver/module to indicate that read-side data is available.
3020  *
3021  * On return the sodirect_t.lock mutex will be exited so this must be the
3022  * last sodirect_t call to guarantee atomic access of *sodp.
3023  */
3024 void
3025 sodwakeup(sodirect_t *sodp)
3026 {
3027 	queue_t		*q = sodp->sod_q;
3028 	struct stdata	*stp = (struct stdata *)q->q_ptr;
3029 
3030 	ASSERT(MUTEX_HELD(sodp->sod_lock));
3031 
3032 	if (stp->sd_flag & RSLEEP) {
3033 		stp->sd_flag &= ~RSLEEP;
3034 		cv_broadcast(&q->q_wait);
3035 	}
3036 
3037 	if (stp->sd_rput_opt & SR_POLLIN) {
3038 		stp->sd_rput_opt &= ~SR_POLLIN;
3039 		mutex_exit(sodp->sod_lock);
3040 		pollwakeup(&stp->sd_pollist, POLLIN | POLLRDNORM);
3041 	} else
3042 		mutex_exit(sodp->sod_lock);
3043 }
3044