xref: /titanic_50/usr/src/uts/common/fs/sockfs/sockstr.c (revision c93c462eec9d46f84d567abf52eb29a27c2e134b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/inttypes.h>
29 #include <sys/t_lock.h>
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/buf.h>
33 #include <sys/conf.h>
34 #include <sys/cred.h>
35 #include <sys/kmem.h>
36 #include <sys/sysmacros.h>
37 #include <sys/vfs.h>
38 #include <sys/vnode.h>
39 #include <sys/debug.h>
40 #include <sys/errno.h>
41 #include <sys/time.h>
42 #include <sys/file.h>
43 #include <sys/user.h>
44 #include <sys/stream.h>
45 #include <sys/strsubr.h>
46 #include <sys/esunddi.h>
47 #include <sys/flock.h>
48 #include <sys/modctl.h>
49 #include <sys/vtrace.h>
50 #include <sys/strsun.h>
51 #include <sys/cmn_err.h>
52 #include <sys/proc.h>
53 #include <sys/ddi.h>
54 
55 #include <sys/suntpi.h>
56 #include <sys/socket.h>
57 #include <sys/sockio.h>
58 #include <sys/socketvar.h>
59 #include <sys/sodirect.h>
60 #include <netinet/in.h>
61 #include <inet/common.h>
62 #include <inet/proto_set.h>
63 
64 #include <sys/tiuser.h>
65 #define	_SUN_TPI_VERSION	2
66 #include <sys/tihdr.h>
67 
68 #include <inet/kssl/ksslapi.h>
69 
70 #include <c2/audit.h>
71 
72 #include <fs/sockfs/socktpi.h>
73 #include <fs/sockfs/socktpi_impl.h>
74 #include <sys/dcopy.h>
75 
76 int so_default_version = SOV_SOCKSTREAM;
77 
78 #ifdef DEBUG
79 /* Set sockdebug to print debug messages when SO_DEBUG is set */
80 int sockdebug = 0;
81 
82 /* Set sockprinterr to print error messages when SO_DEBUG is set */
83 int sockprinterr = 0;
84 
85 /*
86  * Set so_default_options to SO_DEBUG is all sockets should be created
87  * with SO_DEBUG set. This is needed to get debug printouts from the
88  * socket() call itself.
89  */
90 int so_default_options = 0;
91 #endif /* DEBUG */
92 
93 #ifdef SOCK_TEST
94 /*
95  * Set to number of ticks to limit cv_waits for code coverage testing.
96  * Set to 1000 when SO_DEBUG is set to 2.
97  */
98 clock_t sock_test_timelimit = 0;
99 #endif /* SOCK_TEST */
100 
101 /*
102  * For concurrency testing of e.g. opening /dev/ip which does not
103  * handle T_INFO_REQ messages.
104  */
105 int so_no_tinfo = 0;
106 
107 /*
108  * Timeout for getting a T_CAPABILITY_ACK - it is possible for a provider
109  * to simply ignore the T_CAPABILITY_REQ.
110  */
111 clock_t	sock_capability_timeout	= 2;	/* seconds */
112 
113 static int	do_tcapability(struct sonode *so, t_uscalar_t cap_bits1);
114 static void	so_removehooks(struct sonode *so);
115 
116 static mblk_t *strsock_proto(vnode_t *vp, mblk_t *mp,
117 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
118 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups);
119 static mblk_t *strsock_misc(vnode_t *vp, mblk_t *mp,
120 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
121 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups);
122 /*
123  * STREAMS based sodirect put/wakeup functions.
124  */
125 static int sodput(sodirect_t *, mblk_t *);
126 static void sodwakeup(sodirect_t *);
127 
128 /*
129  * Called by sockinit() when sockfs is loaded.
130  */
131 int
132 sostr_init()
133 {
134 	sod_init();
135 	return (0);
136 }
137 
138 /*
139  * Convert a socket to a stream. Invoked when the illusory sockmod
140  * is popped from the stream.
141  * Change the stream head back to default operation without losing
142  * any messages (T_conn_ind's are moved to the stream head queue).
143  */
144 int
145 so_sock2stream(struct sonode *so)
146 {
147 	struct vnode		*vp = SOTOV(so);
148 	queue_t			*rq;
149 	mblk_t			*mp;
150 	int			error = 0;
151 	sotpi_info_t		*sti = SOTOTPI(so);
152 
153 	ASSERT(MUTEX_HELD(&sti->sti_plumb_lock));
154 
155 	mutex_enter(&so->so_lock);
156 	so_lock_single(so);
157 
158 	ASSERT(so->so_version != SOV_STREAM);
159 
160 	if (sti->sti_direct) {
161 		mblk_t **mpp;
162 		int rval;
163 
164 		/*
165 		 * Tell the transport below that sockmod is being popped
166 		 */
167 		mutex_exit(&so->so_lock);
168 		error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, CRED(),
169 		    &rval);
170 		mutex_enter(&so->so_lock);
171 		if (error != 0) {
172 			dprintso(so, 0, ("so_sock2stream(%p): "
173 			    "_SIOCSOCKFALLBACK failed\n", (void *)so));
174 			goto exit;
175 		}
176 		sti->sti_direct = 0;
177 
178 		for (mpp = &sti->sti_conn_ind_head; (mp = *mpp) != NULL;
179 		    mpp = &mp->b_next) {
180 			struct T_conn_ind	*conn_ind;
181 
182 			/*
183 			 * strsock_proto() has already verified the length of
184 			 * this message block.
185 			 */
186 			ASSERT(MBLKL(mp) >= sizeof (struct T_conn_ind));
187 
188 			conn_ind = (struct T_conn_ind *)mp->b_rptr;
189 			if (conn_ind->OPT_length == 0 &&
190 			    conn_ind->OPT_offset == 0)
191 				continue;
192 
193 			if (DB_REF(mp) > 1) {
194 				mblk_t	*newmp;
195 				size_t	length;
196 				cred_t	*cr;
197 
198 				/*
199 				 * Copy the message block because it is used
200 				 * elsewhere, too.
201 				 */
202 				length = MBLKL(mp);
203 				newmp = soallocproto(length, _ALLOC_INTR);
204 				if (newmp == NULL) {
205 					error = EINTR;
206 					goto exit;
207 				}
208 				bcopy(mp->b_rptr, newmp->b_wptr, length);
209 				newmp->b_wptr += length;
210 				newmp->b_next = mp->b_next;
211 				cr = DB_CRED(mp);
212 				if (cr != NULL)
213 					mblk_setcred(newmp, cr);
214 				DB_CPID(newmp) = DB_CPID(mp);
215 
216 				/*
217 				 * Link the new message block into the queue
218 				 * and free the old one.
219 				 */
220 				*mpp = newmp;
221 				mp->b_next = NULL;
222 				freemsg(mp);
223 
224 				mp = newmp;
225 				conn_ind = (struct T_conn_ind *)mp->b_rptr;
226 			}
227 
228 			/*
229 			 * Remove options added by TCP for accept fast-path.
230 			 */
231 			conn_ind->OPT_length = 0;
232 			conn_ind->OPT_offset = 0;
233 		}
234 	}
235 
236 	so->so_version = SOV_STREAM;
237 	so->so_proto_handle = NULL;
238 
239 	/*
240 	 * Remove the hooks in the stream head to avoid queuing more
241 	 * packets in sockfs.
242 	 */
243 	mutex_exit(&so->so_lock);
244 	so_removehooks(so);
245 	mutex_enter(&so->so_lock);
246 
247 	/*
248 	 * Clear any state related to urgent data. Leave any T_EXDATA_IND
249 	 * on the queue - the behavior of urgent data after a switch is
250 	 * left undefined.
251 	 */
252 	so->so_error = sti->sti_delayed_error = 0;
253 	freemsg(so->so_oobmsg);
254 	so->so_oobmsg = NULL;
255 	sti->sti_oobsigcnt = sti->sti_oobcnt = 0;
256 
257 	so->so_state &= ~(SS_RCVATMARK|SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA|
258 	    SS_SAVEDEOR);
259 	ASSERT(so_verify_oobstate(so));
260 
261 	freemsg(sti->sti_ack_mp);
262 	sti->sti_ack_mp = NULL;
263 
264 	/*
265 	 * Flush the T_DISCON_IND on sti_discon_ind_mp.
266 	 */
267 	so_flush_discon_ind(so);
268 
269 	/*
270 	 * Move any queued T_CONN_IND messages to stream head queue.
271 	 */
272 	rq = RD(strvp2wq(vp));
273 	while ((mp = sti->sti_conn_ind_head) != NULL) {
274 		sti->sti_conn_ind_head = mp->b_next;
275 		mp->b_next = NULL;
276 		if (sti->sti_conn_ind_head == NULL) {
277 			ASSERT(sti->sti_conn_ind_tail == mp);
278 			sti->sti_conn_ind_tail = NULL;
279 		}
280 		dprintso(so, 0,
281 		    ("so_sock2stream(%p): moving T_CONN_IND\n", (void *)so));
282 
283 		/* Drop lock across put() */
284 		mutex_exit(&so->so_lock);
285 		put(rq, mp);
286 		mutex_enter(&so->so_lock);
287 	}
288 
289 exit:
290 	ASSERT(MUTEX_HELD(&so->so_lock));
291 	so_unlock_single(so, SOLOCKED);
292 	mutex_exit(&so->so_lock);
293 	return (error);
294 }
295 
296 /*
297  * Covert a stream back to a socket. This is invoked when the illusory
298  * sockmod is pushed on a stream (where the stream was "created" by
299  * popping the illusory sockmod).
300  * This routine can not recreate the socket state (certain aspects of
301  * it like urgent data state and the bound/connected addresses for AF_UNIX
302  * sockets can not be recreated by asking the transport for information).
303  * Thus this routine implicitly assumes that the socket is in an initial
304  * state (as if it was just created). It flushes any messages queued on the
305  * read queue to avoid dealing with e.g. TPI acks or T_exdata_ind messages.
306  */
307 void
308 so_stream2sock(struct sonode *so)
309 {
310 	struct vnode *vp = SOTOV(so);
311 	sotpi_info_t *sti = SOTOTPI(so);
312 
313 	ASSERT(MUTEX_HELD(&sti->sti_plumb_lock));
314 
315 	mutex_enter(&so->so_lock);
316 	so_lock_single(so);
317 	ASSERT(so->so_version == SOV_STREAM);
318 	so->so_version = SOV_SOCKSTREAM;
319 	sti->sti_pushcnt = 0;
320 	mutex_exit(&so->so_lock);
321 
322 	/*
323 	 * Set a permenent error to force any thread in sorecvmsg to
324 	 * return (and drop SOREADLOCKED). Clear the error once
325 	 * we have SOREADLOCKED.
326 	 * This makes a read sleeping during the I_PUSH of sockmod return
327 	 * EIO.
328 	 */
329 	strsetrerror(SOTOV(so), EIO, 1, NULL);
330 
331 	/*
332 	 * Get the read lock before flushing data to avoid
333 	 * problems with the T_EXDATA_IND MSG_PEEK code in sorecvmsg.
334 	 */
335 	mutex_enter(&so->so_lock);
336 	(void) so_lock_read(so, 0);	/* Set SOREADLOCKED */
337 	mutex_exit(&so->so_lock);
338 
339 	strsetrerror(SOTOV(so), 0, 0, NULL);
340 	so_installhooks(so);
341 
342 	/*
343 	 * Flush everything on the read queue.
344 	 * This ensures that no T_CONN_IND remain and that no T_EXDATA_IND
345 	 * remain; those types of messages would confuse sockfs.
346 	 */
347 	strflushrq(vp, FLUSHALL);
348 	mutex_enter(&so->so_lock);
349 
350 	/*
351 	 * Flush the T_DISCON_IND on sti_discon_ind_mp.
352 	 */
353 	so_flush_discon_ind(so);
354 	so_unlock_read(so);	/* Clear SOREADLOCKED */
355 
356 	so_unlock_single(so, SOLOCKED);
357 	mutex_exit(&so->so_lock);
358 }
359 
360 /*
361  * Install the hooks in the stream head.
362  */
363 void
364 so_installhooks(struct sonode *so)
365 {
366 	struct vnode *vp = SOTOV(so);
367 
368 	strsetrputhooks(vp, SH_SIGALLDATA | SH_IGN_ZEROLEN | SH_CONSOL_DATA,
369 	    strsock_proto, strsock_misc);
370 	strsetwputhooks(vp, SH_SIGPIPE | SH_RECHECK_ERR, 0);
371 }
372 
373 /*
374  * Remove the hooks in the stream head.
375  */
376 static void
377 so_removehooks(struct sonode *so)
378 {
379 	struct vnode *vp = SOTOV(so);
380 
381 	strsetrputhooks(vp, 0, NULL, NULL);
382 	strsetwputhooks(vp, 0, STRTIMOUT);
383 	/*
384 	 * Leave read behavior as it would have been for a normal
385 	 * stream i.e. a read of an M_PROTO will fail.
386 	 */
387 }
388 
389 void
390 so_basic_strinit(struct sonode *so)
391 {
392 	struct vnode *vp = SOTOV(so);
393 	struct stdata *stp;
394 	mblk_t *mp;
395 	sotpi_info_t *sti = SOTOTPI(so);
396 
397 	/* Preallocate an unbind_req message */
398 	mp = soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP);
399 	mutex_enter(&so->so_lock);
400 	sti->sti_unbind_mp = mp;
401 #ifdef DEBUG
402 	so->so_options = so_default_options;
403 #endif /* DEBUG */
404 	mutex_exit(&so->so_lock);
405 
406 	so_installhooks(so);
407 
408 	stp = vp->v_stream;
409 	/*
410 	 * Have to keep minpsz at zero in order to allow write/send of zero
411 	 * bytes.
412 	 */
413 	mutex_enter(&stp->sd_lock);
414 	if (stp->sd_qn_minpsz == 1)
415 		stp->sd_qn_minpsz = 0;
416 	mutex_exit(&stp->sd_lock);
417 
418 	/*
419 	 * If sodirect capable allocate and initialize sodirect_t.
420 	 * Note, SS_SODIRECT is set in socktpi_open().
421 	 */
422 	if ((so->so_state & SS_SODIRECT) &&
423 	    !(so->so_state & SS_FALLBACK_PENDING)) {
424 		sod_sock_init(so, stp, sodput, sodwakeup, &stp->sd_lock);
425 	}
426 }
427 
428 /*
429  * Initialize the streams side of a socket including
430  * T_info_req/ack processing. If tso is not NULL its values are used thereby
431  * avoiding the T_INFO_REQ.
432  */
433 int
434 so_strinit(struct sonode *so, struct sonode *tso)
435 {
436 	sotpi_info_t *sti = SOTOTPI(so);
437 	sotpi_info_t *tsti;
438 	int error;
439 
440 	so_basic_strinit(so);
441 
442 	/*
443 	 * The T_CAPABILITY_REQ should be the first message sent down because
444 	 * at least TCP has a fast-path for this which avoids timeouts while
445 	 * waiting for the T_CAPABILITY_ACK under high system load.
446 	 */
447 	if (tso == NULL) {
448 		error = do_tcapability(so, TC1_ACCEPTOR_ID | TC1_INFO);
449 		if (error)
450 			return (error);
451 	} else {
452 		tsti = SOTOTPI(tso);
453 
454 		mutex_enter(&so->so_lock);
455 		sti->sti_tsdu_size = tsti->sti_tsdu_size;
456 		sti->sti_etsdu_size = tsti->sti_etsdu_size;
457 		sti->sti_addr_size = tsti->sti_addr_size;
458 		sti->sti_opt_size = tsti->sti_opt_size;
459 		sti->sti_tidu_size = tsti->sti_tidu_size;
460 		sti->sti_serv_type = tsti->sti_serv_type;
461 		so->so_mode = tso->so_mode & ~SM_ACCEPTOR_ID;
462 		mutex_exit(&so->so_lock);
463 
464 		/* the following do_tcapability may update so->so_mode */
465 		if ((tsti->sti_serv_type != T_CLTS) &&
466 		    (sti->sti_direct == 0)) {
467 			error = do_tcapability(so, TC1_ACCEPTOR_ID);
468 			if (error)
469 				return (error);
470 		}
471 	}
472 	/*
473 	 * If the addr_size is 0 we treat it as already bound
474 	 * and connected. This is used by the routing socket.
475 	 * We set the addr_size to something to allocate a the address
476 	 * structures.
477 	 */
478 	if (sti->sti_addr_size == 0) {
479 		so->so_state |= SS_ISBOUND | SS_ISCONNECTED;
480 		/* Address size can vary with address families. */
481 		if (so->so_family == AF_INET6)
482 			sti->sti_addr_size =
483 			    (t_scalar_t)sizeof (struct sockaddr_in6);
484 		else
485 			sti->sti_addr_size =
486 			    (t_scalar_t)sizeof (struct sockaddr_in);
487 		ASSERT(sti->sti_unbind_mp);
488 	}
489 
490 	so_alloc_addr(so, sti->sti_addr_size);
491 
492 	return (0);
493 }
494 
495 static void
496 copy_tinfo(struct sonode *so, struct T_info_ack *tia)
497 {
498 	sotpi_info_t *sti = SOTOTPI(so);
499 
500 	sti->sti_tsdu_size = tia->TSDU_size;
501 	sti->sti_etsdu_size = tia->ETSDU_size;
502 	sti->sti_addr_size = tia->ADDR_size;
503 	sti->sti_opt_size = tia->OPT_size;
504 	sti->sti_tidu_size = tia->TIDU_size;
505 	sti->sti_serv_type = tia->SERV_type;
506 	switch (tia->CURRENT_state) {
507 	case TS_UNBND:
508 		break;
509 	case TS_IDLE:
510 		so->so_state |= SS_ISBOUND;
511 		sti->sti_laddr_len = 0;
512 		sti->sti_laddr_valid = 0;
513 		break;
514 	case TS_DATA_XFER:
515 		so->so_state |= SS_ISBOUND|SS_ISCONNECTED;
516 		sti->sti_laddr_len = 0;
517 		sti->sti_faddr_len = 0;
518 		sti->sti_laddr_valid = 0;
519 		sti->sti_faddr_valid = 0;
520 		break;
521 	}
522 
523 	/*
524 	 * Heuristics for determining the socket mode flags
525 	 * (SM_ATOMIC, SM_CONNREQUIRED, SM_ADDR, SM_FDPASSING,
526 	 * and SM_EXDATA, SM_OPTDATA, and SM_BYTESTREAM)
527 	 * from the info ack.
528 	 */
529 	if (sti->sti_serv_type == T_CLTS) {
530 		so->so_mode |= SM_ATOMIC | SM_ADDR;
531 	} else {
532 		so->so_mode |= SM_CONNREQUIRED;
533 		if (sti->sti_etsdu_size != 0 && sti->sti_etsdu_size != -2)
534 			so->so_mode |= SM_EXDATA;
535 	}
536 	if (so->so_type == SOCK_SEQPACKET || so->so_type == SOCK_RAW) {
537 		/* Semantics are to discard tail end of messages */
538 		so->so_mode |= SM_ATOMIC;
539 	}
540 	if (so->so_family == AF_UNIX) {
541 		so->so_mode |= SM_FDPASSING | SM_OPTDATA;
542 		if (sti->sti_addr_size == -1) {
543 			/* MAXPATHLEN + soun_family + nul termination */
544 			sti->sti_addr_size = (t_scalar_t)(MAXPATHLEN +
545 			    sizeof (short) + 1);
546 		}
547 		if (so->so_type == SOCK_STREAM) {
548 			/*
549 			 * Make it into a byte-stream transport.
550 			 * SOCK_SEQPACKET sockets are unchanged.
551 			 */
552 			sti->sti_tsdu_size = 0;
553 		}
554 	} else if (sti->sti_addr_size == -1) {
555 		/*
556 		 * Logic extracted from sockmod - have to pick some max address
557 		 * length in order to preallocate the addresses.
558 		 */
559 		sti->sti_addr_size = SOA_DEFSIZE;
560 	}
561 	if (sti->sti_tsdu_size == 0)
562 		so->so_mode |= SM_BYTESTREAM;
563 }
564 
565 static int
566 check_tinfo(struct sonode *so)
567 {
568 	sotpi_info_t *sti = SOTOTPI(so);
569 
570 	/* Consistency checks */
571 	if (so->so_type == SOCK_DGRAM && sti->sti_serv_type != T_CLTS) {
572 		eprintso(so, ("service type and socket type mismatch\n"));
573 		eprintsoline(so, EPROTO);
574 		return (EPROTO);
575 	}
576 	if (so->so_type == SOCK_STREAM && sti->sti_serv_type == T_CLTS) {
577 		eprintso(so, ("service type and socket type mismatch\n"));
578 		eprintsoline(so, EPROTO);
579 		return (EPROTO);
580 	}
581 	if (so->so_type == SOCK_SEQPACKET && sti->sti_serv_type == T_CLTS) {
582 		eprintso(so, ("service type and socket type mismatch\n"));
583 		eprintsoline(so, EPROTO);
584 		return (EPROTO);
585 	}
586 	if (so->so_family == AF_INET &&
587 	    sti->sti_addr_size != (t_scalar_t)sizeof (struct sockaddr_in)) {
588 		eprintso(so,
589 		    ("AF_INET must have sockaddr_in address length. Got %d\n",
590 		    sti->sti_addr_size));
591 		eprintsoline(so, EMSGSIZE);
592 		return (EMSGSIZE);
593 	}
594 	if (so->so_family == AF_INET6 &&
595 	    sti->sti_addr_size != (t_scalar_t)sizeof (struct sockaddr_in6)) {
596 		eprintso(so,
597 		    ("AF_INET6 must have sockaddr_in6 address length. Got %d\n",
598 		    sti->sti_addr_size));
599 		eprintsoline(so, EMSGSIZE);
600 		return (EMSGSIZE);
601 	}
602 
603 	dprintso(so, 1, (
604 	    "tinfo: serv %d tsdu %d, etsdu %d, addr %d, opt %d, tidu %d\n",
605 	    sti->sti_serv_type, sti->sti_tsdu_size, sti->sti_etsdu_size,
606 	    sti->sti_addr_size, sti->sti_opt_size,
607 	    sti->sti_tidu_size));
608 	dprintso(so, 1, ("tinfo: so_state %s\n",
609 	    pr_state(so->so_state, so->so_mode)));
610 	return (0);
611 }
612 
613 /*
614  * Send down T_info_req and wait for the ack.
615  * Record interesting T_info_ack values in the sonode.
616  */
617 static int
618 do_tinfo(struct sonode *so)
619 {
620 	struct T_info_req tir;
621 	mblk_t *mp;
622 	int error;
623 
624 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
625 
626 	if (so_no_tinfo) {
627 		SOTOTPI(so)->sti_addr_size = 0;
628 		return (0);
629 	}
630 
631 	dprintso(so, 1, ("do_tinfo(%p)\n", (void *)so));
632 
633 	/* Send T_INFO_REQ */
634 	tir.PRIM_type = T_INFO_REQ;
635 	mp = soallocproto1(&tir, sizeof (tir),
636 	    sizeof (struct T_info_req) + sizeof (struct T_info_ack),
637 	    _ALLOC_INTR);
638 	if (mp == NULL) {
639 		eprintsoline(so, ENOBUFS);
640 		return (ENOBUFS);
641 	}
642 	/* T_INFO_REQ has to be M_PCPROTO */
643 	DB_TYPE(mp) = M_PCPROTO;
644 
645 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
646 	    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
647 	if (error) {
648 		eprintsoline(so, error);
649 		return (error);
650 	}
651 	mutex_enter(&so->so_lock);
652 	/* Wait for T_INFO_ACK */
653 	if ((error = sowaitprim(so, T_INFO_REQ, T_INFO_ACK,
654 	    (t_uscalar_t)sizeof (struct T_info_ack), &mp, 0))) {
655 		mutex_exit(&so->so_lock);
656 		eprintsoline(so, error);
657 		return (error);
658 	}
659 
660 	ASSERT(mp);
661 	copy_tinfo(so, (struct T_info_ack *)mp->b_rptr);
662 	mutex_exit(&so->so_lock);
663 	freemsg(mp);
664 	return (check_tinfo(so));
665 }
666 
667 /*
668  * Send down T_capability_req and wait for the ack.
669  * Record interesting T_capability_ack values in the sonode.
670  */
671 static int
672 do_tcapability(struct sonode *so, t_uscalar_t cap_bits1)
673 {
674 	struct T_capability_req tcr;
675 	struct T_capability_ack *tca;
676 	mblk_t *mp;
677 	int error;
678 	sotpi_info_t *sti = SOTOTPI(so);
679 
680 	ASSERT(cap_bits1 != 0);
681 	ASSERT((cap_bits1 & ~(TC1_ACCEPTOR_ID | TC1_INFO)) == 0);
682 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
683 
684 	if (sti->sti_provinfo->tpi_capability == PI_NO)
685 		return (do_tinfo(so));
686 
687 	if (so_no_tinfo) {
688 		sti->sti_addr_size = 0;
689 		if ((cap_bits1 &= ~TC1_INFO) == 0)
690 			return (0);
691 	}
692 
693 	dprintso(so, 1, ("do_tcapability(%p)\n", (void *)so));
694 
695 	/* Send T_CAPABILITY_REQ */
696 	tcr.PRIM_type = T_CAPABILITY_REQ;
697 	tcr.CAP_bits1 = cap_bits1;
698 	mp = soallocproto1(&tcr, sizeof (tcr),
699 	    sizeof (struct T_capability_req) + sizeof (struct T_capability_ack),
700 	    _ALLOC_INTR);
701 	if (mp == NULL) {
702 		eprintsoline(so, ENOBUFS);
703 		return (ENOBUFS);
704 	}
705 	/* T_CAPABILITY_REQ should be M_PCPROTO here */
706 	DB_TYPE(mp) = M_PCPROTO;
707 
708 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
709 	    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
710 	if (error) {
711 		eprintsoline(so, error);
712 		return (error);
713 	}
714 	mutex_enter(&so->so_lock);
715 	/* Wait for T_CAPABILITY_ACK */
716 	if ((error = sowaitprim(so, T_CAPABILITY_REQ, T_CAPABILITY_ACK,
717 	    (t_uscalar_t)sizeof (*tca), &mp, sock_capability_timeout * hz))) {
718 		mutex_exit(&so->so_lock);
719 		PI_PROVLOCK(sti->sti_provinfo);
720 		if (sti->sti_provinfo->tpi_capability == PI_DONTKNOW)
721 			sti->sti_provinfo->tpi_capability = PI_NO;
722 		PI_PROVUNLOCK(sti->sti_provinfo);
723 		ASSERT((so->so_mode & SM_ACCEPTOR_ID) == 0);
724 		if (cap_bits1 & TC1_INFO) {
725 			/*
726 			 * If the T_CAPABILITY_REQ timed out and then a
727 			 * T_INFO_REQ gets a protocol error, most likely
728 			 * the capability was slow (vs. unsupported). Return
729 			 * ENOSR for this case as a best guess.
730 			 */
731 			if (error == ETIME) {
732 				return ((error = do_tinfo(so)) == EPROTO ?
733 				    ENOSR : error);
734 			}
735 			return (do_tinfo(so));
736 		}
737 		return (0);
738 	}
739 
740 	ASSERT(mp);
741 	tca = (struct T_capability_ack *)mp->b_rptr;
742 
743 	ASSERT((cap_bits1 & TC1_INFO) == (tca->CAP_bits1 & TC1_INFO));
744 	so_proc_tcapability_ack(so, tca);
745 
746 	cap_bits1 = tca->CAP_bits1;
747 
748 	mutex_exit(&so->so_lock);
749 	freemsg(mp);
750 
751 	if (cap_bits1 & TC1_INFO)
752 		return (check_tinfo(so));
753 
754 	return (0);
755 }
756 
757 /*
758  * Process a T_CAPABILITY_ACK
759  */
760 void
761 so_proc_tcapability_ack(struct sonode *so, struct T_capability_ack *tca)
762 {
763 	sotpi_info_t *sti = SOTOTPI(so);
764 
765 	if (sti->sti_provinfo->tpi_capability == PI_DONTKNOW) {
766 		PI_PROVLOCK(sti->sti_provinfo);
767 		sti->sti_provinfo->tpi_capability = PI_YES;
768 		PI_PROVUNLOCK(sti->sti_provinfo);
769 	}
770 
771 	if (tca->CAP_bits1 & TC1_ACCEPTOR_ID) {
772 		sti->sti_acceptor_id = tca->ACCEPTOR_id;
773 		so->so_mode |= SM_ACCEPTOR_ID;
774 	}
775 
776 	if (tca->CAP_bits1 & TC1_INFO)
777 		copy_tinfo(so, &tca->INFO_ack);
778 }
779 
780 /*
781  * Retrieve socket error, clear error if not peek.
782  */
783 int
784 sogeterr(struct sonode *so, boolean_t clear_err)
785 {
786 	int error;
787 
788 	ASSERT(MUTEX_HELD(&so->so_lock));
789 
790 	error = so->so_error;
791 	if (clear_err)
792 		so->so_error = 0;
793 
794 	return (error);
795 }
796 
797 /*
798  * This routine is registered with the stream head to retrieve read
799  * side errors.
800  * It does not clear the socket error for a peeking read side operation.
801  * It the error is to be cleared it sets *clearerr.
802  */
803 int
804 sogetrderr(vnode_t *vp, int ispeek, int *clearerr)
805 {
806 	struct sonode *so = VTOSO(vp);
807 	int error;
808 
809 	mutex_enter(&so->so_lock);
810 	if (ispeek) {
811 		error = so->so_error;
812 		*clearerr = 0;
813 	} else {
814 		error = so->so_error;
815 		so->so_error = 0;
816 		*clearerr = 1;
817 	}
818 	mutex_exit(&so->so_lock);
819 	return (error);
820 }
821 
822 /*
823  * This routine is registered with the stream head to retrieve write
824  * side errors.
825  * It does not clear the socket error for a peeking read side operation.
826  * It the error is to be cleared it sets *clearerr.
827  */
828 int
829 sogetwrerr(vnode_t *vp, int ispeek, int *clearerr)
830 {
831 	struct sonode *so = VTOSO(vp);
832 	int error;
833 
834 	mutex_enter(&so->so_lock);
835 	if (so->so_state & SS_CANTSENDMORE) {
836 		error = EPIPE;
837 		*clearerr = 0;
838 	} else {
839 		error = so->so_error;
840 		if (ispeek) {
841 			*clearerr = 0;
842 		} else {
843 			so->so_error = 0;
844 			*clearerr = 1;
845 		}
846 	}
847 	mutex_exit(&so->so_lock);
848 	return (error);
849 }
850 
851 /*
852  * Set a nonpersistent read and write error on the socket.
853  * Used when there is a T_uderror_ind for a connected socket.
854  * The caller also needs to call strsetrerror and strsetwerror
855  * after dropping the lock.
856  */
857 void
858 soseterror(struct sonode *so, int error)
859 {
860 	ASSERT(error != 0);
861 
862 	ASSERT(MUTEX_HELD(&so->so_lock));
863 	so->so_error = (ushort_t)error;
864 }
865 
866 void
867 soisconnecting(struct sonode *so)
868 {
869 	ASSERT(MUTEX_HELD(&so->so_lock));
870 	so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
871 	so->so_state |= SS_ISCONNECTING;
872 	cv_broadcast(&so->so_state_cv);
873 }
874 
875 void
876 soisconnected(struct sonode *so)
877 {
878 	ASSERT(MUTEX_HELD(&so->so_lock));
879 	so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING);
880 	so->so_state |= SS_ISCONNECTED;
881 	cv_broadcast(&so->so_state_cv);
882 }
883 
884 /*
885  * The caller also needs to call strsetrerror, strsetwerror and strseteof.
886  */
887 void
888 soisdisconnected(struct sonode *so, int error)
889 {
890 	ASSERT(MUTEX_HELD(&so->so_lock));
891 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
892 	so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE);
893 	so->so_error = (ushort_t)error;
894 	if (so->so_peercred != NULL) {
895 		crfree(so->so_peercred);
896 		so->so_peercred = NULL;
897 	}
898 	cv_broadcast(&so->so_state_cv);
899 }
900 
901 /*
902  * For connected AF_UNIX SOCK_DGRAM sockets when the peer closes.
903  * Does not affect write side.
904  * The caller also has to call strsetrerror.
905  */
906 static void
907 sobreakconn(struct sonode *so, int error)
908 {
909 	ASSERT(MUTEX_HELD(&so->so_lock));
910 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
911 	so->so_error = (ushort_t)error;
912 	cv_broadcast(&so->so_state_cv);
913 }
914 
915 /*
916  * Can no longer send.
917  * Caller must also call strsetwerror.
918  *
919  * We mark the peer address as no longer valid for getpeername, but
920  * leave it around for so_unix_close to notify the peer (that
921  * transport has no addressing held at that layer).
922  */
923 void
924 socantsendmore(struct sonode *so)
925 {
926 	ASSERT(MUTEX_HELD(&so->so_lock));
927 	so->so_state |= SS_CANTSENDMORE;
928 	cv_broadcast(&so->so_state_cv);
929 }
930 
931 /*
932  * The caller must call strseteof(,1) as well as this routine
933  * to change the socket state.
934  */
935 void
936 socantrcvmore(struct sonode *so)
937 {
938 	ASSERT(MUTEX_HELD(&so->so_lock));
939 	so->so_state |= SS_CANTRCVMORE;
940 	cv_broadcast(&so->so_state_cv);
941 }
942 
943 /*
944  * The caller has sent down a "request_prim" primitive and wants to wait for
945  * an ack ("ack_prim") or an T_ERROR_ACK for it.
946  * The specified "ack_prim" can be a T_OK_ACK.
947  *
948  * Assumes that all the TPI acks are M_PCPROTO messages.
949  *
950  * Note that the socket is single-threaded (using so_lock_single)
951  * for all operations that generate TPI ack messages. Since
952  * only TPI ack messages are M_PCPROTO we should never receive
953  * anything except either the ack we are expecting or a T_ERROR_ACK
954  * for the same primitive.
955  */
956 int
957 sowaitprim(struct sonode *so, t_scalar_t request_prim, t_scalar_t ack_prim,
958 	    t_uscalar_t min_size, mblk_t **mpp, clock_t wait)
959 {
960 	mblk_t *mp;
961 	union T_primitives *tpr;
962 	int error;
963 
964 	dprintso(so, 1, ("sowaitprim(%p, %d, %d, %d, %p, %lu)\n",
965 	    (void *)so, request_prim, ack_prim, min_size, (void *)mpp, wait));
966 
967 	ASSERT(MUTEX_HELD(&so->so_lock));
968 
969 	error = sowaitack(so, &mp, wait);
970 	if (error)
971 		return (error);
972 
973 	dprintso(so, 1, ("got msg %p\n", (void *)mp));
974 	if (DB_TYPE(mp) != M_PCPROTO ||
975 	    MBLKL(mp) < sizeof (tpr->type)) {
976 		freemsg(mp);
977 		eprintsoline(so, EPROTO);
978 		return (EPROTO);
979 	}
980 	tpr = (union T_primitives *)mp->b_rptr;
981 	/*
982 	 * Did we get the primitive that we were asking for?
983 	 * For T_OK_ACK we also check that it matches the request primitive.
984 	 */
985 	if (tpr->type == ack_prim &&
986 	    (ack_prim != T_OK_ACK ||
987 	    tpr->ok_ack.CORRECT_prim == request_prim)) {
988 		if (MBLKL(mp) >= (ssize_t)min_size) {
989 			/* Found what we are looking for */
990 			*mpp = mp;
991 			return (0);
992 		}
993 		/* Too short */
994 		freemsg(mp);
995 		eprintsoline(so, EPROTO);
996 		return (EPROTO);
997 	}
998 
999 	if (tpr->type == T_ERROR_ACK &&
1000 	    tpr->error_ack.ERROR_prim == request_prim) {
1001 		/* Error to the primitive we were looking for */
1002 		if (tpr->error_ack.TLI_error == TSYSERR) {
1003 			error = tpr->error_ack.UNIX_error;
1004 		} else {
1005 			error = proto_tlitosyserr(tpr->error_ack.TLI_error);
1006 		}
1007 		dprintso(so, 0, ("error_ack for %d: %d/%d ->%d\n",
1008 		    tpr->error_ack.ERROR_prim, tpr->error_ack.TLI_error,
1009 		    tpr->error_ack.UNIX_error, error));
1010 		freemsg(mp);
1011 		return (error);
1012 	}
1013 	/*
1014 	 * Wrong primitive or T_ERROR_ACK for the wrong primitive
1015 	 */
1016 #ifdef DEBUG
1017 	if (tpr->type == T_ERROR_ACK) {
1018 		dprintso(so, 0, ("error_ack for %d: %d/%d\n",
1019 		    tpr->error_ack.ERROR_prim, tpr->error_ack.TLI_error,
1020 		    tpr->error_ack.UNIX_error));
1021 	} else if (tpr->type == T_OK_ACK) {
1022 		dprintso(so, 0, ("ok_ack for %d, expected %d for %d\n",
1023 		    tpr->ok_ack.CORRECT_prim, ack_prim, request_prim));
1024 	} else {
1025 		dprintso(so, 0,
1026 		    ("unexpected primitive %d, expected %d for %d\n",
1027 		    tpr->type, ack_prim, request_prim));
1028 	}
1029 #endif /* DEBUG */
1030 
1031 	freemsg(mp);
1032 	eprintsoline(so, EPROTO);
1033 	return (EPROTO);
1034 }
1035 
1036 /*
1037  * Wait for a T_OK_ACK for the specified primitive.
1038  */
1039 int
1040 sowaitokack(struct sonode *so, t_scalar_t request_prim)
1041 {
1042 	mblk_t *mp;
1043 	int error;
1044 
1045 	error = sowaitprim(so, request_prim, T_OK_ACK,
1046 	    (t_uscalar_t)sizeof (struct T_ok_ack), &mp, 0);
1047 	if (error)
1048 		return (error);
1049 	freemsg(mp);
1050 	return (0);
1051 }
1052 
1053 /*
1054  * Queue a received TPI ack message on sti_ack_mp.
1055  */
1056 void
1057 soqueueack(struct sonode *so, mblk_t *mp)
1058 {
1059 	sotpi_info_t *sti = SOTOTPI(so);
1060 
1061 	if (DB_TYPE(mp) != M_PCPROTO) {
1062 		zcmn_err(getzoneid(), CE_WARN,
1063 		    "sockfs: received unexpected M_PROTO TPI ack. Prim %d\n",
1064 		    *(t_scalar_t *)mp->b_rptr);
1065 		freemsg(mp);
1066 		return;
1067 	}
1068 
1069 	mutex_enter(&so->so_lock);
1070 	if (sti->sti_ack_mp != NULL) {
1071 		dprintso(so, 1, ("sti_ack_mp already set\n"));
1072 		freemsg(sti->sti_ack_mp);
1073 		sti->sti_ack_mp = NULL;
1074 	}
1075 	sti->sti_ack_mp = mp;
1076 	cv_broadcast(&sti->sti_ack_cv);
1077 	mutex_exit(&so->so_lock);
1078 }
1079 
1080 /*
1081  * Wait for a TPI ack ignoring signals and errors.
1082  */
1083 int
1084 sowaitack(struct sonode *so, mblk_t **mpp, clock_t wait)
1085 {
1086 	sotpi_info_t *sti = SOTOTPI(so);
1087 
1088 	ASSERT(MUTEX_HELD(&so->so_lock));
1089 
1090 	while (sti->sti_ack_mp == NULL) {
1091 #ifdef SOCK_TEST
1092 		if (wait == 0 && sock_test_timelimit != 0)
1093 			wait = sock_test_timelimit;
1094 #endif
1095 		if (wait != 0) {
1096 			/*
1097 			 * Only wait for the time limit.
1098 			 */
1099 			clock_t now;
1100 
1101 			time_to_wait(&now, wait);
1102 			if (cv_timedwait(&sti->sti_ack_cv, &so->so_lock,
1103 			    now) == -1) {
1104 				eprintsoline(so, ETIME);
1105 				return (ETIME);
1106 			}
1107 		}
1108 		else
1109 			cv_wait(&sti->sti_ack_cv, &so->so_lock);
1110 	}
1111 	*mpp = sti->sti_ack_mp;
1112 #ifdef DEBUG
1113 	{
1114 		union T_primitives *tpr;
1115 		mblk_t *mp = *mpp;
1116 
1117 		tpr = (union T_primitives *)mp->b_rptr;
1118 		ASSERT(DB_TYPE(mp) == M_PCPROTO);
1119 		ASSERT(tpr->type == T_OK_ACK ||
1120 		    tpr->type == T_ERROR_ACK ||
1121 		    tpr->type == T_BIND_ACK ||
1122 		    tpr->type == T_CAPABILITY_ACK ||
1123 		    tpr->type == T_INFO_ACK ||
1124 		    tpr->type == T_OPTMGMT_ACK);
1125 	}
1126 #endif /* DEBUG */
1127 	sti->sti_ack_mp = NULL;
1128 	return (0);
1129 }
1130 
1131 /*
1132  * Queue a received T_CONN_IND message on sti_conn_ind_head/tail.
1133  */
1134 void
1135 soqueueconnind(struct sonode *so, mblk_t *mp)
1136 {
1137 	sotpi_info_t *sti = SOTOTPI(so);
1138 
1139 	if (DB_TYPE(mp) != M_PROTO) {
1140 		zcmn_err(getzoneid(), CE_WARN,
1141 		    "sockfs: received unexpected M_PCPROTO T_CONN_IND\n");
1142 		freemsg(mp);
1143 		return;
1144 	}
1145 
1146 	mutex_enter(&so->so_lock);
1147 	ASSERT(mp->b_next == NULL);
1148 	if (sti->sti_conn_ind_head == NULL) {
1149 		sti->sti_conn_ind_head = mp;
1150 	} else {
1151 		ASSERT(sti->sti_conn_ind_tail->b_next == NULL);
1152 		sti->sti_conn_ind_tail->b_next = mp;
1153 	}
1154 	sti->sti_conn_ind_tail = mp;
1155 	/* Wakeup a single consumer of the T_CONN_IND */
1156 	cv_signal(&so->so_acceptq_cv);
1157 	mutex_exit(&so->so_lock);
1158 }
1159 
1160 /*
1161  * Wait for a T_CONN_IND.
1162  * Don't wait if nonblocking.
1163  * Accept signals and socket errors.
1164  */
1165 int
1166 sowaitconnind(struct sonode *so, int fmode, mblk_t **mpp)
1167 {
1168 	mblk_t *mp;
1169 	sotpi_info_t *sti = SOTOTPI(so);
1170 	int error = 0;
1171 
1172 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
1173 	mutex_enter(&so->so_lock);
1174 check_error:
1175 	if (so->so_error) {
1176 		error = sogeterr(so, B_TRUE);
1177 		if (error) {
1178 			mutex_exit(&so->so_lock);
1179 			return (error);
1180 		}
1181 	}
1182 
1183 	if (sti->sti_conn_ind_head == NULL) {
1184 		if (fmode & (FNDELAY|FNONBLOCK)) {
1185 			error = EWOULDBLOCK;
1186 			goto done;
1187 		}
1188 
1189 		if (so->so_state & SS_CLOSING) {
1190 			error = EINTR;
1191 			goto done;
1192 		}
1193 
1194 		if (!cv_wait_sig_swap(&so->so_acceptq_cv, &so->so_lock)) {
1195 			error = EINTR;
1196 			goto done;
1197 		}
1198 		goto check_error;
1199 	}
1200 	mp = sti->sti_conn_ind_head;
1201 	sti->sti_conn_ind_head = mp->b_next;
1202 	mp->b_next = NULL;
1203 	if (sti->sti_conn_ind_head == NULL) {
1204 		ASSERT(sti->sti_conn_ind_tail == mp);
1205 		sti->sti_conn_ind_tail = NULL;
1206 	}
1207 	*mpp = mp;
1208 done:
1209 	mutex_exit(&so->so_lock);
1210 	return (error);
1211 }
1212 
1213 /*
1214  * Flush a T_CONN_IND matching the sequence number from the list.
1215  * Return zero if found; non-zero otherwise.
1216  * This is called very infrequently thus it is ok to do a linear search.
1217  */
1218 int
1219 soflushconnind(struct sonode *so, t_scalar_t seqno)
1220 {
1221 	mblk_t *prevmp, *mp;
1222 	struct T_conn_ind *tci;
1223 	sotpi_info_t *sti = SOTOTPI(so);
1224 
1225 	mutex_enter(&so->so_lock);
1226 	for (prevmp = NULL, mp = sti->sti_conn_ind_head; mp != NULL;
1227 	    prevmp = mp, mp = mp->b_next) {
1228 		tci = (struct T_conn_ind *)mp->b_rptr;
1229 		if (tci->SEQ_number == seqno) {
1230 			dprintso(so, 1,
1231 			    ("t_discon_ind: found T_CONN_IND %d\n", seqno));
1232 			/* Deleting last? */
1233 			if (sti->sti_conn_ind_tail == mp) {
1234 				sti->sti_conn_ind_tail = prevmp;
1235 			}
1236 			if (prevmp == NULL) {
1237 				/* Deleting first */
1238 				sti->sti_conn_ind_head = mp->b_next;
1239 			} else {
1240 				prevmp->b_next = mp->b_next;
1241 			}
1242 			mp->b_next = NULL;
1243 
1244 			ASSERT((sti->sti_conn_ind_head == NULL &&
1245 			    sti->sti_conn_ind_tail == NULL) ||
1246 			    (sti->sti_conn_ind_head != NULL &&
1247 			    sti->sti_conn_ind_tail != NULL));
1248 
1249 			so->so_error = ECONNABORTED;
1250 			mutex_exit(&so->so_lock);
1251 
1252 			/*
1253 			 * T_KSSL_PROXY_CONN_IND may carry a handle for
1254 			 * an SSL context, and needs to be released.
1255 			 */
1256 			if ((tci->PRIM_type == T_SSL_PROXY_CONN_IND) &&
1257 			    (mp->b_cont != NULL)) {
1258 				kssl_ctx_t kssl_ctx;
1259 
1260 				ASSERT(MBLKL(mp->b_cont) ==
1261 				    sizeof (kssl_ctx_t));
1262 				kssl_ctx = *((kssl_ctx_t *)mp->b_cont->b_rptr);
1263 				kssl_release_ctx(kssl_ctx);
1264 			}
1265 			freemsg(mp);
1266 			return (0);
1267 		}
1268 	}
1269 	mutex_exit(&so->so_lock);
1270 	dprintso(so, 1,	("t_discon_ind: NOT found T_CONN_IND %d\n", seqno));
1271 	return (-1);
1272 }
1273 
1274 /*
1275  * Wait until the socket is connected or there is an error.
1276  * fmode should contain any nonblocking flags. nosig should be
1277  * set if the caller does not want the wait to be interrupted by a signal.
1278  */
1279 int
1280 sowaitconnected(struct sonode *so, int fmode, int nosig)
1281 {
1282 	int error;
1283 
1284 	ASSERT(MUTEX_HELD(&so->so_lock));
1285 
1286 	while ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) ==
1287 	    SS_ISCONNECTING && so->so_error == 0) {
1288 
1289 		dprintso(so, 1, ("waiting for SS_ISCONNECTED on %p\n",
1290 		    (void *)so));
1291 		if (fmode & (FNDELAY|FNONBLOCK))
1292 			return (EINPROGRESS);
1293 
1294 		if (so->so_state & SS_CLOSING)
1295 			return (EINTR);
1296 
1297 		if (nosig)
1298 			cv_wait(&so->so_state_cv, &so->so_lock);
1299 		else if (!cv_wait_sig_swap(&so->so_state_cv, &so->so_lock)) {
1300 			/*
1301 			 * Return EINTR and let the application use
1302 			 * nonblocking techniques for detecting when
1303 			 * the connection has been established.
1304 			 */
1305 			return (EINTR);
1306 		}
1307 		dprintso(so, 1, ("awoken on %p\n", (void *)so));
1308 	}
1309 
1310 	if (so->so_error != 0) {
1311 		error = sogeterr(so, B_TRUE);
1312 		ASSERT(error != 0);
1313 		dprintso(so, 1, ("sowaitconnected: error %d\n", error));
1314 		return (error);
1315 	}
1316 	if (!(so->so_state & SS_ISCONNECTED)) {
1317 		/*
1318 		 * Could have received a T_ORDREL_IND or a T_DISCON_IND with
1319 		 * zero errno. Or another thread could have consumed so_error
1320 		 * e.g. by calling read.
1321 		 */
1322 		error = ECONNREFUSED;
1323 		dprintso(so, 1, ("sowaitconnected: error %d\n", error));
1324 		return (error);
1325 	}
1326 	return (0);
1327 }
1328 
1329 
1330 /*
1331  * Handle the signal generation aspect of urgent data.
1332  */
1333 static void
1334 so_oob_sig(struct sonode *so, int extrasig,
1335     strsigset_t *signals, strpollset_t *pollwakeups)
1336 {
1337 	sotpi_info_t *sti = SOTOTPI(so);
1338 
1339 	ASSERT(MUTEX_HELD(&so->so_lock));
1340 
1341 	ASSERT(so_verify_oobstate(so));
1342 	ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt);
1343 	if (sti->sti_oobsigcnt > sti->sti_oobcnt) {
1344 		/*
1345 		 * Signal has already been generated once for this
1346 		 * urgent "event". However, since TCP can receive updated
1347 		 * urgent pointers we still generate a signal.
1348 		 */
1349 		ASSERT(so->so_state & SS_OOBPEND);
1350 		if (extrasig) {
1351 			*signals |= S_RDBAND;
1352 			*pollwakeups |= POLLRDBAND;
1353 		}
1354 		return;
1355 	}
1356 
1357 	sti->sti_oobsigcnt++;
1358 	ASSERT(sti->sti_oobsigcnt > 0);	/* Wraparound */
1359 	ASSERT(sti->sti_oobsigcnt > sti->sti_oobcnt);
1360 
1361 	/*
1362 	 * Record (for select/poll) that urgent data is pending.
1363 	 */
1364 	so->so_state |= SS_OOBPEND;
1365 	/*
1366 	 * New urgent data on the way so forget about any old
1367 	 * urgent data.
1368 	 */
1369 	so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA);
1370 	if (so->so_oobmsg != NULL) {
1371 		dprintso(so, 1, ("sock: discarding old oob\n"));
1372 		freemsg(so->so_oobmsg);
1373 		so->so_oobmsg = NULL;
1374 	}
1375 	*signals |= S_RDBAND;
1376 	*pollwakeups |= POLLRDBAND;
1377 	ASSERT(so_verify_oobstate(so));
1378 }
1379 
1380 /*
1381  * Handle the processing of the T_EXDATA_IND with urgent data.
1382  * Returns the T_EXDATA_IND if it should be queued on the read queue.
1383  */
1384 /* ARGSUSED2 */
1385 static mblk_t *
1386 so_oob_exdata(struct sonode *so, mblk_t *mp,
1387 	strsigset_t *signals, strpollset_t *pollwakeups)
1388 {
1389 	sotpi_info_t *sti = SOTOTPI(so);
1390 
1391 	ASSERT(MUTEX_HELD(&so->so_lock));
1392 
1393 	ASSERT(so_verify_oobstate(so));
1394 
1395 	ASSERT(sti->sti_oobsigcnt > sti->sti_oobcnt);
1396 
1397 	sti->sti_oobcnt++;
1398 	ASSERT(sti->sti_oobcnt > 0);	/* wraparound? */
1399 	ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt);
1400 
1401 	/*
1402 	 * Set MSGMARK for SIOCATMARK.
1403 	 */
1404 	mp->b_flag |= MSGMARK;
1405 
1406 	ASSERT(so_verify_oobstate(so));
1407 	return (mp);
1408 }
1409 
1410 /*
1411  * Handle the processing of the actual urgent data.
1412  * Returns the data mblk if it should be queued on the read queue.
1413  */
1414 static mblk_t *
1415 so_oob_data(struct sonode *so, mblk_t *mp,
1416 	strsigset_t *signals, strpollset_t *pollwakeups)
1417 {
1418 	sotpi_info_t *sti = SOTOTPI(so);
1419 
1420 	ASSERT(MUTEX_HELD(&so->so_lock));
1421 
1422 	ASSERT(so_verify_oobstate(so));
1423 
1424 	ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt);
1425 	ASSERT(mp != NULL);
1426 	/*
1427 	 * For OOBINLINE we keep the data in the T_EXDATA_IND.
1428 	 * Otherwise we store it in so_oobmsg.
1429 	 */
1430 	ASSERT(so->so_oobmsg == NULL);
1431 	if (so->so_options & SO_OOBINLINE) {
1432 		*pollwakeups |= POLLIN | POLLRDNORM | POLLRDBAND;
1433 		*signals |= S_INPUT | S_RDNORM;
1434 	} else {
1435 		*pollwakeups |= POLLRDBAND;
1436 		so->so_state |= SS_HAVEOOBDATA;
1437 		so->so_oobmsg = mp;
1438 		mp = NULL;
1439 	}
1440 	ASSERT(so_verify_oobstate(so));
1441 	return (mp);
1442 }
1443 
1444 /*
1445  * Caller must hold the mutex.
1446  * For delayed processing, save the T_DISCON_IND received
1447  * from below on sti_discon_ind_mp.
1448  * When the message is processed the framework will call:
1449  *      (*func)(so, mp);
1450  */
1451 static void
1452 so_save_discon_ind(struct sonode *so,
1453 	mblk_t *mp,
1454 	void (*func)(struct sonode *so, mblk_t *))
1455 {
1456 	sotpi_info_t *sti = SOTOTPI(so);
1457 
1458 	ASSERT(MUTEX_HELD(&so->so_lock));
1459 
1460 	/*
1461 	 * Discard new T_DISCON_IND if we have already received another.
1462 	 * Currently the earlier message can either be on sti_discon_ind_mp
1463 	 * or being processed.
1464 	 */
1465 	if (sti->sti_discon_ind_mp != NULL || (so->so_flag & SOASYNC_UNBIND)) {
1466 		zcmn_err(getzoneid(), CE_WARN,
1467 		    "sockfs: received unexpected additional T_DISCON_IND\n");
1468 		freemsg(mp);
1469 		return;
1470 	}
1471 	mp->b_prev = (mblk_t *)func;
1472 	mp->b_next = NULL;
1473 	sti->sti_discon_ind_mp = mp;
1474 }
1475 
1476 /*
1477  * Caller must hold the mutex and make sure that either SOLOCKED
1478  * or SOASYNC_UNBIND is set. Called from so_unlock_single().
1479  * Perform delayed processing of T_DISCON_IND message on sti_discon_ind_mp.
1480  * Need to ensure that strsock_proto() will not end up sleeping for
1481  * SOASYNC_UNBIND, while executing this function.
1482  */
1483 void
1484 so_drain_discon_ind(struct sonode *so)
1485 {
1486 	mblk_t	*bp;
1487 	void (*func)(struct sonode *so, mblk_t *);
1488 	sotpi_info_t *sti = SOTOTPI(so);
1489 
1490 	ASSERT(MUTEX_HELD(&so->so_lock));
1491 	ASSERT(so->so_flag & (SOLOCKED|SOASYNC_UNBIND));
1492 
1493 	/* Process T_DISCON_IND on sti_discon_ind_mp */
1494 	if ((bp = sti->sti_discon_ind_mp) != NULL) {
1495 		sti->sti_discon_ind_mp = NULL;
1496 		func = (void (*)())bp->b_prev;
1497 		bp->b_prev = NULL;
1498 
1499 		/*
1500 		 * This (*func) is supposed to generate a message downstream
1501 		 * and we need to have a flag set until the corresponding
1502 		 * upstream message reaches stream head.
1503 		 * When processing T_DISCON_IND in strsock_discon_ind
1504 		 * we hold SOASYN_UNBIND when sending T_UNBIND_REQ down and
1505 		 * drop the flag after we get the ACK in strsock_proto.
1506 		 */
1507 		(void) (*func)(so, bp);
1508 	}
1509 }
1510 
1511 /*
1512  * Caller must hold the mutex.
1513  * Remove the T_DISCON_IND on sti_discon_ind_mp.
1514  */
1515 void
1516 so_flush_discon_ind(struct sonode *so)
1517 {
1518 	mblk_t	*bp;
1519 	sotpi_info_t *sti = SOTOTPI(so);
1520 
1521 	ASSERT(MUTEX_HELD(&so->so_lock));
1522 
1523 	/*
1524 	 * Remove T_DISCON_IND mblk at sti_discon_ind_mp.
1525 	 */
1526 	if ((bp = sti->sti_discon_ind_mp) != NULL) {
1527 		sti->sti_discon_ind_mp = NULL;
1528 		bp->b_prev = NULL;
1529 		freemsg(bp);
1530 	}
1531 }
1532 
1533 /*
1534  * Caller must hold the mutex.
1535  *
1536  * This function is used to process the T_DISCON_IND message. It does
1537  * immediate processing when called from strsock_proto and delayed
1538  * processing of discon_ind saved on sti_discon_ind_mp when called from
1539  * so_drain_discon_ind. When a T_DISCON_IND message is saved in
1540  * sti_discon_ind_mp for delayed processing, this function is registered
1541  * as the callback function to process the message.
1542  *
1543  * SOASYNC_UNBIND should be held in this function, during the non-blocking
1544  * unbind operation, and should be released only after we receive the ACK
1545  * in strsock_proto, for the T_UNBIND_REQ sent here. Since SOLOCKED is not set,
1546  * no TPI messages would be sent down at this time. This is to prevent M_FLUSH
1547  * sent from either this function or tcp_unbind(), flushing away any TPI
1548  * message that is being sent down and stays in a lower module's queue.
1549  *
1550  * This function drops so_lock and grabs it again.
1551  */
1552 static void
1553 strsock_discon_ind(struct sonode *so, mblk_t *discon_mp)
1554 {
1555 	struct vnode *vp;
1556 	struct stdata *stp;
1557 	union T_primitives *tpr;
1558 	struct T_unbind_req *ubr;
1559 	mblk_t *mp;
1560 	int error;
1561 	sotpi_info_t *sti = SOTOTPI(so);
1562 
1563 	ASSERT(MUTEX_HELD(&so->so_lock));
1564 	ASSERT(discon_mp);
1565 	ASSERT(discon_mp->b_rptr);
1566 
1567 	tpr = (union T_primitives *)discon_mp->b_rptr;
1568 	ASSERT(tpr->type == T_DISCON_IND);
1569 
1570 	vp = SOTOV(so);
1571 	stp = vp->v_stream;
1572 	ASSERT(stp);
1573 
1574 	/*
1575 	 * Not a listener
1576 	 */
1577 	ASSERT((so->so_state & SS_ACCEPTCONN) == 0);
1578 
1579 	/*
1580 	 * This assumes that the name space for DISCON_reason
1581 	 * is the errno name space.
1582 	 */
1583 	soisdisconnected(so, tpr->discon_ind.DISCON_reason);
1584 	sti->sti_laddr_valid = 0;
1585 	sti->sti_faddr_valid = 0;
1586 
1587 	/*
1588 	 * Unbind with the transport without blocking.
1589 	 * If we've already received a T_DISCON_IND do not unbind.
1590 	 *
1591 	 * If there is no preallocated unbind message, we have already
1592 	 * unbound with the transport
1593 	 *
1594 	 * If the socket is not bound, no need to unbind.
1595 	 */
1596 	mp = sti->sti_unbind_mp;
1597 	if (mp == NULL) {
1598 		ASSERT(!(so->so_state & SS_ISBOUND));
1599 		mutex_exit(&so->so_lock);
1600 	} else if (!(so->so_state & SS_ISBOUND))  {
1601 		mutex_exit(&so->so_lock);
1602 	} else {
1603 		sti->sti_unbind_mp = NULL;
1604 
1605 		/*
1606 		 * Is another T_DISCON_IND being processed.
1607 		 */
1608 		ASSERT((so->so_flag & SOASYNC_UNBIND) == 0);
1609 
1610 		/*
1611 		 * Make strsock_proto ignore T_OK_ACK and T_ERROR_ACK for
1612 		 * this unbind. Set SOASYNC_UNBIND. This should be cleared
1613 		 * only after we receive the ACK in strsock_proto.
1614 		 */
1615 		so->so_flag |= SOASYNC_UNBIND;
1616 		ASSERT(!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)));
1617 		so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN);
1618 		sti->sti_laddr_valid = 0;
1619 		mutex_exit(&so->so_lock);
1620 
1621 		/*
1622 		 * Send down T_UNBIND_REQ ignoring flow control.
1623 		 * XXX Assumes that MSG_IGNFLOW implies that this thread
1624 		 * does not run service procedures.
1625 		 */
1626 		ASSERT(DB_TYPE(mp) == M_PROTO);
1627 		ubr = (struct T_unbind_req *)mp->b_rptr;
1628 		mp->b_wptr += sizeof (*ubr);
1629 		ubr->PRIM_type = T_UNBIND_REQ;
1630 
1631 		/*
1632 		 * Flush the read and write side (except stream head read queue)
1633 		 * and send down T_UNBIND_REQ.
1634 		 */
1635 		(void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW);
1636 		error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
1637 		    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0);
1638 		/* LINTED - warning: statement has no consequent: if */
1639 		if (error) {
1640 			eprintsoline(so, error);
1641 		}
1642 	}
1643 
1644 	if (tpr->discon_ind.DISCON_reason != 0)
1645 		strsetrerror(SOTOV(so), 0, 0, sogetrderr);
1646 	strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
1647 	strseteof(SOTOV(so), 1);
1648 	/*
1649 	 * strseteof takes care of read side wakeups,
1650 	 * pollwakeups, and signals.
1651 	 */
1652 	dprintso(so, 1, ("T_DISCON_IND: error %d\n", so->so_error));
1653 	freemsg(discon_mp);
1654 
1655 
1656 	pollwakeup(&stp->sd_pollist, POLLOUT);
1657 	mutex_enter(&stp->sd_lock);
1658 
1659 	/*
1660 	 * Wake sleeping write
1661 	 */
1662 	if (stp->sd_flag & WSLEEP) {
1663 		stp->sd_flag &= ~WSLEEP;
1664 		cv_broadcast(&stp->sd_wrq->q_wait);
1665 	}
1666 
1667 	/*
1668 	 * strsendsig can handle multiple signals with a
1669 	 * single call.  Send SIGPOLL for S_OUTPUT event.
1670 	 */
1671 	if (stp->sd_sigflags & S_OUTPUT)
1672 		strsendsig(stp->sd_siglist, S_OUTPUT, 0, 0);
1673 
1674 	mutex_exit(&stp->sd_lock);
1675 	mutex_enter(&so->so_lock);
1676 }
1677 
1678 /*
1679  * This routine is registered with the stream head to receive M_PROTO
1680  * and M_PCPROTO messages.
1681  *
1682  * Returns NULL if the message was consumed.
1683  * Returns an mblk to make that mblk be processed (and queued) by the stream
1684  * head.
1685  *
1686  * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and
1687  * *pollwakeups) for the stream head to take action on. Note that since
1688  * sockets always deliver SIGIO for every new piece of data this routine
1689  * never sets *firstmsgsigs; any signals are returned in *allmsgsigs.
1690  *
1691  * This routine handles all data related TPI messages independent of
1692  * the type of the socket i.e. it doesn't care if T_UNITDATA_IND message
1693  * arrive on a SOCK_STREAM.
1694  */
1695 static mblk_t *
1696 strsock_proto(vnode_t *vp, mblk_t *mp,
1697 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
1698 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
1699 {
1700 	union T_primitives *tpr;
1701 	struct sonode *so;
1702 	sotpi_info_t *sti;
1703 
1704 	so = VTOSO(vp);
1705 	sti = SOTOTPI(so);
1706 
1707 	dprintso(so, 1, ("strsock_proto(%p, %p)\n", (void *)vp, (void *)mp));
1708 
1709 	/* Set default return values */
1710 	*firstmsgsigs = *wakeups = *allmsgsigs = *pollwakeups = 0;
1711 
1712 	ASSERT(DB_TYPE(mp) == M_PROTO ||
1713 	    DB_TYPE(mp) == M_PCPROTO);
1714 
1715 	if (MBLKL(mp) < sizeof (tpr->type)) {
1716 		/* The message is too short to even contain the primitive */
1717 		zcmn_err(getzoneid(), CE_WARN,
1718 		    "sockfs: Too short TPI message received. Len = %ld\n",
1719 		    (ptrdiff_t)(MBLKL(mp)));
1720 		freemsg(mp);
1721 		return (NULL);
1722 	}
1723 	if (!__TPI_PRIM_ISALIGNED(mp->b_rptr)) {
1724 		/* The read pointer is not aligned correctly for TPI */
1725 		zcmn_err(getzoneid(), CE_WARN,
1726 		    "sockfs: Unaligned TPI message received. rptr = %p\n",
1727 		    (void *)mp->b_rptr);
1728 		freemsg(mp);
1729 		return (NULL);
1730 	}
1731 	tpr = (union T_primitives *)mp->b_rptr;
1732 	dprintso(so, 1, ("strsock_proto: primitive %d\n", tpr->type));
1733 
1734 	switch (tpr->type) {
1735 
1736 	case T_DATA_IND:
1737 		if (MBLKL(mp) < sizeof (struct T_data_ind)) {
1738 			zcmn_err(getzoneid(), CE_WARN,
1739 			    "sockfs: Too short T_DATA_IND. Len = %ld\n",
1740 			    (ptrdiff_t)(MBLKL(mp)));
1741 			freemsg(mp);
1742 			return (NULL);
1743 		}
1744 		/*
1745 		 * Ignore zero-length T_DATA_IND messages. These might be
1746 		 * generated by some transports.
1747 		 * This is needed to prevent read (which skips the M_PROTO
1748 		 * part) to unexpectedly return 0 (or return EWOULDBLOCK
1749 		 * on a non-blocking socket after select/poll has indicated
1750 		 * that data is available).
1751 		 */
1752 		if (msgdsize(mp->b_cont) == 0) {
1753 			dprintso(so, 0,
1754 			    ("strsock_proto: zero length T_DATA_IND\n"));
1755 			freemsg(mp);
1756 			return (NULL);
1757 		}
1758 		*allmsgsigs = S_INPUT | S_RDNORM;
1759 		*pollwakeups = POLLIN | POLLRDNORM;
1760 		*wakeups = RSLEEP;
1761 		return (mp);
1762 
1763 	case T_UNITDATA_IND: {
1764 		struct T_unitdata_ind	*tudi = &tpr->unitdata_ind;
1765 		void			*addr;
1766 		t_uscalar_t		addrlen;
1767 
1768 		if (MBLKL(mp) < sizeof (struct T_unitdata_ind)) {
1769 			zcmn_err(getzoneid(), CE_WARN,
1770 			    "sockfs: Too short T_UNITDATA_IND. Len = %ld\n",
1771 			    (ptrdiff_t)(MBLKL(mp)));
1772 			freemsg(mp);
1773 			return (NULL);
1774 		}
1775 
1776 		/* Is this is not a connected datagram socket? */
1777 		if ((so->so_mode & SM_CONNREQUIRED) ||
1778 		    !(so->so_state & SS_ISCONNECTED)) {
1779 			/*
1780 			 * Not a connected datagram socket. Look for
1781 			 * the SO_UNIX_CLOSE option. If such an option is found
1782 			 * discard the message (since it has no meaning
1783 			 * unless connected).
1784 			 */
1785 			if (so->so_family == AF_UNIX && msgdsize(mp) == 0 &&
1786 			    tudi->OPT_length != 0) {
1787 				void *opt;
1788 				t_uscalar_t optlen = tudi->OPT_length;
1789 
1790 				opt = sogetoff(mp, tudi->OPT_offset,
1791 				    optlen, __TPI_ALIGN_SIZE);
1792 				if (opt == NULL) {
1793 					/* The len/off falls outside mp */
1794 					freemsg(mp);
1795 					mutex_enter(&so->so_lock);
1796 					soseterror(so, EPROTO);
1797 					mutex_exit(&so->so_lock);
1798 					zcmn_err(getzoneid(), CE_WARN,
1799 					    "sockfs: T_unidata_ind with "
1800 					    "invalid optlen/offset %u/%d\n",
1801 					    optlen, tudi->OPT_offset);
1802 					return (NULL);
1803 				}
1804 				if (so_getopt_unix_close(opt, optlen)) {
1805 					freemsg(mp);
1806 					return (NULL);
1807 				}
1808 			}
1809 			*allmsgsigs = S_INPUT | S_RDNORM;
1810 			*pollwakeups = POLLIN | POLLRDNORM;
1811 			*wakeups = RSLEEP;
1812 			if (audit_active)
1813 				audit_sock(T_UNITDATA_IND, strvp2wq(vp),
1814 				    mp, 0);
1815 			return (mp);
1816 		}
1817 
1818 		/*
1819 		 * A connect datagram socket. For AF_INET{,6} we verify that
1820 		 * the source address matches the "connected to" address.
1821 		 * The semantics of AF_UNIX sockets is to not verify
1822 		 * the source address.
1823 		 * Note that this source address verification is transport
1824 		 * specific. Thus the real fix would be to extent TPI
1825 		 * to allow T_CONN_REQ messages to be send to connectionless
1826 		 * transport providers and always let the transport provider
1827 		 * do whatever filtering is needed.
1828 		 *
1829 		 * The verification/filtering semantics for transports
1830 		 * other than AF_INET and AF_UNIX are unknown. The choice
1831 		 * would be to either filter using bcmp or let all messages
1832 		 * get through. This code does not filter other address
1833 		 * families since this at least allows the application to
1834 		 * work around any missing filtering.
1835 		 *
1836 		 * XXX Should we move filtering to UDP/ICMP???
1837 		 * That would require passing e.g. a T_DISCON_REQ to UDP
1838 		 * when the socket becomes unconnected.
1839 		 */
1840 		addrlen = tudi->SRC_length;
1841 		/*
1842 		 * The alignment restriction is really to strict but
1843 		 * we want enough alignment to inspect the fields of
1844 		 * a sockaddr_in.
1845 		 */
1846 		addr = sogetoff(mp, tudi->SRC_offset, addrlen,
1847 		    __TPI_ALIGN_SIZE);
1848 		if (addr == NULL) {
1849 			freemsg(mp);
1850 			mutex_enter(&so->so_lock);
1851 			soseterror(so, EPROTO);
1852 			mutex_exit(&so->so_lock);
1853 			zcmn_err(getzoneid(), CE_WARN,
1854 			    "sockfs: T_unidata_ind with invalid "
1855 			    "addrlen/offset %u/%d\n",
1856 			    addrlen, tudi->SRC_offset);
1857 			return (NULL);
1858 		}
1859 
1860 		if (so->so_family == AF_INET) {
1861 			/*
1862 			 * For AF_INET we allow wildcarding both sin_addr
1863 			 * and sin_port.
1864 			 */
1865 			struct sockaddr_in *faddr, *sin;
1866 
1867 			/* Prevent sti_faddr_sa from changing while accessed */
1868 			mutex_enter(&so->so_lock);
1869 			ASSERT(sti->sti_faddr_len ==
1870 			    (socklen_t)sizeof (struct sockaddr_in));
1871 			faddr = (struct sockaddr_in *)sti->sti_faddr_sa;
1872 			sin = (struct sockaddr_in *)addr;
1873 			if (addrlen !=
1874 			    (t_uscalar_t)sizeof (struct sockaddr_in) ||
1875 			    (sin->sin_addr.s_addr != faddr->sin_addr.s_addr &&
1876 			    faddr->sin_addr.s_addr != INADDR_ANY) ||
1877 			    (so->so_type != SOCK_RAW &&
1878 			    sin->sin_port != faddr->sin_port &&
1879 			    faddr->sin_port != 0)) {
1880 #ifdef DEBUG
1881 				dprintso(so, 0,
1882 				    ("sockfs: T_UNITDATA_IND mismatch: %s",
1883 				    pr_addr(so->so_family,
1884 				    (struct sockaddr *)addr, addrlen)));
1885 				dprintso(so, 0, (" - %s\n",
1886 				    pr_addr(so->so_family, sti->sti_faddr_sa,
1887 				    (t_uscalar_t)sti->sti_faddr_len)));
1888 #endif /* DEBUG */
1889 				mutex_exit(&so->so_lock);
1890 				freemsg(mp);
1891 				return (NULL);
1892 			}
1893 			mutex_exit(&so->so_lock);
1894 		} else if (so->so_family == AF_INET6) {
1895 			/*
1896 			 * For AF_INET6 we allow wildcarding both sin6_addr
1897 			 * and sin6_port.
1898 			 */
1899 			struct sockaddr_in6 *faddr6, *sin6;
1900 			static struct in6_addr zeroes; /* inits to all zeros */
1901 
1902 			/* Prevent sti_faddr_sa from changing while accessed */
1903 			mutex_enter(&so->so_lock);
1904 			ASSERT(sti->sti_faddr_len ==
1905 			    (socklen_t)sizeof (struct sockaddr_in6));
1906 			faddr6 = (struct sockaddr_in6 *)sti->sti_faddr_sa;
1907 			sin6 = (struct sockaddr_in6 *)addr;
1908 			/* XXX could we get a mapped address ::ffff:0.0.0.0 ? */
1909 			if (addrlen !=
1910 			    (t_uscalar_t)sizeof (struct sockaddr_in6) ||
1911 			    (!IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
1912 			    &faddr6->sin6_addr) &&
1913 			    !IN6_ARE_ADDR_EQUAL(&faddr6->sin6_addr, &zeroes)) ||
1914 			    (so->so_type != SOCK_RAW &&
1915 			    sin6->sin6_port != faddr6->sin6_port &&
1916 			    faddr6->sin6_port != 0)) {
1917 #ifdef DEBUG
1918 				dprintso(so, 0,
1919 				    ("sockfs: T_UNITDATA_IND mismatch: %s",
1920 				    pr_addr(so->so_family,
1921 				    (struct sockaddr *)addr, addrlen)));
1922 				dprintso(so, 0, (" - %s\n",
1923 				    pr_addr(so->so_family, sti->sti_faddr_sa,
1924 				    (t_uscalar_t)sti->sti_faddr_len)));
1925 #endif /* DEBUG */
1926 				mutex_exit(&so->so_lock);
1927 				freemsg(mp);
1928 				return (NULL);
1929 			}
1930 			mutex_exit(&so->so_lock);
1931 		} else if (so->so_family == AF_UNIX &&
1932 		    msgdsize(mp->b_cont) == 0 &&
1933 		    tudi->OPT_length != 0) {
1934 			/*
1935 			 * Attempt to extract AF_UNIX
1936 			 * SO_UNIX_CLOSE indication from options.
1937 			 */
1938 			void *opt;
1939 			t_uscalar_t optlen = tudi->OPT_length;
1940 
1941 			opt = sogetoff(mp, tudi->OPT_offset,
1942 			    optlen, __TPI_ALIGN_SIZE);
1943 			if (opt == NULL) {
1944 				/* The len/off falls outside mp */
1945 				freemsg(mp);
1946 				mutex_enter(&so->so_lock);
1947 				soseterror(so, EPROTO);
1948 				mutex_exit(&so->so_lock);
1949 				zcmn_err(getzoneid(), CE_WARN,
1950 				    "sockfs: T_unidata_ind with invalid "
1951 				    "optlen/offset %u/%d\n",
1952 				    optlen, tudi->OPT_offset);
1953 				return (NULL);
1954 			}
1955 			/*
1956 			 * If we received a unix close indication mark the
1957 			 * socket and discard this message.
1958 			 */
1959 			if (so_getopt_unix_close(opt, optlen)) {
1960 				mutex_enter(&so->so_lock);
1961 				sobreakconn(so, ECONNRESET);
1962 				mutex_exit(&so->so_lock);
1963 				strsetrerror(SOTOV(so), 0, 0, sogetrderr);
1964 				freemsg(mp);
1965 				*pollwakeups = POLLIN | POLLRDNORM;
1966 				*allmsgsigs = S_INPUT | S_RDNORM;
1967 				*wakeups = RSLEEP;
1968 				return (NULL);
1969 			}
1970 		}
1971 		*allmsgsigs = S_INPUT | S_RDNORM;
1972 		*pollwakeups = POLLIN | POLLRDNORM;
1973 		*wakeups = RSLEEP;
1974 		return (mp);
1975 	}
1976 
1977 	case T_OPTDATA_IND: {
1978 		struct T_optdata_ind	*tdi = &tpr->optdata_ind;
1979 
1980 		if (MBLKL(mp) < sizeof (struct T_optdata_ind)) {
1981 			zcmn_err(getzoneid(), CE_WARN,
1982 			    "sockfs: Too short T_OPTDATA_IND. Len = %ld\n",
1983 			    (ptrdiff_t)(MBLKL(mp)));
1984 			freemsg(mp);
1985 			return (NULL);
1986 		}
1987 		/*
1988 		 * Allow zero-length messages carrying options.
1989 		 * This is used when carrying the SO_UNIX_CLOSE option.
1990 		 */
1991 		if (so->so_family == AF_UNIX && msgdsize(mp->b_cont) == 0 &&
1992 		    tdi->OPT_length != 0) {
1993 			/*
1994 			 * Attempt to extract AF_UNIX close indication
1995 			 * from the options. Ignore any other options -
1996 			 * those are handled once the message is removed
1997 			 * from the queue.
1998 			 * The close indication message should not carry data.
1999 			 */
2000 			void *opt;
2001 			t_uscalar_t optlen = tdi->OPT_length;
2002 
2003 			opt = sogetoff(mp, tdi->OPT_offset,
2004 			    optlen, __TPI_ALIGN_SIZE);
2005 			if (opt == NULL) {
2006 				/* The len/off falls outside mp */
2007 				freemsg(mp);
2008 				mutex_enter(&so->so_lock);
2009 				soseterror(so, EPROTO);
2010 				mutex_exit(&so->so_lock);
2011 				zcmn_err(getzoneid(), CE_WARN,
2012 				    "sockfs: T_optdata_ind with invalid "
2013 				    "optlen/offset %u/%d\n",
2014 				    optlen, tdi->OPT_offset);
2015 				return (NULL);
2016 			}
2017 			/*
2018 			 * If we received a close indication mark the
2019 			 * socket and discard this message.
2020 			 */
2021 			if (so_getopt_unix_close(opt, optlen)) {
2022 				mutex_enter(&so->so_lock);
2023 				socantsendmore(so);
2024 				sti->sti_faddr_valid = 0;
2025 				mutex_exit(&so->so_lock);
2026 				strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2027 				freemsg(mp);
2028 				return (NULL);
2029 			}
2030 		}
2031 		*allmsgsigs = S_INPUT | S_RDNORM;
2032 		*pollwakeups = POLLIN | POLLRDNORM;
2033 		*wakeups = RSLEEP;
2034 		return (mp);
2035 	}
2036 
2037 	case T_EXDATA_IND: {
2038 		mblk_t		*mctl, *mdata;
2039 		mblk_t *lbp;
2040 		union T_primitives *tprp;
2041 		struct stdata   *stp;
2042 		queue_t *qp;
2043 
2044 		if (MBLKL(mp) < sizeof (struct T_exdata_ind)) {
2045 			zcmn_err(getzoneid(), CE_WARN,
2046 			    "sockfs: Too short T_EXDATA_IND. Len = %ld\n",
2047 			    (ptrdiff_t)(MBLKL(mp)));
2048 			freemsg(mp);
2049 			return (NULL);
2050 		}
2051 		/*
2052 		 * Ignore zero-length T_EXDATA_IND messages. These might be
2053 		 * generated by some transports.
2054 		 *
2055 		 * This is needed to prevent read (which skips the M_PROTO
2056 		 * part) to unexpectedly return 0 (or return EWOULDBLOCK
2057 		 * on a non-blocking socket after select/poll has indicated
2058 		 * that data is available).
2059 		 */
2060 		dprintso(so, 1,
2061 		    ("T_EXDATA_IND(%p): counts %d/%d state %s\n",
2062 		    (void *)vp, sti->sti_oobsigcnt, sti->sti_oobcnt,
2063 		    pr_state(so->so_state, so->so_mode)));
2064 
2065 		if (msgdsize(mp->b_cont) == 0) {
2066 			dprintso(so, 0,
2067 			    ("strsock_proto: zero length T_EXDATA_IND\n"));
2068 			freemsg(mp);
2069 			return (NULL);
2070 		}
2071 
2072 		/*
2073 		 * Split into the T_EXDATA_IND and the M_DATA part.
2074 		 * We process these three pieces separately:
2075 		 *	signal generation
2076 		 *	handling T_EXDATA_IND
2077 		 *	handling M_DATA component
2078 		 */
2079 		mctl = mp;
2080 		mdata = mctl->b_cont;
2081 		mctl->b_cont = NULL;
2082 		mutex_enter(&so->so_lock);
2083 		so_oob_sig(so, 0, allmsgsigs, pollwakeups);
2084 		mctl = so_oob_exdata(so, mctl, allmsgsigs, pollwakeups);
2085 		mdata = so_oob_data(so, mdata, allmsgsigs, pollwakeups);
2086 
2087 		stp = vp->v_stream;
2088 		ASSERT(stp != NULL);
2089 		qp = _RD(stp->sd_wrq);
2090 
2091 		mutex_enter(QLOCK(qp));
2092 		lbp = qp->q_last;
2093 
2094 		/*
2095 		 * We want to avoid queueing up a string of T_EXDATA_IND
2096 		 * messages with no intervening data messages at the stream
2097 		 * head. These messages contribute to the total message
2098 		 * count. Eventually this can lead to STREAMS flow contol
2099 		 * and also cause TCP to advertise a zero window condition
2100 		 * to the peer. This can happen in the degenerate case where
2101 		 * the sender and receiver exchange only OOB data. The sender
2102 		 * only sends messages with MSG_OOB flag and the receiver
2103 		 * receives only MSG_OOB messages and does not use SO_OOBINLINE.
2104 		 * An example of this scenario has been reported in applications
2105 		 * that use OOB data to exchange heart beats. Flow control
2106 		 * relief will never happen if the application only reads OOB
2107 		 * data which is done directly by sorecvoob() and the
2108 		 * T_EXDATA_IND messages at the streamhead won't be consumed.
2109 		 * Note that there is no correctness issue in compressing the
2110 		 * string of T_EXDATA_IND messages into a single T_EXDATA_IND
2111 		 * message. A single read that does not specify MSG_OOB will
2112 		 * read across all the marks in a loop in sotpi_recvmsg().
2113 		 * Each mark is individually distinguishable only if the
2114 		 * T_EXDATA_IND messages are separated by data messages.
2115 		 */
2116 		if ((qp->q_first != NULL) && (DB_TYPE(lbp) == M_PROTO)) {
2117 			tprp = (union T_primitives *)lbp->b_rptr;
2118 			if ((tprp->type == T_EXDATA_IND) &&
2119 			    !(so->so_options & SO_OOBINLINE)) {
2120 
2121 				/*
2122 				 * free the new M_PROTO message
2123 				 */
2124 				freemsg(mctl);
2125 
2126 				/*
2127 				 * adjust the OOB count and OOB	signal count
2128 				 * just incremented for the new OOB data.
2129 				 */
2130 				sti->sti_oobcnt--;
2131 				sti->sti_oobsigcnt--;
2132 				mutex_exit(QLOCK(qp));
2133 				mutex_exit(&so->so_lock);
2134 				return (NULL);
2135 			}
2136 		}
2137 		mutex_exit(QLOCK(qp));
2138 
2139 		/*
2140 		 * Pass the T_EXDATA_IND and the M_DATA back separately
2141 		 * by using b_next linkage. (The stream head will queue any
2142 		 * b_next linked messages separately.) This is needed
2143 		 * since MSGMARK applies to the last by of the message
2144 		 * hence we can not have any M_DATA component attached
2145 		 * to the marked T_EXDATA_IND. Note that the stream head
2146 		 * will not consolidate M_DATA messages onto an MSGMARK'ed
2147 		 * message in order to preserve the constraint that
2148 		 * the T_EXDATA_IND always is a separate message.
2149 		 */
2150 		ASSERT(mctl != NULL);
2151 		mctl->b_next = mdata;
2152 		mp = mctl;
2153 #ifdef DEBUG
2154 		if (mdata == NULL) {
2155 			dprintso(so, 1,
2156 			    ("after outofline T_EXDATA_IND(%p): "
2157 			    "counts %d/%d  poll 0x%x sig 0x%x state %s\n",
2158 			    (void *)vp, sti->sti_oobsigcnt,
2159 			    sti->sti_oobcnt, *pollwakeups, *allmsgsigs,
2160 			    pr_state(so->so_state, so->so_mode)));
2161 		} else {
2162 			dprintso(so, 1,
2163 			    ("after inline T_EXDATA_IND(%p): "
2164 			    "counts %d/%d  poll 0x%x sig 0x%x state %s\n",
2165 			    (void *)vp, sti->sti_oobsigcnt,
2166 			    sti->sti_oobcnt, *pollwakeups, *allmsgsigs,
2167 			    pr_state(so->so_state, so->so_mode)));
2168 		}
2169 #endif /* DEBUG */
2170 		mutex_exit(&so->so_lock);
2171 		*wakeups = RSLEEP;
2172 		return (mp);
2173 	}
2174 
2175 	case T_CONN_CON: {
2176 		struct T_conn_con	*conn_con;
2177 		void			*addr;
2178 		t_uscalar_t		addrlen;
2179 
2180 		/*
2181 		 * Verify the state, update the state to ISCONNECTED,
2182 		 * record the potentially new address in the message,
2183 		 * and drop the message.
2184 		 */
2185 		if (MBLKL(mp) < sizeof (struct T_conn_con)) {
2186 			zcmn_err(getzoneid(), CE_WARN,
2187 			    "sockfs: Too short T_CONN_CON. Len = %ld\n",
2188 			    (ptrdiff_t)(MBLKL(mp)));
2189 			freemsg(mp);
2190 			return (NULL);
2191 		}
2192 
2193 		mutex_enter(&so->so_lock);
2194 		if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) !=
2195 		    SS_ISCONNECTING) {
2196 			mutex_exit(&so->so_lock);
2197 			dprintso(so, 1,
2198 			    ("T_CONN_CON: state %x\n", so->so_state));
2199 			freemsg(mp);
2200 			return (NULL);
2201 		}
2202 
2203 		conn_con = &tpr->conn_con;
2204 		addrlen = conn_con->RES_length;
2205 		/*
2206 		 * Allow the address to be of different size than sent down
2207 		 * in the T_CONN_REQ as long as it doesn't exceed the maxlen.
2208 		 * For AF_UNIX require the identical length.
2209 		 */
2210 		if (so->so_family == AF_UNIX ?
2211 		    addrlen != (t_uscalar_t)sizeof (sti->sti_ux_laddr) :
2212 		    addrlen > (t_uscalar_t)sti->sti_faddr_maxlen) {
2213 			zcmn_err(getzoneid(), CE_WARN,
2214 			    "sockfs: T_conn_con with different "
2215 			    "length %u/%d\n",
2216 			    addrlen, conn_con->RES_length);
2217 			soisdisconnected(so, EPROTO);
2218 			sti->sti_laddr_valid = 0;
2219 			sti->sti_faddr_valid = 0;
2220 			mutex_exit(&so->so_lock);
2221 			strsetrerror(SOTOV(so), 0, 0, sogetrderr);
2222 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2223 			strseteof(SOTOV(so), 1);
2224 			freemsg(mp);
2225 			/*
2226 			 * strseteof takes care of read side wakeups,
2227 			 * pollwakeups, and signals.
2228 			 */
2229 			*wakeups = WSLEEP;
2230 			*allmsgsigs = S_OUTPUT;
2231 			*pollwakeups = POLLOUT;
2232 			return (NULL);
2233 		}
2234 		addr = sogetoff(mp, conn_con->RES_offset, addrlen, 1);
2235 		if (addr == NULL) {
2236 			zcmn_err(getzoneid(), CE_WARN,
2237 			    "sockfs: T_conn_con with invalid "
2238 			    "addrlen/offset %u/%d\n",
2239 			    addrlen, conn_con->RES_offset);
2240 			mutex_exit(&so->so_lock);
2241 			strsetrerror(SOTOV(so), 0, 0, sogetrderr);
2242 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2243 			strseteof(SOTOV(so), 1);
2244 			freemsg(mp);
2245 			/*
2246 			 * strseteof takes care of read side wakeups,
2247 			 * pollwakeups, and signals.
2248 			 */
2249 			*wakeups = WSLEEP;
2250 			*allmsgsigs = S_OUTPUT;
2251 			*pollwakeups = POLLOUT;
2252 			return (NULL);
2253 		}
2254 
2255 		/*
2256 		 * Save for getpeername.
2257 		 */
2258 		if (so->so_family != AF_UNIX) {
2259 			sti->sti_faddr_len = (socklen_t)addrlen;
2260 			ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen);
2261 			bcopy(addr, sti->sti_faddr_sa, addrlen);
2262 			sti->sti_faddr_valid = 1;
2263 		}
2264 
2265 		if (so->so_peercred != NULL)
2266 			crfree(so->so_peercred);
2267 		so->so_peercred = DB_CRED(mp);
2268 		so->so_cpid = DB_CPID(mp);
2269 		if (so->so_peercred != NULL)
2270 			crhold(so->so_peercred);
2271 
2272 		/* Wakeup anybody sleeping in sowaitconnected */
2273 		soisconnected(so);
2274 		mutex_exit(&so->so_lock);
2275 
2276 		/*
2277 		 * The socket is now available for sending data.
2278 		 */
2279 		*wakeups = WSLEEP;
2280 		*allmsgsigs = S_OUTPUT;
2281 		*pollwakeups = POLLOUT;
2282 		freemsg(mp);
2283 		return (NULL);
2284 	}
2285 
2286 	/*
2287 	 * Extra processing in case of an SSL proxy, before queuing or
2288 	 * forwarding to the fallback endpoint
2289 	 */
2290 	case T_SSL_PROXY_CONN_IND:
2291 	case T_CONN_IND:
2292 		/*
2293 		 * Verify the min size and queue the message on
2294 		 * the sti_conn_ind_head/tail list.
2295 		 */
2296 		if (MBLKL(mp) < sizeof (struct T_conn_ind)) {
2297 			zcmn_err(getzoneid(), CE_WARN,
2298 			    "sockfs: Too short T_CONN_IND. Len = %ld\n",
2299 			    (ptrdiff_t)(MBLKL(mp)));
2300 			freemsg(mp);
2301 			return (NULL);
2302 		}
2303 
2304 		if (audit_active)
2305 			audit_sock(T_CONN_IND, strvp2wq(vp), mp, 0);
2306 		if (!(so->so_state & SS_ACCEPTCONN)) {
2307 			zcmn_err(getzoneid(), CE_WARN,
2308 			    "sockfs: T_conn_ind on non-listening socket\n");
2309 			freemsg(mp);
2310 			return (NULL);
2311 		}
2312 
2313 		if (tpr->type == T_SSL_PROXY_CONN_IND && mp->b_cont == NULL) {
2314 			/* No context: need to fall back */
2315 			struct sonode *fbso;
2316 			stdata_t *fbstp;
2317 
2318 			tpr->type = T_CONN_IND;
2319 
2320 			fbso = kssl_find_fallback(sti->sti_kssl_ent);
2321 
2322 			/*
2323 			 * No fallback: the remote will timeout and
2324 			 * disconnect.
2325 			 */
2326 			if (fbso == NULL) {
2327 				freemsg(mp);
2328 				return (NULL);
2329 			}
2330 			fbstp = SOTOV(fbso)->v_stream;
2331 			qreply(fbstp->sd_wrq->q_next, mp);
2332 			return (NULL);
2333 		}
2334 		soqueueconnind(so, mp);
2335 		*allmsgsigs = S_INPUT | S_RDNORM;
2336 		*pollwakeups = POLLIN | POLLRDNORM;
2337 		*wakeups = RSLEEP;
2338 		return (NULL);
2339 
2340 	case T_ORDREL_IND:
2341 		if (MBLKL(mp) < sizeof (struct T_ordrel_ind)) {
2342 			zcmn_err(getzoneid(), CE_WARN,
2343 			    "sockfs: Too short T_ORDREL_IND. Len = %ld\n",
2344 			    (ptrdiff_t)(MBLKL(mp)));
2345 			freemsg(mp);
2346 			return (NULL);
2347 		}
2348 
2349 		/*
2350 		 * Some providers send this when not fully connected.
2351 		 * SunLink X.25 needs to retrieve disconnect reason after
2352 		 * disconnect for compatibility. It uses T_ORDREL_IND
2353 		 * instead of T_DISCON_IND so that it may use the
2354 		 * endpoint after a connect failure to retrieve the
2355 		 * reason using an ioctl. Thus we explicitly clear
2356 		 * SS_ISCONNECTING here for SunLink X.25.
2357 		 * This is a needed TPI violation.
2358 		 */
2359 		mutex_enter(&so->so_lock);
2360 		so->so_state &= ~SS_ISCONNECTING;
2361 		socantrcvmore(so);
2362 		mutex_exit(&so->so_lock);
2363 		strseteof(SOTOV(so), 1);
2364 		/*
2365 		 * strseteof takes care of read side wakeups,
2366 		 * pollwakeups, and signals.
2367 		 */
2368 		freemsg(mp);
2369 		return (NULL);
2370 
2371 	case T_DISCON_IND:
2372 		if (MBLKL(mp) < sizeof (struct T_discon_ind)) {
2373 			zcmn_err(getzoneid(), CE_WARN,
2374 			    "sockfs: Too short T_DISCON_IND. Len = %ld\n",
2375 			    (ptrdiff_t)(MBLKL(mp)));
2376 			freemsg(mp);
2377 			return (NULL);
2378 		}
2379 		if (so->so_state & SS_ACCEPTCONN) {
2380 			/*
2381 			 * This is a listener. Look for a queued T_CONN_IND
2382 			 * with a matching sequence number and remove it
2383 			 * from the list.
2384 			 * It is normal to not find the sequence number since
2385 			 * the soaccept might have already dequeued it
2386 			 * (in which case the T_CONN_RES will fail with
2387 			 * TBADSEQ).
2388 			 */
2389 			(void) soflushconnind(so, tpr->discon_ind.SEQ_number);
2390 			freemsg(mp);
2391 			return (0);
2392 		}
2393 
2394 		/*
2395 		 * Not a listener
2396 		 *
2397 		 * If SS_CANTRCVMORE for AF_UNIX ignore the discon_reason.
2398 		 * Such a discon_ind appears when the peer has first done
2399 		 * a shutdown() followed by a close() in which case we just
2400 		 * want to record socantsendmore.
2401 		 * In this case sockfs first receives a T_ORDREL_IND followed
2402 		 * by a T_DISCON_IND.
2403 		 * Note that for other transports (e.g. TCP) we need to handle
2404 		 * the discon_ind in this case since it signals an error.
2405 		 */
2406 		mutex_enter(&so->so_lock);
2407 		if ((so->so_state & SS_CANTRCVMORE) &&
2408 		    (so->so_family == AF_UNIX)) {
2409 			socantsendmore(so);
2410 			sti->sti_faddr_valid = 0;
2411 			mutex_exit(&so->so_lock);
2412 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2413 			dprintso(so, 1,
2414 			    ("T_DISCON_IND: error %d\n", so->so_error));
2415 			freemsg(mp);
2416 			/*
2417 			 * Set these variables for caller to process them.
2418 			 * For the else part where T_DISCON_IND is processed,
2419 			 * this will be done in the function being called
2420 			 * (strsock_discon_ind())
2421 			 */
2422 			*wakeups = WSLEEP;
2423 			*allmsgsigs = S_OUTPUT;
2424 			*pollwakeups = POLLOUT;
2425 		} else if (so->so_flag & (SOASYNC_UNBIND | SOLOCKED)) {
2426 			/*
2427 			 * Deferred processing of T_DISCON_IND
2428 			 */
2429 			so_save_discon_ind(so, mp, strsock_discon_ind);
2430 			mutex_exit(&so->so_lock);
2431 		} else {
2432 			/*
2433 			 * Process T_DISCON_IND now
2434 			 */
2435 			(void) strsock_discon_ind(so, mp);
2436 			mutex_exit(&so->so_lock);
2437 		}
2438 		return (NULL);
2439 
2440 	case T_UDERROR_IND: {
2441 		struct T_uderror_ind	*tudi = &tpr->uderror_ind;
2442 		void			*addr;
2443 		t_uscalar_t		addrlen;
2444 		int			error;
2445 
2446 		dprintso(so, 0,
2447 		    ("T_UDERROR_IND: error %d\n", tudi->ERROR_type));
2448 
2449 		if (MBLKL(mp) < sizeof (struct T_uderror_ind)) {
2450 			zcmn_err(getzoneid(), CE_WARN,
2451 			    "sockfs: Too short T_UDERROR_IND. Len = %ld\n",
2452 			    (ptrdiff_t)(MBLKL(mp)));
2453 			freemsg(mp);
2454 			return (NULL);
2455 		}
2456 		/* Ignore on connection-oriented transports */
2457 		if (so->so_mode & SM_CONNREQUIRED) {
2458 			freemsg(mp);
2459 			eprintsoline(so, 0);
2460 			zcmn_err(getzoneid(), CE_WARN,
2461 			    "sockfs: T_uderror_ind on connection-oriented "
2462 			    "transport\n");
2463 			return (NULL);
2464 		}
2465 		addrlen = tudi->DEST_length;
2466 		addr = sogetoff(mp, tudi->DEST_offset, addrlen, 1);
2467 		if (addr == NULL) {
2468 			zcmn_err(getzoneid(), CE_WARN,
2469 			    "sockfs: T_uderror_ind with invalid "
2470 			    "addrlen/offset %u/%d\n",
2471 			    addrlen, tudi->DEST_offset);
2472 			freemsg(mp);
2473 			return (NULL);
2474 		}
2475 
2476 		/* Verify source address for connected socket. */
2477 		mutex_enter(&so->so_lock);
2478 		if (so->so_state & SS_ISCONNECTED) {
2479 			void *faddr;
2480 			t_uscalar_t faddr_len;
2481 			boolean_t match = B_FALSE;
2482 
2483 			switch (so->so_family) {
2484 			case AF_INET: {
2485 				/* Compare just IP address and port */
2486 				struct sockaddr_in *sin1, *sin2;
2487 
2488 				sin1 = (struct sockaddr_in *)sti->sti_faddr_sa;
2489 				sin2 = (struct sockaddr_in *)addr;
2490 				if (addrlen == sizeof (struct sockaddr_in) &&
2491 				    sin1->sin_port == sin2->sin_port &&
2492 				    sin1->sin_addr.s_addr ==
2493 				    sin2->sin_addr.s_addr)
2494 					match = B_TRUE;
2495 				break;
2496 			}
2497 			case AF_INET6: {
2498 				/* Compare just IP address and port. Not flow */
2499 				struct sockaddr_in6 *sin1, *sin2;
2500 
2501 				sin1 = (struct sockaddr_in6 *)sti->sti_faddr_sa;
2502 				sin2 = (struct sockaddr_in6 *)addr;
2503 				if (addrlen == sizeof (struct sockaddr_in6) &&
2504 				    sin1->sin6_port == sin2->sin6_port &&
2505 				    IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr,
2506 				    &sin2->sin6_addr))
2507 					match = B_TRUE;
2508 				break;
2509 			}
2510 			case AF_UNIX:
2511 				faddr = &sti->sti_ux_faddr;
2512 				faddr_len =
2513 				    (t_uscalar_t)sizeof (sti->sti_ux_faddr);
2514 				if (faddr_len == addrlen &&
2515 				    bcmp(addr, faddr, addrlen) == 0)
2516 					match = B_TRUE;
2517 				break;
2518 			default:
2519 				faddr = sti->sti_faddr_sa;
2520 				faddr_len = (t_uscalar_t)sti->sti_faddr_len;
2521 				if (faddr_len == addrlen &&
2522 				    bcmp(addr, faddr, addrlen) == 0)
2523 					match = B_TRUE;
2524 				break;
2525 			}
2526 
2527 			if (!match) {
2528 #ifdef DEBUG
2529 				dprintso(so, 0,
2530 				    ("sockfs: T_UDERR_IND mismatch: %s - ",
2531 				    pr_addr(so->so_family,
2532 				    (struct sockaddr *)addr, addrlen)));
2533 				dprintso(so, 0, ("%s\n",
2534 				    pr_addr(so->so_family, sti->sti_faddr_sa,
2535 				    sti->sti_faddr_len)));
2536 #endif /* DEBUG */
2537 				mutex_exit(&so->so_lock);
2538 				freemsg(mp);
2539 				return (NULL);
2540 			}
2541 			/*
2542 			 * Make the write error nonpersistent. If the error
2543 			 * is zero we use ECONNRESET.
2544 			 * This assumes that the name space for ERROR_type
2545 			 * is the errno name space.
2546 			 */
2547 			if (tudi->ERROR_type != 0)
2548 				error = tudi->ERROR_type;
2549 			else
2550 				error = ECONNRESET;
2551 
2552 			soseterror(so, error);
2553 			mutex_exit(&so->so_lock);
2554 			strsetrerror(SOTOV(so), 0, 0, sogetrderr);
2555 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2556 			*wakeups = RSLEEP | WSLEEP;
2557 			*allmsgsigs = S_INPUT | S_RDNORM | S_OUTPUT;
2558 			*pollwakeups = POLLIN | POLLRDNORM | POLLOUT;
2559 			freemsg(mp);
2560 			return (NULL);
2561 		}
2562 		/*
2563 		 * If the application asked for delayed errors
2564 		 * record the T_UDERROR_IND sti_eaddr_mp and the reason in
2565 		 * sti_delayed_error for delayed error posting. If the reason
2566 		 * is zero use ECONNRESET.
2567 		 * Note that delayed error indications do not make sense for
2568 		 * AF_UNIX sockets since sendto checks that the destination
2569 		 * address is valid at the time of the sendto.
2570 		 */
2571 		if (!(so->so_options & SO_DGRAM_ERRIND)) {
2572 			mutex_exit(&so->so_lock);
2573 			freemsg(mp);
2574 			return (NULL);
2575 		}
2576 		if (sti->sti_eaddr_mp != NULL)
2577 			freemsg(sti->sti_eaddr_mp);
2578 
2579 		sti->sti_eaddr_mp = mp;
2580 		if (tudi->ERROR_type != 0)
2581 			error = tudi->ERROR_type;
2582 		else
2583 			error = ECONNRESET;
2584 		sti->sti_delayed_error = (ushort_t)error;
2585 		mutex_exit(&so->so_lock);
2586 		return (NULL);
2587 	}
2588 
2589 	case T_ERROR_ACK:
2590 		dprintso(so, 0,
2591 		    ("strsock_proto: T_ERROR_ACK for %d, error %d/%d\n",
2592 		    tpr->error_ack.ERROR_prim,
2593 		    tpr->error_ack.TLI_error,
2594 		    tpr->error_ack.UNIX_error));
2595 
2596 		if (MBLKL(mp) < sizeof (struct T_error_ack)) {
2597 			zcmn_err(getzoneid(), CE_WARN,
2598 			    "sockfs: Too short T_ERROR_ACK. Len = %ld\n",
2599 			    (ptrdiff_t)(MBLKL(mp)));
2600 			freemsg(mp);
2601 			return (NULL);
2602 		}
2603 		/*
2604 		 * Check if we were waiting for the async message
2605 		 */
2606 		mutex_enter(&so->so_lock);
2607 		if ((so->so_flag & SOASYNC_UNBIND) &&
2608 		    tpr->error_ack.ERROR_prim == T_UNBIND_REQ) {
2609 			so_unlock_single(so, SOASYNC_UNBIND);
2610 			mutex_exit(&so->so_lock);
2611 			freemsg(mp);
2612 			return (NULL);
2613 		}
2614 		mutex_exit(&so->so_lock);
2615 		soqueueack(so, mp);
2616 		return (NULL);
2617 
2618 	case T_OK_ACK:
2619 		if (MBLKL(mp) < sizeof (struct T_ok_ack)) {
2620 			zcmn_err(getzoneid(), CE_WARN,
2621 			    "sockfs: Too short T_OK_ACK. Len = %ld\n",
2622 			    (ptrdiff_t)(MBLKL(mp)));
2623 			freemsg(mp);
2624 			return (NULL);
2625 		}
2626 		/*
2627 		 * Check if we were waiting for the async message
2628 		 */
2629 		mutex_enter(&so->so_lock);
2630 		if ((so->so_flag & SOASYNC_UNBIND) &&
2631 		    tpr->ok_ack.CORRECT_prim == T_UNBIND_REQ) {
2632 			dprintso(so, 1,
2633 			    ("strsock_proto: T_OK_ACK async unbind\n"));
2634 			so_unlock_single(so, SOASYNC_UNBIND);
2635 			mutex_exit(&so->so_lock);
2636 			freemsg(mp);
2637 			return (NULL);
2638 		}
2639 		mutex_exit(&so->so_lock);
2640 		soqueueack(so, mp);
2641 		return (NULL);
2642 
2643 	case T_INFO_ACK:
2644 		if (MBLKL(mp) < sizeof (struct T_info_ack)) {
2645 			zcmn_err(getzoneid(), CE_WARN,
2646 			    "sockfs: Too short T_INFO_ACK. Len = %ld\n",
2647 			    (ptrdiff_t)(MBLKL(mp)));
2648 			freemsg(mp);
2649 			return (NULL);
2650 		}
2651 		soqueueack(so, mp);
2652 		return (NULL);
2653 
2654 	case T_CAPABILITY_ACK:
2655 		/*
2656 		 * A T_capability_ack need only be large enough to hold
2657 		 * the PRIM_type and CAP_bits1 fields; checking for anything
2658 		 * larger might reject a correct response from an older
2659 		 * provider.
2660 		 */
2661 		if (MBLKL(mp) < 2 * sizeof (t_uscalar_t)) {
2662 			zcmn_err(getzoneid(), CE_WARN,
2663 			    "sockfs: Too short T_CAPABILITY_ACK. Len = %ld\n",
2664 			    (ptrdiff_t)(MBLKL(mp)));
2665 			freemsg(mp);
2666 			return (NULL);
2667 		}
2668 		soqueueack(so, mp);
2669 		return (NULL);
2670 
2671 	case T_BIND_ACK:
2672 		if (MBLKL(mp) < sizeof (struct T_bind_ack)) {
2673 			zcmn_err(getzoneid(), CE_WARN,
2674 			    "sockfs: Too short T_BIND_ACK. Len = %ld\n",
2675 			    (ptrdiff_t)(MBLKL(mp)));
2676 			freemsg(mp);
2677 			return (NULL);
2678 		}
2679 		soqueueack(so, mp);
2680 		return (NULL);
2681 
2682 	case T_OPTMGMT_ACK:
2683 		if (MBLKL(mp) < sizeof (struct T_optmgmt_ack)) {
2684 			zcmn_err(getzoneid(), CE_WARN,
2685 			    "sockfs: Too short T_OPTMGMT_ACK. Len = %ld\n",
2686 			    (ptrdiff_t)(MBLKL(mp)));
2687 			freemsg(mp);
2688 			return (NULL);
2689 		}
2690 		soqueueack(so, mp);
2691 		return (NULL);
2692 	default:
2693 #ifdef DEBUG
2694 		zcmn_err(getzoneid(), CE_WARN,
2695 		    "sockfs: unknown TPI primitive %d received\n",
2696 		    tpr->type);
2697 #endif /* DEBUG */
2698 		freemsg(mp);
2699 		return (NULL);
2700 	}
2701 }
2702 
2703 /*
2704  * This routine is registered with the stream head to receive other
2705  * (non-data, and non-proto) messages.
2706  *
2707  * Returns NULL if the message was consumed.
2708  * Returns an mblk to make that mblk be processed by the stream head.
2709  *
2710  * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and
2711  * *pollwakeups) for the stream head to take action on.
2712  */
2713 static mblk_t *
2714 strsock_misc(vnode_t *vp, mblk_t *mp,
2715 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
2716 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
2717 {
2718 	struct sonode *so;
2719 	sotpi_info_t *sti;
2720 
2721 	so = VTOSO(vp);
2722 	sti = SOTOTPI(so);
2723 
2724 	dprintso(so, 1, ("strsock_misc(%p, %p, 0x%x)\n",
2725 	    (void *)vp, (void *)mp, DB_TYPE(mp)));
2726 
2727 	/* Set default return values */
2728 	*wakeups = *allmsgsigs = *firstmsgsigs = *pollwakeups = 0;
2729 
2730 	switch (DB_TYPE(mp)) {
2731 	case M_PCSIG:
2732 		/*
2733 		 * This assumes that an M_PCSIG for the urgent data arrives
2734 		 * before the corresponding T_EXDATA_IND.
2735 		 *
2736 		 * Note: Just like in SunOS 4.X and 4.4BSD a poll will be
2737 		 * awoken before the urgent data shows up.
2738 		 * For OOBINLINE this can result in select returning
2739 		 * only exceptions as opposed to except|read.
2740 		 */
2741 		if (*mp->b_rptr == SIGURG) {
2742 			mutex_enter(&so->so_lock);
2743 			dprintso(so, 1,
2744 			    ("SIGURG(%p): counts %d/%d state %s\n",
2745 			    (void *)vp, sti->sti_oobsigcnt, sti->sti_oobcnt,
2746 			    pr_state(so->so_state, so->so_mode)));
2747 			so_oob_sig(so, 1, allmsgsigs, pollwakeups);
2748 			dprintso(so, 1,
2749 			    ("after SIGURG(%p): counts %d/%d "
2750 			    " poll 0x%x sig 0x%x state %s\n",
2751 			    (void *)vp, sti->sti_oobsigcnt, sti->sti_oobcnt,
2752 			    *pollwakeups, *allmsgsigs,
2753 			    pr_state(so->so_state, so->so_mode)));
2754 			mutex_exit(&so->so_lock);
2755 		}
2756 		freemsg(mp);
2757 		return (NULL);
2758 
2759 	case M_SIG:
2760 	case M_HANGUP:
2761 	case M_UNHANGUP:
2762 	case M_ERROR:
2763 		/* M_ERRORs etc are ignored */
2764 		freemsg(mp);
2765 		return (NULL);
2766 
2767 	case M_FLUSH:
2768 		/*
2769 		 * Do not flush read queue. If the M_FLUSH
2770 		 * arrives because of an impending T_discon_ind
2771 		 * we still have to keep any queued data - this is part of
2772 		 * socket semantics.
2773 		 */
2774 		if (*mp->b_rptr & FLUSHW) {
2775 			*mp->b_rptr &= ~FLUSHR;
2776 			return (mp);
2777 		}
2778 		freemsg(mp);
2779 		return (NULL);
2780 
2781 	default:
2782 		return (mp);
2783 	}
2784 }
2785 
2786 
2787 /* Register to receive signals for certain events */
2788 int
2789 so_set_asyncsigs(vnode_t *vp, pid_t pgrp, int events, int mode, cred_t *cr)
2790 {
2791 	struct strsigset ss;
2792 	int32_t rval;
2793 
2794 	/*
2795 	 * Note that SOLOCKED will be set except for the call from soaccept().
2796 	 */
2797 	ASSERT(!mutex_owned(&VTOSO(vp)->so_lock));
2798 	ss.ss_pid = pgrp;
2799 	ss.ss_events = events;
2800 	return (strioctl(vp, I_ESETSIG, (intptr_t)&ss, mode, K_TO_K, cr,
2801 	    &rval));
2802 }
2803 
2804 
2805 /* Register for events matching the SS_ASYNC flag */
2806 int
2807 so_set_events(struct sonode *so, vnode_t *vp, cred_t *cr)
2808 {
2809 	int events = so->so_state & SS_ASYNC ?
2810 	    S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT :
2811 	    S_RDBAND | S_BANDURG;
2812 
2813 	return (so_set_asyncsigs(vp, so->so_pgrp, events, 0, cr));
2814 }
2815 
2816 
2817 /* Change the SS_ASYNC flag, and update signal delivery if needed */
2818 int
2819 so_flip_async(struct sonode *so, vnode_t *vp, int mode, cred_t *cr)
2820 {
2821 	ASSERT(mutex_owned(&so->so_lock));
2822 	if (so->so_pgrp != 0) {
2823 		int error;
2824 		int events = so->so_state & SS_ASYNC ?		/* Old flag */
2825 		    S_RDBAND | S_BANDURG :			/* New sigs */
2826 		    S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT;
2827 
2828 		so_lock_single(so);
2829 		mutex_exit(&so->so_lock);
2830 
2831 		error = so_set_asyncsigs(vp, so->so_pgrp, events, mode, cr);
2832 
2833 		mutex_enter(&so->so_lock);
2834 		so_unlock_single(so, SOLOCKED);
2835 		if (error)
2836 			return (error);
2837 	}
2838 	so->so_state ^= SS_ASYNC;
2839 	return (0);
2840 }
2841 
2842 /*
2843  * Set new pid/pgrp for SIGPOLL (or SIGIO for FIOASYNC mode), replacing
2844  * any existing one.  If passed zero, just clear the existing one.
2845  */
2846 int
2847 so_set_siggrp(struct sonode *so, vnode_t *vp, pid_t pgrp, int mode, cred_t *cr)
2848 {
2849 	int events = so->so_state & SS_ASYNC ?
2850 	    S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT :
2851 	    S_RDBAND | S_BANDURG;
2852 	int error;
2853 
2854 	ASSERT(mutex_owned(&so->so_lock));
2855 
2856 	/*
2857 	 * Change socket process (group).
2858 	 *
2859 	 * strioctl (via so_set_asyncsigs) will perform permission check and
2860 	 * also keep a PID_HOLD to prevent the pid from being reused.
2861 	 */
2862 	so_lock_single(so);
2863 	mutex_exit(&so->so_lock);
2864 
2865 	if (pgrp != 0) {
2866 		dprintso(so, 1, ("setown: adding pgrp %d ev 0x%x\n",
2867 		    pgrp, events));
2868 		error = so_set_asyncsigs(vp, pgrp, events, mode, cr);
2869 		if (error != 0) {
2870 			eprintsoline(so, error);
2871 			goto bad;
2872 		}
2873 	}
2874 	/* Remove the previously registered process/group */
2875 	if (so->so_pgrp != 0) {
2876 		dprintso(so, 1, ("setown: removing pgrp %d\n", so->so_pgrp));
2877 		error = so_set_asyncsigs(vp, so->so_pgrp, 0, mode, cr);
2878 		if (error != 0) {
2879 			eprintsoline(so, error);
2880 			error = 0;
2881 		}
2882 	}
2883 	mutex_enter(&so->so_lock);
2884 	so_unlock_single(so, SOLOCKED);
2885 	so->so_pgrp = pgrp;
2886 	return (0);
2887 bad:
2888 	mutex_enter(&so->so_lock);
2889 	so_unlock_single(so, SOLOCKED);
2890 	return (error);
2891 }
2892 
2893 /*
2894  * Wrapper for getmsg. If the socket has been converted to a stream
2895  * pass the request to the stream head.
2896  */
2897 int
2898 sock_getmsg(
2899 	struct vnode *vp,
2900 	struct strbuf *mctl,
2901 	struct strbuf *mdata,
2902 	uchar_t *prip,
2903 	int *flagsp,
2904 	int fmode,
2905 	rval_t *rvp
2906 )
2907 {
2908 	struct sonode *so;
2909 
2910 	ASSERT(vp->v_type == VSOCK);
2911 	/*
2912 	 * Use the stream head to find the real socket vnode.
2913 	 * This is needed when namefs sits above sockfs.  Some
2914 	 * sockets (like SCTP) are not streams.
2915 	 */
2916 	if (!vp->v_stream) {
2917 		return (ENOSTR);
2918 	}
2919 	ASSERT(vp->v_stream->sd_vnode);
2920 	vp = vp->v_stream->sd_vnode;
2921 	ASSERT(vn_matchops(vp, socket_vnodeops));
2922 	so = VTOSO(vp);
2923 
2924 	dprintso(so, 1, ("sock_getmsg(%p) %s\n",
2925 	    (void *)so, pr_state(so->so_state, so->so_mode)));
2926 
2927 	if (so->so_version == SOV_STREAM) {
2928 		/* The imaginary "sockmod" has been popped - act as a stream */
2929 		return (strgetmsg(vp, mctl, mdata, prip, flagsp, fmode, rvp));
2930 	}
2931 	eprintsoline(so, ENOSTR);
2932 	return (ENOSTR);
2933 }
2934 
2935 /*
2936  * Wrapper for putmsg. If the socket has been converted to a stream
2937  * pass the request to the stream head.
2938  *
2939  * Note that a while a regular socket (SOV_SOCKSTREAM) does support the
2940  * streams ioctl set it does not support putmsg and getmsg.
2941  * Allowing putmsg would prevent sockfs from tracking the state of
2942  * the socket/transport and would also invalidate the locking in sockfs.
2943  */
2944 int
2945 sock_putmsg(
2946 	struct vnode *vp,
2947 	struct strbuf *mctl,
2948 	struct strbuf *mdata,
2949 	uchar_t pri,
2950 	int flag,
2951 	int fmode
2952 )
2953 {
2954 	struct sonode *so;
2955 
2956 	ASSERT(vp->v_type == VSOCK);
2957 	/*
2958 	 * Use the stream head to find the real socket vnode.
2959 	 * This is needed when namefs sits above sockfs.
2960 	 */
2961 	if (!vp->v_stream) {
2962 		return (ENOSTR);
2963 	}
2964 	ASSERT(vp->v_stream->sd_vnode);
2965 	vp = vp->v_stream->sd_vnode;
2966 	ASSERT(vn_matchops(vp, socket_vnodeops));
2967 	so = VTOSO(vp);
2968 
2969 	dprintso(so, 1, ("sock_putmsg(%p) %s\n",
2970 	    (void *)so, pr_state(so->so_state, so->so_mode)));
2971 
2972 	if (so->so_version == SOV_STREAM) {
2973 		/* The imaginary "sockmod" has been popped - act as a stream */
2974 		return (strputmsg(vp, mctl, mdata, pri, flag, fmode));
2975 	}
2976 	eprintsoline(so, ENOSTR);
2977 	return (ENOSTR);
2978 }
2979 
2980 /*
2981  * Special function called only from f_getfl().
2982  * Returns FASYNC if the SS_ASYNC flag is set on a socket, else 0.
2983  * No locks are acquired here, so it is safe to use while uf_lock is held.
2984  * This exists solely for BSD fcntl() FASYNC compatibility.
2985  */
2986 int
2987 sock_getfasync(vnode_t *vp)
2988 {
2989 	struct sonode *so;
2990 
2991 	ASSERT(vp->v_type == VSOCK);
2992 	/*
2993 	 * For stream model, v_stream is used; For non-stream, v_stream always
2994 	 * equals NULL
2995 	 */
2996 	if (vp->v_stream != NULL)
2997 		so = VTOSO(vp->v_stream->sd_vnode);
2998 	else
2999 		so = VTOSO(vp);
3000 
3001 	if (so->so_version == SOV_STREAM || !(so->so_state & SS_ASYNC))
3002 		return (0);
3003 
3004 	return (FASYNC);
3005 }
3006 
3007 /*
3008  * Sockfs sodirect STREAMS read put procedure. Called from sodirect enable
3009  * transport driver/module with an mblk_t chain.
3010  *
3011  * Note, we in-line putq() for the fast-path cases of q is empty, q_last and
3012  * bp are of type M_DATA. All other cases we call putq().
3013  *
3014  * On success a zero will be return, else an errno will be returned.
3015  */
3016 int
3017 sodput(sodirect_t *sodp, mblk_t *bp)
3018 {
3019 	queue_t		*q = sodp->sod_q;
3020 	struct stdata	*stp = (struct stdata *)q->q_ptr;
3021 	mblk_t		*nbp;
3022 	mblk_t		*last = q->q_last;
3023 	int		bytecnt = 0;
3024 	int		mblkcnt = 0;
3025 
3026 
3027 	ASSERT(MUTEX_HELD(sodp->sod_lockp));
3028 
3029 	if (stp->sd_flag == STREOF) {
3030 		do {
3031 			if ((nbp = bp->b_next) != NULL)
3032 				bp->b_next = NULL;
3033 			freemsg(bp);
3034 		} while ((bp = nbp) != NULL);
3035 
3036 		return (0);
3037 	}
3038 
3039 	mutex_enter(QLOCK(q));
3040 	if (q->q_first == NULL) {
3041 		/* Q empty, really fast fast-path */
3042 		bp->b_prev = NULL;
3043 		bp->b_next = NULL;
3044 		q->q_first = bp;
3045 		q->q_last = bp;
3046 
3047 	} else if (last->b_datap->db_type == M_DATA &&
3048 	    bp->b_datap->db_type == M_DATA) {
3049 		/*
3050 		 * Last mblk_t chain and bp are both type M_DATA so
3051 		 * in-line putq() here, if the DBLK_UIOA state match
3052 		 * add bp to the end of the current last chain, else
3053 		 * start a new last chain with bp.
3054 		 */
3055 		if ((last->b_datap->db_flags & DBLK_UIOA) ==
3056 		    (bp->b_datap->db_flags & DBLK_UIOA)) {
3057 			/* Added to end */
3058 			while ((nbp = last->b_cont) != NULL)
3059 				last = nbp;
3060 			last->b_cont = bp;
3061 		} else {
3062 			/* New last */
3063 			ASSERT((bp->b_datap->db_flags & DBLK_UIOA) == 0 ||
3064 			    msgdsize(bp) == sodp->sod_uioa.uioa_mbytes);
3065 			last->b_next = bp;
3066 			bp->b_next = NULL;
3067 			bp->b_prev = last;
3068 			q->q_last = bp;
3069 		}
3070 	} else {
3071 		/*
3072 		 * Can't use q_last so just call putq().
3073 		 */
3074 		mutex_exit(QLOCK(q));
3075 
3076 		ASSERT((bp->b_datap->db_flags & DBLK_UIOA) == 0 ||
3077 		    msgdsize(bp) == sodp->sod_uioa.uioa_mbytes);
3078 		(void) putq(q, bp);
3079 		return (0);
3080 	}
3081 
3082 	/* Count bytes and mblk_t's */
3083 	do {
3084 		bytecnt += MBLKL(bp);
3085 		mblkcnt++;
3086 	} while ((bp = bp->b_cont) != NULL);
3087 	q->q_count += bytecnt;
3088 	q->q_mblkcnt += mblkcnt;
3089 
3090 	/* Check for QFULL */
3091 	if (q->q_count >= q->q_hiwat + sodp->sod_want ||
3092 	    q->q_mblkcnt >= q->q_hiwat) {
3093 		q->q_flag |= QFULL;
3094 	}
3095 
3096 	mutex_exit(QLOCK(q));
3097 	return (0);
3098 }
3099 
3100 /*
3101  * Sockfs sodirect read wakeup. Called from a sodirect enabled transport
3102  * driver/module to indicate that read-side data is available.
3103  *
3104  * On return the sodirect_t.lock mutex will be exited so this must be the
3105  * last sodirect_t call to guarantee atomic access of *sodp.
3106  */
3107 void
3108 sodwakeup(sodirect_t *sodp)
3109 {
3110 	queue_t		*q = sodp->sod_q;
3111 	struct stdata	*stp = (struct stdata *)q->q_ptr;
3112 
3113 	ASSERT(MUTEX_HELD(sodp->sod_lockp));
3114 
3115 	if (stp->sd_flag & RSLEEP) {
3116 		stp->sd_flag &= ~RSLEEP;
3117 		cv_broadcast(&q->q_wait);
3118 	}
3119 
3120 	if (stp->sd_rput_opt & SR_POLLIN) {
3121 		stp->sd_rput_opt &= ~SR_POLLIN;
3122 		mutex_exit(sodp->sod_lockp);
3123 		pollwakeup(&stp->sd_pollist, POLLIN | POLLRDNORM);
3124 	} else
3125 		mutex_exit(sodp->sod_lockp);
3126 }
3127