xref: /illumos-gate/usr/src/uts/common/fs/sockfs/socktpi.c (revision 52aec5b9758f6352670ab269980b437a987f4822)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
52caf0dcdSrshoaib  * Common Development and Distribution License (the "License").
62caf0dcdSrshoaib  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
212caf0dcdSrshoaib 
227c478bd9Sstevel@tonic-gate /*
233e95bd4aSAnders Persson  * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
24acb55917SPatrick Mooney  * Copyright 2015, Joyent, Inc.
25f012ee0cSGordon Ross  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
26dfc0fed8SRobert Mustacchi  */
277c478bd9Sstevel@tonic-gate 
287c478bd9Sstevel@tonic-gate #include <sys/types.h>
297c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>
307c478bd9Sstevel@tonic-gate #include <sys/param.h>
317c478bd9Sstevel@tonic-gate #include <sys/systm.h>
327c478bd9Sstevel@tonic-gate #include <sys/buf.h>
337c478bd9Sstevel@tonic-gate #include <sys/conf.h>
347c478bd9Sstevel@tonic-gate #include <sys/cred.h>
357c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
360f1702c5SYu Xiangning #include <sys/kmem_impl.h>
377c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
387c478bd9Sstevel@tonic-gate #include <sys/vfs.h>
397c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
407c478bd9Sstevel@tonic-gate #include <sys/debug.h>
417c478bd9Sstevel@tonic-gate #include <sys/errno.h>
427c478bd9Sstevel@tonic-gate #include <sys/time.h>
437c478bd9Sstevel@tonic-gate #include <sys/file.h>
447c478bd9Sstevel@tonic-gate #include <sys/open.h>
457c478bd9Sstevel@tonic-gate #include <sys/user.h>
467c478bd9Sstevel@tonic-gate #include <sys/termios.h>
477c478bd9Sstevel@tonic-gate #include <sys/stream.h>
487c478bd9Sstevel@tonic-gate #include <sys/strsubr.h>
497c478bd9Sstevel@tonic-gate #include <sys/strsun.h>
500f1702c5SYu Xiangning #include <sys/suntpi.h>
517c478bd9Sstevel@tonic-gate #include <sys/ddi.h>
527c478bd9Sstevel@tonic-gate #include <sys/esunddi.h>
537c478bd9Sstevel@tonic-gate #include <sys/flock.h>
547c478bd9Sstevel@tonic-gate #include <sys/modctl.h>
557c478bd9Sstevel@tonic-gate #include <sys/vtrace.h>
567c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
577c478bd9Sstevel@tonic-gate #include <sys/pathname.h>
587c478bd9Sstevel@tonic-gate 
597c478bd9Sstevel@tonic-gate #include <sys/socket.h>
607c478bd9Sstevel@tonic-gate #include <sys/socketvar.h>
61ff550d0eSmasputra #include <sys/sockio.h>
627c478bd9Sstevel@tonic-gate #include <netinet/in.h>
637c478bd9Sstevel@tonic-gate #include <sys/un.h>
647c478bd9Sstevel@tonic-gate #include <sys/strsun.h>
657c478bd9Sstevel@tonic-gate 
667c478bd9Sstevel@tonic-gate #include <sys/tiuser.h>
677c478bd9Sstevel@tonic-gate #define	_SUN_TPI_VERSION	2
687c478bd9Sstevel@tonic-gate #include <sys/tihdr.h>
697c478bd9Sstevel@tonic-gate #include <sys/timod.h>		/* TI_GETMYNAME, TI_GETPEERNAME */
707c478bd9Sstevel@tonic-gate 
717c478bd9Sstevel@tonic-gate #include <c2/audit.h>
727c478bd9Sstevel@tonic-gate 
737c478bd9Sstevel@tonic-gate #include <inet/common.h>
747c478bd9Sstevel@tonic-gate #include <inet/ip.h>
757c478bd9Sstevel@tonic-gate #include <inet/ip6.h>
767c478bd9Sstevel@tonic-gate #include <inet/tcp.h>
77ff550d0eSmasputra #include <inet/udp_impl.h>
787c478bd9Sstevel@tonic-gate 
797c478bd9Sstevel@tonic-gate #include <sys/zone.h>
807c478bd9Sstevel@tonic-gate 
812c9e429eSbrutus #include <fs/sockfs/nl7c.h>
822c9e429eSbrutus #include <fs/sockfs/nl7curi.h>
832c9e429eSbrutus 
840f1702c5SYu Xiangning #include <fs/sockfs/sockcommon.h>
850f1702c5SYu Xiangning #include <fs/sockfs/socktpi.h>
860f1702c5SYu Xiangning #include <fs/sockfs/socktpi_impl.h>
870f1702c5SYu Xiangning 
887c478bd9Sstevel@tonic-gate /*
897c478bd9Sstevel@tonic-gate  * Possible failures when memory can't be allocated. The documented behavior:
907c478bd9Sstevel@tonic-gate  *
917c478bd9Sstevel@tonic-gate  *		5.5:			4.X:		XNET:
927c478bd9Sstevel@tonic-gate  * accept:	ENOMEM/ENOSR/EINTR	- (EINTR)	ENOMEM/ENOBUFS/ENOSR/
937c478bd9Sstevel@tonic-gate  *							EINTR
947c478bd9Sstevel@tonic-gate  *	(4.X does not document EINTR but returns it)
957c478bd9Sstevel@tonic-gate  * bind:	ENOSR			-		ENOBUFS/ENOSR
967c478bd9Sstevel@tonic-gate  * connect:	EINTR			EINTR		ENOBUFS/ENOSR/EINTR
977c478bd9Sstevel@tonic-gate  * getpeername:	ENOMEM/ENOSR		ENOBUFS (-)	ENOBUFS/ENOSR
987c478bd9Sstevel@tonic-gate  * getsockname:	ENOMEM/ENOSR		ENOBUFS (-)	ENOBUFS/ENOSR
997c478bd9Sstevel@tonic-gate  *	(4.X getpeername and getsockname do not fail in practice)
1007c478bd9Sstevel@tonic-gate  * getsockopt:	ENOMEM/ENOSR		-		ENOBUFS/ENOSR
1017c478bd9Sstevel@tonic-gate  * listen:	-			-		ENOBUFS
1027c478bd9Sstevel@tonic-gate  * recv:	ENOMEM/ENOSR/EINTR	EINTR		ENOBUFS/ENOMEM/ENOSR/
1037c478bd9Sstevel@tonic-gate  *							EINTR
1047c478bd9Sstevel@tonic-gate  * send:	ENOMEM/ENOSR/EINTR	ENOBUFS/EINTR	ENOBUFS/ENOMEM/ENOSR/
1057c478bd9Sstevel@tonic-gate  *							EINTR
1067c478bd9Sstevel@tonic-gate  * setsockopt:	ENOMEM/ENOSR		-		ENOBUFS/ENOMEM/ENOSR
1077c478bd9Sstevel@tonic-gate  * shutdown:	ENOMEM/ENOSR		-		ENOBUFS/ENOSR
1087c478bd9Sstevel@tonic-gate  * socket:	ENOMEM/ENOSR		ENOBUFS		ENOBUFS/ENOMEM/ENOSR
1097c478bd9Sstevel@tonic-gate  * socketpair:	ENOMEM/ENOSR		-		ENOBUFS/ENOMEM/ENOSR
1107c478bd9Sstevel@tonic-gate  *
1117c478bd9Sstevel@tonic-gate  * Resolution. When allocation fails:
1127c478bd9Sstevel@tonic-gate  *	recv: return EINTR
1137c478bd9Sstevel@tonic-gate  *	send: return EINTR
1147c478bd9Sstevel@tonic-gate  *	connect, accept: EINTR
1157c478bd9Sstevel@tonic-gate  *	bind, listen, shutdown (unbind, unix_close, disconnect): sleep
1167c478bd9Sstevel@tonic-gate  *	socket, socketpair: ENOBUFS
1177c478bd9Sstevel@tonic-gate  *	getpeername, getsockname: sleep
1187c478bd9Sstevel@tonic-gate  *	getsockopt, setsockopt: sleep
1197c478bd9Sstevel@tonic-gate  */
1207c478bd9Sstevel@tonic-gate 
1217c478bd9Sstevel@tonic-gate #ifdef SOCK_TEST
1227c478bd9Sstevel@tonic-gate /*
1237c478bd9Sstevel@tonic-gate  * Variables that make sockfs do something other than the standard TPI
1247c478bd9Sstevel@tonic-gate  * for the AF_INET transports.
1257c478bd9Sstevel@tonic-gate  *
1267c478bd9Sstevel@tonic-gate  * solisten_tpi_tcp:
1277c478bd9Sstevel@tonic-gate  *	TCP can handle a O_T_BIND_REQ with an increased backlog even though
1287c478bd9Sstevel@tonic-gate  *	the transport is already bound. This is needed to avoid loosing the
1297c478bd9Sstevel@tonic-gate  *	port number should listen() do a T_UNBIND_REQ followed by a
1307c478bd9Sstevel@tonic-gate  *	O_T_BIND_REQ.
1317c478bd9Sstevel@tonic-gate  *
1327c478bd9Sstevel@tonic-gate  * soconnect_tpi_udp:
1337c478bd9Sstevel@tonic-gate  *	UDP and ICMP can handle a T_CONN_REQ.
1347c478bd9Sstevel@tonic-gate  *	This is needed to make the sequence of connect(), getsockname()
1357c478bd9Sstevel@tonic-gate  *	return the local IP address used to send packets to the connected to
1367c478bd9Sstevel@tonic-gate  *	destination.
1377c478bd9Sstevel@tonic-gate  *
1387c478bd9Sstevel@tonic-gate  * soconnect_tpi_tcp:
1397c478bd9Sstevel@tonic-gate  *	TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ.
1407c478bd9Sstevel@tonic-gate  *	Set this to non-zero to send TPI conformant messages to TCP in this
1417c478bd9Sstevel@tonic-gate  *	respect. This is a performance optimization.
1427c478bd9Sstevel@tonic-gate  *
1437c478bd9Sstevel@tonic-gate  * soaccept_tpi_tcp:
1447c478bd9Sstevel@tonic-gate  *	TCP can handle a T_CONN_REQ without the acceptor being bound.
1457c478bd9Sstevel@tonic-gate  *	This is a performance optimization that has been picked up in XTI.
1467c478bd9Sstevel@tonic-gate  *
1477c478bd9Sstevel@tonic-gate  * soaccept_tpi_multioptions:
1487c478bd9Sstevel@tonic-gate  *	When inheriting SOL_SOCKET options from the listener to the accepting
1497c478bd9Sstevel@tonic-gate  *	socket send them as a single message for AF_INET{,6}.
1507c478bd9Sstevel@tonic-gate  */
1517c478bd9Sstevel@tonic-gate int solisten_tpi_tcp = 0;
1527c478bd9Sstevel@tonic-gate int soconnect_tpi_udp = 0;
1537c478bd9Sstevel@tonic-gate int soconnect_tpi_tcp = 0;
1547c478bd9Sstevel@tonic-gate int soaccept_tpi_tcp = 0;
1557c478bd9Sstevel@tonic-gate int soaccept_tpi_multioptions = 1;
1567c478bd9Sstevel@tonic-gate #else /* SOCK_TEST */
1577c478bd9Sstevel@tonic-gate #define	soconnect_tpi_tcp	0
1587c478bd9Sstevel@tonic-gate #define	soconnect_tpi_udp	0
1597c478bd9Sstevel@tonic-gate #define	solisten_tpi_tcp	0
1607c478bd9Sstevel@tonic-gate #define	soaccept_tpi_tcp	0
1617c478bd9Sstevel@tonic-gate #define	soaccept_tpi_multioptions	1
1627c478bd9Sstevel@tonic-gate #endif /* SOCK_TEST */
1637c478bd9Sstevel@tonic-gate 
1647c478bd9Sstevel@tonic-gate #ifdef SOCK_TEST
1657c478bd9Sstevel@tonic-gate extern int do_useracc;
1667c478bd9Sstevel@tonic-gate extern clock_t sock_test_timelimit;
1677c478bd9Sstevel@tonic-gate #endif /* SOCK_TEST */
1687c478bd9Sstevel@tonic-gate 
169d28d4716SJerry Jelinek extern uint32_t ucredsize;
170d28d4716SJerry Jelinek 
1717c478bd9Sstevel@tonic-gate /*
1727c478bd9Sstevel@tonic-gate  * Some X/Open added checks might have to be backed out to keep SunOS 4.X
1737c478bd9Sstevel@tonic-gate  * applications working. Turn on this flag to disable these checks.
1747c478bd9Sstevel@tonic-gate  */
1757c478bd9Sstevel@tonic-gate int xnet_skip_checks = 0;
1767c478bd9Sstevel@tonic-gate int xnet_check_print = 0;
1777c478bd9Sstevel@tonic-gate int xnet_truncate_print = 0;
1787c478bd9Sstevel@tonic-gate 
1790f1702c5SYu Xiangning static void sotpi_destroy(struct sonode *);
1800f1702c5SYu Xiangning static struct sonode *sotpi_create(struct sockparams *, int, int, int, int,
1810f1702c5SYu Xiangning     int, int *, cred_t *cr);
1820f1702c5SYu Xiangning 
1830f1702c5SYu Xiangning static boolean_t	sotpi_info_create(struct sonode *, int);
1840f1702c5SYu Xiangning static void		sotpi_info_init(struct sonode *);
1850f1702c5SYu Xiangning static void		sotpi_info_fini(struct sonode *);
1860f1702c5SYu Xiangning static void		sotpi_info_destroy(struct sonode *);
1870f1702c5SYu Xiangning 
1880f1702c5SYu Xiangning /*
1890f1702c5SYu Xiangning  * Do direct function call to the transport layer below; this would
1900f1702c5SYu Xiangning  * also allow the transport to utilize read-side synchronous stream
1910f1702c5SYu Xiangning  * interface if necessary.  This is a /etc/system tunable that must
1920f1702c5SYu Xiangning  * not be modified on a running system.  By default this is enabled
1930f1702c5SYu Xiangning  * for performance reasons and may be disabled for debugging purposes.
1940f1702c5SYu Xiangning  */
1950f1702c5SYu Xiangning boolean_t socktpi_direct = B_TRUE;
1960f1702c5SYu Xiangning 
1970f1702c5SYu Xiangning static struct kmem_cache *socktpi_cache, *socktpi_unix_cache;
1980f1702c5SYu Xiangning 
1997c478bd9Sstevel@tonic-gate extern	void sigintr(k_sigset_t *, int);
2007c478bd9Sstevel@tonic-gate extern	void sigunintr(k_sigset_t *);
2017c478bd9Sstevel@tonic-gate 
2027c478bd9Sstevel@tonic-gate static int	sotpi_unbind(struct sonode *, int);
2037c478bd9Sstevel@tonic-gate 
2047c478bd9Sstevel@tonic-gate /* TPI sockfs sonode operations */
2050f1702c5SYu Xiangning int		sotpi_init(struct sonode *, struct sonode *, struct cred *,
2067c478bd9Sstevel@tonic-gate 		    int);
2070f1702c5SYu Xiangning static int	sotpi_accept(struct sonode *, int, struct cred *,
2080f1702c5SYu Xiangning 		    struct sonode **);
2090f1702c5SYu Xiangning static int	sotpi_bind(struct sonode *, struct sockaddr *, socklen_t,
2100f1702c5SYu Xiangning 		    int, struct cred *);
2110f1702c5SYu Xiangning static int	sotpi_listen(struct sonode *, int, struct cred *);
2123e95bd4aSAnders Persson static int	sotpi_connect(struct sonode *, struct sockaddr *,
2130f1702c5SYu Xiangning 		    socklen_t, int, int, struct cred *);
2140f1702c5SYu Xiangning extern int	sotpi_recvmsg(struct sonode *, struct nmsghdr *,
2150f1702c5SYu Xiangning 		    struct uio *, struct cred *);
2167c478bd9Sstevel@tonic-gate static int	sotpi_sendmsg(struct sonode *, struct nmsghdr *,
2170f1702c5SYu Xiangning 		    struct uio *, struct cred *);
2180f1702c5SYu Xiangning static int	sotpi_sendmblk(struct sonode *, struct nmsghdr *, int,
2190f1702c5SYu Xiangning 		    struct cred *, mblk_t **);
220ff550d0eSmasputra static int	sosend_dgramcmsg(struct sonode *, struct sockaddr *, socklen_t,
221ff550d0eSmasputra 		    struct uio *, void *, t_uscalar_t, int);
222ff550d0eSmasputra static int	sodgram_direct(struct sonode *, struct sockaddr *,
223ff550d0eSmasputra 		    socklen_t, struct uio *, int);
2240f1702c5SYu Xiangning extern int	sotpi_getpeername(struct sonode *, struct sockaddr *,
2250f1702c5SYu Xiangning 		    socklen_t *, boolean_t, struct cred *);
2260f1702c5SYu Xiangning static int	sotpi_getsockname(struct sonode *, struct sockaddr *,
2270f1702c5SYu Xiangning 		    socklen_t *, struct cred *);
2280f1702c5SYu Xiangning static int	sotpi_shutdown(struct sonode *, int, struct cred *);
2290f1702c5SYu Xiangning extern int	sotpi_getsockopt(struct sonode *, int, int, void *,
2300f1702c5SYu Xiangning 		    socklen_t *, int, struct cred *);
2310f1702c5SYu Xiangning extern int	sotpi_setsockopt(struct sonode *, int, int, const void *,
2320f1702c5SYu Xiangning 		    socklen_t, struct cred *);
2330f1702c5SYu Xiangning static int	sotpi_ioctl(struct sonode *, int, intptr_t, int, struct cred *,
2340f1702c5SYu Xiangning 		    int32_t *);
2350f1702c5SYu Xiangning static int	socktpi_plumbioctl(struct vnode *, int, intptr_t, int,
2360f1702c5SYu Xiangning 		    struct cred *, int32_t *);
2370f1702c5SYu Xiangning static int	sotpi_poll(struct sonode *, short, int, short *,
2380f1702c5SYu Xiangning 		    struct pollhead **);
2390f1702c5SYu Xiangning static int	sotpi_close(struct sonode *, int, struct cred *);
2400f1702c5SYu Xiangning 
2410f1702c5SYu Xiangning static int	i_sotpi_info_constructor(sotpi_info_t *);
2420f1702c5SYu Xiangning static void	i_sotpi_info_destructor(sotpi_info_t *);
2437c478bd9Sstevel@tonic-gate 
2447c478bd9Sstevel@tonic-gate sonodeops_t sotpi_sonodeops = {
2450f1702c5SYu Xiangning 	sotpi_init,		/* sop_init		*/
2467c478bd9Sstevel@tonic-gate 	sotpi_accept,		/* sop_accept		*/
2477c478bd9Sstevel@tonic-gate 	sotpi_bind,		/* sop_bind		*/
2487c478bd9Sstevel@tonic-gate 	sotpi_listen,		/* sop_listen		*/
2497c478bd9Sstevel@tonic-gate 	sotpi_connect,		/* sop_connect		*/
2507c478bd9Sstevel@tonic-gate 	sotpi_recvmsg,		/* sop_recvmsg		*/
2517c478bd9Sstevel@tonic-gate 	sotpi_sendmsg,		/* sop_sendmsg		*/
2520f1702c5SYu Xiangning 	sotpi_sendmblk,		/* sop_sendmblk		*/
2537c478bd9Sstevel@tonic-gate 	sotpi_getpeername,	/* sop_getpeername	*/
2547c478bd9Sstevel@tonic-gate 	sotpi_getsockname,	/* sop_getsockname	*/
2557c478bd9Sstevel@tonic-gate 	sotpi_shutdown,		/* sop_shutdown		*/
2567c478bd9Sstevel@tonic-gate 	sotpi_getsockopt,	/* sop_getsockopt	*/
2570f1702c5SYu Xiangning 	sotpi_setsockopt,	/* sop_setsockopt	*/
2580f1702c5SYu Xiangning 	sotpi_ioctl,		/* sop_ioctl		*/
2590f1702c5SYu Xiangning 	sotpi_poll,		/* sop_poll		*/
2600f1702c5SYu Xiangning 	sotpi_close,		/* sop_close		*/
2617c478bd9Sstevel@tonic-gate };
2627c478bd9Sstevel@tonic-gate 
2637c478bd9Sstevel@tonic-gate /*
2640f1702c5SYu Xiangning  * Return a TPI socket vnode.
2650f1702c5SYu Xiangning  *
2660f1702c5SYu Xiangning  * Note that sockets assume that the driver will clone (either itself
2670f1702c5SYu Xiangning  * or by using the clone driver) i.e. a socket() call will always
2680f1702c5SYu Xiangning  * result in a new vnode being created.
2690f1702c5SYu Xiangning  */
2700f1702c5SYu Xiangning 
2710f1702c5SYu Xiangning /*
2727c478bd9Sstevel@tonic-gate  * Common create code for socket and accept. If tso is set the values
2737c478bd9Sstevel@tonic-gate  * from that node is used instead of issuing a T_INFO_REQ.
2747c478bd9Sstevel@tonic-gate  */
2750f1702c5SYu Xiangning 
2760f1702c5SYu Xiangning /* ARGSUSED */
2770f1702c5SYu Xiangning static struct sonode *
2780f1702c5SYu Xiangning sotpi_create(struct sockparams *sp, int family, int type, int protocol,
2790f1702c5SYu Xiangning     int version, int sflags, int *errorp, cred_t *cr)
2807c478bd9Sstevel@tonic-gate {
2817c478bd9Sstevel@tonic-gate 	struct sonode	*so;
2820f1702c5SYu Xiangning 	kmem_cache_t	*cp;
2830f1702c5SYu Xiangning 	int		sfamily = family;
2847c478bd9Sstevel@tonic-gate 
2850f1702c5SYu Xiangning 	ASSERT(sp->sp_sdev_info.sd_vnode != NULL);
2867c478bd9Sstevel@tonic-gate 
2870f1702c5SYu Xiangning 	if (family == AF_NCA) {
2880f1702c5SYu Xiangning 		/*
2890f1702c5SYu Xiangning 		 * The request is for an NCA socket so for NL7C use the
2900f1702c5SYu Xiangning 		 * INET domain instead and mark NL7C_AF_NCA below.
2910f1702c5SYu Xiangning 		 */
2920f1702c5SYu Xiangning 		family = AF_INET;
2930f1702c5SYu Xiangning 		/*
2940f1702c5SYu Xiangning 		 * NL7C is not supported in the non-global zone,
2950f1702c5SYu Xiangning 		 * we enforce this restriction here.
2960f1702c5SYu Xiangning 		 */
2970f1702c5SYu Xiangning 		if (getzoneid() != GLOBAL_ZONEID) {
2980f1702c5SYu Xiangning 			*errorp = ENOTSUP;
2990f1702c5SYu Xiangning 			return (NULL);
3000f1702c5SYu Xiangning 		}
3010f1702c5SYu Xiangning 	}
302ff550d0eSmasputra 
3030f1702c5SYu Xiangning 	/*
3040f1702c5SYu Xiangning 	 * to be compatible with old tpi socket implementation ignore
3050f1702c5SYu Xiangning 	 * sleep flag (sflags) passed in
3060f1702c5SYu Xiangning 	 */
3070f1702c5SYu Xiangning 	cp = (family == AF_UNIX) ? socktpi_unix_cache : socktpi_cache;
3080f1702c5SYu Xiangning 	so = kmem_cache_alloc(cp, KM_SLEEP);
3090f1702c5SYu Xiangning 	if (so == NULL) {
3100f1702c5SYu Xiangning 		*errorp = ENOMEM;
3110f1702c5SYu Xiangning 		return (NULL);
3120f1702c5SYu Xiangning 	}
3130f1702c5SYu Xiangning 
3140f1702c5SYu Xiangning 	sonode_init(so, sp, family, type, protocol, &sotpi_sonodeops);
3150f1702c5SYu Xiangning 	sotpi_info_init(so);
3160f1702c5SYu Xiangning 
3170f1702c5SYu Xiangning 	if (sfamily == AF_NCA) {
3180f1702c5SYu Xiangning 		SOTOTPI(so)->sti_nl7c_flags = NL7C_AF_NCA;
3190f1702c5SYu Xiangning 	}
3200f1702c5SYu Xiangning 
3210f1702c5SYu Xiangning 	if (version == SOV_DEFAULT)
3220f1702c5SYu Xiangning 		version = so_default_version;
3230f1702c5SYu Xiangning 
3240f1702c5SYu Xiangning 	so->so_version = (short)version;
3250f1702c5SYu Xiangning 	*errorp = 0;
3260f1702c5SYu Xiangning 
3270f1702c5SYu Xiangning 	return (so);
3280f1702c5SYu Xiangning }
3290f1702c5SYu Xiangning 
3300f1702c5SYu Xiangning static void
3310f1702c5SYu Xiangning sotpi_destroy(struct sonode *so)
3320f1702c5SYu Xiangning {
3330f1702c5SYu Xiangning 	kmem_cache_t *cp;
3340f1702c5SYu Xiangning 	struct sockparams *origsp;
3350f1702c5SYu Xiangning 
3360f1702c5SYu Xiangning 	/*
3370f1702c5SYu Xiangning 	 * If there is a new dealloc function (ie. smod_destroy_func),
3380f1702c5SYu Xiangning 	 * then it should check the correctness of the ops.
3390f1702c5SYu Xiangning 	 */
3400f1702c5SYu Xiangning 
3410f1702c5SYu Xiangning 	ASSERT(so->so_ops == &sotpi_sonodeops);
3420f1702c5SYu Xiangning 
3430f1702c5SYu Xiangning 	origsp = SOTOTPI(so)->sti_orig_sp;
3440f1702c5SYu Xiangning 
3450f1702c5SYu Xiangning 	sotpi_info_fini(so);
3460f1702c5SYu Xiangning 
3470f1702c5SYu Xiangning 	if (so->so_state & SS_FALLBACK_COMP) {
3480f1702c5SYu Xiangning 		/*
3490f1702c5SYu Xiangning 		 * A fallback happend, which means that a sotpi_info_t struct
3500f1702c5SYu Xiangning 		 * was allocated (as opposed to being allocated from the TPI
3510f1702c5SYu Xiangning 		 * sonode cache. Therefore we explicitly free the struct
3520f1702c5SYu Xiangning 		 * here.
3530f1702c5SYu Xiangning 		 */
3540f1702c5SYu Xiangning 		sotpi_info_destroy(so);
3550f1702c5SYu Xiangning 		ASSERT(origsp != NULL);
3560f1702c5SYu Xiangning 
3570f1702c5SYu Xiangning 		origsp->sp_smod_info->smod_sock_destroy_func(so);
3580f1702c5SYu Xiangning 		SOCKPARAMS_DEC_REF(origsp);
3590f1702c5SYu Xiangning 	} else {
3600f1702c5SYu Xiangning 		sonode_fini(so);
3610f1702c5SYu Xiangning 		cp = (so->so_family == AF_UNIX) ? socktpi_unix_cache :
3620f1702c5SYu Xiangning 		    socktpi_cache;
3630f1702c5SYu Xiangning 		kmem_cache_free(cp, so);
3640f1702c5SYu Xiangning 	}
3650f1702c5SYu Xiangning }
3660f1702c5SYu Xiangning 
3670f1702c5SYu Xiangning /* ARGSUSED1 */
3680f1702c5SYu Xiangning int
3690f1702c5SYu Xiangning sotpi_init(struct sonode *so, struct sonode *tso, struct cred *cr, int flags)
3700f1702c5SYu Xiangning {
3710f1702c5SYu Xiangning 	major_t maj;
3720f1702c5SYu Xiangning 	dev_t newdev;
3730f1702c5SYu Xiangning 	struct vnode *vp;
3740f1702c5SYu Xiangning 	int error = 0;
3750f1702c5SYu Xiangning 	struct stdata *stp;
3760f1702c5SYu Xiangning 
3770f1702c5SYu Xiangning 	sotpi_info_t *sti = SOTOTPI(so);
3780f1702c5SYu Xiangning 
3790f1702c5SYu Xiangning 	dprint(1, ("sotpi_init()\n"));
3800f1702c5SYu Xiangning 
3810f1702c5SYu Xiangning 	/*
3820f1702c5SYu Xiangning 	 * over write the sleep flag passed in but that is ok
3830f1702c5SYu Xiangning 	 * as tpi socket does not honor sleep flag.
3840f1702c5SYu Xiangning 	 */
3850f1702c5SYu Xiangning 	flags |= FREAD|FWRITE;
3860f1702c5SYu Xiangning 
3870f1702c5SYu Xiangning 	/*
3880f1702c5SYu Xiangning 	 * Record in so_flag that it is a clone.
3890f1702c5SYu Xiangning 	 */
3900f1702c5SYu Xiangning 	if (getmajor(sti->sti_dev) == clone_major)
3910f1702c5SYu Xiangning 		so->so_flag |= SOCLONE;
3920f1702c5SYu Xiangning 
3930f1702c5SYu Xiangning 	if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM) &&
3940f1702c5SYu Xiangning 	    (so->so_family == AF_INET || so->so_family == AF_INET6) &&
3950f1702c5SYu Xiangning 	    (so->so_protocol == IPPROTO_TCP || so->so_protocol == IPPROTO_UDP ||
3960f1702c5SYu Xiangning 	    so->so_protocol == IPPROTO_IP)) {
397ff550d0eSmasputra 		/* Tell tcp or udp that it's talking to sockets */
3987c478bd9Sstevel@tonic-gate 		flags |= SO_SOCKSTR;
399ff550d0eSmasputra 
400ff550d0eSmasputra 		/*
401ff550d0eSmasputra 		 * Here we indicate to socktpi_open() our attempt to
402ff550d0eSmasputra 		 * make direct calls between sockfs and transport.
403ff550d0eSmasputra 		 * The final decision is left to socktpi_open().
404ff550d0eSmasputra 		 */
4050f1702c5SYu Xiangning 		sti->sti_direct = 1;
406ff550d0eSmasputra 
407ff550d0eSmasputra 		ASSERT(so->so_type != SOCK_DGRAM || tso == NULL);
408ff550d0eSmasputra 		if (so->so_type == SOCK_STREAM && tso != NULL) {
4090f1702c5SYu Xiangning 			if (SOTOTPI(tso)->sti_direct) {
410ff550d0eSmasputra 				/*
4110f1702c5SYu Xiangning 				 * Inherit sti_direct from listener and pass
412ff550d0eSmasputra 				 * SO_ACCEPTOR open flag to tcp, indicating
413ff550d0eSmasputra 				 * that this is an accept fast-path instance.
414ff550d0eSmasputra 				 */
415ff550d0eSmasputra 				flags |= SO_ACCEPTOR;
416ff550d0eSmasputra 			} else {
417ff550d0eSmasputra 				/*
4180f1702c5SYu Xiangning 				 * sti_direct is not set on listener, meaning
419ff550d0eSmasputra 				 * that the listener has been converted from
420ff550d0eSmasputra 				 * a socket to a stream.  Ensure that the
421ff550d0eSmasputra 				 * acceptor inherits these settings.
422ff550d0eSmasputra 				 */
4230f1702c5SYu Xiangning 				sti->sti_direct = 0;
424ff550d0eSmasputra 				flags &= ~SO_SOCKSTR;
425ff550d0eSmasputra 			}
4267c478bd9Sstevel@tonic-gate 		}
4277c478bd9Sstevel@tonic-gate 	}
4287c478bd9Sstevel@tonic-gate 
4297c478bd9Sstevel@tonic-gate 	/*
4307c478bd9Sstevel@tonic-gate 	 * Tell local transport that it is talking to sockets.
4317c478bd9Sstevel@tonic-gate 	 */
4327c478bd9Sstevel@tonic-gate 	if (so->so_family == AF_UNIX) {
4337c478bd9Sstevel@tonic-gate 		flags |= SO_SOCKSTR;
4347c478bd9Sstevel@tonic-gate 	}
4357c478bd9Sstevel@tonic-gate 
4360f1702c5SYu Xiangning 	vp = SOTOV(so);
4370f1702c5SYu Xiangning 	newdev = vp->v_rdev;
4380f1702c5SYu Xiangning 	maj = getmajor(newdev);
4390f1702c5SYu Xiangning 	ASSERT(STREAMSTAB(maj));
440655a2e99Skais 
4410f1702c5SYu Xiangning 	error = stropen(vp, &newdev, flags, cr);
4420f1702c5SYu Xiangning 
4430f1702c5SYu Xiangning 	stp = vp->v_stream;
4440f1702c5SYu Xiangning 	if (error == 0) {
4450f1702c5SYu Xiangning 		if (so->so_flag & SOCLONE)
4460f1702c5SYu Xiangning 			ASSERT(newdev != vp->v_rdev);
4470f1702c5SYu Xiangning 		mutex_enter(&so->so_lock);
4480f1702c5SYu Xiangning 		sti->sti_dev = newdev;
4490f1702c5SYu Xiangning 		vp->v_rdev = newdev;
4500f1702c5SYu Xiangning 		mutex_exit(&so->so_lock);
4510f1702c5SYu Xiangning 
4520f1702c5SYu Xiangning 		if (stp->sd_flag & STRISTTY) {
4530f1702c5SYu Xiangning 			/*
4540f1702c5SYu Xiangning 			 * this is a post SVR4 tty driver - a socket can not
4550f1702c5SYu Xiangning 			 * be a controlling terminal. Fail the open.
4560f1702c5SYu Xiangning 			 */
4570f1702c5SYu Xiangning 			(void) sotpi_close(so, flags, cr);
4580f1702c5SYu Xiangning 			return (ENOTTY);	/* XXX */
4597c478bd9Sstevel@tonic-gate 		}
4607c478bd9Sstevel@tonic-gate 
4610f1702c5SYu Xiangning 		ASSERT(stp->sd_wrq != NULL);
4620f1702c5SYu Xiangning 		sti->sti_provinfo = tpi_findprov(stp->sd_wrq);
4630f1702c5SYu Xiangning 
4640f1702c5SYu Xiangning 		/*
4650f1702c5SYu Xiangning 		 * If caller is interested in doing direct function call
4660f1702c5SYu Xiangning 		 * interface to/from transport module, probe the module
4670f1702c5SYu Xiangning 		 * directly beneath the streamhead to see if it qualifies.
4680f1702c5SYu Xiangning 		 *
4690f1702c5SYu Xiangning 		 * We turn off the direct interface when qualifications fail.
4700f1702c5SYu Xiangning 		 * In the acceptor case, we simply turn off the sti_direct
4710f1702c5SYu Xiangning 		 * flag on the socket. We do the fallback after the accept
4720f1702c5SYu Xiangning 		 * has completed, before the new socket is returned to the
4730f1702c5SYu Xiangning 		 * application.
4740f1702c5SYu Xiangning 		 */
4750f1702c5SYu Xiangning 		if (sti->sti_direct) {
4760f1702c5SYu Xiangning 			queue_t *tq = stp->sd_wrq->q_next;
4770f1702c5SYu Xiangning 
4780f1702c5SYu Xiangning 			/*
4790f1702c5SYu Xiangning 			 * sti_direct is currently supported and tested
4800f1702c5SYu Xiangning 			 * only for tcp/udp; this is the main reason to
4810f1702c5SYu Xiangning 			 * have the following assertions.
4820f1702c5SYu Xiangning 			 */
4830f1702c5SYu Xiangning 			ASSERT(so->so_family == AF_INET ||
4840f1702c5SYu Xiangning 			    so->so_family == AF_INET6);
4850f1702c5SYu Xiangning 			ASSERT(so->so_protocol == IPPROTO_UDP ||
4860f1702c5SYu Xiangning 			    so->so_protocol == IPPROTO_TCP ||
4870f1702c5SYu Xiangning 			    so->so_protocol == IPPROTO_IP);
4880f1702c5SYu Xiangning 			ASSERT(so->so_type == SOCK_DGRAM ||
4890f1702c5SYu Xiangning 			    so->so_type == SOCK_STREAM);
4900f1702c5SYu Xiangning 
4910f1702c5SYu Xiangning 			/*
4920f1702c5SYu Xiangning 			 * Abort direct call interface if the module directly
4930f1702c5SYu Xiangning 			 * underneath the stream head is not defined with the
4940f1702c5SYu Xiangning 			 * _D_DIRECT flag.  This could happen in the tcp or
4950f1702c5SYu Xiangning 			 * udp case, when some other module is autopushed
4960f1702c5SYu Xiangning 			 * above it, or for some reasons the expected module
4970f1702c5SYu Xiangning 			 * isn't purely D_MP (which is the main requirement).
4980f1702c5SYu Xiangning 			 */
4990f1702c5SYu Xiangning 			if (!socktpi_direct || !(tq->q_flag & _QDIRECT) ||
5000f1702c5SYu Xiangning 			    !(_OTHERQ(tq)->q_flag & _QDIRECT)) {
5010f1702c5SYu Xiangning 				int rval;
5020f1702c5SYu Xiangning 
5030f1702c5SYu Xiangning 				/* Continue on without direct calls */
5040f1702c5SYu Xiangning 				sti->sti_direct = 0;
5050f1702c5SYu Xiangning 
5060f1702c5SYu Xiangning 				/*
5070f1702c5SYu Xiangning 				 * Cannot issue ioctl on fallback socket since
5080f1702c5SYu Xiangning 				 * there is no conn associated with the queue.
5090f1702c5SYu Xiangning 				 * The fallback downcall will notify the proto
5100f1702c5SYu Xiangning 				 * of the change.
5110f1702c5SYu Xiangning 				 */
5120f1702c5SYu Xiangning 				if (!(flags & SO_ACCEPTOR) &&
5130f1702c5SYu Xiangning 				    !(flags & SO_FALLBACK)) {
5140f1702c5SYu Xiangning 					if ((error = strioctl(vp,
5150f1702c5SYu Xiangning 					    _SIOCSOCKFALLBACK, 0, 0, K_TO_K,
5160f1702c5SYu Xiangning 					    cr, &rval)) != 0) {
5170f1702c5SYu Xiangning 						(void) sotpi_close(so, flags,
5180f1702c5SYu Xiangning 						    cr);
5190f1702c5SYu Xiangning 						return (error);
5200f1702c5SYu Xiangning 					}
5210f1702c5SYu Xiangning 				}
5220f1702c5SYu Xiangning 			}
5230f1702c5SYu Xiangning 		}
5240f1702c5SYu Xiangning 
5250f1702c5SYu Xiangning 		if (flags & SO_FALLBACK) {
5260f1702c5SYu Xiangning 			/*
5270f1702c5SYu Xiangning 			 * The stream created does not have a conn.
5280f1702c5SYu Xiangning 			 * do stream set up after conn has been assigned
5290f1702c5SYu Xiangning 			 */
5300f1702c5SYu Xiangning 			return (error);
5310f1702c5SYu Xiangning 		}
5327c478bd9Sstevel@tonic-gate 		if (error = so_strinit(so, tso)) {
5330f1702c5SYu Xiangning 			(void) sotpi_close(so, flags, cr);
5340f1702c5SYu Xiangning 			return (error);
5357c478bd9Sstevel@tonic-gate 		}
5367c478bd9Sstevel@tonic-gate 
537acb55917SPatrick Mooney 		/* Enable sendfile() on AF_UNIX streams */
538acb55917SPatrick Mooney 		if (so->so_family == AF_UNIX && so->so_type == SOCK_STREAM) {
539acb55917SPatrick Mooney 			mutex_enter(&so->so_lock);
540acb55917SPatrick Mooney 			so->so_mode |= SM_SENDFILESUPP;
541acb55917SPatrick Mooney 			mutex_exit(&so->so_lock);
542acb55917SPatrick Mooney 		}
543acb55917SPatrick Mooney 
5440f1702c5SYu Xiangning 		/* Wildcard */
5450f1702c5SYu Xiangning 		if (so->so_protocol != so->so_sockparams->sp_protocol) {
5460f1702c5SYu Xiangning 			int protocol = so->so_protocol;
5470f1702c5SYu Xiangning 			/*
5480f1702c5SYu Xiangning 			 * Issue SO_PROTOTYPE setsockopt.
5490f1702c5SYu Xiangning 			 */
5500f1702c5SYu Xiangning 			error = sotpi_setsockopt(so, SOL_SOCKET, SO_PROTOTYPE,
5510f1702c5SYu Xiangning 			    &protocol, (t_uscalar_t)sizeof (protocol), cr);
5520f1702c5SYu Xiangning 			if (error != 0) {
5530f1702c5SYu Xiangning 				(void) sotpi_close(so, flags, cr);
5540f1702c5SYu Xiangning 				/*
5550f1702c5SYu Xiangning 				 * Setsockopt often fails with ENOPROTOOPT but
5560f1702c5SYu Xiangning 				 * socket() should fail with
5570f1702c5SYu Xiangning 				 * EPROTONOSUPPORT/EPROTOTYPE.
5580f1702c5SYu Xiangning 				 */
5590f1702c5SYu Xiangning 				return (EPROTONOSUPPORT);
5600f1702c5SYu Xiangning 			}
5610f1702c5SYu Xiangning 		}
5627c478bd9Sstevel@tonic-gate 
5630f1702c5SYu Xiangning 	} else {
5640f1702c5SYu Xiangning 		/*
5650f1702c5SYu Xiangning 		 * While the same socket can not be reopened (unlike specfs)
5660f1702c5SYu Xiangning 		 * the stream head sets STREOPENFAIL when the autopush fails.
5670f1702c5SYu Xiangning 		 */
5680f1702c5SYu Xiangning 		if ((stp != NULL) &&
5690f1702c5SYu Xiangning 		    (stp->sd_flag & STREOPENFAIL)) {
5700f1702c5SYu Xiangning 			/*
5710f1702c5SYu Xiangning 			 * Open failed part way through.
5720f1702c5SYu Xiangning 			 */
5730f1702c5SYu Xiangning 			mutex_enter(&stp->sd_lock);
5740f1702c5SYu Xiangning 			stp->sd_flag &= ~STREOPENFAIL;
5750f1702c5SYu Xiangning 			mutex_exit(&stp->sd_lock);
5760f1702c5SYu Xiangning 			(void) sotpi_close(so, flags, cr);
5770f1702c5SYu Xiangning 			return (error);
5780f1702c5SYu Xiangning 			/*NOTREACHED*/
5790f1702c5SYu Xiangning 		}
5800f1702c5SYu Xiangning 		ASSERT(stp == NULL);
5810f1702c5SYu Xiangning 	}
5820f1702c5SYu Xiangning 	TRACE_4(TR_FAC_SOCKFS, TR_SOCKFS_OPEN,
5830f1702c5SYu Xiangning 	    "sockfs open:maj %d vp %p so %p error %d",
5840f1702c5SYu Xiangning 	    maj, vp, so, error);
5850f1702c5SYu Xiangning 	return (error);
5867c478bd9Sstevel@tonic-gate }
5877c478bd9Sstevel@tonic-gate 
5887c478bd9Sstevel@tonic-gate /*
5897c478bd9Sstevel@tonic-gate  * Bind the socket to an unspecified address in sockfs only.
5907c478bd9Sstevel@tonic-gate  * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't
5917c478bd9Sstevel@tonic-gate  * required in all cases.
5927c478bd9Sstevel@tonic-gate  */
5937c478bd9Sstevel@tonic-gate static void
5947c478bd9Sstevel@tonic-gate so_automatic_bind(struct sonode *so)
5957c478bd9Sstevel@tonic-gate {
5960f1702c5SYu Xiangning 	sotpi_info_t *sti = SOTOTPI(so);
5977c478bd9Sstevel@tonic-gate 	ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6);
5987c478bd9Sstevel@tonic-gate 
5997c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
6007c478bd9Sstevel@tonic-gate 	ASSERT(!(so->so_state & SS_ISBOUND));
6010f1702c5SYu Xiangning 	ASSERT(sti->sti_unbind_mp);
6027c478bd9Sstevel@tonic-gate 
6030f1702c5SYu Xiangning 	ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen);
6040f1702c5SYu Xiangning 	bzero(sti->sti_laddr_sa, sti->sti_laddr_len);
6050f1702c5SYu Xiangning 	sti->sti_laddr_sa->sa_family = so->so_family;
6067c478bd9Sstevel@tonic-gate 	so->so_state |= SS_ISBOUND;
6077c478bd9Sstevel@tonic-gate }
6087c478bd9Sstevel@tonic-gate 
6097c478bd9Sstevel@tonic-gate 
6107c478bd9Sstevel@tonic-gate /*
6117c478bd9Sstevel@tonic-gate  * bind the socket.
6127c478bd9Sstevel@tonic-gate  *
6137c478bd9Sstevel@tonic-gate  * If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2
6147c478bd9Sstevel@tonic-gate  * are passed in we allow rebinding. Note that for backwards compatibility
6157c478bd9Sstevel@tonic-gate  * even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind.
6167c478bd9Sstevel@tonic-gate  * Thus the rebinding code is currently not executed.
6177c478bd9Sstevel@tonic-gate  *
6187c478bd9Sstevel@tonic-gate  * The constraints for rebinding are:
6197c478bd9Sstevel@tonic-gate  * - it is a SOCK_DGRAM, or
6207c478bd9Sstevel@tonic-gate  * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected
6217c478bd9Sstevel@tonic-gate  *   and no listen() has been done.
6227c478bd9Sstevel@tonic-gate  * This rebinding code was added based on some language in the XNET book
6237c478bd9Sstevel@tonic-gate  * about not returning EINVAL it the protocol allows rebinding. However,
6247c478bd9Sstevel@tonic-gate  * this language is not present in the Posix socket draft. Thus maybe the
6257c478bd9Sstevel@tonic-gate  * rebinding logic should be deleted from the source.
6267c478bd9Sstevel@tonic-gate  *
6277c478bd9Sstevel@tonic-gate  * A null "name" can be used to unbind the socket if:
6287c478bd9Sstevel@tonic-gate  * - it is a SOCK_DGRAM, or
6297c478bd9Sstevel@tonic-gate  * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected
6307c478bd9Sstevel@tonic-gate  *   and no listen() has been done.
6317c478bd9Sstevel@tonic-gate  */
6320f1702c5SYu Xiangning /* ARGSUSED */
6337c478bd9Sstevel@tonic-gate static int
6347c478bd9Sstevel@tonic-gate sotpi_bindlisten(struct sonode *so, struct sockaddr *name,
6350f1702c5SYu Xiangning     socklen_t namelen, int backlog, int flags, struct cred *cr)
6367c478bd9Sstevel@tonic-gate {
6377c478bd9Sstevel@tonic-gate 	struct T_bind_req	bind_req;
6387c478bd9Sstevel@tonic-gate 	struct T_bind_ack	*bind_ack;
6397c478bd9Sstevel@tonic-gate 	int			error = 0;
6407c478bd9Sstevel@tonic-gate 	mblk_t			*mp;
6417c478bd9Sstevel@tonic-gate 	void			*addr;
6427c478bd9Sstevel@tonic-gate 	t_uscalar_t		addrlen;
6437c478bd9Sstevel@tonic-gate 	int			unbind_on_err = 1;
6447c478bd9Sstevel@tonic-gate 	boolean_t		clear_acceptconn_on_err = B_FALSE;
6457c478bd9Sstevel@tonic-gate 	boolean_t		restore_backlog_on_err = B_FALSE;
6467c478bd9Sstevel@tonic-gate 	int			save_so_backlog;
6477c478bd9Sstevel@tonic-gate 	t_scalar_t		PRIM_type = O_T_BIND_REQ;
6487c478bd9Sstevel@tonic-gate 	boolean_t		tcp_udp_xport;
6497c478bd9Sstevel@tonic-gate 	void			*nl7c = NULL;
6500f1702c5SYu Xiangning 	sotpi_info_t		*sti = SOTOTPI(so);
6517c478bd9Sstevel@tonic-gate 
6527c478bd9Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n",
653903a11ebSrh87107 	    (void *)so, (void *)name, namelen, backlog, flags,
6547c478bd9Sstevel@tonic-gate 	    pr_state(so->so_state, so->so_mode)));
6557c478bd9Sstevel@tonic-gate 
6567c478bd9Sstevel@tonic-gate 	tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM;
6577c478bd9Sstevel@tonic-gate 
6587c478bd9Sstevel@tonic-gate 	if (!(flags & _SOBIND_LOCK_HELD)) {
6597c478bd9Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
6607c478bd9Sstevel@tonic-gate 		so_lock_single(so);	/* Set SOLOCKED */
6617c478bd9Sstevel@tonic-gate 	} else {
6627c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(&so->so_lock));
6637c478bd9Sstevel@tonic-gate 		ASSERT(so->so_flag & SOLOCKED);
6647c478bd9Sstevel@tonic-gate 	}
6657c478bd9Sstevel@tonic-gate 
6667c478bd9Sstevel@tonic-gate 	/*
6677c478bd9Sstevel@tonic-gate 	 * Make sure that there is a preallocated unbind_req message
6687c478bd9Sstevel@tonic-gate 	 * before binding. This message allocated when the socket is
6697c478bd9Sstevel@tonic-gate 	 * created  but it might be have been consumed.
6707c478bd9Sstevel@tonic-gate 	 */
6710f1702c5SYu Xiangning 	if (sti->sti_unbind_mp == NULL) {
6727c478bd9Sstevel@tonic-gate 		dprintso(so, 1, ("sobind: allocating unbind_req\n"));
6737c478bd9Sstevel@tonic-gate 		/* NOTE: holding so_lock while sleeping */
6740f1702c5SYu Xiangning 		sti->sti_unbind_mp =
675de8c4a14SErik Nordmark 		    soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP,
676de8c4a14SErik Nordmark 		    cr);
6777c478bd9Sstevel@tonic-gate 	}
6787c478bd9Sstevel@tonic-gate 
6797c478bd9Sstevel@tonic-gate 	if (flags & _SOBIND_REBIND) {
6807c478bd9Sstevel@tonic-gate 		/*
6817c478bd9Sstevel@tonic-gate 		 * Called from solisten after doing an sotpi_unbind() or
6827c478bd9Sstevel@tonic-gate 		 * potentially without the unbind (latter for AF_INET{,6}).
6837c478bd9Sstevel@tonic-gate 		 */
6847c478bd9Sstevel@tonic-gate 		ASSERT(name == NULL && namelen == 0);
6857c478bd9Sstevel@tonic-gate 
6867c478bd9Sstevel@tonic-gate 		if (so->so_family == AF_UNIX) {
6870f1702c5SYu Xiangning 			ASSERT(sti->sti_ux_bound_vp);
6880f1702c5SYu Xiangning 			addr = &sti->sti_ux_laddr;
6890f1702c5SYu Xiangning 			addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr);
690fc80c0dfSnordmark 			dprintso(so, 1, ("sobind rebind UNIX: addrlen %d, "
691fc80c0dfSnordmark 			    "addr 0x%p, vp %p\n",
6927c478bd9Sstevel@tonic-gate 			    addrlen,
693903a11ebSrh87107 			    (void *)((struct so_ux_addr *)addr)->soua_vp,
6940f1702c5SYu Xiangning 			    (void *)sti->sti_ux_bound_vp));
6957c478bd9Sstevel@tonic-gate 		} else {
6960f1702c5SYu Xiangning 			addr = sti->sti_laddr_sa;
6970f1702c5SYu Xiangning 			addrlen = (t_uscalar_t)sti->sti_laddr_len;
6987c478bd9Sstevel@tonic-gate 		}
6997c478bd9Sstevel@tonic-gate 	} else if (flags & _SOBIND_UNSPEC) {
7007c478bd9Sstevel@tonic-gate 		ASSERT(name == NULL && namelen == 0);
7017c478bd9Sstevel@tonic-gate 
7027c478bd9Sstevel@tonic-gate 		/*
7037c478bd9Sstevel@tonic-gate 		 * The caller checked SS_ISBOUND but not necessarily
7047c478bd9Sstevel@tonic-gate 		 * under so_lock
7057c478bd9Sstevel@tonic-gate 		 */
7067c478bd9Sstevel@tonic-gate 		if (so->so_state & SS_ISBOUND) {
7077c478bd9Sstevel@tonic-gate 			/* No error */
7087c478bd9Sstevel@tonic-gate 			goto done;
7097c478bd9Sstevel@tonic-gate 		}
7107c478bd9Sstevel@tonic-gate 
7117c478bd9Sstevel@tonic-gate 		/* Set an initial local address */
7127c478bd9Sstevel@tonic-gate 		switch (so->so_family) {
7137c478bd9Sstevel@tonic-gate 		case AF_UNIX:
7147c478bd9Sstevel@tonic-gate 			/*
7157c478bd9Sstevel@tonic-gate 			 * Use an address with same size as struct sockaddr
7167c478bd9Sstevel@tonic-gate 			 * just like BSD.
7177c478bd9Sstevel@tonic-gate 			 */
7180f1702c5SYu Xiangning 			sti->sti_laddr_len =
7197c478bd9Sstevel@tonic-gate 			    (socklen_t)sizeof (struct sockaddr);
7200f1702c5SYu Xiangning 			ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen);
7210f1702c5SYu Xiangning 			bzero(sti->sti_laddr_sa, sti->sti_laddr_len);
7220f1702c5SYu Xiangning 			sti->sti_laddr_sa->sa_family = so->so_family;
7237c478bd9Sstevel@tonic-gate 
7247c478bd9Sstevel@tonic-gate 			/*
7257c478bd9Sstevel@tonic-gate 			 * Pass down an address with the implicit bind
7267c478bd9Sstevel@tonic-gate 			 * magic number and the rest all zeros.
7277c478bd9Sstevel@tonic-gate 			 * The transport will return a unique address.
7287c478bd9Sstevel@tonic-gate 			 */
7290f1702c5SYu Xiangning 			sti->sti_ux_laddr.soua_vp = NULL;
7300f1702c5SYu Xiangning 			sti->sti_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT;
7310f1702c5SYu Xiangning 			addr = &sti->sti_ux_laddr;
7320f1702c5SYu Xiangning 			addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr);
7337c478bd9Sstevel@tonic-gate 			break;
7347c478bd9Sstevel@tonic-gate 
7357c478bd9Sstevel@tonic-gate 		case AF_INET:
7367c478bd9Sstevel@tonic-gate 		case AF_INET6:
7377c478bd9Sstevel@tonic-gate 			/*
7387c478bd9Sstevel@tonic-gate 			 * An unspecified bind in TPI has a NULL address.
7397c478bd9Sstevel@tonic-gate 			 * Set the address in sockfs to have the sa_family.
7407c478bd9Sstevel@tonic-gate 			 */
7410f1702c5SYu Xiangning 			sti->sti_laddr_len = (so->so_family == AF_INET) ?
7427c478bd9Sstevel@tonic-gate 			    (socklen_t)sizeof (sin_t) :
7437c478bd9Sstevel@tonic-gate 			    (socklen_t)sizeof (sin6_t);
7440f1702c5SYu Xiangning 			ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen);
7450f1702c5SYu Xiangning 			bzero(sti->sti_laddr_sa, sti->sti_laddr_len);
7460f1702c5SYu Xiangning 			sti->sti_laddr_sa->sa_family = so->so_family;
7477c478bd9Sstevel@tonic-gate 			addr = NULL;
7487c478bd9Sstevel@tonic-gate 			addrlen = 0;
7497c478bd9Sstevel@tonic-gate 			break;
7507c478bd9Sstevel@tonic-gate 
7517c478bd9Sstevel@tonic-gate 		default:
7527c478bd9Sstevel@tonic-gate 			/*
7537c478bd9Sstevel@tonic-gate 			 * An unspecified bind in TPI has a NULL address.
7547c478bd9Sstevel@tonic-gate 			 * Set the address in sockfs to be zero length.
7557c478bd9Sstevel@tonic-gate 			 *
7567c478bd9Sstevel@tonic-gate 			 * Can not assume there is a sa_family for all
7577c478bd9Sstevel@tonic-gate 			 * protocol families. For example, AF_X25 does not
7587c478bd9Sstevel@tonic-gate 			 * have a family field.
7597c478bd9Sstevel@tonic-gate 			 */
7600f1702c5SYu Xiangning 			bzero(sti->sti_laddr_sa, sti->sti_laddr_len);
7610f1702c5SYu Xiangning 			sti->sti_laddr_len = 0;	/* XXX correct? */
7627c478bd9Sstevel@tonic-gate 			addr = NULL;
7637c478bd9Sstevel@tonic-gate 			addrlen = 0;
7647c478bd9Sstevel@tonic-gate 			break;
7657c478bd9Sstevel@tonic-gate 		}
7667c478bd9Sstevel@tonic-gate 
7677c478bd9Sstevel@tonic-gate 	} else {
7687c478bd9Sstevel@tonic-gate 		if (so->so_state & SS_ISBOUND) {
7697c478bd9Sstevel@tonic-gate 			/*
7707c478bd9Sstevel@tonic-gate 			 * If it is ok to rebind the socket, first unbind
7717c478bd9Sstevel@tonic-gate 			 * with the transport. A rebind to the NULL address
7727c478bd9Sstevel@tonic-gate 			 * is interpreted as an unbind.
7737c478bd9Sstevel@tonic-gate 			 * Note that a bind to NULL in BSD does unbind the
7747c478bd9Sstevel@tonic-gate 			 * socket but it fails with EINVAL.
7757c478bd9Sstevel@tonic-gate 			 * Note that regular sockets set SOV_SOCKBSD i.e.
7767c478bd9Sstevel@tonic-gate 			 * _SOBIND_SOCKBSD gets set here hence no type of
7777c478bd9Sstevel@tonic-gate 			 * socket does currently allow rebinding.
7787c478bd9Sstevel@tonic-gate 			 *
7797c478bd9Sstevel@tonic-gate 			 * If the name is NULL just do an unbind.
7807c478bd9Sstevel@tonic-gate 			 */
7817c478bd9Sstevel@tonic-gate 			if (flags & (_SOBIND_SOCKBSD|_SOBIND_XPG4_2) &&
7827c478bd9Sstevel@tonic-gate 			    name != NULL) {
7837c478bd9Sstevel@tonic-gate 				error = EINVAL;
7847c478bd9Sstevel@tonic-gate 				unbind_on_err = 0;
7857c478bd9Sstevel@tonic-gate 				eprintsoline(so, error);
7867c478bd9Sstevel@tonic-gate 				goto done;
7877c478bd9Sstevel@tonic-gate 			}
7887c478bd9Sstevel@tonic-gate 			if ((so->so_mode & SM_CONNREQUIRED) &&
7897c478bd9Sstevel@tonic-gate 			    (so->so_state & SS_CANTREBIND)) {
7907c478bd9Sstevel@tonic-gate 				error = EINVAL;
7917c478bd9Sstevel@tonic-gate 				unbind_on_err = 0;
7927c478bd9Sstevel@tonic-gate 				eprintsoline(so, error);
7937c478bd9Sstevel@tonic-gate 				goto done;
7947c478bd9Sstevel@tonic-gate 			}
7957c478bd9Sstevel@tonic-gate 			error = sotpi_unbind(so, 0);
7967c478bd9Sstevel@tonic-gate 			if (error) {
7977c478bd9Sstevel@tonic-gate 				eprintsoline(so, error);
7987c478bd9Sstevel@tonic-gate 				goto done;
7997c478bd9Sstevel@tonic-gate 			}
8007c478bd9Sstevel@tonic-gate 			ASSERT(!(so->so_state & SS_ISBOUND));
8017c478bd9Sstevel@tonic-gate 			if (name == NULL) {
8027c478bd9Sstevel@tonic-gate 				so->so_state &=
8037c478bd9Sstevel@tonic-gate 				    ~(SS_ISCONNECTED|SS_ISCONNECTING);
8047c478bd9Sstevel@tonic-gate 				goto done;
8057c478bd9Sstevel@tonic-gate 			}
8067c478bd9Sstevel@tonic-gate 		}
8070f1702c5SYu Xiangning 
8087c478bd9Sstevel@tonic-gate 		/* X/Open requires this check */
8097c478bd9Sstevel@tonic-gate 		if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
8107c478bd9Sstevel@tonic-gate 			if (xnet_check_print) {
8117c478bd9Sstevel@tonic-gate 				printf("sockfs: X/Open bind state check "
8127c478bd9Sstevel@tonic-gate 				    "caused EINVAL\n");
8137c478bd9Sstevel@tonic-gate 			}
8147c478bd9Sstevel@tonic-gate 			error = EINVAL;
8157c478bd9Sstevel@tonic-gate 			goto done;
8167c478bd9Sstevel@tonic-gate 		}
8177c478bd9Sstevel@tonic-gate 
8187c478bd9Sstevel@tonic-gate 		switch (so->so_family) {
8197c478bd9Sstevel@tonic-gate 		case AF_UNIX:
8207c478bd9Sstevel@tonic-gate 			/*
8217c478bd9Sstevel@tonic-gate 			 * All AF_UNIX addresses are nul terminated
8227c478bd9Sstevel@tonic-gate 			 * when copied (copyin_name) in so the minimum
8237c478bd9Sstevel@tonic-gate 			 * length is 3 bytes.
8247c478bd9Sstevel@tonic-gate 			 */
8257c478bd9Sstevel@tonic-gate 			if (name == NULL ||
8267c478bd9Sstevel@tonic-gate 			    (ssize_t)namelen <= sizeof (short) + 1) {
8277c478bd9Sstevel@tonic-gate 				error = EISDIR;
8287c478bd9Sstevel@tonic-gate 				eprintsoline(so, error);
8297c478bd9Sstevel@tonic-gate 				goto done;
8307c478bd9Sstevel@tonic-gate 			}
8317c478bd9Sstevel@tonic-gate 			/*
8327c478bd9Sstevel@tonic-gate 			 * Verify so_family matches the bound family.
8337c478bd9Sstevel@tonic-gate 			 * BSD does not check this for AF_UNIX resulting
8347c478bd9Sstevel@tonic-gate 			 * in funny mknods.
8357c478bd9Sstevel@tonic-gate 			 */
8367c478bd9Sstevel@tonic-gate 			if (name->sa_family != so->so_family) {
8377c478bd9Sstevel@tonic-gate 				error = EAFNOSUPPORT;
8387c478bd9Sstevel@tonic-gate 				goto done;
8397c478bd9Sstevel@tonic-gate 			}
8407c478bd9Sstevel@tonic-gate 			break;
8417c478bd9Sstevel@tonic-gate 		case AF_INET:
8427c478bd9Sstevel@tonic-gate 			if (name == NULL) {
8437c478bd9Sstevel@tonic-gate 				error = EINVAL;
8447c478bd9Sstevel@tonic-gate 				eprintsoline(so, error);
8457c478bd9Sstevel@tonic-gate 				goto done;
8467c478bd9Sstevel@tonic-gate 			}
8477c478bd9Sstevel@tonic-gate 			if ((size_t)namelen != sizeof (sin_t)) {
8487c478bd9Sstevel@tonic-gate 				error = name->sa_family != so->so_family ?
8497c478bd9Sstevel@tonic-gate 				    EAFNOSUPPORT : EINVAL;
8507c478bd9Sstevel@tonic-gate 				eprintsoline(so, error);
8517c478bd9Sstevel@tonic-gate 				goto done;
8527c478bd9Sstevel@tonic-gate 			}
8537c478bd9Sstevel@tonic-gate 			if ((flags & _SOBIND_XPG4_2) &&
8547c478bd9Sstevel@tonic-gate 			    (name->sa_family != so->so_family)) {
8557c478bd9Sstevel@tonic-gate 				/*
8567c478bd9Sstevel@tonic-gate 				 * This check has to be made for X/Open
8577c478bd9Sstevel@tonic-gate 				 * sockets however application failures have
8587c478bd9Sstevel@tonic-gate 				 * been observed when it is applied to
8597c478bd9Sstevel@tonic-gate 				 * all sockets.
8607c478bd9Sstevel@tonic-gate 				 */
8617c478bd9Sstevel@tonic-gate 				error = EAFNOSUPPORT;
8627c478bd9Sstevel@tonic-gate 				eprintsoline(so, error);
8637c478bd9Sstevel@tonic-gate 				goto done;
8647c478bd9Sstevel@tonic-gate 			}
8657c478bd9Sstevel@tonic-gate 			/*
8667c478bd9Sstevel@tonic-gate 			 * Force a zero sa_family to match so_family.
8677c478bd9Sstevel@tonic-gate 			 *
8687c478bd9Sstevel@tonic-gate 			 * Some programs like inetd(1M) don't set the
8697c478bd9Sstevel@tonic-gate 			 * family field. Other programs leave
8707c478bd9Sstevel@tonic-gate 			 * sin_family set to garbage - SunOS 4.X does
8717c478bd9Sstevel@tonic-gate 			 * not check the family field on a bind.
8727c478bd9Sstevel@tonic-gate 			 * We use the family field that
8737c478bd9Sstevel@tonic-gate 			 * was passed in to the socket() call.
8747c478bd9Sstevel@tonic-gate 			 */
8757c478bd9Sstevel@tonic-gate 			name->sa_family = so->so_family;
8767c478bd9Sstevel@tonic-gate 			break;
8777c478bd9Sstevel@tonic-gate 
8787c478bd9Sstevel@tonic-gate 		case AF_INET6: {
8797c478bd9Sstevel@tonic-gate #ifdef DEBUG
8807c478bd9Sstevel@tonic-gate 			sin6_t *sin6 = (sin6_t *)name;
8817c478bd9Sstevel@tonic-gate #endif /* DEBUG */
8827c478bd9Sstevel@tonic-gate 
8837c478bd9Sstevel@tonic-gate 			if (name == NULL) {
8847c478bd9Sstevel@tonic-gate 				error = EINVAL;
8857c478bd9Sstevel@tonic-gate 				eprintsoline(so, error);
8867c478bd9Sstevel@tonic-gate 				goto done;
8877c478bd9Sstevel@tonic-gate 			}
8887c478bd9Sstevel@tonic-gate 			if ((size_t)namelen != sizeof (sin6_t)) {
8897c478bd9Sstevel@tonic-gate 				error = name->sa_family != so->so_family ?
8907c478bd9Sstevel@tonic-gate 				    EAFNOSUPPORT : EINVAL;
8917c478bd9Sstevel@tonic-gate 				eprintsoline(so, error);
8927c478bd9Sstevel@tonic-gate 				goto done;
8937c478bd9Sstevel@tonic-gate 			}
8947c478bd9Sstevel@tonic-gate 			if (name->sa_family != so->so_family) {
8957c478bd9Sstevel@tonic-gate 				/*
8967c478bd9Sstevel@tonic-gate 				 * With IPv6 we require the family to match
8977c478bd9Sstevel@tonic-gate 				 * unlike in IPv4.
8987c478bd9Sstevel@tonic-gate 				 */
8997c478bd9Sstevel@tonic-gate 				error = EAFNOSUPPORT;
9007c478bd9Sstevel@tonic-gate 				eprintsoline(so, error);
9017c478bd9Sstevel@tonic-gate 				goto done;
9027c478bd9Sstevel@tonic-gate 			}
9037c478bd9Sstevel@tonic-gate #ifdef DEBUG
9047c478bd9Sstevel@tonic-gate 			/*
9057c478bd9Sstevel@tonic-gate 			 * Verify that apps don't forget to clear
9067c478bd9Sstevel@tonic-gate 			 * sin6_scope_id etc
9077c478bd9Sstevel@tonic-gate 			 */
9087c478bd9Sstevel@tonic-gate 			if (sin6->sin6_scope_id != 0 &&
9097c478bd9Sstevel@tonic-gate 			    !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
9102caf0dcdSrshoaib 				zcmn_err(getzoneid(), CE_WARN,
9117c478bd9Sstevel@tonic-gate 				    "bind with uninitialized sin6_scope_id "
9127c478bd9Sstevel@tonic-gate 				    "(%d) on socket. Pid = %d\n",
9137c478bd9Sstevel@tonic-gate 				    (int)sin6->sin6_scope_id,
9147c478bd9Sstevel@tonic-gate 				    (int)curproc->p_pid);
9157c478bd9Sstevel@tonic-gate 			}
9167c478bd9Sstevel@tonic-gate 			if (sin6->__sin6_src_id != 0) {
9172caf0dcdSrshoaib 				zcmn_err(getzoneid(), CE_WARN,
9187c478bd9Sstevel@tonic-gate 				    "bind with uninitialized __sin6_src_id "
9197c478bd9Sstevel@tonic-gate 				    "(%d) on socket. Pid = %d\n",
9207c478bd9Sstevel@tonic-gate 				    (int)sin6->__sin6_src_id,
9217c478bd9Sstevel@tonic-gate 				    (int)curproc->p_pid);
9227c478bd9Sstevel@tonic-gate 			}
9237c478bd9Sstevel@tonic-gate #endif /* DEBUG */
9247c478bd9Sstevel@tonic-gate 			break;
9257c478bd9Sstevel@tonic-gate 		}
9267c478bd9Sstevel@tonic-gate 		default:
9277c478bd9Sstevel@tonic-gate 			/*
9287c478bd9Sstevel@tonic-gate 			 * Don't do any length or sa_family check to allow
9297c478bd9Sstevel@tonic-gate 			 * non-sockaddr style addresses.
9307c478bd9Sstevel@tonic-gate 			 */
9317c478bd9Sstevel@tonic-gate 			if (name == NULL) {
9327c478bd9Sstevel@tonic-gate 				error = EINVAL;
9337c478bd9Sstevel@tonic-gate 				eprintsoline(so, error);
9347c478bd9Sstevel@tonic-gate 				goto done;
9357c478bd9Sstevel@tonic-gate 			}
9367c478bd9Sstevel@tonic-gate 			break;
9377c478bd9Sstevel@tonic-gate 		}
9387c478bd9Sstevel@tonic-gate 
9390f1702c5SYu Xiangning 		if (namelen > (t_uscalar_t)sti->sti_laddr_maxlen) {
9407c478bd9Sstevel@tonic-gate 			error = ENAMETOOLONG;
9417c478bd9Sstevel@tonic-gate 			eprintsoline(so, error);
9427c478bd9Sstevel@tonic-gate 			goto done;
9437c478bd9Sstevel@tonic-gate 		}
9447c478bd9Sstevel@tonic-gate 		/*
9457c478bd9Sstevel@tonic-gate 		 * Save local address.
9467c478bd9Sstevel@tonic-gate 		 */
9470f1702c5SYu Xiangning 		sti->sti_laddr_len = (socklen_t)namelen;
9480f1702c5SYu Xiangning 		ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen);
9490f1702c5SYu Xiangning 		bcopy(name, sti->sti_laddr_sa, namelen);
9507c478bd9Sstevel@tonic-gate 
9510f1702c5SYu Xiangning 		addr = sti->sti_laddr_sa;
9520f1702c5SYu Xiangning 		addrlen = (t_uscalar_t)sti->sti_laddr_len;
9537c478bd9Sstevel@tonic-gate 		switch (so->so_family) {
9547c478bd9Sstevel@tonic-gate 		case AF_INET6:
9557c478bd9Sstevel@tonic-gate 		case AF_INET:
9567c478bd9Sstevel@tonic-gate 			break;
9577c478bd9Sstevel@tonic-gate 		case AF_UNIX: {
9587c478bd9Sstevel@tonic-gate 			struct sockaddr_un *soun =
9590f1702c5SYu Xiangning 			    (struct sockaddr_un *)sti->sti_laddr_sa;
96092f45f6dSRic Aleshire 			struct vnode *vp, *rvp;
9617c478bd9Sstevel@tonic-gate 			struct vattr vattr;
9627c478bd9Sstevel@tonic-gate 
9630f1702c5SYu Xiangning 			ASSERT(sti->sti_ux_bound_vp == NULL);
9647c478bd9Sstevel@tonic-gate 			/*
9657c478bd9Sstevel@tonic-gate 			 * Create vnode for the specified path name.
9660f1702c5SYu Xiangning 			 * Keep vnode held with a reference in sti_ux_bound_vp.
9677c478bd9Sstevel@tonic-gate 			 * Use the vnode pointer as the address used in the
9687c478bd9Sstevel@tonic-gate 			 * bind with the transport.
9697c478bd9Sstevel@tonic-gate 			 *
9707c478bd9Sstevel@tonic-gate 			 * Use the same mode as in BSD. In particular this does
9717c478bd9Sstevel@tonic-gate 			 * not observe the umask.
9727c478bd9Sstevel@tonic-gate 			 */
9737c478bd9Sstevel@tonic-gate 			/* MAXPATHLEN + soun_family + nul termination */
9740f1702c5SYu Xiangning 			if (sti->sti_laddr_len >
9757c478bd9Sstevel@tonic-gate 			    (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) {
9767c478bd9Sstevel@tonic-gate 				error = ENAMETOOLONG;
9777c478bd9Sstevel@tonic-gate 				eprintsoline(so, error);
9787c478bd9Sstevel@tonic-gate 				goto done;
9797c478bd9Sstevel@tonic-gate 			}
9807c478bd9Sstevel@tonic-gate 			vattr.va_type = VSOCK;
981ae115bc7Smrj 			vattr.va_mode = 0777 & ~PTOU(curproc)->u_cmask;
9827c478bd9Sstevel@tonic-gate 			vattr.va_mask = AT_TYPE|AT_MODE;
9837c478bd9Sstevel@tonic-gate 			/* NOTE: holding so_lock */
9847c478bd9Sstevel@tonic-gate 			error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr,
9857c478bd9Sstevel@tonic-gate 			    EXCL, 0, &vp, CRMKNOD, 0, 0);
9867c478bd9Sstevel@tonic-gate 			if (error) {
9877c478bd9Sstevel@tonic-gate 				if (error == EEXIST)
9887c478bd9Sstevel@tonic-gate 					error = EADDRINUSE;
9897c478bd9Sstevel@tonic-gate 				eprintsoline(so, error);
9907c478bd9Sstevel@tonic-gate 				goto done;
9917c478bd9Sstevel@tonic-gate 			}
9927c478bd9Sstevel@tonic-gate 			/*
9937c478bd9Sstevel@tonic-gate 			 * Establish pointer from the underlying filesystem
9947c478bd9Sstevel@tonic-gate 			 * vnode to the socket node.
9950f1702c5SYu Xiangning 			 * sti_ux_bound_vp and v_stream->sd_vnode form the
9967c478bd9Sstevel@tonic-gate 			 * cross-linkage between the underlying filesystem
9977c478bd9Sstevel@tonic-gate 			 * node and the socket node.
9987c478bd9Sstevel@tonic-gate 			 */
99992f45f6dSRic Aleshire 
100092f45f6dSRic Aleshire 			if ((VOP_REALVP(vp, &rvp, NULL) == 0) && (vp != rvp)) {
100192f45f6dSRic Aleshire 				VN_HOLD(rvp);
100292f45f6dSRic Aleshire 				VN_RELE(vp);
100392f45f6dSRic Aleshire 				vp = rvp;
100492f45f6dSRic Aleshire 			}
100592f45f6dSRic Aleshire 
10067c478bd9Sstevel@tonic-gate 			ASSERT(SOTOV(so)->v_stream);
10077c478bd9Sstevel@tonic-gate 			mutex_enter(&vp->v_lock);
10087c478bd9Sstevel@tonic-gate 			vp->v_stream = SOTOV(so)->v_stream;
10090f1702c5SYu Xiangning 			sti->sti_ux_bound_vp = vp;
10107c478bd9Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
10117c478bd9Sstevel@tonic-gate 
10127c478bd9Sstevel@tonic-gate 			/*
10137c478bd9Sstevel@tonic-gate 			 * Use the vnode pointer value as a unique address
10147c478bd9Sstevel@tonic-gate 			 * (together with the magic number to avoid conflicts
10157c478bd9Sstevel@tonic-gate 			 * with implicit binds) in the transport provider.
10167c478bd9Sstevel@tonic-gate 			 */
10170f1702c5SYu Xiangning 			sti->sti_ux_laddr.soua_vp =
10180f1702c5SYu Xiangning 			    (void *)sti->sti_ux_bound_vp;
10190f1702c5SYu Xiangning 			sti->sti_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT;
10200f1702c5SYu Xiangning 			addr = &sti->sti_ux_laddr;
10210f1702c5SYu Xiangning 			addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr);
10227c478bd9Sstevel@tonic-gate 			dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n",
10237c478bd9Sstevel@tonic-gate 			    addrlen,
10240f1702c5SYu Xiangning 			    (void *)((struct so_ux_addr *)addr)->soua_vp));
10257c478bd9Sstevel@tonic-gate 			break;
10267c478bd9Sstevel@tonic-gate 		}
10277c478bd9Sstevel@tonic-gate 		} /* end switch (so->so_family) */
10287c478bd9Sstevel@tonic-gate 	}
10297c478bd9Sstevel@tonic-gate 
10307c478bd9Sstevel@tonic-gate 	/*
10317c478bd9Sstevel@tonic-gate 	 * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since
10327c478bd9Sstevel@tonic-gate 	 * the transport can start passing up T_CONN_IND messages
10337c478bd9Sstevel@tonic-gate 	 * as soon as it receives the bind req and strsock_proto()
10347c478bd9Sstevel@tonic-gate 	 * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs.
10357c478bd9Sstevel@tonic-gate 	 */
10367c478bd9Sstevel@tonic-gate 	if (flags & _SOBIND_LISTEN) {
10377c478bd9Sstevel@tonic-gate 		if ((so->so_state & SS_ACCEPTCONN) == 0)
10387c478bd9Sstevel@tonic-gate 			clear_acceptconn_on_err = B_TRUE;
10397c478bd9Sstevel@tonic-gate 		save_so_backlog = so->so_backlog;
10407c478bd9Sstevel@tonic-gate 		restore_backlog_on_err = B_TRUE;
10417c478bd9Sstevel@tonic-gate 		so->so_state |= SS_ACCEPTCONN;
10427c478bd9Sstevel@tonic-gate 		so->so_backlog = backlog;
10437c478bd9Sstevel@tonic-gate 	}
10447c478bd9Sstevel@tonic-gate 
10457c478bd9Sstevel@tonic-gate 	/*
10467c478bd9Sstevel@tonic-gate 	 * If NL7C addr(s) have been configured check for addr/port match,
10477c478bd9Sstevel@tonic-gate 	 * or if an implicit NL7C socket via AF_NCA mark socket as NL7C.
10487c478bd9Sstevel@tonic-gate 	 *
10497c478bd9Sstevel@tonic-gate 	 * NL7C supports the TCP transport only so check AF_INET and AF_INET6
10507c478bd9Sstevel@tonic-gate 	 * family sockets only. If match mark as such.
10517c478bd9Sstevel@tonic-gate 	 */
10522c9e429eSbrutus 	if (nl7c_enabled && ((addr != NULL &&
10537c478bd9Sstevel@tonic-gate 	    (so->so_family == AF_INET || so->so_family == AF_INET6) &&
10547c478bd9Sstevel@tonic-gate 	    (nl7c = nl7c_lookup_addr(addr, addrlen))) ||
10550f1702c5SYu Xiangning 	    sti->sti_nl7c_flags == NL7C_AF_NCA)) {
10567c478bd9Sstevel@tonic-gate 		/*
10577c478bd9Sstevel@tonic-gate 		 * NL7C is not supported in non-global zones,
10587c478bd9Sstevel@tonic-gate 		 * we enforce this restriction here.
10597c478bd9Sstevel@tonic-gate 		 */
10607c478bd9Sstevel@tonic-gate 		if (so->so_zoneid == GLOBAL_ZONEID) {
10617c478bd9Sstevel@tonic-gate 			/* An NL7C socket, mark it */
10620f1702c5SYu Xiangning 			sti->sti_nl7c_flags |= NL7C_ENABLED;
10632c9e429eSbrutus 			if (nl7c == NULL) {
10642c9e429eSbrutus 				/*
10652c9e429eSbrutus 				 * Was an AF_NCA bind() so add it to the
10662c9e429eSbrutus 				 * addr list for reporting purposes.
10672c9e429eSbrutus 				 */
10682c9e429eSbrutus 				nl7c = nl7c_add_addr(addr, addrlen);
10692c9e429eSbrutus 			}
10707c478bd9Sstevel@tonic-gate 		} else
10717c478bd9Sstevel@tonic-gate 			nl7c = NULL;
10727c478bd9Sstevel@tonic-gate 	}
10730f1702c5SYu Xiangning 
10747c478bd9Sstevel@tonic-gate 	/*
10757c478bd9Sstevel@tonic-gate 	 * We send a T_BIND_REQ for TCP/UDP since we know it supports it,
10767c478bd9Sstevel@tonic-gate 	 * for other transports we will send in a O_T_BIND_REQ.
10777c478bd9Sstevel@tonic-gate 	 */
10787c478bd9Sstevel@tonic-gate 	if (tcp_udp_xport &&
10797c478bd9Sstevel@tonic-gate 	    (so->so_family == AF_INET || so->so_family == AF_INET6))
10807c478bd9Sstevel@tonic-gate 		PRIM_type = T_BIND_REQ;
10817c478bd9Sstevel@tonic-gate 
10827c478bd9Sstevel@tonic-gate 	bind_req.PRIM_type = PRIM_type;
10837c478bd9Sstevel@tonic-gate 	bind_req.ADDR_length = addrlen;
10847c478bd9Sstevel@tonic-gate 	bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req);
10857c478bd9Sstevel@tonic-gate 	bind_req.CONIND_number = backlog;
10867c478bd9Sstevel@tonic-gate 	/* NOTE: holding so_lock while sleeping */
10877c478bd9Sstevel@tonic-gate 	mp = soallocproto2(&bind_req, sizeof (bind_req),
1088de8c4a14SErik Nordmark 	    addr, addrlen, 0, _ALLOC_SLEEP, cr);
10890f1702c5SYu Xiangning 	sti->sti_laddr_valid = 0;
1090c28749e9Skais 
10910f1702c5SYu Xiangning 	/* Done using sti_laddr_sa - can drop the lock */
10927c478bd9Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
10937c478bd9Sstevel@tonic-gate 
10947c478bd9Sstevel@tonic-gate 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
10957c478bd9Sstevel@tonic-gate 	    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
10967c478bd9Sstevel@tonic-gate 	if (error) {
10977c478bd9Sstevel@tonic-gate 		eprintsoline(so, error);
10987c478bd9Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
10997c478bd9Sstevel@tonic-gate 		goto done;
11007c478bd9Sstevel@tonic-gate 	}
11017c478bd9Sstevel@tonic-gate 
11027c478bd9Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
11037c478bd9Sstevel@tonic-gate 	error = sowaitprim(so, PRIM_type, T_BIND_ACK,
11047c478bd9Sstevel@tonic-gate 	    (t_uscalar_t)sizeof (*bind_ack), &mp, 0);
11057c478bd9Sstevel@tonic-gate 	if (error) {
11067c478bd9Sstevel@tonic-gate 		eprintsoline(so, error);
11077c478bd9Sstevel@tonic-gate 		goto done;
11087c478bd9Sstevel@tonic-gate 	}
11097c478bd9Sstevel@tonic-gate 	ASSERT(mp);
11107c478bd9Sstevel@tonic-gate 	/*
11117c478bd9Sstevel@tonic-gate 	 * Even if some TPI message (e.g. T_DISCON_IND) was received in
11127c478bd9Sstevel@tonic-gate 	 * strsock_proto while the lock was dropped above, the bind
11137c478bd9Sstevel@tonic-gate 	 * is allowed to complete.
11147c478bd9Sstevel@tonic-gate 	 */
11157c478bd9Sstevel@tonic-gate 
11167c478bd9Sstevel@tonic-gate 	/* Mark as bound. This will be undone if we detect errors below. */
11177c478bd9Sstevel@tonic-gate 	if (flags & _SOBIND_NOXLATE) {
11187c478bd9Sstevel@tonic-gate 		ASSERT(so->so_family == AF_UNIX);
11190f1702c5SYu Xiangning 		sti->sti_faddr_noxlate = 1;
11207c478bd9Sstevel@tonic-gate 	}
11217c478bd9Sstevel@tonic-gate 	ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND));
11227c478bd9Sstevel@tonic-gate 	so->so_state |= SS_ISBOUND;
11230f1702c5SYu Xiangning 	ASSERT(sti->sti_unbind_mp);
11247c478bd9Sstevel@tonic-gate 
11257c478bd9Sstevel@tonic-gate 	/* note that we've already set SS_ACCEPTCONN above */
11267c478bd9Sstevel@tonic-gate 
11277c478bd9Sstevel@tonic-gate 	/*
11287c478bd9Sstevel@tonic-gate 	 * Recompute addrlen - an unspecied bind sent down an
11297c478bd9Sstevel@tonic-gate 	 * address of length zero but we expect the appropriate length
11307c478bd9Sstevel@tonic-gate 	 * in return.
11317c478bd9Sstevel@tonic-gate 	 */
11327c478bd9Sstevel@tonic-gate 	addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ?
11330f1702c5SYu Xiangning 	    sizeof (sti->sti_ux_laddr) : sti->sti_laddr_len);
11347c478bd9Sstevel@tonic-gate 
11357c478bd9Sstevel@tonic-gate 	bind_ack = (struct T_bind_ack *)mp->b_rptr;
11367c478bd9Sstevel@tonic-gate 	/*
11377c478bd9Sstevel@tonic-gate 	 * The alignment restriction is really too strict but
11387c478bd9Sstevel@tonic-gate 	 * we want enough alignment to inspect the fields of
11397c478bd9Sstevel@tonic-gate 	 * a sockaddr_in.
11407c478bd9Sstevel@tonic-gate 	 */
11417c478bd9Sstevel@tonic-gate 	addr = sogetoff(mp, bind_ack->ADDR_offset,
11427c478bd9Sstevel@tonic-gate 	    bind_ack->ADDR_length,
11437c478bd9Sstevel@tonic-gate 	    __TPI_ALIGN_SIZE);
11447c478bd9Sstevel@tonic-gate 	if (addr == NULL) {
11457c478bd9Sstevel@tonic-gate 		freemsg(mp);
11467c478bd9Sstevel@tonic-gate 		error = EPROTO;
11477c478bd9Sstevel@tonic-gate 		eprintsoline(so, error);
11487c478bd9Sstevel@tonic-gate 		goto done;
11497c478bd9Sstevel@tonic-gate 	}
11507c478bd9Sstevel@tonic-gate 	if (!(flags & _SOBIND_UNSPEC)) {
11517c478bd9Sstevel@tonic-gate 		/*
11527c478bd9Sstevel@tonic-gate 		 * Verify that the transport didn't return something we
11537c478bd9Sstevel@tonic-gate 		 * did not want e.g. an address other than what we asked for.
11547c478bd9Sstevel@tonic-gate 		 *
11557c478bd9Sstevel@tonic-gate 		 * NOTE: These checks would go away if/when we switch to
11567c478bd9Sstevel@tonic-gate 		 * using the new TPI (in which the transport would fail
11577c478bd9Sstevel@tonic-gate 		 * the request instead of assigning a different address).
11587c478bd9Sstevel@tonic-gate 		 *
11597c478bd9Sstevel@tonic-gate 		 * NOTE2: For protocols that we don't know (i.e. any
11607c478bd9Sstevel@tonic-gate 		 * other than AF_INET6, AF_INET and AF_UNIX), we
11617c478bd9Sstevel@tonic-gate 		 * cannot know if the transport should be expected to
11627c478bd9Sstevel@tonic-gate 		 * return the same address as that requested.
11637c478bd9Sstevel@tonic-gate 		 *
11647c478bd9Sstevel@tonic-gate 		 * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send
11657c478bd9Sstevel@tonic-gate 		 * down a T_BIND_REQ. We use O_T_BIND_REQ for others.
11667c478bd9Sstevel@tonic-gate 		 *
11677c478bd9Sstevel@tonic-gate 		 * For example, in the case of netatalk it may be
11687c478bd9Sstevel@tonic-gate 		 * inappropriate for the transport to return the
11697c478bd9Sstevel@tonic-gate 		 * requested address (as it may have allocated a local
11707c478bd9Sstevel@tonic-gate 		 * port number in behaviour similar to that of an
11717c478bd9Sstevel@tonic-gate 		 * AF_INET bind request with a port number of zero).
11727c478bd9Sstevel@tonic-gate 		 *
11737c478bd9Sstevel@tonic-gate 		 * Given the definition of O_T_BIND_REQ, where the
11747c478bd9Sstevel@tonic-gate 		 * transport may bind to an address other than the
11757c478bd9Sstevel@tonic-gate 		 * requested address, it's not possible to determine
11767c478bd9Sstevel@tonic-gate 		 * whether a returned address that differs from the
11777c478bd9Sstevel@tonic-gate 		 * requested address is a reason to fail (because the
11787c478bd9Sstevel@tonic-gate 		 * requested address was not available) or succeed
11797c478bd9Sstevel@tonic-gate 		 * (because the transport allocated an appropriate
11807c478bd9Sstevel@tonic-gate 		 * address and/or port).
11817c478bd9Sstevel@tonic-gate 		 *
11827c478bd9Sstevel@tonic-gate 		 * sockfs currently requires that the transport return
11837c478bd9Sstevel@tonic-gate 		 * the requested address in the T_BIND_ACK, unless
11847c478bd9Sstevel@tonic-gate 		 * there is code here to allow for any discrepancy.
11857c478bd9Sstevel@tonic-gate 		 * Such code exists for AF_INET and AF_INET6.
11867c478bd9Sstevel@tonic-gate 		 *
11877c478bd9Sstevel@tonic-gate 		 * Netatalk chooses to return the requested address
11887c478bd9Sstevel@tonic-gate 		 * rather than the (correct) allocated address.  This
11897c478bd9Sstevel@tonic-gate 		 * means that netatalk violates the TPI specification
11907c478bd9Sstevel@tonic-gate 		 * (and would not function correctly if used from a
11917c478bd9Sstevel@tonic-gate 		 * TLI application), but it does mean that it works
11927c478bd9Sstevel@tonic-gate 		 * with sockfs.
11937c478bd9Sstevel@tonic-gate 		 *
11947c478bd9Sstevel@tonic-gate 		 * As noted above, using the newer XTI bind primitive
11957c478bd9Sstevel@tonic-gate 		 * (T_BIND_REQ) in preference to O_T_BIND_REQ would
11967c478bd9Sstevel@tonic-gate 		 * allow sockfs to be more sure about whether or not
11977c478bd9Sstevel@tonic-gate 		 * the bind request had succeeded (as transports are
11987c478bd9Sstevel@tonic-gate 		 * not permitted to bind to a different address than
11997c478bd9Sstevel@tonic-gate 		 * that requested - they must return failure).
12007c478bd9Sstevel@tonic-gate 		 * Unfortunately, support for T_BIND_REQ may not be
12017c478bd9Sstevel@tonic-gate 		 * present in all transport implementations (netatalk,
12027c478bd9Sstevel@tonic-gate 		 * for example, doesn't have it), making the
12037c478bd9Sstevel@tonic-gate 		 * transition difficult.
12047c478bd9Sstevel@tonic-gate 		 */
12057c478bd9Sstevel@tonic-gate 		if (bind_ack->ADDR_length != addrlen) {
12067c478bd9Sstevel@tonic-gate 			/* Assumes that the requested address was in use */
12077c478bd9Sstevel@tonic-gate 			freemsg(mp);
12087c478bd9Sstevel@tonic-gate 			error = EADDRINUSE;
12097c478bd9Sstevel@tonic-gate 			eprintsoline(so, error);
12107c478bd9Sstevel@tonic-gate 			goto done;
12117c478bd9Sstevel@tonic-gate 		}
12127c478bd9Sstevel@tonic-gate 
12137c478bd9Sstevel@tonic-gate 		switch (so->so_family) {
12147c478bd9Sstevel@tonic-gate 		case AF_INET6:
12157c478bd9Sstevel@tonic-gate 		case AF_INET: {
12167c478bd9Sstevel@tonic-gate 			sin_t *rname, *aname;
12177c478bd9Sstevel@tonic-gate 
12187c478bd9Sstevel@tonic-gate 			rname = (sin_t *)addr;
12190f1702c5SYu Xiangning 			aname = (sin_t *)sti->sti_laddr_sa;
12207c478bd9Sstevel@tonic-gate 
12217c478bd9Sstevel@tonic-gate 			/*
12227c478bd9Sstevel@tonic-gate 			 * Take advantage of the alignment
12237c478bd9Sstevel@tonic-gate 			 * of sin_port and sin6_port which fall
12247c478bd9Sstevel@tonic-gate 			 * in the same place in their data structures.
12257c478bd9Sstevel@tonic-gate 			 * Just use sin_port for either address family.
12267c478bd9Sstevel@tonic-gate 			 *
12277c478bd9Sstevel@tonic-gate 			 * This may become a problem if (heaven forbid)
12287c478bd9Sstevel@tonic-gate 			 * there's a separate ipv6port_reserved... :-P
12297c478bd9Sstevel@tonic-gate 			 *
12307c478bd9Sstevel@tonic-gate 			 * Binding to port 0 has the semantics of letting
12317c478bd9Sstevel@tonic-gate 			 * the transport bind to any port.
12327c478bd9Sstevel@tonic-gate 			 *
12337c478bd9Sstevel@tonic-gate 			 * If the transport is TCP or UDP since we had sent
12347c478bd9Sstevel@tonic-gate 			 * a T_BIND_REQ we would not get a port other than
12357c478bd9Sstevel@tonic-gate 			 * what we asked for.
12367c478bd9Sstevel@tonic-gate 			 */
12377c478bd9Sstevel@tonic-gate 			if (tcp_udp_xport) {
12387c478bd9Sstevel@tonic-gate 				/*
12397c478bd9Sstevel@tonic-gate 				 * Pick up the new port number if we bound to
12407c478bd9Sstevel@tonic-gate 				 * port 0.
12417c478bd9Sstevel@tonic-gate 				 */
12427c478bd9Sstevel@tonic-gate 				if (aname->sin_port == 0)
12437c478bd9Sstevel@tonic-gate 					aname->sin_port = rname->sin_port;
12440f1702c5SYu Xiangning 				sti->sti_laddr_valid = 1;
12457c478bd9Sstevel@tonic-gate 				break;
12467c478bd9Sstevel@tonic-gate 			}
12477c478bd9Sstevel@tonic-gate 			if (aname->sin_port != 0 &&
12487c478bd9Sstevel@tonic-gate 			    aname->sin_port != rname->sin_port) {
12497c478bd9Sstevel@tonic-gate 				freemsg(mp);
12507c478bd9Sstevel@tonic-gate 				error = EADDRINUSE;
12517c478bd9Sstevel@tonic-gate 				eprintsoline(so, error);
12527c478bd9Sstevel@tonic-gate 				goto done;
12537c478bd9Sstevel@tonic-gate 			}
12547c478bd9Sstevel@tonic-gate 			/*
12557c478bd9Sstevel@tonic-gate 			 * Pick up the new port number if we bound to port 0.
12567c478bd9Sstevel@tonic-gate 			 */
12577c478bd9Sstevel@tonic-gate 			aname->sin_port = rname->sin_port;
12587c478bd9Sstevel@tonic-gate 
12597c478bd9Sstevel@tonic-gate 			/*
12607c478bd9Sstevel@tonic-gate 			 * Unfortunately, addresses aren't _quite_ the same.
12617c478bd9Sstevel@tonic-gate 			 */
12627c478bd9Sstevel@tonic-gate 			if (so->so_family == AF_INET) {
12637c478bd9Sstevel@tonic-gate 				if (aname->sin_addr.s_addr !=
12647c478bd9Sstevel@tonic-gate 				    rname->sin_addr.s_addr) {
12657c478bd9Sstevel@tonic-gate 					freemsg(mp);
12667c478bd9Sstevel@tonic-gate 					error = EADDRNOTAVAIL;
12677c478bd9Sstevel@tonic-gate 					eprintsoline(so, error);
12687c478bd9Sstevel@tonic-gate 					goto done;
12697c478bd9Sstevel@tonic-gate 				}
12707c478bd9Sstevel@tonic-gate 			} else {
12717c478bd9Sstevel@tonic-gate 				sin6_t *rname6 = (sin6_t *)rname;
12727c478bd9Sstevel@tonic-gate 				sin6_t *aname6 = (sin6_t *)aname;
12737c478bd9Sstevel@tonic-gate 
12747c478bd9Sstevel@tonic-gate 				if (!IN6_ARE_ADDR_EQUAL(&aname6->sin6_addr,
12757c478bd9Sstevel@tonic-gate 				    &rname6->sin6_addr)) {
12767c478bd9Sstevel@tonic-gate 					freemsg(mp);
12777c478bd9Sstevel@tonic-gate 					error = EADDRNOTAVAIL;
12787c478bd9Sstevel@tonic-gate 					eprintsoline(so, error);
12797c478bd9Sstevel@tonic-gate 					goto done;
12807c478bd9Sstevel@tonic-gate 				}
12817c478bd9Sstevel@tonic-gate 			}
12827c478bd9Sstevel@tonic-gate 			break;
12837c478bd9Sstevel@tonic-gate 		}
12847c478bd9Sstevel@tonic-gate 		case AF_UNIX:
12850f1702c5SYu Xiangning 			if (bcmp(addr, &sti->sti_ux_laddr, addrlen) != 0) {
12867c478bd9Sstevel@tonic-gate 				freemsg(mp);
12877c478bd9Sstevel@tonic-gate 				error = EADDRINUSE;
12887c478bd9Sstevel@tonic-gate 				eprintsoline(so, error);
12897c478bd9Sstevel@tonic-gate 				eprintso(so,
12907c478bd9Sstevel@tonic-gate 				    ("addrlen %d, addr 0x%x, vp %p\n",
12917c478bd9Sstevel@tonic-gate 				    addrlen, *((int *)addr),
12920f1702c5SYu Xiangning 				    (void *)sti->sti_ux_bound_vp));
12937c478bd9Sstevel@tonic-gate 				goto done;
12947c478bd9Sstevel@tonic-gate 			}
12950f1702c5SYu Xiangning 			sti->sti_laddr_valid = 1;
12967c478bd9Sstevel@tonic-gate 			break;
12977c478bd9Sstevel@tonic-gate 		default:
12987c478bd9Sstevel@tonic-gate 			/*
12997c478bd9Sstevel@tonic-gate 			 * NOTE: This assumes that addresses can be
13007c478bd9Sstevel@tonic-gate 			 * byte-compared for equivalence.
13017c478bd9Sstevel@tonic-gate 			 */
13020f1702c5SYu Xiangning 			if (bcmp(addr, sti->sti_laddr_sa, addrlen) != 0) {
13037c478bd9Sstevel@tonic-gate 				freemsg(mp);
13047c478bd9Sstevel@tonic-gate 				error = EADDRINUSE;
13057c478bd9Sstevel@tonic-gate 				eprintsoline(so, error);
13067c478bd9Sstevel@tonic-gate 				goto done;
13077c478bd9Sstevel@tonic-gate 			}
13087c478bd9Sstevel@tonic-gate 			/*
13090f1702c5SYu Xiangning 			 * Don't mark sti_laddr_valid, as we cannot be
13107c478bd9Sstevel@tonic-gate 			 * sure that the returned address is the real
13117c478bd9Sstevel@tonic-gate 			 * bound address when talking to an unknown
13127c478bd9Sstevel@tonic-gate 			 * transport.
13137c478bd9Sstevel@tonic-gate 			 */
13147c478bd9Sstevel@tonic-gate 			break;
13157c478bd9Sstevel@tonic-gate 		}
13167c478bd9Sstevel@tonic-gate 	} else {
13177c478bd9Sstevel@tonic-gate 		/*
13187c478bd9Sstevel@tonic-gate 		 * Save for returned address for getsockname.
13197c478bd9Sstevel@tonic-gate 		 * Needed for unspecific bind unless transport supports
13207c478bd9Sstevel@tonic-gate 		 * the TI_GETMYNAME ioctl.
13217c478bd9Sstevel@tonic-gate 		 * Do this for AF_INET{,6} even though they do, as
13227c478bd9Sstevel@tonic-gate 		 * caching info here is much better performance than
13237c478bd9Sstevel@tonic-gate 		 * a TPI/STREAMS trip to the transport for getsockname.
13247c478bd9Sstevel@tonic-gate 		 * Any which can't for some reason _must_ _not_ set
13250f1702c5SYu Xiangning 		 * sti_laddr_valid here for the caching version of
13260f1702c5SYu Xiangning 		 * getsockname to not break;
13277c478bd9Sstevel@tonic-gate 		 */
13287c478bd9Sstevel@tonic-gate 		switch (so->so_family) {
13297c478bd9Sstevel@tonic-gate 		case AF_UNIX:
13307c478bd9Sstevel@tonic-gate 			/*
13317c478bd9Sstevel@tonic-gate 			 * Record the address bound with the transport
13327c478bd9Sstevel@tonic-gate 			 * for use by socketpair.
13337c478bd9Sstevel@tonic-gate 			 */
13340f1702c5SYu Xiangning 			bcopy(addr, &sti->sti_ux_laddr, addrlen);
13350f1702c5SYu Xiangning 			sti->sti_laddr_valid = 1;
13367c478bd9Sstevel@tonic-gate 			break;
13377c478bd9Sstevel@tonic-gate 		case AF_INET:
13387c478bd9Sstevel@tonic-gate 		case AF_INET6:
13390f1702c5SYu Xiangning 			ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen);
13400f1702c5SYu Xiangning 			bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len);
13410f1702c5SYu Xiangning 			sti->sti_laddr_valid = 1;
13427c478bd9Sstevel@tonic-gate 			break;
13437c478bd9Sstevel@tonic-gate 		default:
13447c478bd9Sstevel@tonic-gate 			/*
13450f1702c5SYu Xiangning 			 * Don't mark sti_laddr_valid, as we cannot be
13467c478bd9Sstevel@tonic-gate 			 * sure that the returned address is the real
13477c478bd9Sstevel@tonic-gate 			 * bound address when talking to an unknown
13487c478bd9Sstevel@tonic-gate 			 * transport.
13497c478bd9Sstevel@tonic-gate 			 */
13507c478bd9Sstevel@tonic-gate 			break;
13517c478bd9Sstevel@tonic-gate 		}
13527c478bd9Sstevel@tonic-gate 	}
13537c478bd9Sstevel@tonic-gate 
13547c478bd9Sstevel@tonic-gate 	if (nl7c != NULL) {
13552c9e429eSbrutus 		/* Register listen()er sonode pointer with NL7C */
13562c9e429eSbrutus 		nl7c_listener_addr(nl7c, so);
13577c478bd9Sstevel@tonic-gate 	}
13587c478bd9Sstevel@tonic-gate 
13597c478bd9Sstevel@tonic-gate 	freemsg(mp);
13607c478bd9Sstevel@tonic-gate 
13617c478bd9Sstevel@tonic-gate done:
13627c478bd9Sstevel@tonic-gate 	if (error) {
13637c478bd9Sstevel@tonic-gate 		/* reset state & backlog to values held on entry */
13647c478bd9Sstevel@tonic-gate 		if (clear_acceptconn_on_err == B_TRUE)
13657c478bd9Sstevel@tonic-gate 			so->so_state &= ~SS_ACCEPTCONN;
13667c478bd9Sstevel@tonic-gate 		if (restore_backlog_on_err == B_TRUE)
13677c478bd9Sstevel@tonic-gate 			so->so_backlog = save_so_backlog;
13687c478bd9Sstevel@tonic-gate 
13697c478bd9Sstevel@tonic-gate 		if (unbind_on_err && so->so_state & SS_ISBOUND) {
13707c478bd9Sstevel@tonic-gate 			int err;
13717c478bd9Sstevel@tonic-gate 
13727c478bd9Sstevel@tonic-gate 			err = sotpi_unbind(so, 0);
13737c478bd9Sstevel@tonic-gate 			/* LINTED - statement has no consequent: if */
13747c478bd9Sstevel@tonic-gate 			if (err) {
13757c478bd9Sstevel@tonic-gate 				eprintsoline(so, error);
13767c478bd9Sstevel@tonic-gate 			} else {
13777c478bd9Sstevel@tonic-gate 				ASSERT(!(so->so_state & SS_ISBOUND));
13787c478bd9Sstevel@tonic-gate 			}
13797c478bd9Sstevel@tonic-gate 		}
13807c478bd9Sstevel@tonic-gate 	}
13817c478bd9Sstevel@tonic-gate 	if (!(flags & _SOBIND_LOCK_HELD)) {
13827c478bd9Sstevel@tonic-gate 		so_unlock_single(so, SOLOCKED);
13837c478bd9Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
13847c478bd9Sstevel@tonic-gate 	} else {
13857c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(&so->so_lock));
13867c478bd9Sstevel@tonic-gate 		ASSERT(so->so_flag & SOLOCKED);
13877c478bd9Sstevel@tonic-gate 	}
13887c478bd9Sstevel@tonic-gate 	return (error);
13897c478bd9Sstevel@tonic-gate }
13907c478bd9Sstevel@tonic-gate 
13917c478bd9Sstevel@tonic-gate /* bind the socket */
1392ff550d0eSmasputra static int
13937c478bd9Sstevel@tonic-gate sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
13940f1702c5SYu Xiangning     int flags, struct cred *cr)
13957c478bd9Sstevel@tonic-gate {
13967c478bd9Sstevel@tonic-gate 	if ((flags & _SOBIND_SOCKETPAIR) == 0)
13970f1702c5SYu Xiangning 		return (sotpi_bindlisten(so, name, namelen, 0, flags, cr));
13987c478bd9Sstevel@tonic-gate 
13997c478bd9Sstevel@tonic-gate 	flags &= ~_SOBIND_SOCKETPAIR;
14000f1702c5SYu Xiangning 	return (sotpi_bindlisten(so, name, namelen, 1, flags, cr));
14017c478bd9Sstevel@tonic-gate }
14027c478bd9Sstevel@tonic-gate 
14037c478bd9Sstevel@tonic-gate /*
14047c478bd9Sstevel@tonic-gate  * Unbind a socket - used when bind() fails, when bind() specifies a NULL
14057c478bd9Sstevel@tonic-gate  * address, or when listen needs to unbind and bind.
14067c478bd9Sstevel@tonic-gate  * If the _SOUNBIND_REBIND flag is specified the addresses are retained
14077c478bd9Sstevel@tonic-gate  * so that a sobind can pick them up.
14087c478bd9Sstevel@tonic-gate  */
14097c478bd9Sstevel@tonic-gate static int
14107c478bd9Sstevel@tonic-gate sotpi_unbind(struct sonode *so, int flags)
14117c478bd9Sstevel@tonic-gate {
14127c478bd9Sstevel@tonic-gate 	struct T_unbind_req	unbind_req;
14137c478bd9Sstevel@tonic-gate 	int			error = 0;
14147c478bd9Sstevel@tonic-gate 	mblk_t			*mp;
14150f1702c5SYu Xiangning 	sotpi_info_t		*sti = SOTOTPI(so);
14167c478bd9Sstevel@tonic-gate 
14177c478bd9Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n",
1418903a11ebSrh87107 	    (void *)so, flags, pr_state(so->so_state, so->so_mode)));
14197c478bd9Sstevel@tonic-gate 
14207c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
14217c478bd9Sstevel@tonic-gate 	ASSERT(so->so_flag & SOLOCKED);
14227c478bd9Sstevel@tonic-gate 
14237c478bd9Sstevel@tonic-gate 	if (!(so->so_state & SS_ISBOUND)) {
14247c478bd9Sstevel@tonic-gate 		error = EINVAL;
14257c478bd9Sstevel@tonic-gate 		eprintsoline(so, error);
14267c478bd9Sstevel@tonic-gate 		goto done;
14277c478bd9Sstevel@tonic-gate 	}
14287c478bd9Sstevel@tonic-gate 
14297c478bd9Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
14307c478bd9Sstevel@tonic-gate 
14317c478bd9Sstevel@tonic-gate 	/*
14327c478bd9Sstevel@tonic-gate 	 * Flush the read and write side (except stream head read queue)
14337c478bd9Sstevel@tonic-gate 	 * and send down T_UNBIND_REQ.
14347c478bd9Sstevel@tonic-gate 	 */
14357c478bd9Sstevel@tonic-gate 	(void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW);
14367c478bd9Sstevel@tonic-gate 
14377c478bd9Sstevel@tonic-gate 	unbind_req.PRIM_type = T_UNBIND_REQ;
14387c478bd9Sstevel@tonic-gate 	mp = soallocproto1(&unbind_req, sizeof (unbind_req),
1439de8c4a14SErik Nordmark 	    0, _ALLOC_SLEEP, CRED());
14407c478bd9Sstevel@tonic-gate 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
14417c478bd9Sstevel@tonic-gate 	    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
14427c478bd9Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
14437c478bd9Sstevel@tonic-gate 	if (error) {
14447c478bd9Sstevel@tonic-gate 		eprintsoline(so, error);
14457c478bd9Sstevel@tonic-gate 		goto done;
14467c478bd9Sstevel@tonic-gate 	}
14477c478bd9Sstevel@tonic-gate 
14487c478bd9Sstevel@tonic-gate 	error = sowaitokack(so, T_UNBIND_REQ);
14497c478bd9Sstevel@tonic-gate 	if (error) {
14507c478bd9Sstevel@tonic-gate 		eprintsoline(so, error);
14517c478bd9Sstevel@tonic-gate 		goto done;
14527c478bd9Sstevel@tonic-gate 	}
14537c478bd9Sstevel@tonic-gate 
14547c478bd9Sstevel@tonic-gate 	/*
14557c478bd9Sstevel@tonic-gate 	 * Even if some TPI message (e.g. T_DISCON_IND) was received in
14567c478bd9Sstevel@tonic-gate 	 * strsock_proto while the lock was dropped above, the unbind
14577c478bd9Sstevel@tonic-gate 	 * is allowed to complete.
14587c478bd9Sstevel@tonic-gate 	 */
14597c478bd9Sstevel@tonic-gate 	if (!(flags & _SOUNBIND_REBIND)) {
14607c478bd9Sstevel@tonic-gate 		/*
14617c478bd9Sstevel@tonic-gate 		 * Clear out bound address.
14627c478bd9Sstevel@tonic-gate 		 */
14637c478bd9Sstevel@tonic-gate 		vnode_t *vp;
14647c478bd9Sstevel@tonic-gate 
14650f1702c5SYu Xiangning 		if ((vp = sti->sti_ux_bound_vp) != NULL) {
14660f1702c5SYu Xiangning 			sti->sti_ux_bound_vp = NULL;
14677c478bd9Sstevel@tonic-gate 			vn_rele_stream(vp);
14687c478bd9Sstevel@tonic-gate 		}
14697c478bd9Sstevel@tonic-gate 		/* Clear out address */
14700f1702c5SYu Xiangning 		sti->sti_laddr_len = 0;
14717c478bd9Sstevel@tonic-gate 	}
14720f1702c5SYu Xiangning 	so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN);
14730f1702c5SYu Xiangning 	sti->sti_laddr_valid = 0;
14742c9e429eSbrutus 
14757c478bd9Sstevel@tonic-gate done:
1476c28749e9Skais 
14777c478bd9Sstevel@tonic-gate 	/* If the caller held the lock don't release it here */
14787c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
14797c478bd9Sstevel@tonic-gate 	ASSERT(so->so_flag & SOLOCKED);
14807c478bd9Sstevel@tonic-gate 
14817c478bd9Sstevel@tonic-gate 	return (error);
14827c478bd9Sstevel@tonic-gate }
14837c478bd9Sstevel@tonic-gate 
14847c478bd9Sstevel@tonic-gate /*
14857c478bd9Sstevel@tonic-gate  * listen on the socket.
14867c478bd9Sstevel@tonic-gate  * For TPI conforming transports this has to first unbind with the transport
14877c478bd9Sstevel@tonic-gate  * and then bind again using the new backlog.
14887c478bd9Sstevel@tonic-gate  */
14890f1702c5SYu Xiangning /* ARGSUSED */
14907c478bd9Sstevel@tonic-gate int
14910f1702c5SYu Xiangning sotpi_listen(struct sonode *so, int backlog, struct cred *cr)
14927c478bd9Sstevel@tonic-gate {
14937c478bd9Sstevel@tonic-gate 	int		error = 0;
14940f1702c5SYu Xiangning 	sotpi_info_t	*sti = SOTOTPI(so);
14957c478bd9Sstevel@tonic-gate 
14967c478bd9Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n",
1497903a11ebSrh87107 	    (void *)so, backlog, pr_state(so->so_state, so->so_mode)));
14987c478bd9Sstevel@tonic-gate 
14990f1702c5SYu Xiangning 	if (sti->sti_serv_type == T_CLTS)
15007c478bd9Sstevel@tonic-gate 		return (EOPNOTSUPP);
15017c478bd9Sstevel@tonic-gate 
15027c478bd9Sstevel@tonic-gate 	/*
15037c478bd9Sstevel@tonic-gate 	 * If the socket is ready to accept connections already, then
15047c478bd9Sstevel@tonic-gate 	 * return without doing anything.  This avoids a problem where
15057c478bd9Sstevel@tonic-gate 	 * a second listen() call fails if a connection is pending and
15067c478bd9Sstevel@tonic-gate 	 * leaves the socket unbound. Only when we are not unbinding
15077c478bd9Sstevel@tonic-gate 	 * with the transport can we safely increase the backlog.
15087c478bd9Sstevel@tonic-gate 	 */
15097c478bd9Sstevel@tonic-gate 	if (so->so_state & SS_ACCEPTCONN &&
15107c478bd9Sstevel@tonic-gate 	    !((so->so_family == AF_INET || so->so_family == AF_INET6) &&
15117c478bd9Sstevel@tonic-gate 	    /*CONSTCOND*/
15127c478bd9Sstevel@tonic-gate 	    !solisten_tpi_tcp))
15137c478bd9Sstevel@tonic-gate 		return (0);
15147c478bd9Sstevel@tonic-gate 
15157c478bd9Sstevel@tonic-gate 	if (so->so_state & SS_ISCONNECTED)
15167c478bd9Sstevel@tonic-gate 		return (EINVAL);
15177c478bd9Sstevel@tonic-gate 
15187c478bd9Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
15197c478bd9Sstevel@tonic-gate 	so_lock_single(so);	/* Set SOLOCKED */
15207c478bd9Sstevel@tonic-gate 
15217c478bd9Sstevel@tonic-gate 	/*
15227c478bd9Sstevel@tonic-gate 	 * If the listen doesn't change the backlog we do nothing.
15237c478bd9Sstevel@tonic-gate 	 * This avoids an EPROTO error from the transport.
15247c478bd9Sstevel@tonic-gate 	 */
15257c478bd9Sstevel@tonic-gate 	if ((so->so_state & SS_ACCEPTCONN) &&
15267c478bd9Sstevel@tonic-gate 	    so->so_backlog == backlog)
15277c478bd9Sstevel@tonic-gate 		goto done;
15287c478bd9Sstevel@tonic-gate 
15297c478bd9Sstevel@tonic-gate 	if (!(so->so_state & SS_ISBOUND)) {
15307c478bd9Sstevel@tonic-gate 		/*
15317c478bd9Sstevel@tonic-gate 		 * Must have been explicitly bound in the UNIX domain.
15327c478bd9Sstevel@tonic-gate 		 */
15337c478bd9Sstevel@tonic-gate 		if (so->so_family == AF_UNIX) {
15347c478bd9Sstevel@tonic-gate 			error = EINVAL;
15357c478bd9Sstevel@tonic-gate 			goto done;
15367c478bd9Sstevel@tonic-gate 		}
15377c478bd9Sstevel@tonic-gate 		error = sotpi_bindlisten(so, NULL, 0, backlog,
15380f1702c5SYu Xiangning 		    _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr);
15397c478bd9Sstevel@tonic-gate 	} else if (backlog > 0) {
15407c478bd9Sstevel@tonic-gate 		/*
15417c478bd9Sstevel@tonic-gate 		 * AF_INET{,6} hack to avoid losing the port.
15427c478bd9Sstevel@tonic-gate 		 * Assumes that all AF_INET{,6} transports can handle a
15437c478bd9Sstevel@tonic-gate 		 * O_T_BIND_REQ with a non-zero CONIND_number when the TPI
15447c478bd9Sstevel@tonic-gate 		 * has already bound thus it is possible to avoid the unbind.
15457c478bd9Sstevel@tonic-gate 		 */
15467c478bd9Sstevel@tonic-gate 		if (!((so->so_family == AF_INET || so->so_family == AF_INET6) &&
15477c478bd9Sstevel@tonic-gate 		    /*CONSTCOND*/
15487c478bd9Sstevel@tonic-gate 		    !solisten_tpi_tcp)) {
15497c478bd9Sstevel@tonic-gate 			error = sotpi_unbind(so, _SOUNBIND_REBIND);
15507c478bd9Sstevel@tonic-gate 			if (error)
15517c478bd9Sstevel@tonic-gate 				goto done;
15527c478bd9Sstevel@tonic-gate 		}
15537c478bd9Sstevel@tonic-gate 		error = sotpi_bindlisten(so, NULL, 0, backlog,
15540f1702c5SYu Xiangning 		    _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr);
15557c478bd9Sstevel@tonic-gate 	} else {
15567c478bd9Sstevel@tonic-gate 		so->so_state |= SS_ACCEPTCONN;
15577c478bd9Sstevel@tonic-gate 		so->so_backlog = backlog;
15587c478bd9Sstevel@tonic-gate 	}
15597c478bd9Sstevel@tonic-gate 	if (error)
15607c478bd9Sstevel@tonic-gate 		goto done;
15617c478bd9Sstevel@tonic-gate 	ASSERT(so->so_state & SS_ACCEPTCONN);
15627c478bd9Sstevel@tonic-gate done:
15637c478bd9Sstevel@tonic-gate 	so_unlock_single(so, SOLOCKED);
15647c478bd9Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
15657c478bd9Sstevel@tonic-gate 	return (error);
15667c478bd9Sstevel@tonic-gate }
15677c478bd9Sstevel@tonic-gate 
15687c478bd9Sstevel@tonic-gate /*
15697c478bd9Sstevel@tonic-gate  * Disconnect either a specified seqno or all (-1).
15707c478bd9Sstevel@tonic-gate  * The former is used on listening sockets only.
15717c478bd9Sstevel@tonic-gate  *
15727c478bd9Sstevel@tonic-gate  * When seqno == -1 sodisconnect could call sotpi_unbind. However,
15737c478bd9Sstevel@tonic-gate  * the current use of sodisconnect(seqno == -1) is only for shutdown
15747c478bd9Sstevel@tonic-gate  * so there is no point (and potentially incorrect) to unbind.
15757c478bd9Sstevel@tonic-gate  */
15760f1702c5SYu Xiangning static int
15777c478bd9Sstevel@tonic-gate sodisconnect(struct sonode *so, t_scalar_t seqno, int flags)
15787c478bd9Sstevel@tonic-gate {
15797c478bd9Sstevel@tonic-gate 	struct T_discon_req	discon_req;
15807c478bd9Sstevel@tonic-gate 	int			error = 0;
15817c478bd9Sstevel@tonic-gate 	mblk_t			*mp;
15827c478bd9Sstevel@tonic-gate 
15837c478bd9Sstevel@tonic-gate 	dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n",
1584903a11ebSrh87107 	    (void *)so, seqno, flags, pr_state(so->so_state, so->so_mode)));
15857c478bd9Sstevel@tonic-gate 
15867c478bd9Sstevel@tonic-gate 	if (!(flags & _SODISCONNECT_LOCK_HELD)) {
15877c478bd9Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
15887c478bd9Sstevel@tonic-gate 		so_lock_single(so);	/* Set SOLOCKED */
15897c478bd9Sstevel@tonic-gate 	} else {
15907c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(&so->so_lock));
15917c478bd9Sstevel@tonic-gate 		ASSERT(so->so_flag & SOLOCKED);
15927c478bd9Sstevel@tonic-gate 	}
15937c478bd9Sstevel@tonic-gate 
15947c478bd9Sstevel@tonic-gate 	if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ACCEPTCONN))) {
15957c478bd9Sstevel@tonic-gate 		error = EINVAL;
15967c478bd9Sstevel@tonic-gate 		eprintsoline(so, error);
15977c478bd9Sstevel@tonic-gate 		goto done;
15987c478bd9Sstevel@tonic-gate 	}
15997c478bd9Sstevel@tonic-gate 
16007c478bd9Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
16017c478bd9Sstevel@tonic-gate 	/*
16027c478bd9Sstevel@tonic-gate 	 * Flush the write side (unless this is a listener)
16037c478bd9Sstevel@tonic-gate 	 * and then send down a T_DISCON_REQ.
16047c478bd9Sstevel@tonic-gate 	 * (Don't flush on listener since it could flush {O_}T_CONN_RES
16057c478bd9Sstevel@tonic-gate 	 * and other messages.)
16067c478bd9Sstevel@tonic-gate 	 */
16077c478bd9Sstevel@tonic-gate 	if (!(so->so_state & SS_ACCEPTCONN))
16087c478bd9Sstevel@tonic-gate 		(void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHW);
16097c478bd9Sstevel@tonic-gate 
16107c478bd9Sstevel@tonic-gate 	discon_req.PRIM_type = T_DISCON_REQ;
16117c478bd9Sstevel@tonic-gate 	discon_req.SEQ_number = seqno;
16127c478bd9Sstevel@tonic-gate 	mp = soallocproto1(&discon_req, sizeof (discon_req),
1613de8c4a14SErik Nordmark 	    0, _ALLOC_SLEEP, CRED());
16147c478bd9Sstevel@tonic-gate 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
16157c478bd9Sstevel@tonic-gate 	    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
16167c478bd9Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
16177c478bd9Sstevel@tonic-gate 	if (error) {
16187c478bd9Sstevel@tonic-gate 		eprintsoline(so, error);
16197c478bd9Sstevel@tonic-gate 		goto done;
16207c478bd9Sstevel@tonic-gate 	}
16217c478bd9Sstevel@tonic-gate 
16227c478bd9Sstevel@tonic-gate 	error = sowaitokack(so, T_DISCON_REQ);
16237c478bd9Sstevel@tonic-gate 	if (error) {
16247c478bd9Sstevel@tonic-gate 		eprintsoline(so, error);
16257c478bd9Sstevel@tonic-gate 		goto done;
16267c478bd9Sstevel@tonic-gate 	}
16277c478bd9Sstevel@tonic-gate 	/*
16287c478bd9Sstevel@tonic-gate 	 * Even if some TPI message (e.g. T_DISCON_IND) was received in
16297c478bd9Sstevel@tonic-gate 	 * strsock_proto while the lock was dropped above, the disconnect
16307c478bd9Sstevel@tonic-gate 	 * is allowed to complete. However, it is not possible to
16317c478bd9Sstevel@tonic-gate 	 * assert that SS_ISCONNECTED|SS_ISCONNECTING are set.
16327c478bd9Sstevel@tonic-gate 	 */
16330f1702c5SYu Xiangning 	so->so_state &= ~(SS_ISCONNECTED|SS_ISCONNECTING);
16340f1702c5SYu Xiangning 	SOTOTPI(so)->sti_laddr_valid = 0;
16350f1702c5SYu Xiangning 	SOTOTPI(so)->sti_faddr_valid = 0;
16367c478bd9Sstevel@tonic-gate done:
16377c478bd9Sstevel@tonic-gate 	if (!(flags & _SODISCONNECT_LOCK_HELD)) {
16387c478bd9Sstevel@tonic-gate 		so_unlock_single(so, SOLOCKED);
16397c478bd9Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
16407c478bd9Sstevel@tonic-gate 	} else {
16417c478bd9Sstevel@tonic-gate 		/* If the caller held the lock don't release it here */
16427c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(&so->so_lock));
16437c478bd9Sstevel@tonic-gate 		ASSERT(so->so_flag & SOLOCKED);
16447c478bd9Sstevel@tonic-gate 	}
16457c478bd9Sstevel@tonic-gate 	return (error);
16467c478bd9Sstevel@tonic-gate }
16477c478bd9Sstevel@tonic-gate 
16480f1702c5SYu Xiangning /* ARGSUSED */
16497c478bd9Sstevel@tonic-gate int
16500f1702c5SYu Xiangning sotpi_accept(struct sonode *so, int fflag, struct cred *cr,
16510f1702c5SYu Xiangning     struct sonode **nsop)
16527c478bd9Sstevel@tonic-gate {
16537c478bd9Sstevel@tonic-gate 	struct T_conn_ind	*conn_ind;
16547c478bd9Sstevel@tonic-gate 	struct T_conn_res	*conn_res;
16557c478bd9Sstevel@tonic-gate 	int			error = 0;
1656dd49f125SAnders Persson 	mblk_t			*mp, *ack_mp;
16577c478bd9Sstevel@tonic-gate 	struct sonode		*nso;
16587c478bd9Sstevel@tonic-gate 	vnode_t			*nvp;
16597c478bd9Sstevel@tonic-gate 	void			*src;
16607c478bd9Sstevel@tonic-gate 	t_uscalar_t		srclen;
16617c478bd9Sstevel@tonic-gate 	void			*opt;
16627c478bd9Sstevel@tonic-gate 	t_uscalar_t		optlen;
16637c478bd9Sstevel@tonic-gate 	t_scalar_t		PRIM_type;
16647c478bd9Sstevel@tonic-gate 	t_scalar_t		SEQ_number;
1665188658baSja97890 	size_t			sinlen;
16660f1702c5SYu Xiangning 	sotpi_info_t		*sti = SOTOTPI(so);
16670f1702c5SYu Xiangning 	sotpi_info_t		*nsti;
16687c478bd9Sstevel@tonic-gate 
16697c478bd9Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n",
1670903a11ebSrh87107 	    (void *)so, fflag, (void *)nsop,
1671903a11ebSrh87107 	    pr_state(so->so_state, so->so_mode)));
16727c478bd9Sstevel@tonic-gate 
16737c478bd9Sstevel@tonic-gate 	/*
16747c478bd9Sstevel@tonic-gate 	 * Defer single-threading the accepting socket until
16757c478bd9Sstevel@tonic-gate 	 * the T_CONN_IND has been received and parsed and the
16767c478bd9Sstevel@tonic-gate 	 * new sonode has been opened.
16777c478bd9Sstevel@tonic-gate 	 */
16787c478bd9Sstevel@tonic-gate 
16797c478bd9Sstevel@tonic-gate 	/* Check that we are not already connected */
16807c478bd9Sstevel@tonic-gate 	if ((so->so_state & SS_ACCEPTCONN) == 0)
16817c478bd9Sstevel@tonic-gate 		goto conn_bad;
16827c478bd9Sstevel@tonic-gate again:
16837c478bd9Sstevel@tonic-gate 	if ((error = sowaitconnind(so, fflag, &mp)) != 0)
16847c478bd9Sstevel@tonic-gate 		goto e_bad;
16857c478bd9Sstevel@tonic-gate 
16860f1702c5SYu Xiangning 	ASSERT(mp != NULL);
16877c478bd9Sstevel@tonic-gate 	conn_ind = (struct T_conn_ind *)mp->b_rptr;
1688c28749e9Skais 
16897c478bd9Sstevel@tonic-gate 	/*
16907c478bd9Sstevel@tonic-gate 	 * Save SEQ_number for error paths.
16917c478bd9Sstevel@tonic-gate 	 */
16927c478bd9Sstevel@tonic-gate 	SEQ_number = conn_ind->SEQ_number;
16937c478bd9Sstevel@tonic-gate 
16947c478bd9Sstevel@tonic-gate 	srclen = conn_ind->SRC_length;
16957c478bd9Sstevel@tonic-gate 	src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1);
16967c478bd9Sstevel@tonic-gate 	if (src == NULL) {
16977c478bd9Sstevel@tonic-gate 		error = EPROTO;
16987c478bd9Sstevel@tonic-gate 		freemsg(mp);
16997c478bd9Sstevel@tonic-gate 		eprintsoline(so, error);
17007c478bd9Sstevel@tonic-gate 		goto disconnect_unlocked;
17017c478bd9Sstevel@tonic-gate 	}
17027c478bd9Sstevel@tonic-gate 	optlen = conn_ind->OPT_length;
17037c478bd9Sstevel@tonic-gate 	switch (so->so_family) {
17047c478bd9Sstevel@tonic-gate 	case AF_INET:
17057c478bd9Sstevel@tonic-gate 	case AF_INET6:
17060f1702c5SYu Xiangning 		if ((optlen == sizeof (intptr_t)) && (sti->sti_direct != 0)) {
17077c478bd9Sstevel@tonic-gate 			bcopy(mp->b_rptr + conn_ind->OPT_offset,
17087c478bd9Sstevel@tonic-gate 			    &opt, conn_ind->OPT_length);
17097c478bd9Sstevel@tonic-gate 		} else {
17107c478bd9Sstevel@tonic-gate 			/*
17117c478bd9Sstevel@tonic-gate 			 * The transport (in this case TCP) hasn't sent up
17127c478bd9Sstevel@tonic-gate 			 * a pointer to an instance for the accept fast-path.
17137c478bd9Sstevel@tonic-gate 			 * Disable fast-path completely because the call to
17147c478bd9Sstevel@tonic-gate 			 * sotpi_create() below would otherwise create an
17157c478bd9Sstevel@tonic-gate 			 * incomplete TCP instance, which would lead to
17167c478bd9Sstevel@tonic-gate 			 * problems when sockfs sends a normal T_CONN_RES
17177c478bd9Sstevel@tonic-gate 			 * message down the new stream.
17187c478bd9Sstevel@tonic-gate 			 */
17190f1702c5SYu Xiangning 			if (sti->sti_direct) {
1720ff550d0eSmasputra 				int rval;
1721ff550d0eSmasputra 				/*
1722ff550d0eSmasputra 				 * For consistency we inform tcp to disable
1723ff550d0eSmasputra 				 * direct interface on the listener, though
1724ff550d0eSmasputra 				 * we can certainly live without doing this
1725ff550d0eSmasputra 				 * because no data will ever travel upstream
1726ff550d0eSmasputra 				 * on the listening socket.
1727ff550d0eSmasputra 				 */
17280f1702c5SYu Xiangning 				sti->sti_direct = 0;
1729ff550d0eSmasputra 				(void) strioctl(SOTOV(so), _SIOCSOCKFALLBACK,
1730de8c4a14SErik Nordmark 				    0, 0, K_TO_K, cr, &rval);
1731ff550d0eSmasputra 			}
17327c478bd9Sstevel@tonic-gate 			opt = NULL;
17337c478bd9Sstevel@tonic-gate 			optlen = 0;
17347c478bd9Sstevel@tonic-gate 		}
17357c478bd9Sstevel@tonic-gate 		break;
17367c478bd9Sstevel@tonic-gate 	case AF_UNIX:
17377c478bd9Sstevel@tonic-gate 	default:
17387c478bd9Sstevel@tonic-gate 		if (optlen != 0) {
17397c478bd9Sstevel@tonic-gate 			opt = sogetoff(mp, conn_ind->OPT_offset, optlen,
17407c478bd9Sstevel@tonic-gate 			    __TPI_ALIGN_SIZE);
17417c478bd9Sstevel@tonic-gate 			if (opt == NULL) {
17427c478bd9Sstevel@tonic-gate 				error = EPROTO;
17437c478bd9Sstevel@tonic-gate 				freemsg(mp);
17447c478bd9Sstevel@tonic-gate 				eprintsoline(so, error);
17457c478bd9Sstevel@tonic-gate 				goto disconnect_unlocked;
17467c478bd9Sstevel@tonic-gate 			}
17477c478bd9Sstevel@tonic-gate 		}
17487c478bd9Sstevel@tonic-gate 		if (so->so_family == AF_UNIX) {
17490f1702c5SYu Xiangning 			if (!sti->sti_faddr_noxlate) {
17507c478bd9Sstevel@tonic-gate 				src = NULL;
17517c478bd9Sstevel@tonic-gate 				srclen = 0;
17527c478bd9Sstevel@tonic-gate 			}
17537c478bd9Sstevel@tonic-gate 			/* Extract src address from options */
17547c478bd9Sstevel@tonic-gate 			if (optlen != 0)
17557c478bd9Sstevel@tonic-gate 				so_getopt_srcaddr(opt, optlen, &src, &srclen);
17567c478bd9Sstevel@tonic-gate 		}
17577c478bd9Sstevel@tonic-gate 		break;
17587c478bd9Sstevel@tonic-gate 	}
17597c478bd9Sstevel@tonic-gate 
17607c478bd9Sstevel@tonic-gate 	/*
17617c478bd9Sstevel@tonic-gate 	 * Create the new socket.
17627c478bd9Sstevel@tonic-gate 	 */
17630f1702c5SYu Xiangning 	nso = socket_newconn(so, NULL, NULL, SOCKET_SLEEP, &error);
17647c478bd9Sstevel@tonic-gate 	if (nso == NULL) {
17657c478bd9Sstevel@tonic-gate 		ASSERT(error != 0);
17667c478bd9Sstevel@tonic-gate 		/*
17677c478bd9Sstevel@tonic-gate 		 * Accept can not fail with ENOBUFS. sotpi_create
17687c478bd9Sstevel@tonic-gate 		 * sleeps waiting for memory until a signal is caught
17697c478bd9Sstevel@tonic-gate 		 * so return EINTR.
17707c478bd9Sstevel@tonic-gate 		 */
17717c478bd9Sstevel@tonic-gate 		freemsg(mp);
17727c478bd9Sstevel@tonic-gate 		if (error == ENOBUFS)
17737c478bd9Sstevel@tonic-gate 			error = EINTR;
17747c478bd9Sstevel@tonic-gate 		goto e_disc_unl;
17757c478bd9Sstevel@tonic-gate 	}
17767c478bd9Sstevel@tonic-gate 	nvp = SOTOV(nso);
17770f1702c5SYu Xiangning 	nsti = SOTOTPI(nso);
17787c478bd9Sstevel@tonic-gate 
17797c478bd9Sstevel@tonic-gate #ifdef DEBUG
17807c478bd9Sstevel@tonic-gate 	/*
17817c478bd9Sstevel@tonic-gate 	 * SO_DEBUG is used to trigger the dprint* and eprint* macros thus
17827c478bd9Sstevel@tonic-gate 	 * it's inherited early to allow debugging of the accept code itself.
17837c478bd9Sstevel@tonic-gate 	 */
17847c478bd9Sstevel@tonic-gate 	nso->so_options |= so->so_options & SO_DEBUG;
17857c478bd9Sstevel@tonic-gate #endif /* DEBUG */
17867c478bd9Sstevel@tonic-gate 
17877c478bd9Sstevel@tonic-gate 	/*
17887c478bd9Sstevel@tonic-gate 	 * Save the SRC address from the T_CONN_IND
17897c478bd9Sstevel@tonic-gate 	 * for getpeername to work on AF_UNIX and on transports that do not
17907c478bd9Sstevel@tonic-gate 	 * support TI_GETPEERNAME.
17917c478bd9Sstevel@tonic-gate 	 *
17927c478bd9Sstevel@tonic-gate 	 * NOTE: AF_UNIX NUL termination is ensured by the sender's
17937c478bd9Sstevel@tonic-gate 	 * copyin_name().
17947c478bd9Sstevel@tonic-gate 	 */
17950f1702c5SYu Xiangning 	if (srclen > (t_uscalar_t)nsti->sti_faddr_maxlen) {
17967c478bd9Sstevel@tonic-gate 		error = EINVAL;
17977c478bd9Sstevel@tonic-gate 		freemsg(mp);
17987c478bd9Sstevel@tonic-gate 		eprintsoline(so, error);
17997c478bd9Sstevel@tonic-gate 		goto disconnect_vp_unlocked;
18007c478bd9Sstevel@tonic-gate 	}
18010f1702c5SYu Xiangning 	nsti->sti_faddr_len = (socklen_t)srclen;
18020f1702c5SYu Xiangning 	ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen);
18030f1702c5SYu Xiangning 	bcopy(src, nsti->sti_faddr_sa, srclen);
18040f1702c5SYu Xiangning 	nsti->sti_faddr_valid = 1;
18057c478bd9Sstevel@tonic-gate 
1806de8c4a14SErik Nordmark 	/*
1807de8c4a14SErik Nordmark 	 * Record so_peercred and so_cpid from a cred in the T_CONN_IND.
1808de8c4a14SErik Nordmark 	 */
18097c478bd9Sstevel@tonic-gate 	if ((DB_REF(mp) > 1) || MBLKSIZE(mp) <
18107c478bd9Sstevel@tonic-gate 	    (sizeof (struct T_conn_res) + sizeof (intptr_t))) {
18117c478bd9Sstevel@tonic-gate 		cred_t	*cr;
1812de8c4a14SErik Nordmark 		pid_t	cpid;
18137c478bd9Sstevel@tonic-gate 
1814de8c4a14SErik Nordmark 		cr = msg_getcred(mp, &cpid);
1815de8c4a14SErik Nordmark 		if (cr != NULL) {
18167c478bd9Sstevel@tonic-gate 			crhold(cr);
18177c478bd9Sstevel@tonic-gate 			nso->so_peercred = cr;
1818de8c4a14SErik Nordmark 			nso->so_cpid = cpid;
18197c478bd9Sstevel@tonic-gate 		}
18207c478bd9Sstevel@tonic-gate 		freemsg(mp);
18217c478bd9Sstevel@tonic-gate 
18227c478bd9Sstevel@tonic-gate 		mp = soallocproto1(NULL, sizeof (struct T_conn_res) +
1823d4f98ef9SAnders Persson 		    sizeof (intptr_t), 0, _ALLOC_INTR, cr);
18247c478bd9Sstevel@tonic-gate 		if (mp == NULL) {
18257c478bd9Sstevel@tonic-gate 			/*
18267c478bd9Sstevel@tonic-gate 			 * Accept can not fail with ENOBUFS.
18277c478bd9Sstevel@tonic-gate 			 * A signal was caught so return EINTR.
18287c478bd9Sstevel@tonic-gate 			 */
18297c478bd9Sstevel@tonic-gate 			error = EINTR;
18307c478bd9Sstevel@tonic-gate 			eprintsoline(so, error);
18317c478bd9Sstevel@tonic-gate 			goto disconnect_vp_unlocked;
18327c478bd9Sstevel@tonic-gate 		}
18337c478bd9Sstevel@tonic-gate 		conn_res = (struct T_conn_res *)mp->b_rptr;
18347c478bd9Sstevel@tonic-gate 	} else {
1835de8c4a14SErik Nordmark 		/*
1836de8c4a14SErik Nordmark 		 * For efficency reasons we use msg_extractcred; no crhold
1837de8c4a14SErik Nordmark 		 * needed since db_credp is cleared (i.e., we move the cred
1838de8c4a14SErik Nordmark 		 * from the message to so_peercred.
1839de8c4a14SErik Nordmark 		 */
1840de8c4a14SErik Nordmark 		nso->so_peercred = msg_extractcred(mp, &nso->so_cpid);
18417c478bd9Sstevel@tonic-gate 
18427c478bd9Sstevel@tonic-gate 		mp->b_rptr = DB_BASE(mp);
18437c478bd9Sstevel@tonic-gate 		conn_res = (struct T_conn_res *)mp->b_rptr;
18447c478bd9Sstevel@tonic-gate 		mp->b_wptr = mp->b_rptr + sizeof (struct T_conn_res);
1845d4f98ef9SAnders Persson 
1846d4f98ef9SAnders Persson 		mblk_setcred(mp, cr, curproc->p_pid);
18477c478bd9Sstevel@tonic-gate 	}
18487c478bd9Sstevel@tonic-gate 
18497c478bd9Sstevel@tonic-gate 	/*
18507c478bd9Sstevel@tonic-gate 	 * New socket must be bound at least in sockfs and, except for AF_INET,
18517c478bd9Sstevel@tonic-gate 	 * (or AF_INET6) it also has to be bound in the transport provider.
1852188658baSja97890 	 * We set the local address in the sonode from the T_OK_ACK of the
1853188658baSja97890 	 * T_CONN_RES. For this reason the address we bind to here isn't
1854188658baSja97890 	 * important.
18557c478bd9Sstevel@tonic-gate 	 */
18567c478bd9Sstevel@tonic-gate 	if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) &&
18577c478bd9Sstevel@tonic-gate 	    /*CONSTCOND*/
18587c478bd9Sstevel@tonic-gate 	    nso->so_type == SOCK_STREAM && !soaccept_tpi_tcp) {
18597c478bd9Sstevel@tonic-gate 		/*
18607c478bd9Sstevel@tonic-gate 		 * Optimization for AF_INET{,6} transports
18617c478bd9Sstevel@tonic-gate 		 * that can handle a T_CONN_RES without being bound.
18627c478bd9Sstevel@tonic-gate 		 */
18637c478bd9Sstevel@tonic-gate 		mutex_enter(&nso->so_lock);
18647c478bd9Sstevel@tonic-gate 		so_automatic_bind(nso);
18657c478bd9Sstevel@tonic-gate 		mutex_exit(&nso->so_lock);
18667c478bd9Sstevel@tonic-gate 	} else {
18677c478bd9Sstevel@tonic-gate 		/* Perform NULL bind with the transport provider. */
18680f1702c5SYu Xiangning 		if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC,
18690f1702c5SYu Xiangning 		    cr)) != 0) {
18707c478bd9Sstevel@tonic-gate 			ASSERT(error != ENOBUFS);
18717c478bd9Sstevel@tonic-gate 			freemsg(mp);
18727c478bd9Sstevel@tonic-gate 			eprintsoline(nso, error);
18737c478bd9Sstevel@tonic-gate 			goto disconnect_vp_unlocked;
18747c478bd9Sstevel@tonic-gate 		}
18757c478bd9Sstevel@tonic-gate 	}
18767c478bd9Sstevel@tonic-gate 
18777c478bd9Sstevel@tonic-gate 	/*
18787c478bd9Sstevel@tonic-gate 	 * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES
18797c478bd9Sstevel@tonic-gate 	 * so that any data arriving on the new socket will cause the
18807c478bd9Sstevel@tonic-gate 	 * appropriate signals to be delivered for the new socket.
18817c478bd9Sstevel@tonic-gate 	 *
18827c478bd9Sstevel@tonic-gate 	 * No other thread (except strsock_proto and strsock_misc)
18837c478bd9Sstevel@tonic-gate 	 * can access the new socket thus we relax the locking.
18847c478bd9Sstevel@tonic-gate 	 */
18857c478bd9Sstevel@tonic-gate 	nso->so_pgrp = so->so_pgrp;
18860f1702c5SYu Xiangning 	nso->so_state |= so->so_state & SS_ASYNC;
18870f1702c5SYu Xiangning 	nsti->sti_faddr_noxlate = sti->sti_faddr_noxlate;
18887c478bd9Sstevel@tonic-gate 
18897c478bd9Sstevel@tonic-gate 	if (nso->so_pgrp != 0) {
1890de8c4a14SErik Nordmark 		if ((error = so_set_events(nso, nvp, cr)) != 0) {
18917c478bd9Sstevel@tonic-gate 			eprintsoline(nso, error);
18927c478bd9Sstevel@tonic-gate 			error = 0;
18937c478bd9Sstevel@tonic-gate 			nso->so_pgrp = 0;
18947c478bd9Sstevel@tonic-gate 		}
18957c478bd9Sstevel@tonic-gate 	}
18967c478bd9Sstevel@tonic-gate 
18977c478bd9Sstevel@tonic-gate 	/*
18987c478bd9Sstevel@tonic-gate 	 * Make note of the socket level options. TCP and IP level options
18997c478bd9Sstevel@tonic-gate 	 * are already inherited. We could do all this after accept is
19007c478bd9Sstevel@tonic-gate 	 * successful but doing it here simplifies code and no harm done
19017c478bd9Sstevel@tonic-gate 	 * for error case.
19027c478bd9Sstevel@tonic-gate 	 */
19037c478bd9Sstevel@tonic-gate 	nso->so_options = so->so_options & (SO_DEBUG|SO_REUSEADDR|SO_KEEPALIVE|
19047c478bd9Sstevel@tonic-gate 	    SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK|
19057c478bd9Sstevel@tonic-gate 	    SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER);
19067c478bd9Sstevel@tonic-gate 	nso->so_sndbuf = so->so_sndbuf;
19077c478bd9Sstevel@tonic-gate 	nso->so_rcvbuf = so->so_rcvbuf;
19087c478bd9Sstevel@tonic-gate 	if (nso->so_options & SO_LINGER)
19097c478bd9Sstevel@tonic-gate 		nso->so_linger = so->so_linger;
19107c478bd9Sstevel@tonic-gate 
19110f1702c5SYu Xiangning 	/*
19120f1702c5SYu Xiangning 	 * Note that the following sti_direct code path should be
19130f1702c5SYu Xiangning 	 * removed once we are confident that the direct sockets
19140f1702c5SYu Xiangning 	 * do not result in any degradation.
19150f1702c5SYu Xiangning 	 */
19160f1702c5SYu Xiangning 	if (sti->sti_direct) {
19177c478bd9Sstevel@tonic-gate 
19187c478bd9Sstevel@tonic-gate 		ASSERT(opt != NULL);
19197c478bd9Sstevel@tonic-gate 
19207c478bd9Sstevel@tonic-gate 		conn_res->OPT_length = optlen;
19217c478bd9Sstevel@tonic-gate 		conn_res->OPT_offset = MBLKL(mp);
19227c478bd9Sstevel@tonic-gate 		bcopy(&opt, mp->b_wptr, optlen);
19237c478bd9Sstevel@tonic-gate 		mp->b_wptr += optlen;
19247c478bd9Sstevel@tonic-gate 		conn_res->PRIM_type = T_CONN_RES;
19257c478bd9Sstevel@tonic-gate 		conn_res->ACCEPTOR_id = 0;
19267c478bd9Sstevel@tonic-gate 		PRIM_type = T_CONN_RES;
19277c478bd9Sstevel@tonic-gate 
19287c478bd9Sstevel@tonic-gate 		/* Send down the T_CONN_RES on acceptor STREAM */
19297c478bd9Sstevel@tonic-gate 		error = kstrputmsg(SOTOV(nso), mp, NULL,
19307c478bd9Sstevel@tonic-gate 		    0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
19317c478bd9Sstevel@tonic-gate 		if (error) {
19327c478bd9Sstevel@tonic-gate 			mutex_enter(&so->so_lock);
19337c478bd9Sstevel@tonic-gate 			so_lock_single(so);
19347c478bd9Sstevel@tonic-gate 			eprintsoline(so, error);
19357c478bd9Sstevel@tonic-gate 			goto disconnect_vp;
19367c478bd9Sstevel@tonic-gate 		}
19377c478bd9Sstevel@tonic-gate 		mutex_enter(&nso->so_lock);
19387c478bd9Sstevel@tonic-gate 		error = sowaitprim(nso, T_CONN_RES, T_OK_ACK,
19397c478bd9Sstevel@tonic-gate 		    (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0);
19407c478bd9Sstevel@tonic-gate 		if (error) {
19417c478bd9Sstevel@tonic-gate 			mutex_exit(&nso->so_lock);
19427c478bd9Sstevel@tonic-gate 			mutex_enter(&so->so_lock);
19437c478bd9Sstevel@tonic-gate 			so_lock_single(so);
19447c478bd9Sstevel@tonic-gate 			eprintsoline(so, error);
19457c478bd9Sstevel@tonic-gate 			goto disconnect_vp;
19467c478bd9Sstevel@tonic-gate 		}
19477c478bd9Sstevel@tonic-gate 		if (nso->so_family == AF_INET) {
19487c478bd9Sstevel@tonic-gate 			sin_t *sin;
19497c478bd9Sstevel@tonic-gate 
19507c478bd9Sstevel@tonic-gate 			sin = (sin_t *)(ack_mp->b_rptr +
19517c478bd9Sstevel@tonic-gate 			    sizeof (struct T_ok_ack));
19520f1702c5SYu Xiangning 			bcopy(sin, nsti->sti_laddr_sa, sizeof (sin_t));
19530f1702c5SYu Xiangning 			nsti->sti_laddr_len = sizeof (sin_t);
19547c478bd9Sstevel@tonic-gate 		} else {
19557c478bd9Sstevel@tonic-gate 			sin6_t *sin6;
19567c478bd9Sstevel@tonic-gate 
19577c478bd9Sstevel@tonic-gate 			sin6 = (sin6_t *)(ack_mp->b_rptr +
19587c478bd9Sstevel@tonic-gate 			    sizeof (struct T_ok_ack));
19590f1702c5SYu Xiangning 			bcopy(sin6, nsti->sti_laddr_sa, sizeof (sin6_t));
19600f1702c5SYu Xiangning 			nsti->sti_laddr_len = sizeof (sin6_t);
19617c478bd9Sstevel@tonic-gate 		}
19627c478bd9Sstevel@tonic-gate 		freemsg(ack_mp);
19637c478bd9Sstevel@tonic-gate 
19640f1702c5SYu Xiangning 		nso->so_state |= SS_ISCONNECTED;
19650f1702c5SYu Xiangning 		nso->so_proto_handle = (sock_lower_handle_t)opt;
19660f1702c5SYu Xiangning 		nsti->sti_laddr_valid = 1;
19677c478bd9Sstevel@tonic-gate 
19680f1702c5SYu Xiangning 		if (sti->sti_nl7c_flags & NL7C_ENABLED) {
19697c478bd9Sstevel@tonic-gate 			/*
19702c9e429eSbrutus 			 * A NL7C marked listen()er so the new socket
19712c9e429eSbrutus 			 * inherits the listen()er's NL7C state, except
19722c9e429eSbrutus 			 * for NL7C_POLLIN.
19737c478bd9Sstevel@tonic-gate 			 *
19742c9e429eSbrutus 			 * Only call NL7C to process the new socket if
19752c9e429eSbrutus 			 * the listen socket allows blocking i/o.
19767c478bd9Sstevel@tonic-gate 			 */
19770f1702c5SYu Xiangning 			nsti->sti_nl7c_flags =
19780f1702c5SYu Xiangning 			    sti->sti_nl7c_flags & (~NL7C_POLLIN);
19792c9e429eSbrutus 			if (so->so_state & (SS_NONBLOCK|SS_NDELAY)) {
19802c9e429eSbrutus 				/*
19812c9e429eSbrutus 				 * Nonblocking accept() just make it
19822c9e429eSbrutus 				 * persist to defer processing to the
19832c9e429eSbrutus 				 * read-side syscall (e.g. read).
19842c9e429eSbrutus 				 */
19850f1702c5SYu Xiangning 				nsti->sti_nl7c_flags |= NL7C_SOPERSIST;
19862c9e429eSbrutus 			} else if (nl7c_process(nso, B_FALSE)) {
19877c478bd9Sstevel@tonic-gate 				/*
19887c478bd9Sstevel@tonic-gate 				 * NL7C has completed processing on the
19897c478bd9Sstevel@tonic-gate 				 * socket, close the socket and back to
19907c478bd9Sstevel@tonic-gate 				 * the top to await the next T_CONN_IND.
19917c478bd9Sstevel@tonic-gate 				 */
19927c478bd9Sstevel@tonic-gate 				mutex_exit(&nso->so_lock);
19937c478bd9Sstevel@tonic-gate 				(void) VOP_CLOSE(nvp, 0, 1, (offset_t)0,
1994de8c4a14SErik Nordmark 				    cr, NULL);
19957c478bd9Sstevel@tonic-gate 				VN_RELE(nvp);
19967c478bd9Sstevel@tonic-gate 				goto again;
19977c478bd9Sstevel@tonic-gate 			}
19987c478bd9Sstevel@tonic-gate 			/* Pass the new socket out */
19997c478bd9Sstevel@tonic-gate 		}
20007c478bd9Sstevel@tonic-gate 
20017c478bd9Sstevel@tonic-gate 		mutex_exit(&nso->so_lock);
20027c478bd9Sstevel@tonic-gate 
20037c478bd9Sstevel@tonic-gate 		/*
20047d6c035bSja97890 		 * It's possible, through the use of autopush for example,
20050f1702c5SYu Xiangning 		 * that the acceptor stream may not support sti_direct
20060f1702c5SYu Xiangning 		 * semantics. If the new socket does not support sti_direct
20077d6c035bSja97890 		 * we issue a _SIOCSOCKFALLBACK to inform the transport
20087d6c035bSja97890 		 * as we would in the I_PUSH case.
20097d6c035bSja97890 		 */
20100f1702c5SYu Xiangning 		if (nsti->sti_direct == 0) {
20117d6c035bSja97890 			int	rval;
20127d6c035bSja97890 
20137d6c035bSja97890 			if ((error = strioctl(SOTOV(nso), _SIOCSOCKFALLBACK,
2014de8c4a14SErik Nordmark 			    0, 0, K_TO_K, cr, &rval)) != 0) {
20157d6c035bSja97890 				mutex_enter(&so->so_lock);
20167d6c035bSja97890 				so_lock_single(so);
20177d6c035bSja97890 				eprintsoline(so, error);
20187d6c035bSja97890 				goto disconnect_vp;
20197d6c035bSja97890 			}
20207d6c035bSja97890 		}
20217d6c035bSja97890 
20227d6c035bSja97890 		/*
20237c478bd9Sstevel@tonic-gate 		 * Pass out new socket.
20247c478bd9Sstevel@tonic-gate 		 */
20257c478bd9Sstevel@tonic-gate 		if (nsop != NULL)
20267c478bd9Sstevel@tonic-gate 			*nsop = nso;
20277c478bd9Sstevel@tonic-gate 
20287c478bd9Sstevel@tonic-gate 		return (0);
20297c478bd9Sstevel@tonic-gate 	}
20307c478bd9Sstevel@tonic-gate 
20317c478bd9Sstevel@tonic-gate 	/*
20327c478bd9Sstevel@tonic-gate 	 * This is the non-performance case for sockets (e.g. AF_UNIX sockets)
20337c478bd9Sstevel@tonic-gate 	 * which don't support the FireEngine accept fast-path. It is also
20347c478bd9Sstevel@tonic-gate 	 * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd
20357c478bd9Sstevel@tonic-gate 	 * again. Neither sockfs nor TCP attempt to find out if some other
20367c478bd9Sstevel@tonic-gate 	 * random module has been inserted in between (in which case we
20377c478bd9Sstevel@tonic-gate 	 * should follow TLI accept behaviour). We blindly assume the worst
20387c478bd9Sstevel@tonic-gate 	 * case and revert back to old behaviour i.e. TCP will not send us
20397c478bd9Sstevel@tonic-gate 	 * any option (eager) and the accept should happen on the listener
20407c478bd9Sstevel@tonic-gate 	 * queue. Any queued T_conn_ind have already got their options removed
20417c478bd9Sstevel@tonic-gate 	 * by so_sock2_stream() when "sockmod" was I_POP'd.
20427c478bd9Sstevel@tonic-gate 	 */
20437c478bd9Sstevel@tonic-gate 	/*
20447c478bd9Sstevel@tonic-gate 	 * Fill in the {O_}T_CONN_RES before getting SOLOCKED.
20457c478bd9Sstevel@tonic-gate 	 */
20467c478bd9Sstevel@tonic-gate 	if ((nso->so_mode & SM_ACCEPTOR_ID) == 0) {
20477c478bd9Sstevel@tonic-gate #ifdef	_ILP32
20487c478bd9Sstevel@tonic-gate 		queue_t	*q;
20497c478bd9Sstevel@tonic-gate 
20507c478bd9Sstevel@tonic-gate 		/*
20517c478bd9Sstevel@tonic-gate 		 * Find read queue in driver
20527c478bd9Sstevel@tonic-gate 		 * Can safely do this since we "own" nso/nvp.
20537c478bd9Sstevel@tonic-gate 		 */
20547c478bd9Sstevel@tonic-gate 		q = strvp2wq(nvp)->q_next;
20557c478bd9Sstevel@tonic-gate 		while (SAMESTR(q))
20567c478bd9Sstevel@tonic-gate 			q = q->q_next;
20577c478bd9Sstevel@tonic-gate 		q = RD(q);
20587c478bd9Sstevel@tonic-gate 		conn_res->ACCEPTOR_id = (t_uscalar_t)q;
20597c478bd9Sstevel@tonic-gate #else
20607c478bd9Sstevel@tonic-gate 		conn_res->ACCEPTOR_id = (t_uscalar_t)getminor(nvp->v_rdev);
20617c478bd9Sstevel@tonic-gate #endif	/* _ILP32 */
20627c478bd9Sstevel@tonic-gate 		conn_res->PRIM_type = O_T_CONN_RES;
20637c478bd9Sstevel@tonic-gate 		PRIM_type = O_T_CONN_RES;
20647c478bd9Sstevel@tonic-gate 	} else {
20650f1702c5SYu Xiangning 		conn_res->ACCEPTOR_id = nsti->sti_acceptor_id;
20667c478bd9Sstevel@tonic-gate 		conn_res->PRIM_type = T_CONN_RES;
20677c478bd9Sstevel@tonic-gate 		PRIM_type = T_CONN_RES;
20687c478bd9Sstevel@tonic-gate 	}
20697c478bd9Sstevel@tonic-gate 	conn_res->SEQ_number = SEQ_number;
20707c478bd9Sstevel@tonic-gate 	conn_res->OPT_length = 0;
20717c478bd9Sstevel@tonic-gate 	conn_res->OPT_offset = 0;
20727c478bd9Sstevel@tonic-gate 
20737c478bd9Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
20747c478bd9Sstevel@tonic-gate 	so_lock_single(so);	/* Set SOLOCKED */
20757c478bd9Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
20767c478bd9Sstevel@tonic-gate 
20777c478bd9Sstevel@tonic-gate 	error = kstrputmsg(SOTOV(so), mp, NULL,
20787c478bd9Sstevel@tonic-gate 	    0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
20797c478bd9Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
20807c478bd9Sstevel@tonic-gate 	if (error) {
20817c478bd9Sstevel@tonic-gate 		eprintsoline(so, error);
20827c478bd9Sstevel@tonic-gate 		goto disconnect_vp;
20837c478bd9Sstevel@tonic-gate 	}
2084188658baSja97890 	error = sowaitprim(so, PRIM_type, T_OK_ACK,
2085188658baSja97890 	    (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0);
20867c478bd9Sstevel@tonic-gate 	if (error) {
20877c478bd9Sstevel@tonic-gate 		eprintsoline(so, error);
20887c478bd9Sstevel@tonic-gate 		goto disconnect_vp;
20897c478bd9Sstevel@tonic-gate 	}
2090881776cfSGeorge Shepherd 	mutex_exit(&so->so_lock);
2091188658baSja97890 	/*
2092188658baSja97890 	 * If there is a sin/sin6 appended onto the T_OK_ACK use
2093188658baSja97890 	 * that to set the local address. If this is not present
2094188658baSja97890 	 * then we zero out the address and don't set the
20950f1702c5SYu Xiangning 	 * sti_laddr_valid bit. For AF_UNIX endpoints we copy over
20966e81d8daSja97890 	 * the pathname from the listening socket.
2097881776cfSGeorge Shepherd 	 * In the case where this is TCP or an AF_UNIX socket the
2098881776cfSGeorge Shepherd 	 * client side may have queued data or a T_ORDREL in the
2099881776cfSGeorge Shepherd 	 * transport. Having now sent the T_CONN_RES we may receive
2100881776cfSGeorge Shepherd 	 * those queued messages at any time. Hold the acceptor
2101881776cfSGeorge Shepherd 	 * so_lock until its state and laddr are finalized.
2102188658baSja97890 	 */
2103881776cfSGeorge Shepherd 	mutex_enter(&nso->so_lock);
2104188658baSja97890 	sinlen = (nso->so_family == AF_INET) ? sizeof (sin_t) : sizeof (sin6_t);
2105188658baSja97890 	if ((nso->so_family == AF_INET) || (nso->so_family == AF_INET6) &&
2106188658baSja97890 	    MBLKL(ack_mp) == (sizeof (struct T_ok_ack) + sinlen)) {
2107188658baSja97890 		ack_mp->b_rptr += sizeof (struct T_ok_ack);
21080f1702c5SYu Xiangning 		bcopy(ack_mp->b_rptr, nsti->sti_laddr_sa, sinlen);
21090f1702c5SYu Xiangning 		nsti->sti_laddr_len = sinlen;
21100f1702c5SYu Xiangning 		nsti->sti_laddr_valid = 1;
21116e81d8daSja97890 	} else if (nso->so_family == AF_UNIX) {
21126e81d8daSja97890 		ASSERT(so->so_family == AF_UNIX);
21130f1702c5SYu Xiangning 		nsti->sti_laddr_len = sti->sti_laddr_len;
21140f1702c5SYu Xiangning 		ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen);
21150f1702c5SYu Xiangning 		bcopy(sti->sti_laddr_sa, nsti->sti_laddr_sa,
21160f1702c5SYu Xiangning 		    nsti->sti_laddr_len);
21170f1702c5SYu Xiangning 		nsti->sti_laddr_valid = 1;
2118188658baSja97890 	} else {
21190f1702c5SYu Xiangning 		nsti->sti_laddr_len = sti->sti_laddr_len;
21200f1702c5SYu Xiangning 		ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen);
21210f1702c5SYu Xiangning 		bzero(nsti->sti_laddr_sa, nsti->sti_addr_size);
21220f1702c5SYu Xiangning 		nsti->sti_laddr_sa->sa_family = nso->so_family;
2123188658baSja97890 	}
2124881776cfSGeorge Shepherd 	nso->so_state |= SS_ISCONNECTED;
2125881776cfSGeorge Shepherd 	mutex_exit(&nso->so_lock);
2126881776cfSGeorge Shepherd 
2127188658baSja97890 	freemsg(ack_mp);
2128188658baSja97890 
2129881776cfSGeorge Shepherd 	mutex_enter(&so->so_lock);
21307c478bd9Sstevel@tonic-gate 	so_unlock_single(so, SOLOCKED);
21317c478bd9Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
21327c478bd9Sstevel@tonic-gate 
21337c478bd9Sstevel@tonic-gate 	/*
21347c478bd9Sstevel@tonic-gate 	 * Pass out new socket.
21357c478bd9Sstevel@tonic-gate 	 */
21367c478bd9Sstevel@tonic-gate 	if (nsop != NULL)
21377c478bd9Sstevel@tonic-gate 		*nsop = nso;
21387c478bd9Sstevel@tonic-gate 
21397c478bd9Sstevel@tonic-gate 	return (0);
21407c478bd9Sstevel@tonic-gate 
21417c478bd9Sstevel@tonic-gate 
21427c478bd9Sstevel@tonic-gate eproto_disc_unl:
21437c478bd9Sstevel@tonic-gate 	error = EPROTO;
21447c478bd9Sstevel@tonic-gate e_disc_unl:
21457c478bd9Sstevel@tonic-gate 	eprintsoline(so, error);
21467c478bd9Sstevel@tonic-gate 	goto disconnect_unlocked;
21477c478bd9Sstevel@tonic-gate 
21487c478bd9Sstevel@tonic-gate pr_disc_vp_unl:
21497c478bd9Sstevel@tonic-gate 	eprintsoline(so, error);
21507c478bd9Sstevel@tonic-gate disconnect_vp_unlocked:
2151de8c4a14SErik Nordmark 	(void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL);
21527c478bd9Sstevel@tonic-gate 	VN_RELE(nvp);
21537c478bd9Sstevel@tonic-gate disconnect_unlocked:
21547c478bd9Sstevel@tonic-gate 	(void) sodisconnect(so, SEQ_number, 0);
21557c478bd9Sstevel@tonic-gate 	return (error);
21567c478bd9Sstevel@tonic-gate 
21577c478bd9Sstevel@tonic-gate pr_disc_vp:
21587c478bd9Sstevel@tonic-gate 	eprintsoline(so, error);
21597c478bd9Sstevel@tonic-gate disconnect_vp:
21607c478bd9Sstevel@tonic-gate 	(void) sodisconnect(so, SEQ_number, _SODISCONNECT_LOCK_HELD);
21617c478bd9Sstevel@tonic-gate 	so_unlock_single(so, SOLOCKED);
21627c478bd9Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
2163de8c4a14SErik Nordmark 	(void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL);
21647c478bd9Sstevel@tonic-gate 	VN_RELE(nvp);
21657c478bd9Sstevel@tonic-gate 	return (error);
21667c478bd9Sstevel@tonic-gate 
21677c478bd9Sstevel@tonic-gate conn_bad:	/* Note: SunOS 4/BSD unconditionally returns EINVAL here */
21687c478bd9Sstevel@tonic-gate 	error = (so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW)
21697c478bd9Sstevel@tonic-gate 	    ? EOPNOTSUPP : EINVAL;
21707c478bd9Sstevel@tonic-gate e_bad:
21717c478bd9Sstevel@tonic-gate 	eprintsoline(so, error);
21727c478bd9Sstevel@tonic-gate 	return (error);
21737c478bd9Sstevel@tonic-gate }
21747c478bd9Sstevel@tonic-gate 
21757c478bd9Sstevel@tonic-gate /*
21767c478bd9Sstevel@tonic-gate  * connect a socket.
21777c478bd9Sstevel@tonic-gate  *
21787c478bd9Sstevel@tonic-gate  * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to
21797c478bd9Sstevel@tonic-gate  * unconnect (by specifying a null address).
21807c478bd9Sstevel@tonic-gate  */
21817c478bd9Sstevel@tonic-gate int
21827c478bd9Sstevel@tonic-gate sotpi_connect(struct sonode *so,
21833e95bd4aSAnders Persson     struct sockaddr *name,
21847c478bd9Sstevel@tonic-gate     socklen_t namelen,
21857c478bd9Sstevel@tonic-gate     int fflag,
21860f1702c5SYu Xiangning     int flags,
21870f1702c5SYu Xiangning     struct cred *cr)
21887c478bd9Sstevel@tonic-gate {
21897c478bd9Sstevel@tonic-gate 	struct T_conn_req	conn_req;
21907c478bd9Sstevel@tonic-gate 	int			error = 0;
21917c478bd9Sstevel@tonic-gate 	mblk_t			*mp;
21927c478bd9Sstevel@tonic-gate 	void			*src;
21937c478bd9Sstevel@tonic-gate 	socklen_t		srclen;
21947c478bd9Sstevel@tonic-gate 	void			*addr;
21957c478bd9Sstevel@tonic-gate 	socklen_t		addrlen;
21967c478bd9Sstevel@tonic-gate 	boolean_t		need_unlock;
21970f1702c5SYu Xiangning 	sotpi_info_t		*sti = SOTOTPI(so);
21987c478bd9Sstevel@tonic-gate 
21997c478bd9Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n",
2200903a11ebSrh87107 	    (void *)so, (void *)name, namelen, fflag, flags,
22017c478bd9Sstevel@tonic-gate 	    pr_state(so->so_state, so->so_mode)));
22027c478bd9Sstevel@tonic-gate 
22037c478bd9Sstevel@tonic-gate 	/*
22047c478bd9Sstevel@tonic-gate 	 * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to
22057c478bd9Sstevel@tonic-gate 	 * avoid sleeping for memory with SOLOCKED held.
22060f1702c5SYu Xiangning 	 * We know that the T_CONN_REQ can't be larger than 2 * sti_faddr_maxlen
22077c478bd9Sstevel@tonic-gate 	 * + sizeof (struct T_opthdr).
22087c478bd9Sstevel@tonic-gate 	 * (the AF_UNIX so_ux_addr_xlate() does not make the address
22090f1702c5SYu Xiangning 	 * exceed sti_faddr_maxlen).
22107c478bd9Sstevel@tonic-gate 	 */
22117c478bd9Sstevel@tonic-gate 	mp = soallocproto(sizeof (struct T_conn_req) +
2212de8c4a14SErik Nordmark 	    2 * sti->sti_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR,
2213de8c4a14SErik Nordmark 	    cr);
22147c478bd9Sstevel@tonic-gate 	if (mp == NULL) {
22157c478bd9Sstevel@tonic-gate 		/*
22167c478bd9Sstevel@tonic-gate 		 * Connect can not fail with ENOBUFS. A signal was
22177c478bd9Sstevel@tonic-gate 		 * caught so return EINTR.
22187c478bd9Sstevel@tonic-gate 		 */
22197c478bd9Sstevel@tonic-gate 		error = EINTR;
22207c478bd9Sstevel@tonic-gate 		eprintsoline(so, error);
22217c478bd9Sstevel@tonic-gate 		return (error);
22227c478bd9Sstevel@tonic-gate 	}
22237c478bd9Sstevel@tonic-gate 
22247c478bd9Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
22257c478bd9Sstevel@tonic-gate 	/*
2226ba3431dfSjprakash 	 * Make sure there is a preallocated T_unbind_req message
2227ba3431dfSjprakash 	 * before any binding. This message is allocated when the
2228ba3431dfSjprakash 	 * socket is created. Since another thread can consume
2229ba3431dfSjprakash 	 * so_unbind_mp by the time we return from so_lock_single(),
2230ba3431dfSjprakash 	 * we should check the availability of so_unbind_mp after
2231ba3431dfSjprakash 	 * we return from so_lock_single().
22327c478bd9Sstevel@tonic-gate 	 */
2233ba3431dfSjprakash 
2234ba3431dfSjprakash 	so_lock_single(so);	/* Set SOLOCKED */
2235ba3431dfSjprakash 	need_unlock = B_TRUE;
2236ba3431dfSjprakash 
22370f1702c5SYu Xiangning 	if (sti->sti_unbind_mp == NULL) {
22387c478bd9Sstevel@tonic-gate 		dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n"));
22397c478bd9Sstevel@tonic-gate 		/* NOTE: holding so_lock while sleeping */
22400f1702c5SYu Xiangning 		sti->sti_unbind_mp =
2241de8c4a14SErik Nordmark 		    soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR, cr);
22420f1702c5SYu Xiangning 		if (sti->sti_unbind_mp == NULL) {
22437c478bd9Sstevel@tonic-gate 			error = EINTR;
22447c478bd9Sstevel@tonic-gate 			goto done;
22457c478bd9Sstevel@tonic-gate 		}
22467c478bd9Sstevel@tonic-gate 	}
22477c478bd9Sstevel@tonic-gate 
22487c478bd9Sstevel@tonic-gate 	/*
22497c478bd9Sstevel@tonic-gate 	 * Can't have done a listen before connecting.
22507c478bd9Sstevel@tonic-gate 	 */
22517c478bd9Sstevel@tonic-gate 	if (so->so_state & SS_ACCEPTCONN) {
22527c478bd9Sstevel@tonic-gate 		error = EOPNOTSUPP;
22537c478bd9Sstevel@tonic-gate 		goto done;
22547c478bd9Sstevel@tonic-gate 	}
22557c478bd9Sstevel@tonic-gate 
22567c478bd9Sstevel@tonic-gate 	/*
22577c478bd9Sstevel@tonic-gate 	 * Must be bound with the transport
22587c478bd9Sstevel@tonic-gate 	 */
22597c478bd9Sstevel@tonic-gate 	if (!(so->so_state & SS_ISBOUND)) {
22607c478bd9Sstevel@tonic-gate 		if ((so->so_family == AF_INET || so->so_family == AF_INET6) &&
22617c478bd9Sstevel@tonic-gate 		    /*CONSTCOND*/
22627c478bd9Sstevel@tonic-gate 		    so->so_type == SOCK_STREAM && !soconnect_tpi_tcp) {
22637c478bd9Sstevel@tonic-gate 			/*
22647c478bd9Sstevel@tonic-gate 			 * Optimization for AF_INET{,6} transports
22657c478bd9Sstevel@tonic-gate 			 * that can handle a T_CONN_REQ without being bound.
22667c478bd9Sstevel@tonic-gate 			 */
22677c478bd9Sstevel@tonic-gate 			so_automatic_bind(so);
22687c478bd9Sstevel@tonic-gate 		} else {
22697c478bd9Sstevel@tonic-gate 			error = sotpi_bind(so, NULL, 0,
22700f1702c5SYu Xiangning 			    _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr);
22717c478bd9Sstevel@tonic-gate 			if (error)
22727c478bd9Sstevel@tonic-gate 				goto done;
22737c478bd9Sstevel@tonic-gate 		}
22747c478bd9Sstevel@tonic-gate 		ASSERT(so->so_state & SS_ISBOUND);
22757c478bd9Sstevel@tonic-gate 		flags |= _SOCONNECT_DID_BIND;
22767c478bd9Sstevel@tonic-gate 	}
22777c478bd9Sstevel@tonic-gate 
22787c478bd9Sstevel@tonic-gate 	/*
22797c478bd9Sstevel@tonic-gate 	 * Handle a connect to a name parameter of type AF_UNSPEC like a
22807c478bd9Sstevel@tonic-gate 	 * connect to a null address. This is the portable method to
22817c478bd9Sstevel@tonic-gate 	 * unconnect a socket.
22827c478bd9Sstevel@tonic-gate 	 */
22837c478bd9Sstevel@tonic-gate 	if ((namelen >= sizeof (sa_family_t)) &&
22847c478bd9Sstevel@tonic-gate 	    (name->sa_family == AF_UNSPEC)) {
22857c478bd9Sstevel@tonic-gate 		name = NULL;
22867c478bd9Sstevel@tonic-gate 		namelen = 0;
22877c478bd9Sstevel@tonic-gate 	}
22887c478bd9Sstevel@tonic-gate 
22897c478bd9Sstevel@tonic-gate 	/*
22907c478bd9Sstevel@tonic-gate 	 * Check that we are not already connected.
22917c478bd9Sstevel@tonic-gate 	 * A connection-oriented socket cannot be reconnected.
22927c478bd9Sstevel@tonic-gate 	 * A connected connection-less socket can be
22937c478bd9Sstevel@tonic-gate 	 * - connected to a different address by a subsequent connect
22947c478bd9Sstevel@tonic-gate 	 * - "unconnected" by a connect to the NULL address
22957c478bd9Sstevel@tonic-gate 	 */
22967c478bd9Sstevel@tonic-gate 	if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) {
22977c478bd9Sstevel@tonic-gate 		ASSERT(!(flags & _SOCONNECT_DID_BIND));
22987c478bd9Sstevel@tonic-gate 		if (so->so_mode & SM_CONNREQUIRED) {
22997c478bd9Sstevel@tonic-gate 			/* Connection-oriented socket */
23007c478bd9Sstevel@tonic-gate 			error = so->so_state & SS_ISCONNECTED ?
23017c478bd9Sstevel@tonic-gate 			    EISCONN : EALREADY;
23027c478bd9Sstevel@tonic-gate 			goto done;
23037c478bd9Sstevel@tonic-gate 		}
23047c478bd9Sstevel@tonic-gate 		/* Connection-less socket */
23057c478bd9Sstevel@tonic-gate 		if (name == NULL) {
23067c478bd9Sstevel@tonic-gate 			/*
23077c478bd9Sstevel@tonic-gate 			 * Remove the connected state and clear SO_DGRAM_ERRIND
23087c478bd9Sstevel@tonic-gate 			 * since it was set when the socket was connected.
23097c478bd9Sstevel@tonic-gate 			 * If this is UDP also send down a T_DISCON_REQ.
23107c478bd9Sstevel@tonic-gate 			 */
23117c478bd9Sstevel@tonic-gate 			int val;
23127c478bd9Sstevel@tonic-gate 
23137c478bd9Sstevel@tonic-gate 			if ((so->so_family == AF_INET ||
23147c478bd9Sstevel@tonic-gate 			    so->so_family == AF_INET6) &&
23157c478bd9Sstevel@tonic-gate 			    (so->so_type == SOCK_DGRAM ||
23167c478bd9Sstevel@tonic-gate 			    so->so_type == SOCK_RAW) &&
23177c478bd9Sstevel@tonic-gate 			    /*CONSTCOND*/
23187c478bd9Sstevel@tonic-gate 			    !soconnect_tpi_udp) {
23197c478bd9Sstevel@tonic-gate 				/* XXX What about implicitly unbinding here? */
23207c478bd9Sstevel@tonic-gate 				error = sodisconnect(so, -1,
23217c478bd9Sstevel@tonic-gate 				    _SODISCONNECT_LOCK_HELD);
23227c478bd9Sstevel@tonic-gate 			} else {
23237c478bd9Sstevel@tonic-gate 				so->so_state &=
23240f1702c5SYu Xiangning 				    ~(SS_ISCONNECTED | SS_ISCONNECTING);
23250f1702c5SYu Xiangning 				sti->sti_faddr_valid = 0;
23260f1702c5SYu Xiangning 				sti->sti_faddr_len = 0;
23277c478bd9Sstevel@tonic-gate 			}
23287c478bd9Sstevel@tonic-gate 
23290f1702c5SYu Xiangning 			/* Remove SOLOCKED since setsockopt will grab it */
23307c478bd9Sstevel@tonic-gate 			so_unlock_single(so, SOLOCKED);
23317c478bd9Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
23327c478bd9Sstevel@tonic-gate 
23337c478bd9Sstevel@tonic-gate 			val = 0;
23340f1702c5SYu Xiangning 			(void) sotpi_setsockopt(so, SOL_SOCKET,
23350f1702c5SYu Xiangning 			    SO_DGRAM_ERRIND, &val, (t_uscalar_t)sizeof (val),
23360f1702c5SYu Xiangning 			    cr);
23377c478bd9Sstevel@tonic-gate 
23387c478bd9Sstevel@tonic-gate 			mutex_enter(&so->so_lock);
23397c478bd9Sstevel@tonic-gate 			so_lock_single(so);	/* Set SOLOCKED */
23407c478bd9Sstevel@tonic-gate 			goto done;
23417c478bd9Sstevel@tonic-gate 		}
23427c478bd9Sstevel@tonic-gate 	}
23437c478bd9Sstevel@tonic-gate 	ASSERT(so->so_state & SS_ISBOUND);
23447c478bd9Sstevel@tonic-gate 
23457c478bd9Sstevel@tonic-gate 	if (name == NULL || namelen == 0) {
23467c478bd9Sstevel@tonic-gate 		error = EINVAL;
23477c478bd9Sstevel@tonic-gate 		goto done;
23487c478bd9Sstevel@tonic-gate 	}
23497c478bd9Sstevel@tonic-gate 	/*
23500f1702c5SYu Xiangning 	 * Mark the socket if sti_faddr_sa represents the transport level
23517c478bd9Sstevel@tonic-gate 	 * address.
23527c478bd9Sstevel@tonic-gate 	 */
23537c478bd9Sstevel@tonic-gate 	if (flags & _SOCONNECT_NOXLATE) {
23547c478bd9Sstevel@tonic-gate 		struct sockaddr_ux	*soaddr_ux;
23557c478bd9Sstevel@tonic-gate 
23567c478bd9Sstevel@tonic-gate 		ASSERT(so->so_family == AF_UNIX);
23577c478bd9Sstevel@tonic-gate 		if (namelen != sizeof (struct sockaddr_ux)) {
23587c478bd9Sstevel@tonic-gate 			error = EINVAL;
23597c478bd9Sstevel@tonic-gate 			goto done;
23607c478bd9Sstevel@tonic-gate 		}
23617c478bd9Sstevel@tonic-gate 		soaddr_ux = (struct sockaddr_ux *)name;
23627c478bd9Sstevel@tonic-gate 		name = (struct sockaddr *)&soaddr_ux->sou_addr;
23637c478bd9Sstevel@tonic-gate 		namelen = sizeof (soaddr_ux->sou_addr);
23640f1702c5SYu Xiangning 		sti->sti_faddr_noxlate = 1;
23657c478bd9Sstevel@tonic-gate 	}
23667c478bd9Sstevel@tonic-gate 
23677c478bd9Sstevel@tonic-gate 	/*
23687c478bd9Sstevel@tonic-gate 	 * Length and family checks.
23697c478bd9Sstevel@tonic-gate 	 */
23707c478bd9Sstevel@tonic-gate 	error = so_addr_verify(so, name, namelen);
23717c478bd9Sstevel@tonic-gate 	if (error)
23727c478bd9Sstevel@tonic-gate 		goto bad;
23737c478bd9Sstevel@tonic-gate 
23747c478bd9Sstevel@tonic-gate 	/*
23757c478bd9Sstevel@tonic-gate 	 * Save foreign address. Needed for AF_UNIX as well as
23767c478bd9Sstevel@tonic-gate 	 * transport providers that do not support TI_GETPEERNAME.
23777c478bd9Sstevel@tonic-gate 	 * Also used for cached foreign address for TCP and UDP.
23787c478bd9Sstevel@tonic-gate 	 */
23790f1702c5SYu Xiangning 	if (namelen > (t_uscalar_t)sti->sti_faddr_maxlen) {
23807c478bd9Sstevel@tonic-gate 		error = EINVAL;
23817c478bd9Sstevel@tonic-gate 		goto done;
23827c478bd9Sstevel@tonic-gate 	}
23830f1702c5SYu Xiangning 	sti->sti_faddr_len = (socklen_t)namelen;
23840f1702c5SYu Xiangning 	ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen);
23850f1702c5SYu Xiangning 	bcopy(name, sti->sti_faddr_sa, namelen);
23860f1702c5SYu Xiangning 	sti->sti_faddr_valid = 1;
23877c478bd9Sstevel@tonic-gate 
23887c478bd9Sstevel@tonic-gate 	if (so->so_family == AF_UNIX) {
23890f1702c5SYu Xiangning 		if (sti->sti_faddr_noxlate) {
23907c478bd9Sstevel@tonic-gate 			/*
2391b521c41bSGordon Ross 			 * sti_faddr is a transport-level address, so
2392b521c41bSGordon Ross 			 * don't pass it as an option.  Do save it in
2393b521c41bSGordon Ross 			 * sti_ux_faddr, used for connected DG send.
23947c478bd9Sstevel@tonic-gate 			 */
23957c478bd9Sstevel@tonic-gate 			src = NULL;
23967c478bd9Sstevel@tonic-gate 			srclen = 0;
2397b521c41bSGordon Ross 			addr = sti->sti_faddr_sa;
2398b521c41bSGordon Ross 			addrlen = (t_uscalar_t)sti->sti_faddr_len;
2399b521c41bSGordon Ross 			bcopy(addr, &sti->sti_ux_faddr,
2400b521c41bSGordon Ross 			    sizeof (sti->sti_ux_faddr));
24017c478bd9Sstevel@tonic-gate 		} else {
24027c478bd9Sstevel@tonic-gate 			/*
24037c478bd9Sstevel@tonic-gate 			 * Pass the sockaddr_un source address as an option
24047c478bd9Sstevel@tonic-gate 			 * and translate the remote address.
24050f1702c5SYu Xiangning 			 * Holding so_lock thus sti_laddr_sa can not change.
24067c478bd9Sstevel@tonic-gate 			 */
24070f1702c5SYu Xiangning 			src = sti->sti_laddr_sa;
24080f1702c5SYu Xiangning 			srclen = (t_uscalar_t)sti->sti_laddr_len;
24097c478bd9Sstevel@tonic-gate 			dprintso(so, 1,
24107c478bd9Sstevel@tonic-gate 			    ("sotpi_connect UNIX: srclen %d, src %p\n",
24117c478bd9Sstevel@tonic-gate 			    srclen, src));
2412f012ee0cSGordon Ross 			/*
2413f012ee0cSGordon Ross 			 * Translate the destination address into our
2414f012ee0cSGordon Ross 			 * internal form, and save it in sti_ux_faddr.
2415f012ee0cSGordon Ross 			 * After this call, addr==&sti->sti_ux_taddr,
2416f012ee0cSGordon Ross 			 * and we copy that to sti->sti_ux_faddr so
2417f012ee0cSGordon Ross 			 * we save the connected peer address.
2418f012ee0cSGordon Ross 			 */
24197c478bd9Sstevel@tonic-gate 			error = so_ux_addr_xlate(so,
24200f1702c5SYu Xiangning 			    sti->sti_faddr_sa, (socklen_t)sti->sti_faddr_len,
24217c478bd9Sstevel@tonic-gate 			    (flags & _SOCONNECT_XPG4_2),
24227c478bd9Sstevel@tonic-gate 			    &addr, &addrlen);
24237c478bd9Sstevel@tonic-gate 			if (error)
24247c478bd9Sstevel@tonic-gate 				goto bad;
2425f012ee0cSGordon Ross 			bcopy(&sti->sti_ux_taddr, &sti->sti_ux_faddr,
2426f012ee0cSGordon Ross 			    sizeof (sti->sti_ux_faddr));
24277c478bd9Sstevel@tonic-gate 		}
24287c478bd9Sstevel@tonic-gate 	} else {
24290f1702c5SYu Xiangning 		addr = sti->sti_faddr_sa;
24300f1702c5SYu Xiangning 		addrlen = (t_uscalar_t)sti->sti_faddr_len;
24317c478bd9Sstevel@tonic-gate 		src = NULL;
24327c478bd9Sstevel@tonic-gate 		srclen = 0;
24337c478bd9Sstevel@tonic-gate 	}
24347c478bd9Sstevel@tonic-gate 	/*
24357c478bd9Sstevel@tonic-gate 	 * When connecting a datagram socket we issue the SO_DGRAM_ERRIND
24367c478bd9Sstevel@tonic-gate 	 * option which asks the transport provider to send T_UDERR_IND
24377c478bd9Sstevel@tonic-gate 	 * messages. These T_UDERR_IND messages are used to return connected
24387c478bd9Sstevel@tonic-gate 	 * style errors (e.g. ECONNRESET) for connected datagram sockets.
24397c478bd9Sstevel@tonic-gate 	 *
24407c478bd9Sstevel@tonic-gate 	 * In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets)
24417c478bd9Sstevel@tonic-gate 	 * we send down a T_CONN_REQ. This is needed to let the
24427c478bd9Sstevel@tonic-gate 	 * transport assign a local address that is consistent with
24437c478bd9Sstevel@tonic-gate 	 * the remote address. Applications depend on a getsockname()
24447c478bd9Sstevel@tonic-gate 	 * after a connect() to retrieve the "source" IP address for
24457c478bd9Sstevel@tonic-gate 	 * the connected socket.  Invalidate the cached local address
24467c478bd9Sstevel@tonic-gate 	 * to force getsockname() to enquire of the transport.
24477c478bd9Sstevel@tonic-gate 	 */
24487c478bd9Sstevel@tonic-gate 	if (!(so->so_mode & SM_CONNREQUIRED)) {
24497c478bd9Sstevel@tonic-gate 		/*
24507c478bd9Sstevel@tonic-gate 		 * Datagram socket.
24517c478bd9Sstevel@tonic-gate 		 */
24527c478bd9Sstevel@tonic-gate 		int32_t val;
24537c478bd9Sstevel@tonic-gate 
24547c478bd9Sstevel@tonic-gate 		so_unlock_single(so, SOLOCKED);
24557c478bd9Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
24567c478bd9Sstevel@tonic-gate 
24577c478bd9Sstevel@tonic-gate 		val = 1;
24587c478bd9Sstevel@tonic-gate 		(void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND,
24590f1702c5SYu Xiangning 		    &val, (t_uscalar_t)sizeof (val), cr);
24607c478bd9Sstevel@tonic-gate 
24617c478bd9Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
24627c478bd9Sstevel@tonic-gate 		so_lock_single(so);	/* Set SOLOCKED */
24637c478bd9Sstevel@tonic-gate 		if ((so->so_family != AF_INET && so->so_family != AF_INET6) ||
24647c478bd9Sstevel@tonic-gate 		    (so->so_type != SOCK_DGRAM && so->so_type != SOCK_RAW) ||
24657c478bd9Sstevel@tonic-gate 		    soconnect_tpi_udp) {
24667c478bd9Sstevel@tonic-gate 			soisconnected(so);
24677c478bd9Sstevel@tonic-gate 			goto done;
24687c478bd9Sstevel@tonic-gate 		}
24697c478bd9Sstevel@tonic-gate 		/*
24707c478bd9Sstevel@tonic-gate 		 * Send down T_CONN_REQ etc.
24717c478bd9Sstevel@tonic-gate 		 * Clear fflag to avoid returning EWOULDBLOCK.
24727c478bd9Sstevel@tonic-gate 		 */
24737c478bd9Sstevel@tonic-gate 		fflag = 0;
24747c478bd9Sstevel@tonic-gate 		ASSERT(so->so_family != AF_UNIX);
24750f1702c5SYu Xiangning 		sti->sti_laddr_valid = 0;
24760f1702c5SYu Xiangning 	} else if (sti->sti_laddr_len != 0) {
24777c478bd9Sstevel@tonic-gate 		/*
24787c478bd9Sstevel@tonic-gate 		 * If the local address or port was "any" then it may be
24797c478bd9Sstevel@tonic-gate 		 * changed by the transport as a result of the
24807c478bd9Sstevel@tonic-gate 		 * connect.  Invalidate the cached version if we have one.
24817c478bd9Sstevel@tonic-gate 		 */
24827c478bd9Sstevel@tonic-gate 		switch (so->so_family) {
24837c478bd9Sstevel@tonic-gate 		case AF_INET:
24840f1702c5SYu Xiangning 			ASSERT(sti->sti_laddr_len == (socklen_t)sizeof (sin_t));
24850f1702c5SYu Xiangning 			if (((sin_t *)sti->sti_laddr_sa)->sin_addr.s_addr ==
24867c478bd9Sstevel@tonic-gate 			    INADDR_ANY ||
24870f1702c5SYu Xiangning 			    ((sin_t *)sti->sti_laddr_sa)->sin_port == 0)
24880f1702c5SYu Xiangning 				sti->sti_laddr_valid = 0;
24897c478bd9Sstevel@tonic-gate 			break;
24907c478bd9Sstevel@tonic-gate 
24917c478bd9Sstevel@tonic-gate 		case AF_INET6:
24920f1702c5SYu Xiangning 			ASSERT(sti->sti_laddr_len ==
24930f1702c5SYu Xiangning 			    (socklen_t)sizeof (sin6_t));
24947c478bd9Sstevel@tonic-gate 			if (IN6_IS_ADDR_UNSPECIFIED(
24950f1702c5SYu Xiangning 			    &((sin6_t *)sti->sti_laddr_sa) ->sin6_addr) ||
24967c478bd9Sstevel@tonic-gate 			    IN6_IS_ADDR_V4MAPPED_ANY(
24970f1702c5SYu Xiangning 			    &((sin6_t *)sti->sti_laddr_sa)->sin6_addr) ||
24980f1702c5SYu Xiangning 			    ((sin6_t *)sti->sti_laddr_sa)->sin6_port == 0)
24990f1702c5SYu Xiangning 				sti->sti_laddr_valid = 0;
25007c478bd9Sstevel@tonic-gate 			break;
25017c478bd9Sstevel@tonic-gate 
25027c478bd9Sstevel@tonic-gate 		default:
25037c478bd9Sstevel@tonic-gate 			break;
25047c478bd9Sstevel@tonic-gate 		}
25057c478bd9Sstevel@tonic-gate 	}
25067c478bd9Sstevel@tonic-gate 
25077c478bd9Sstevel@tonic-gate 	/*
25087c478bd9Sstevel@tonic-gate 	 * Check for failure of an earlier call
25097c478bd9Sstevel@tonic-gate 	 */
25107c478bd9Sstevel@tonic-gate 	if (so->so_error != 0)
25117c478bd9Sstevel@tonic-gate 		goto so_bad;
25127c478bd9Sstevel@tonic-gate 
25137c478bd9Sstevel@tonic-gate 	/*
25147c478bd9Sstevel@tonic-gate 	 * Send down T_CONN_REQ. Message was allocated above.
25157c478bd9Sstevel@tonic-gate 	 */
25167c478bd9Sstevel@tonic-gate 	conn_req.PRIM_type = T_CONN_REQ;
25177c478bd9Sstevel@tonic-gate 	conn_req.DEST_length = addrlen;
25187c478bd9Sstevel@tonic-gate 	conn_req.DEST_offset = (t_scalar_t)sizeof (conn_req);
25197c478bd9Sstevel@tonic-gate 	if (srclen == 0) {
25207c478bd9Sstevel@tonic-gate 		conn_req.OPT_length = 0;
25217c478bd9Sstevel@tonic-gate 		conn_req.OPT_offset = 0;
25227c478bd9Sstevel@tonic-gate 		soappendmsg(mp, &conn_req, sizeof (conn_req));
25237c478bd9Sstevel@tonic-gate 		soappendmsg(mp, addr, addrlen);
25247c478bd9Sstevel@tonic-gate 	} else {
25257c478bd9Sstevel@tonic-gate 		/*
25267c478bd9Sstevel@tonic-gate 		 * There is a AF_UNIX sockaddr_un to include as a source
25277c478bd9Sstevel@tonic-gate 		 * address option.
25287c478bd9Sstevel@tonic-gate 		 */
25297c478bd9Sstevel@tonic-gate 		struct T_opthdr toh;
25307c478bd9Sstevel@tonic-gate 
25317c478bd9Sstevel@tonic-gate 		toh.level = SOL_SOCKET;
25327c478bd9Sstevel@tonic-gate 		toh.name = SO_SRCADDR;
25337c478bd9Sstevel@tonic-gate 		toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr));
25347c478bd9Sstevel@tonic-gate 		toh.status = 0;
25357c478bd9Sstevel@tonic-gate 		conn_req.OPT_length =
25367c478bd9Sstevel@tonic-gate 		    (t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen));
25377c478bd9Sstevel@tonic-gate 		conn_req.OPT_offset = (t_scalar_t)(sizeof (conn_req) +
25387c478bd9Sstevel@tonic-gate 		    _TPI_ALIGN_TOPT(addrlen));
25397c478bd9Sstevel@tonic-gate 
25407c478bd9Sstevel@tonic-gate 		soappendmsg(mp, &conn_req, sizeof (conn_req));
25417c478bd9Sstevel@tonic-gate 		soappendmsg(mp, addr, addrlen);
25427c478bd9Sstevel@tonic-gate 		mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen;
25437c478bd9Sstevel@tonic-gate 		soappendmsg(mp, &toh, sizeof (toh));
25447c478bd9Sstevel@tonic-gate 		soappendmsg(mp, src, srclen);
25457c478bd9Sstevel@tonic-gate 		mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen;
25467c478bd9Sstevel@tonic-gate 		ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
25477c478bd9Sstevel@tonic-gate 	}
25487c478bd9Sstevel@tonic-gate 	/*
25497c478bd9Sstevel@tonic-gate 	 * Set SS_ISCONNECTING before sending down the T_CONN_REQ
25507c478bd9Sstevel@tonic-gate 	 * in order to have the right state when the T_CONN_CON shows up.
25517c478bd9Sstevel@tonic-gate 	 */
25527c478bd9Sstevel@tonic-gate 	soisconnecting(so);
25537c478bd9Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
25547c478bd9Sstevel@tonic-gate 
2555005d3febSMarek Pospisil 	if (AU_AUDITING())
25567c478bd9Sstevel@tonic-gate 		audit_sock(T_CONN_REQ, strvp2wq(SOTOV(so)), mp, 0);
25577c478bd9Sstevel@tonic-gate 
25587c478bd9Sstevel@tonic-gate 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
25597c478bd9Sstevel@tonic-gate 	    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
25607c478bd9Sstevel@tonic-gate 	mp = NULL;
25617c478bd9Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
25627c478bd9Sstevel@tonic-gate 	if (error != 0)
25637c478bd9Sstevel@tonic-gate 		goto bad;
25647c478bd9Sstevel@tonic-gate 
25657c478bd9Sstevel@tonic-gate 	if ((error = sowaitokack(so, T_CONN_REQ)) != 0)
25667c478bd9Sstevel@tonic-gate 		goto bad;
25677c478bd9Sstevel@tonic-gate 
25687c478bd9Sstevel@tonic-gate 	/* Allow other threads to access the socket */
25697c478bd9Sstevel@tonic-gate 	so_unlock_single(so, SOLOCKED);
25707c478bd9Sstevel@tonic-gate 	need_unlock = B_FALSE;
25717c478bd9Sstevel@tonic-gate 
25727c478bd9Sstevel@tonic-gate 	/*
25737c478bd9Sstevel@tonic-gate 	 * Wait until we get a T_CONN_CON or an error
25747c478bd9Sstevel@tonic-gate 	 */
25757c478bd9Sstevel@tonic-gate 	if ((error = sowaitconnected(so, fflag, 0)) != 0) {
25767c478bd9Sstevel@tonic-gate 		so_lock_single(so);	/* Set SOLOCKED */
25777c478bd9Sstevel@tonic-gate 		need_unlock = B_TRUE;
25787c478bd9Sstevel@tonic-gate 	}
25797c478bd9Sstevel@tonic-gate 
25807c478bd9Sstevel@tonic-gate done:
25817c478bd9Sstevel@tonic-gate 	freemsg(mp);
25827c478bd9Sstevel@tonic-gate 	switch (error) {
25837c478bd9Sstevel@tonic-gate 	case EINPROGRESS:
25847c478bd9Sstevel@tonic-gate 	case EALREADY:
25857c478bd9Sstevel@tonic-gate 	case EISCONN:
25867c478bd9Sstevel@tonic-gate 	case EINTR:
25877c478bd9Sstevel@tonic-gate 		/* Non-fatal errors */
25880f1702c5SYu Xiangning 		sti->sti_laddr_valid = 0;
25897c478bd9Sstevel@tonic-gate 		/* FALLTHRU */
25907c478bd9Sstevel@tonic-gate 	case 0:
25917c478bd9Sstevel@tonic-gate 		break;
25927c478bd9Sstevel@tonic-gate 	default:
25937c478bd9Sstevel@tonic-gate 		ASSERT(need_unlock);
25947c478bd9Sstevel@tonic-gate 		/*
25957c478bd9Sstevel@tonic-gate 		 * Fatal errors: clear SS_ISCONNECTING in case it was set,
25967c478bd9Sstevel@tonic-gate 		 * and invalidate local-address cache
25977c478bd9Sstevel@tonic-gate 		 */
25980f1702c5SYu Xiangning 		so->so_state &= ~SS_ISCONNECTING;
25990f1702c5SYu Xiangning 		sti->sti_laddr_valid = 0;
26007c478bd9Sstevel@tonic-gate 		/* A discon_ind might have already unbound us */
26017c478bd9Sstevel@tonic-gate 		if ((flags & _SOCONNECT_DID_BIND) &&
26027c478bd9Sstevel@tonic-gate 		    (so->so_state & SS_ISBOUND)) {
26037c478bd9Sstevel@tonic-gate 			int err;
26047c478bd9Sstevel@tonic-gate 
26057c478bd9Sstevel@tonic-gate 			err = sotpi_unbind(so, 0);
26067c478bd9Sstevel@tonic-gate 			/* LINTED - statement has no conseq */
26077c478bd9Sstevel@tonic-gate 			if (err) {
26087c478bd9Sstevel@tonic-gate 				eprintsoline(so, err);
26097c478bd9Sstevel@tonic-gate 			}
26107c478bd9Sstevel@tonic-gate 		}
26117c478bd9Sstevel@tonic-gate 		break;
26127c478bd9Sstevel@tonic-gate 	}
26137c478bd9Sstevel@tonic-gate 	if (need_unlock)
26147c478bd9Sstevel@tonic-gate 		so_unlock_single(so, SOLOCKED);
26157c478bd9Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
26167c478bd9Sstevel@tonic-gate 	return (error);
26177c478bd9Sstevel@tonic-gate 
26180f1702c5SYu Xiangning so_bad:	error = sogeterr(so, B_TRUE);
26197c478bd9Sstevel@tonic-gate bad:	eprintsoline(so, error);
26207c478bd9Sstevel@tonic-gate 	goto done;
26217c478bd9Sstevel@tonic-gate }
26227c478bd9Sstevel@tonic-gate 
26230f1702c5SYu Xiangning /* ARGSUSED */
26247c478bd9Sstevel@tonic-gate int
26250f1702c5SYu Xiangning sotpi_shutdown(struct sonode *so, int how, struct cred *cr)
26267c478bd9Sstevel@tonic-gate {
26277c478bd9Sstevel@tonic-gate 	struct T_ordrel_req	ordrel_req;
26287c478bd9Sstevel@tonic-gate 	mblk_t			*mp;
26297c478bd9Sstevel@tonic-gate 	uint_t			old_state, state_change;
26307c478bd9Sstevel@tonic-gate 	int			error = 0;
26310f1702c5SYu Xiangning 	sotpi_info_t		*sti = SOTOTPI(so);
26327c478bd9Sstevel@tonic-gate 
26337c478bd9Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n",
2634903a11ebSrh87107 	    (void *)so, how, pr_state(so->so_state, so->so_mode)));
26357c478bd9Sstevel@tonic-gate 
26367c478bd9Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
26377c478bd9Sstevel@tonic-gate 	so_lock_single(so);	/* Set SOLOCKED */
26387c478bd9Sstevel@tonic-gate 
26397c478bd9Sstevel@tonic-gate 	/*
26407c478bd9Sstevel@tonic-gate 	 * SunOS 4.X has no check for datagram sockets.
26417c478bd9Sstevel@tonic-gate 	 * 5.X checks that it is connected (ENOTCONN)
26427c478bd9Sstevel@tonic-gate 	 * X/Open requires that we check the connected state.
26437c478bd9Sstevel@tonic-gate 	 */
26447c478bd9Sstevel@tonic-gate 	if (!(so->so_state & SS_ISCONNECTED)) {
26457c478bd9Sstevel@tonic-gate 		if (!xnet_skip_checks) {
26467c478bd9Sstevel@tonic-gate 			error = ENOTCONN;
26477c478bd9Sstevel@tonic-gate 			if (xnet_check_print) {
26487c478bd9Sstevel@tonic-gate 				printf("sockfs: X/Open shutdown check "
26497c478bd9Sstevel@tonic-gate 				    "caused ENOTCONN\n");
26507c478bd9Sstevel@tonic-gate 			}
26517c478bd9Sstevel@tonic-gate 		}
26527c478bd9Sstevel@tonic-gate 		goto done;
26537c478bd9Sstevel@tonic-gate 	}
26547c478bd9Sstevel@tonic-gate 	/*
26557c478bd9Sstevel@tonic-gate 	 * Record the current state and then perform any state changes.
26567c478bd9Sstevel@tonic-gate 	 * Then use the difference between the old and new states to
26577c478bd9Sstevel@tonic-gate 	 * determine which messages need to be sent.
26587c478bd9Sstevel@tonic-gate 	 * This prevents e.g. duplicate T_ORDREL_REQ when there are
26597c478bd9Sstevel@tonic-gate 	 * duplicate calls to shutdown().
26607c478bd9Sstevel@tonic-gate 	 */
26617c478bd9Sstevel@tonic-gate 	old_state = so->so_state;
26627c478bd9Sstevel@tonic-gate 
26637c478bd9Sstevel@tonic-gate 	switch (how) {
26647c478bd9Sstevel@tonic-gate 	case 0:
26657c478bd9Sstevel@tonic-gate 		socantrcvmore(so);
26667c478bd9Sstevel@tonic-gate 		break;
26677c478bd9Sstevel@tonic-gate 	case 1:
26687c478bd9Sstevel@tonic-gate 		socantsendmore(so);
26697c478bd9Sstevel@tonic-gate 		break;
26707c478bd9Sstevel@tonic-gate 	case 2:
26717c478bd9Sstevel@tonic-gate 		socantsendmore(so);
26727c478bd9Sstevel@tonic-gate 		socantrcvmore(so);
26737c478bd9Sstevel@tonic-gate 		break;
26747c478bd9Sstevel@tonic-gate 	default:
26757c478bd9Sstevel@tonic-gate 		error = EINVAL;
26767c478bd9Sstevel@tonic-gate 		goto done;
26777c478bd9Sstevel@tonic-gate 	}
26787c478bd9Sstevel@tonic-gate 
26797c478bd9Sstevel@tonic-gate 	/*
26807c478bd9Sstevel@tonic-gate 	 * Assumes that the SS_CANT* flags are never cleared in the above code.
26817c478bd9Sstevel@tonic-gate 	 */
26827c478bd9Sstevel@tonic-gate 	state_change = (so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) -
26837c478bd9Sstevel@tonic-gate 	    (old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE));
26847c478bd9Sstevel@tonic-gate 	ASSERT((state_change & ~(SS_CANTRCVMORE|SS_CANTSENDMORE)) == 0);
26857c478bd9Sstevel@tonic-gate 
26867c478bd9Sstevel@tonic-gate 	switch (state_change) {
26877c478bd9Sstevel@tonic-gate 	case 0:
26887c478bd9Sstevel@tonic-gate 		dprintso(so, 1,
26897c478bd9Sstevel@tonic-gate 		    ("sotpi_shutdown: nothing to send in state 0x%x\n",
26907c478bd9Sstevel@tonic-gate 		    so->so_state));
26917c478bd9Sstevel@tonic-gate 		goto done;
26927c478bd9Sstevel@tonic-gate 
26937c478bd9Sstevel@tonic-gate 	case SS_CANTRCVMORE:
26947c478bd9Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
26957c478bd9Sstevel@tonic-gate 		strseteof(SOTOV(so), 1);
26967c478bd9Sstevel@tonic-gate 		/*
26977c478bd9Sstevel@tonic-gate 		 * strseteof takes care of read side wakeups,
26987c478bd9Sstevel@tonic-gate 		 * pollwakeups, and signals.
26997c478bd9Sstevel@tonic-gate 		 */
27007c478bd9Sstevel@tonic-gate 		/*
27017c478bd9Sstevel@tonic-gate 		 * Get the read lock before flushing data to avoid problems
27027c478bd9Sstevel@tonic-gate 		 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg.
27037c478bd9Sstevel@tonic-gate 		 */
27047c478bd9Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
27057c478bd9Sstevel@tonic-gate 		(void) so_lock_read(so, 0);	/* Set SOREADLOCKED */
27067c478bd9Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
27077c478bd9Sstevel@tonic-gate 
27087c478bd9Sstevel@tonic-gate 		/* Flush read side queue */
27097c478bd9Sstevel@tonic-gate 		strflushrq(SOTOV(so), FLUSHALL);
27107c478bd9Sstevel@tonic-gate 
27117c478bd9Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
27127c478bd9Sstevel@tonic-gate 		so_unlock_read(so);		/* Clear SOREADLOCKED */
27137c478bd9Sstevel@tonic-gate 		break;
27147c478bd9Sstevel@tonic-gate 
27157c478bd9Sstevel@tonic-gate 	case SS_CANTSENDMORE:
27167c478bd9Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
27177c478bd9Sstevel@tonic-gate 		strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
27187c478bd9Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
27197c478bd9Sstevel@tonic-gate 		break;
27207c478bd9Sstevel@tonic-gate 
27217c478bd9Sstevel@tonic-gate 	case SS_CANTSENDMORE|SS_CANTRCVMORE:
27227c478bd9Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
27237c478bd9Sstevel@tonic-gate 		strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
27247c478bd9Sstevel@tonic-gate 		strseteof(SOTOV(so), 1);
27257c478bd9Sstevel@tonic-gate 		/*
27267c478bd9Sstevel@tonic-gate 		 * strseteof takes care of read side wakeups,
27277c478bd9Sstevel@tonic-gate 		 * pollwakeups, and signals.
27287c478bd9Sstevel@tonic-gate 		 */
27297c478bd9Sstevel@tonic-gate 		/*
27307c478bd9Sstevel@tonic-gate 		 * Get the read lock before flushing data to avoid problems
27317c478bd9Sstevel@tonic-gate 		 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg.
27327c478bd9Sstevel@tonic-gate 		 */
27337c478bd9Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
27347c478bd9Sstevel@tonic-gate 		(void) so_lock_read(so, 0);	/* Set SOREADLOCKED */
27357c478bd9Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
27367c478bd9Sstevel@tonic-gate 
27377c478bd9Sstevel@tonic-gate 		/* Flush read side queue */
27387c478bd9Sstevel@tonic-gate 		strflushrq(SOTOV(so), FLUSHALL);
27397c478bd9Sstevel@tonic-gate 
27407c478bd9Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
27417c478bd9Sstevel@tonic-gate 		so_unlock_read(so);		/* Clear SOREADLOCKED */
27427c478bd9Sstevel@tonic-gate 		break;
27437c478bd9Sstevel@tonic-gate 	}
27447c478bd9Sstevel@tonic-gate 
27457c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
27467c478bd9Sstevel@tonic-gate 
27477c478bd9Sstevel@tonic-gate 	/*
27487c478bd9Sstevel@tonic-gate 	 * If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them
27497c478bd9Sstevel@tonic-gate 	 * was set due to this call and the new state has both of them set:
27507c478bd9Sstevel@tonic-gate 	 *	Send the AF_UNIX close indication
27517c478bd9Sstevel@tonic-gate 	 *	For T_COTS send a discon_ind
27527c478bd9Sstevel@tonic-gate 	 *
27537c478bd9Sstevel@tonic-gate 	 * If cantsend was set due to this call:
27547c478bd9Sstevel@tonic-gate 	 *	For T_COTSORD send an ordrel_ind
27557c478bd9Sstevel@tonic-gate 	 *
27567c478bd9Sstevel@tonic-gate 	 * Note that for T_CLTS there is no message sent here.
27577c478bd9Sstevel@tonic-gate 	 */
27587c478bd9Sstevel@tonic-gate 	if ((so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) ==
27597c478bd9Sstevel@tonic-gate 	    (SS_CANTRCVMORE|SS_CANTSENDMORE)) {
27607c478bd9Sstevel@tonic-gate 		/*
27617c478bd9Sstevel@tonic-gate 		 * For SunOS 4.X compatibility we tell the other end
27627c478bd9Sstevel@tonic-gate 		 * that we are unable to receive at this point.
27637c478bd9Sstevel@tonic-gate 		 */
27640f1702c5SYu Xiangning 		if (so->so_family == AF_UNIX && sti->sti_serv_type != T_CLTS)
27657c478bd9Sstevel@tonic-gate 			so_unix_close(so);
27667c478bd9Sstevel@tonic-gate 
27670f1702c5SYu Xiangning 		if (sti->sti_serv_type == T_COTS)
27687c478bd9Sstevel@tonic-gate 			error = sodisconnect(so, -1, _SODISCONNECT_LOCK_HELD);
27697c478bd9Sstevel@tonic-gate 	}
27707c478bd9Sstevel@tonic-gate 	if ((state_change & SS_CANTSENDMORE) &&
27710f1702c5SYu Xiangning 	    (sti->sti_serv_type == T_COTS_ORD)) {
27727c478bd9Sstevel@tonic-gate 		/* Send an orderly release */
27737c478bd9Sstevel@tonic-gate 		ordrel_req.PRIM_type = T_ORDREL_REQ;
27747c478bd9Sstevel@tonic-gate 
27757c478bd9Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
27767c478bd9Sstevel@tonic-gate 		mp = soallocproto1(&ordrel_req, sizeof (ordrel_req),
2777de8c4a14SErik Nordmark 		    0, _ALLOC_SLEEP, cr);
27787c478bd9Sstevel@tonic-gate 		/*
27797c478bd9Sstevel@tonic-gate 		 * Send down the T_ORDREL_REQ even if there is flow control.
27807c478bd9Sstevel@tonic-gate 		 * This prevents shutdown from blocking.
27817c478bd9Sstevel@tonic-gate 		 * Note that there is no T_OK_ACK for ordrel_req.
27827c478bd9Sstevel@tonic-gate 		 */
27837c478bd9Sstevel@tonic-gate 		error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
27847c478bd9Sstevel@tonic-gate 		    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0);
27857c478bd9Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
27867c478bd9Sstevel@tonic-gate 		if (error) {
27877c478bd9Sstevel@tonic-gate 			eprintsoline(so, error);
27887c478bd9Sstevel@tonic-gate 			goto done;
27897c478bd9Sstevel@tonic-gate 		}
27907c478bd9Sstevel@tonic-gate 	}
27917c478bd9Sstevel@tonic-gate 
27927c478bd9Sstevel@tonic-gate done:
27937c478bd9Sstevel@tonic-gate 	so_unlock_single(so, SOLOCKED);
27947c478bd9Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
27957c478bd9Sstevel@tonic-gate 	return (error);
27967c478bd9Sstevel@tonic-gate }
27977c478bd9Sstevel@tonic-gate 
27987c478bd9Sstevel@tonic-gate /*
27997c478bd9Sstevel@tonic-gate  * For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send
28007c478bd9Sstevel@tonic-gate  * a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer
28017c478bd9Sstevel@tonic-gate  * that we have closed.
28027c478bd9Sstevel@tonic-gate  * Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length
28037c478bd9Sstevel@tonic-gate  * T_UNITDATA_REQ containing the same option.
28047c478bd9Sstevel@tonic-gate  *
28057c478bd9Sstevel@tonic-gate  * For SOCK_DGRAM half-connections (somebody connected to this end
28067c478bd9Sstevel@tonic-gate  * but this end is not connect) we don't know where to send any
28077c478bd9Sstevel@tonic-gate  * SO_UNIX_CLOSE.
28087c478bd9Sstevel@tonic-gate  *
28097c478bd9Sstevel@tonic-gate  * We have to ignore stream head errors just in case there has been
28107c478bd9Sstevel@tonic-gate  * a shutdown(output).
28117c478bd9Sstevel@tonic-gate  * Ignore any flow control to try to get the message more quickly to the peer.
28127c478bd9Sstevel@tonic-gate  * While locally ignoring flow control solves the problem when there
28137c478bd9Sstevel@tonic-gate  * is only the loopback transport on the stream it would not provide
28147c478bd9Sstevel@tonic-gate  * the correct AF_UNIX socket semantics when one or more modules have
28157c478bd9Sstevel@tonic-gate  * been pushed.
28167c478bd9Sstevel@tonic-gate  */
28177c478bd9Sstevel@tonic-gate void
28187c478bd9Sstevel@tonic-gate so_unix_close(struct sonode *so)
28197c478bd9Sstevel@tonic-gate {
28207c478bd9Sstevel@tonic-gate 	struct T_opthdr	toh;
28217c478bd9Sstevel@tonic-gate 	mblk_t		*mp;
28220f1702c5SYu Xiangning 	sotpi_info_t	*sti = SOTOTPI(so);
28237c478bd9Sstevel@tonic-gate 
28247c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&so->so_lock));
28257c478bd9Sstevel@tonic-gate 
28267c478bd9Sstevel@tonic-gate 	ASSERT(so->so_family == AF_UNIX);
28277c478bd9Sstevel@tonic-gate 
28287c478bd9Sstevel@tonic-gate 	if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) !=
28297c478bd9Sstevel@tonic-gate 	    (SS_ISCONNECTED|SS_ISBOUND))
28307c478bd9Sstevel@tonic-gate 		return;
28317c478bd9Sstevel@tonic-gate 
28327c478bd9Sstevel@tonic-gate 	dprintso(so, 1, ("so_unix_close(%p) %s\n",
2833903a11ebSrh87107 	    (void *)so, pr_state(so->so_state, so->so_mode)));
28347c478bd9Sstevel@tonic-gate 
28357c478bd9Sstevel@tonic-gate 	toh.level = SOL_SOCKET;
28367c478bd9Sstevel@tonic-gate 	toh.name = SO_UNIX_CLOSE;
28377c478bd9Sstevel@tonic-gate 
28387c478bd9Sstevel@tonic-gate 	/* zero length + header */
28397c478bd9Sstevel@tonic-gate 	toh.len = (t_uscalar_t)sizeof (struct T_opthdr);
28407c478bd9Sstevel@tonic-gate 	toh.status = 0;
28417c478bd9Sstevel@tonic-gate 
28427c478bd9Sstevel@tonic-gate 	if (so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) {
28437c478bd9Sstevel@tonic-gate 		struct T_optdata_req tdr;
28447c478bd9Sstevel@tonic-gate 
28457c478bd9Sstevel@tonic-gate 		tdr.PRIM_type = T_OPTDATA_REQ;
28467c478bd9Sstevel@tonic-gate 		tdr.DATA_flag = 0;
28477c478bd9Sstevel@tonic-gate 
28487c478bd9Sstevel@tonic-gate 		tdr.OPT_length = (t_scalar_t)sizeof (toh);
28497c478bd9Sstevel@tonic-gate 		tdr.OPT_offset = (t_scalar_t)sizeof (tdr);
28507c478bd9Sstevel@tonic-gate 
28517c478bd9Sstevel@tonic-gate 		/* NOTE: holding so_lock while sleeping */
28527c478bd9Sstevel@tonic-gate 		mp = soallocproto2(&tdr, sizeof (tdr),
2853de8c4a14SErik Nordmark 		    &toh, sizeof (toh), 0, _ALLOC_SLEEP, CRED());
28547c478bd9Sstevel@tonic-gate 	} else {
28557c478bd9Sstevel@tonic-gate 		struct T_unitdata_req	tudr;
28567c478bd9Sstevel@tonic-gate 		void			*addr;
28577c478bd9Sstevel@tonic-gate 		socklen_t		addrlen;
28587c478bd9Sstevel@tonic-gate 		void			*src;
28597c478bd9Sstevel@tonic-gate 		socklen_t		srclen;
28607c478bd9Sstevel@tonic-gate 		struct T_opthdr		toh2;
28617c478bd9Sstevel@tonic-gate 		t_scalar_t		size;
28627c478bd9Sstevel@tonic-gate 
28637c478bd9Sstevel@tonic-gate 		/*
2864f012ee0cSGordon Ross 		 * We know this is an AF_UNIX connected DGRAM socket.
2865f012ee0cSGordon Ross 		 * We therefore already have the destination address
2866f012ee0cSGordon Ross 		 * in the internal form needed for this send.  This is
2867f012ee0cSGordon Ross 		 * similar to the sosend_dgram call later in this file
2868f012ee0cSGordon Ross 		 * when there's no user-specified destination address.
28697c478bd9Sstevel@tonic-gate 		 */
28700f1702c5SYu Xiangning 		if (sti->sti_faddr_noxlate) {
28717c478bd9Sstevel@tonic-gate 			/*
28727c478bd9Sstevel@tonic-gate 			 * Already have a transport internal address. Do not
28737c478bd9Sstevel@tonic-gate 			 * pass any (transport internal) source address.
28747c478bd9Sstevel@tonic-gate 			 */
28750f1702c5SYu Xiangning 			addr = sti->sti_faddr_sa;
28760f1702c5SYu Xiangning 			addrlen = (t_uscalar_t)sti->sti_faddr_len;
28777c478bd9Sstevel@tonic-gate 			src = NULL;
28787c478bd9Sstevel@tonic-gate 			srclen = 0;
28797c478bd9Sstevel@tonic-gate 		} else {
28807c478bd9Sstevel@tonic-gate 			/*
28817c478bd9Sstevel@tonic-gate 			 * Pass the sockaddr_un source address as an option
28827c478bd9Sstevel@tonic-gate 			 * and translate the remote address.
28830f1702c5SYu Xiangning 			 * Holding so_lock thus sti_laddr_sa can not change.
28847c478bd9Sstevel@tonic-gate 			 */
28850f1702c5SYu Xiangning 			src = sti->sti_laddr_sa;
28860f1702c5SYu Xiangning 			srclen = (socklen_t)sti->sti_laddr_len;
28877c478bd9Sstevel@tonic-gate 			dprintso(so, 1,
28887c478bd9Sstevel@tonic-gate 			    ("so_ux_close: srclen %d, src %p\n",
28897c478bd9Sstevel@tonic-gate 			    srclen, src));
2890f012ee0cSGordon Ross 			/*
2891f012ee0cSGordon Ross 			 * Use the destination address saved in connect.
2892f012ee0cSGordon Ross 			 */
2893f012ee0cSGordon Ross 			addr = &sti->sti_ux_faddr;
2894f012ee0cSGordon Ross 			addrlen = sizeof (sti->sti_ux_faddr);
28957c478bd9Sstevel@tonic-gate 		}
28967c478bd9Sstevel@tonic-gate 		tudr.PRIM_type = T_UNITDATA_REQ;
28977c478bd9Sstevel@tonic-gate 		tudr.DEST_length = addrlen;
28987c478bd9Sstevel@tonic-gate 		tudr.DEST_offset = (t_scalar_t)sizeof (tudr);
28997c478bd9Sstevel@tonic-gate 		if (srclen == 0) {
29007c478bd9Sstevel@tonic-gate 			tudr.OPT_length = (t_scalar_t)sizeof (toh);
29017c478bd9Sstevel@tonic-gate 			tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) +
29027c478bd9Sstevel@tonic-gate 			    _TPI_ALIGN_TOPT(addrlen));
29037c478bd9Sstevel@tonic-gate 
29047c478bd9Sstevel@tonic-gate 			size = tudr.OPT_offset + tudr.OPT_length;
29057c478bd9Sstevel@tonic-gate 			/* NOTE: holding so_lock while sleeping */
29067c478bd9Sstevel@tonic-gate 			mp = soallocproto2(&tudr, sizeof (tudr),
2907de8c4a14SErik Nordmark 			    addr, addrlen, size, _ALLOC_SLEEP, CRED());
29087c478bd9Sstevel@tonic-gate 			mp->b_wptr += (_TPI_ALIGN_TOPT(addrlen) - addrlen);
29097c478bd9Sstevel@tonic-gate 			soappendmsg(mp, &toh, sizeof (toh));
29107c478bd9Sstevel@tonic-gate 		} else {
29117c478bd9Sstevel@tonic-gate 			/*
29127c478bd9Sstevel@tonic-gate 			 * There is a AF_UNIX sockaddr_un to include as a
29137c478bd9Sstevel@tonic-gate 			 * source address option.
29147c478bd9Sstevel@tonic-gate 			 */
29157c478bd9Sstevel@tonic-gate 			tudr.OPT_length = (t_scalar_t)(2 * sizeof (toh) +
29167c478bd9Sstevel@tonic-gate 			    _TPI_ALIGN_TOPT(srclen));
29177c478bd9Sstevel@tonic-gate 			tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) +
29187c478bd9Sstevel@tonic-gate 			    _TPI_ALIGN_TOPT(addrlen));
29197c478bd9Sstevel@tonic-gate 
29207c478bd9Sstevel@tonic-gate 			toh2.level = SOL_SOCKET;
29217c478bd9Sstevel@tonic-gate 			toh2.name = SO_SRCADDR;
29227c478bd9Sstevel@tonic-gate 			toh2.len = (t_uscalar_t)(srclen +
29237c478bd9Sstevel@tonic-gate 			    sizeof (struct T_opthdr));
29247c478bd9Sstevel@tonic-gate 			toh2.status = 0;
29257c478bd9Sstevel@tonic-gate 
29267c478bd9Sstevel@tonic-gate 			size = tudr.OPT_offset + tudr.OPT_length;
29277c478bd9Sstevel@tonic-gate 
29287c478bd9Sstevel@tonic-gate 			/* NOTE: holding so_lock while sleeping */
29297c478bd9Sstevel@tonic-gate 			mp = soallocproto2(&tudr, sizeof (tudr),
2930de8c4a14SErik Nordmark 			    addr, addrlen, size, _ALLOC_SLEEP, CRED());
29317c478bd9Sstevel@tonic-gate 			mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen;
29327c478bd9Sstevel@tonic-gate 			soappendmsg(mp, &toh, sizeof (toh));
29337c478bd9Sstevel@tonic-gate 			soappendmsg(mp, &toh2, sizeof (toh2));
29347c478bd9Sstevel@tonic-gate 			soappendmsg(mp, src, srclen);
29357c478bd9Sstevel@tonic-gate 			mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen;
29367c478bd9Sstevel@tonic-gate 		}
29377c478bd9Sstevel@tonic-gate 		ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
29387c478bd9Sstevel@tonic-gate 	}
29397c478bd9Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
2940f012ee0cSGordon Ross 	(void) kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
29417c478bd9Sstevel@tonic-gate 	    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0);
29427c478bd9Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
29437c478bd9Sstevel@tonic-gate }
29447c478bd9Sstevel@tonic-gate 
29457c478bd9Sstevel@tonic-gate /*
29467c478bd9Sstevel@tonic-gate  * Called by sotpi_recvmsg when reading a non-zero amount of data.
29477c478bd9Sstevel@tonic-gate  * In addition, the caller typically verifies that there is some
29487c478bd9Sstevel@tonic-gate  * potential state to clear by checking
29497c478bd9Sstevel@tonic-gate  *	if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK))
29507c478bd9Sstevel@tonic-gate  * before calling this routine.
29517c478bd9Sstevel@tonic-gate  * Note that such a check can be made without holding so_lock since
29527c478bd9Sstevel@tonic-gate  * sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg
29530f1702c5SYu Xiangning  * decrements sti_oobsigcnt.
29547c478bd9Sstevel@tonic-gate  *
29557c478bd9Sstevel@tonic-gate  * When data is read *after* the point that all pending
29567c478bd9Sstevel@tonic-gate  * oob data has been consumed the oob indication is cleared.
29577c478bd9Sstevel@tonic-gate  *
29587c478bd9Sstevel@tonic-gate  * This logic keeps select/poll returning POLLRDBAND and
29597c478bd9Sstevel@tonic-gate  * SIOCATMARK returning true until we have read past
29607c478bd9Sstevel@tonic-gate  * the mark.
29617c478bd9Sstevel@tonic-gate  */
29627c478bd9Sstevel@tonic-gate static void
29637c478bd9Sstevel@tonic-gate sorecv_update_oobstate(struct sonode *so)
29647c478bd9Sstevel@tonic-gate {
29650f1702c5SYu Xiangning 	sotpi_info_t *sti = SOTOTPI(so);
29660f1702c5SYu Xiangning 
29677c478bd9Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
29687c478bd9Sstevel@tonic-gate 	ASSERT(so_verify_oobstate(so));
29697c478bd9Sstevel@tonic-gate 	dprintso(so, 1,
29707c478bd9Sstevel@tonic-gate 	    ("sorecv_update_oobstate: counts %d/%d state %s\n",
29710f1702c5SYu Xiangning 	    sti->sti_oobsigcnt,
29720f1702c5SYu Xiangning 	    sti->sti_oobcnt, pr_state(so->so_state, so->so_mode)));
29730f1702c5SYu Xiangning 	if (sti->sti_oobsigcnt == 0) {
29747c478bd9Sstevel@tonic-gate 		/* No more pending oob indications */
29757c478bd9Sstevel@tonic-gate 		so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK);
29767c478bd9Sstevel@tonic-gate 		freemsg(so->so_oobmsg);
29777c478bd9Sstevel@tonic-gate 		so->so_oobmsg = NULL;
29787c478bd9Sstevel@tonic-gate 	}
29797c478bd9Sstevel@tonic-gate 	ASSERT(so_verify_oobstate(so));
29807c478bd9Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
29817c478bd9Sstevel@tonic-gate }
29827c478bd9Sstevel@tonic-gate 
29837c478bd9Sstevel@tonic-gate /*
29847c478bd9Sstevel@tonic-gate  * Handle recv* calls for an so which has NL7C saved recv mblk_t(s).
29857c478bd9Sstevel@tonic-gate  */
29867c478bd9Sstevel@tonic-gate static int
29877c478bd9Sstevel@tonic-gate nl7c_sorecv(struct sonode *so, mblk_t **rmp, uio_t *uiop, rval_t *rp)
29887c478bd9Sstevel@tonic-gate {
29890f1702c5SYu Xiangning 	sotpi_info_t *sti = SOTOTPI(so);
29907c478bd9Sstevel@tonic-gate 	int	error = 0;
29917c478bd9Sstevel@tonic-gate 	mblk_t *tmp = NULL;
29927c478bd9Sstevel@tonic-gate 	mblk_t *pmp = NULL;
29930f1702c5SYu Xiangning 	mblk_t *nmp = sti->sti_nl7c_rcv_mp;
29947c478bd9Sstevel@tonic-gate 
29957c478bd9Sstevel@tonic-gate 	ASSERT(nmp != NULL);
29967c478bd9Sstevel@tonic-gate 
29977c478bd9Sstevel@tonic-gate 	while (nmp != NULL && uiop->uio_resid > 0) {
29987c478bd9Sstevel@tonic-gate 		ssize_t n;
29997c478bd9Sstevel@tonic-gate 
30007c478bd9Sstevel@tonic-gate 		if (DB_TYPE(nmp) == M_DATA) {
30017c478bd9Sstevel@tonic-gate 			/*
30027c478bd9Sstevel@tonic-gate 			 * We have some data, uiomove up to resid bytes.
30037c478bd9Sstevel@tonic-gate 			 */
30047c478bd9Sstevel@tonic-gate 			n = MIN(MBLKL(nmp), uiop->uio_resid);
30057c478bd9Sstevel@tonic-gate 			if (n > 0)
30067c478bd9Sstevel@tonic-gate 				error = uiomove(nmp->b_rptr, n, UIO_READ, uiop);
30077c478bd9Sstevel@tonic-gate 			nmp->b_rptr += n;
30087c478bd9Sstevel@tonic-gate 			if (nmp->b_rptr == nmp->b_wptr) {
30097c478bd9Sstevel@tonic-gate 				pmp = nmp;
30107c478bd9Sstevel@tonic-gate 				nmp = nmp->b_cont;
30117c478bd9Sstevel@tonic-gate 			}
30122c9e429eSbrutus 			if (error)
30132c9e429eSbrutus 				break;
30147c478bd9Sstevel@tonic-gate 		} else {
30157c478bd9Sstevel@tonic-gate 			/*
30167c478bd9Sstevel@tonic-gate 			 * We only handle data, save for caller to handle.
30177c478bd9Sstevel@tonic-gate 			 */
30187c478bd9Sstevel@tonic-gate 			if (pmp != NULL) {
30197c478bd9Sstevel@tonic-gate 				pmp->b_cont = nmp->b_cont;
30207c478bd9Sstevel@tonic-gate 			}
30217c478bd9Sstevel@tonic-gate 			nmp->b_cont = NULL;
30227c478bd9Sstevel@tonic-gate 			if (*rmp == NULL) {
30237c478bd9Sstevel@tonic-gate 				*rmp = nmp;
30247c478bd9Sstevel@tonic-gate 			} else {
30252c9e429eSbrutus 				tmp->b_cont = nmp;
30267c478bd9Sstevel@tonic-gate 			}
30277c478bd9Sstevel@tonic-gate 			nmp = nmp->b_cont;
30287c478bd9Sstevel@tonic-gate 			tmp = nmp;
30297c478bd9Sstevel@tonic-gate 		}
30307c478bd9Sstevel@tonic-gate 	}
30317c478bd9Sstevel@tonic-gate 	if (pmp != NULL) {
30327c478bd9Sstevel@tonic-gate 		/* Free any mblk_t(s) which we have consumed */
30337c478bd9Sstevel@tonic-gate 		pmp->b_cont = NULL;
30340f1702c5SYu Xiangning 		freemsg(sti->sti_nl7c_rcv_mp);
30357c478bd9Sstevel@tonic-gate 	}
30360f1702c5SYu Xiangning 	if ((sti->sti_nl7c_rcv_mp = nmp) == NULL) {
30372c9e429eSbrutus 		/* Last mblk_t so return the saved kstrgetmsg() rval/error */
30382c9e429eSbrutus 		if (error == 0) {
30390f1702c5SYu Xiangning 			rval_t	*p = (rval_t *)&sti->sti_nl7c_rcv_rval;
30402c9e429eSbrutus 
30412c9e429eSbrutus 			error = p->r_v.r_v2;
30422c9e429eSbrutus 			p->r_v.r_v2 = 0;
30432c9e429eSbrutus 		}
30440f1702c5SYu Xiangning 		rp->r_vals = sti->sti_nl7c_rcv_rval;
30450f1702c5SYu Xiangning 		sti->sti_nl7c_rcv_rval = 0;
30467c478bd9Sstevel@tonic-gate 	} else {
30477c478bd9Sstevel@tonic-gate 		/* More mblk_t(s) to process so no rval to return */
30487c478bd9Sstevel@tonic-gate 		rp->r_vals = 0;
30497c478bd9Sstevel@tonic-gate 	}
30507c478bd9Sstevel@tonic-gate 	return (error);
30517c478bd9Sstevel@tonic-gate }
30527c478bd9Sstevel@tonic-gate /*
30537c478bd9Sstevel@tonic-gate  * Receive the next message on the queue.
30547c478bd9Sstevel@tonic-gate  * If msg_controllen is non-zero when called the caller is interested in
30557c478bd9Sstevel@tonic-gate  * any received control info (options).
30567c478bd9Sstevel@tonic-gate  * If msg_namelen is non-zero when called the caller is interested in
30577c478bd9Sstevel@tonic-gate  * any received source address.
30587c478bd9Sstevel@tonic-gate  * The routine returns with msg_control and msg_name pointing to
30597c478bd9Sstevel@tonic-gate  * kmem_alloc'ed memory which the caller has to free.
30607c478bd9Sstevel@tonic-gate  */
30610f1702c5SYu Xiangning /* ARGSUSED */
30627c478bd9Sstevel@tonic-gate int
30630f1702c5SYu Xiangning sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
30640f1702c5SYu Xiangning     struct cred *cr)
30657c478bd9Sstevel@tonic-gate {
30667c478bd9Sstevel@tonic-gate 	union T_primitives	*tpr;
30677c478bd9Sstevel@tonic-gate 	mblk_t			*mp;
30687c478bd9Sstevel@tonic-gate 	uchar_t			pri;
30697c478bd9Sstevel@tonic-gate 	int			pflag, opflag;
30707c478bd9Sstevel@tonic-gate 	void			*control;
30717c478bd9Sstevel@tonic-gate 	t_uscalar_t		controllen;
30727c478bd9Sstevel@tonic-gate 	t_uscalar_t		namelen;
30737c478bd9Sstevel@tonic-gate 	int			so_state = so->so_state; /* Snapshot */
30747c478bd9Sstevel@tonic-gate 	ssize_t			saved_resid;
30757c478bd9Sstevel@tonic-gate 	rval_t			rval;
30767c478bd9Sstevel@tonic-gate 	int			flags;
30777c478bd9Sstevel@tonic-gate 	clock_t			timout;
307817169044Sbrutus 	int			error = 0;
30790f1702c5SYu Xiangning 	sotpi_info_t		*sti = SOTOTPI(so);
30807c478bd9Sstevel@tonic-gate 
30817c478bd9Sstevel@tonic-gate 	flags = msg->msg_flags;
30827c478bd9Sstevel@tonic-gate 	msg->msg_flags = 0;
30837c478bd9Sstevel@tonic-gate 
30847c478bd9Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n",
3085903a11ebSrh87107 	    (void *)so, (void *)msg, flags,
30867c478bd9Sstevel@tonic-gate 	    pr_state(so->so_state, so->so_mode), so->so_error));
30877c478bd9Sstevel@tonic-gate 
30880f1702c5SYu Xiangning 	if (so->so_version == SOV_STREAM) {
30890f1702c5SYu Xiangning 		so_update_attrs(so, SOACC);
30900f1702c5SYu Xiangning 		/* The imaginary "sockmod" has been popped - act as a stream */
30910f1702c5SYu Xiangning 		return (strread(SOTOV(so), uiop, cr));
30920f1702c5SYu Xiangning 	}
30930f1702c5SYu Xiangning 
30947c478bd9Sstevel@tonic-gate 	/*
30957c478bd9Sstevel@tonic-gate 	 * If we are not connected because we have never been connected
30967c478bd9Sstevel@tonic-gate 	 * we return ENOTCONN. If we have been connected (but are no longer
30977c478bd9Sstevel@tonic-gate 	 * connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return
30987c478bd9Sstevel@tonic-gate 	 * the EOF.
30997c478bd9Sstevel@tonic-gate 	 *
31007c478bd9Sstevel@tonic-gate 	 * An alternative would be to post an ENOTCONN error in stream head
31017c478bd9Sstevel@tonic-gate 	 * (read+write) and clear it when we're connected. However, that error
31027c478bd9Sstevel@tonic-gate 	 * would cause incorrect poll/select behavior!
31037c478bd9Sstevel@tonic-gate 	 */
31047c478bd9Sstevel@tonic-gate 	if ((so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 &&
31057c478bd9Sstevel@tonic-gate 	    (so->so_mode & SM_CONNREQUIRED)) {
31067c478bd9Sstevel@tonic-gate 		return (ENOTCONN);
31077c478bd9Sstevel@tonic-gate 	}
31087c478bd9Sstevel@tonic-gate 
31097c478bd9Sstevel@tonic-gate 	/*
31107c478bd9Sstevel@tonic-gate 	 * Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but
31117c478bd9Sstevel@tonic-gate 	 * after checking that the read queue is empty) and returns zero.
31127c478bd9Sstevel@tonic-gate 	 * This implementation will sleep (in kstrgetmsg) even if uio_resid
31137c478bd9Sstevel@tonic-gate 	 * is zero.
31147c478bd9Sstevel@tonic-gate 	 */
31157c478bd9Sstevel@tonic-gate 
31167c478bd9Sstevel@tonic-gate 	if (flags & MSG_OOB) {
31177c478bd9Sstevel@tonic-gate 		/* Check that the transport supports OOB */
31187c478bd9Sstevel@tonic-gate 		if (!(so->so_mode & SM_EXDATA))
31197c478bd9Sstevel@tonic-gate 			return (EOPNOTSUPP);
31200f1702c5SYu Xiangning 		so_update_attrs(so, SOACC);
31210f1702c5SYu Xiangning 		return (sorecvoob(so, msg, uiop, flags,
31220f1702c5SYu Xiangning 		    (so->so_options & SO_OOBINLINE)));
31237c478bd9Sstevel@tonic-gate 	}
31247c478bd9Sstevel@tonic-gate 
31250f1702c5SYu Xiangning 	so_update_attrs(so, SOACC);
31260f1702c5SYu Xiangning 
31277c478bd9Sstevel@tonic-gate 	/*
31287c478bd9Sstevel@tonic-gate 	 * Set msg_controllen and msg_namelen to zero here to make it
31297c478bd9Sstevel@tonic-gate 	 * simpler in the cases that no control or name is returned.
31307c478bd9Sstevel@tonic-gate 	 */
31317c478bd9Sstevel@tonic-gate 	controllen = msg->msg_controllen;
31327c478bd9Sstevel@tonic-gate 	namelen = msg->msg_namelen;
31337c478bd9Sstevel@tonic-gate 	msg->msg_controllen = 0;
31347c478bd9Sstevel@tonic-gate 	msg->msg_namelen = 0;
31357c478bd9Sstevel@tonic-gate 
31367c478bd9Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_recvmsg: namelen %d controllen %d\n",
31377c478bd9Sstevel@tonic-gate 	    namelen, controllen));
31387c478bd9Sstevel@tonic-gate 
31392c9e429eSbrutus 	mutex_enter(&so->so_lock);
31407c478bd9Sstevel@tonic-gate 	/*
31417c478bd9Sstevel@tonic-gate 	 * If an NL7C enabled socket and not waiting for write data.
31427c478bd9Sstevel@tonic-gate 	 */
31430f1702c5SYu Xiangning 	if ((sti->sti_nl7c_flags & (NL7C_ENABLED | NL7C_WAITWRITE)) ==
31447c478bd9Sstevel@tonic-gate 	    NL7C_ENABLED) {
31450f1702c5SYu Xiangning 		if (sti->sti_nl7c_uri) {
31462c9e429eSbrutus 			/* Close uri processing for a previous request */
31477c478bd9Sstevel@tonic-gate 			nl7c_close(so);
31487c478bd9Sstevel@tonic-gate 		}
31490f1702c5SYu Xiangning 		if ((so_state & SS_CANTRCVMORE) &&
31500f1702c5SYu Xiangning 		    sti->sti_nl7c_rcv_mp == NULL) {
31512c9e429eSbrutus 			/* Nothing to process, EOF */
31522c9e429eSbrutus 			mutex_exit(&so->so_lock);
31532c9e429eSbrutus 			return (0);
31540f1702c5SYu Xiangning 		} else if (sti->sti_nl7c_flags & NL7C_SOPERSIST) {
31552c9e429eSbrutus 			/* Persistent NL7C socket, try to process request */
31562c9e429eSbrutus 			boolean_t ret;
31572c9e429eSbrutus 
31582c9e429eSbrutus 			ret = nl7c_process(so,
31592c9e429eSbrutus 			    (so->so_state & (SS_NONBLOCK|SS_NDELAY)));
31600f1702c5SYu Xiangning 			rval.r_vals = sti->sti_nl7c_rcv_rval;
31612c9e429eSbrutus 			error = rval.r_v.r_v2;
31622c9e429eSbrutus 			if (error) {
31632c9e429eSbrutus 				/* Error of some sort, return it */
31642c9e429eSbrutus 				mutex_exit(&so->so_lock);
31652c9e429eSbrutus 				return (error);
31662c9e429eSbrutus 			}
31670f1702c5SYu Xiangning 			if (sti->sti_nl7c_flags &&
31680f1702c5SYu Xiangning 			    ! (sti->sti_nl7c_flags & NL7C_WAITWRITE)) {
31697c478bd9Sstevel@tonic-gate 				/*
31702c9e429eSbrutus 				 * Still an NL7C socket and no data
31712c9e429eSbrutus 				 * to pass up to the caller.
31722c9e429eSbrutus 				 */
31732c9e429eSbrutus 				mutex_exit(&so->so_lock);
31742c9e429eSbrutus 				if (ret) {
31752c9e429eSbrutus 					/* EOF */
31762c9e429eSbrutus 					return (0);
31772c9e429eSbrutus 				} else {
31782c9e429eSbrutus 					/* Need more data */
31792c9e429eSbrutus 					return (EAGAIN);
31802c9e429eSbrutus 				}
31812c9e429eSbrutus 			}
31822c9e429eSbrutus 		} else {
31832c9e429eSbrutus 			/*
31842c9e429eSbrutus 			 * Not persistent so no further NL7C processing.
31857c478bd9Sstevel@tonic-gate 			 */
31860f1702c5SYu Xiangning 			sti->sti_nl7c_flags = 0;
31877c478bd9Sstevel@tonic-gate 		}
31887c478bd9Sstevel@tonic-gate 	}
31897c478bd9Sstevel@tonic-gate 	/*
31907c478bd9Sstevel@tonic-gate 	 * Only one reader is allowed at any given time. This is needed
31917c478bd9Sstevel@tonic-gate 	 * for T_EXDATA handling and, in the future, MSG_WAITALL.
31927c478bd9Sstevel@tonic-gate 	 *
31937c478bd9Sstevel@tonic-gate 	 * This is slightly different that BSD behavior in that it fails with
31947c478bd9Sstevel@tonic-gate 	 * EWOULDBLOCK when using nonblocking io. In BSD the read queue access
31957c478bd9Sstevel@tonic-gate 	 * is single-threaded using sblock(), which is dropped while waiting
31967c478bd9Sstevel@tonic-gate 	 * for data to appear. The difference shows up e.g. if one
31977c478bd9Sstevel@tonic-gate 	 * file descriptor does not have O_NONBLOCK but a dup'ed file descriptor
31987c478bd9Sstevel@tonic-gate 	 * does use nonblocking io and different threads are reading each
31997c478bd9Sstevel@tonic-gate 	 * file descriptor. In BSD there would never be an EWOULDBLOCK error
32007c478bd9Sstevel@tonic-gate 	 * in this case as long as the read queue doesn't get empty.
32017c478bd9Sstevel@tonic-gate 	 * In this implementation the thread using nonblocking io can
32027c478bd9Sstevel@tonic-gate 	 * get an EWOULDBLOCK error due to the blocking thread executing
32037c478bd9Sstevel@tonic-gate 	 * e.g. in the uiomove in kstrgetmsg.
32047c478bd9Sstevel@tonic-gate 	 * This difference is not believed to be significant.
32057c478bd9Sstevel@tonic-gate 	 */
3206255daac4Sethindra 	/* Set SOREADLOCKED */
3207255daac4Sethindra 	error = so_lock_read_intr(so,
3208255daac4Sethindra 	    uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0));
32097c478bd9Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
32107c478bd9Sstevel@tonic-gate 	if (error)
32117c478bd9Sstevel@tonic-gate 		return (error);
32127c478bd9Sstevel@tonic-gate 
32137c478bd9Sstevel@tonic-gate 	/*
32147c478bd9Sstevel@tonic-gate 	 * Tell kstrgetmsg to not inspect the stream head errors until all
32157c478bd9Sstevel@tonic-gate 	 * queued data has been consumed.
32167c478bd9Sstevel@tonic-gate 	 * Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set.
32177c478bd9Sstevel@tonic-gate 	 * Also, If uio_fmode indicates nonblocking kstrgetmsg will not block.
32187c478bd9Sstevel@tonic-gate 	 *
32197c478bd9Sstevel@tonic-gate 	 * MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and
32207c478bd9Sstevel@tonic-gate 	 * to T_OPTDATA_IND that do not contain any user-visible control msg.
32217c478bd9Sstevel@tonic-gate 	 * Note that MSG_WAITALL set with MSG_PEEK is a noop.
32227c478bd9Sstevel@tonic-gate 	 */
32237c478bd9Sstevel@tonic-gate 	pflag = MSG_ANY | MSG_DELAYERROR;
32247c478bd9Sstevel@tonic-gate 	if (flags & MSG_PEEK) {
32257c478bd9Sstevel@tonic-gate 		pflag |= MSG_IPEEK;
32267c478bd9Sstevel@tonic-gate 		flags &= ~MSG_WAITALL;
32277c478bd9Sstevel@tonic-gate 	}
32287c478bd9Sstevel@tonic-gate 	if (so->so_mode & SM_ATOMIC)
32297c478bd9Sstevel@tonic-gate 		pflag |= MSG_DISCARDTAIL;
32307c478bd9Sstevel@tonic-gate 
32317c478bd9Sstevel@tonic-gate 	if (flags & MSG_DONTWAIT)
32327c478bd9Sstevel@tonic-gate 		timout = 0;
3233412cc9e9SGordon Ross 	else if (so->so_rcvtimeo != 0)
3234412cc9e9SGordon Ross 		timout = TICK_TO_MSEC(so->so_rcvtimeo);
32357c478bd9Sstevel@tonic-gate 	else
32367c478bd9Sstevel@tonic-gate 		timout = -1;
32377c478bd9Sstevel@tonic-gate 	opflag = pflag;
32387c478bd9Sstevel@tonic-gate retry:
32397c478bd9Sstevel@tonic-gate 	saved_resid = uiop->uio_resid;
32407c478bd9Sstevel@tonic-gate 	pri = 0;
32417c478bd9Sstevel@tonic-gate 	mp = NULL;
32420f1702c5SYu Xiangning 	if (sti->sti_nl7c_rcv_mp != NULL) {
32432c9e429eSbrutus 		/* Already kstrgetmsg()ed saved mblk(s) from NL7C */
32447c478bd9Sstevel@tonic-gate 		error = nl7c_sorecv(so, &mp, uiop, &rval);
32457c478bd9Sstevel@tonic-gate 	} else {
32467c478bd9Sstevel@tonic-gate 		error = kstrgetmsg(SOTOV(so), &mp, uiop, &pri, &pflag,
32477c478bd9Sstevel@tonic-gate 		    timout, &rval);
32487c478bd9Sstevel@tonic-gate 	}
32490f1702c5SYu Xiangning 	if (error != 0) {
32500f1702c5SYu Xiangning 		/* kstrgetmsg returns ETIME when timeout expires */
32510f1702c5SYu Xiangning 		if (error == ETIME)
32527c478bd9Sstevel@tonic-gate 			error = EWOULDBLOCK;
325317169044Sbrutus 		goto out;
32547c478bd9Sstevel@tonic-gate 	}
32557c478bd9Sstevel@tonic-gate 	/*
32567c478bd9Sstevel@tonic-gate 	 * For datagrams the MOREDATA flag is used to set MSG_TRUNC.
32577c478bd9Sstevel@tonic-gate 	 * For non-datagrams MOREDATA is used to set MSG_EOR.
32587c478bd9Sstevel@tonic-gate 	 */
32597c478bd9Sstevel@tonic-gate 	ASSERT(!(rval.r_val1 & MORECTL));
32607c478bd9Sstevel@tonic-gate 	if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC))
32617c478bd9Sstevel@tonic-gate 		msg->msg_flags |= MSG_TRUNC;
32627c478bd9Sstevel@tonic-gate 
32637c478bd9Sstevel@tonic-gate 	if (mp == NULL) {
32647c478bd9Sstevel@tonic-gate 		dprintso(so, 1, ("sotpi_recvmsg: got M_DATA\n"));
32657c478bd9Sstevel@tonic-gate 		/*
32667c478bd9Sstevel@tonic-gate 		 * 4.3BSD and 4.4BSD clears the mark when peeking across it.
32677c478bd9Sstevel@tonic-gate 		 * The draft Posix socket spec states that the mark should
32687c478bd9Sstevel@tonic-gate 		 * not be cleared when peeking. We follow the latter.
32697c478bd9Sstevel@tonic-gate 		 */
32707c478bd9Sstevel@tonic-gate 		if ((so->so_state &
32717c478bd9Sstevel@tonic-gate 		    (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) &&
32727c478bd9Sstevel@tonic-gate 		    (uiop->uio_resid != saved_resid) &&
32737c478bd9Sstevel@tonic-gate 		    !(flags & MSG_PEEK)) {
32747c478bd9Sstevel@tonic-gate 			sorecv_update_oobstate(so);
32757c478bd9Sstevel@tonic-gate 		}
32767c478bd9Sstevel@tonic-gate 
32777c478bd9Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
32787c478bd9Sstevel@tonic-gate 		/* Set MSG_EOR based on MOREDATA */
32797c478bd9Sstevel@tonic-gate 		if (!(rval.r_val1 & MOREDATA)) {
32807c478bd9Sstevel@tonic-gate 			if (so->so_state & SS_SAVEDEOR) {
32817c478bd9Sstevel@tonic-gate 				msg->msg_flags |= MSG_EOR;
32827c478bd9Sstevel@tonic-gate 				so->so_state &= ~SS_SAVEDEOR;
32837c478bd9Sstevel@tonic-gate 			}
32847c478bd9Sstevel@tonic-gate 		}
32857c478bd9Sstevel@tonic-gate 		/*
32867c478bd9Sstevel@tonic-gate 		 * If some data was received (i.e. not EOF) and the
32877c478bd9Sstevel@tonic-gate 		 * read/recv* has not been satisfied wait for some more.
32887c478bd9Sstevel@tonic-gate 		 */
32897c478bd9Sstevel@tonic-gate 		if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
32907c478bd9Sstevel@tonic-gate 		    uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
32917c478bd9Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
32927c478bd9Sstevel@tonic-gate 			pflag = opflag | MSG_NOMARK;
32937c478bd9Sstevel@tonic-gate 			goto retry;
32947c478bd9Sstevel@tonic-gate 		}
329517169044Sbrutus 		goto out_locked;
32967c478bd9Sstevel@tonic-gate 	}
32977c478bd9Sstevel@tonic-gate 
32987c478bd9Sstevel@tonic-gate 	/* strsock_proto has already verified length and alignment */
32997c478bd9Sstevel@tonic-gate 	tpr = (union T_primitives *)mp->b_rptr;
33007c478bd9Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_recvmsg: type %d\n", tpr->type));
33017c478bd9Sstevel@tonic-gate 
33027c478bd9Sstevel@tonic-gate 	switch (tpr->type) {
33037c478bd9Sstevel@tonic-gate 	case T_DATA_IND: {
33047c478bd9Sstevel@tonic-gate 		if ((so->so_state &
33057c478bd9Sstevel@tonic-gate 		    (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) &&
33067c478bd9Sstevel@tonic-gate 		    (uiop->uio_resid != saved_resid) &&
33077c478bd9Sstevel@tonic-gate 		    !(flags & MSG_PEEK)) {
33087c478bd9Sstevel@tonic-gate 			sorecv_update_oobstate(so);
33097c478bd9Sstevel@tonic-gate 		}
33107c478bd9Sstevel@tonic-gate 
33117c478bd9Sstevel@tonic-gate 		/*
33127c478bd9Sstevel@tonic-gate 		 * Set msg_flags to MSG_EOR based on
33137c478bd9Sstevel@tonic-gate 		 * MORE_flag and MOREDATA.
33147c478bd9Sstevel@tonic-gate 		 */
33157c478bd9Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
33167c478bd9Sstevel@tonic-gate 		so->so_state &= ~SS_SAVEDEOR;
33177c478bd9Sstevel@tonic-gate 		if (!(tpr->data_ind.MORE_flag & 1)) {
33187c478bd9Sstevel@tonic-gate 			if (!(rval.r_val1 & MOREDATA))
33197c478bd9Sstevel@tonic-gate 				msg->msg_flags |= MSG_EOR;
33207c478bd9Sstevel@tonic-gate 			else
33217c478bd9Sstevel@tonic-gate 				so->so_state |= SS_SAVEDEOR;
33227c478bd9Sstevel@tonic-gate 		}
33237c478bd9Sstevel@tonic-gate 		freemsg(mp);
33247c478bd9Sstevel@tonic-gate 		/*
33257c478bd9Sstevel@tonic-gate 		 * If some data was received (i.e. not EOF) and the
33267c478bd9Sstevel@tonic-gate 		 * read/recv* has not been satisfied wait for some more.
33277c478bd9Sstevel@tonic-gate 		 */
33287c478bd9Sstevel@tonic-gate 		if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
33297c478bd9Sstevel@tonic-gate 		    uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
33307c478bd9Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
33317c478bd9Sstevel@tonic-gate 			pflag = opflag | MSG_NOMARK;
33327c478bd9Sstevel@tonic-gate 			goto retry;
33337c478bd9Sstevel@tonic-gate 		}
333417169044Sbrutus 		goto out_locked;
33357c478bd9Sstevel@tonic-gate 	}
33367c478bd9Sstevel@tonic-gate 	case T_UNITDATA_IND: {
33377c478bd9Sstevel@tonic-gate 		void *addr;
33387c478bd9Sstevel@tonic-gate 		t_uscalar_t addrlen;
33397c478bd9Sstevel@tonic-gate 		void *abuf;
33407c478bd9Sstevel@tonic-gate 		t_uscalar_t optlen;
33417c478bd9Sstevel@tonic-gate 		void *opt;
33427c478bd9Sstevel@tonic-gate 
33437c478bd9Sstevel@tonic-gate 		if ((so->so_state &
33447c478bd9Sstevel@tonic-gate 		    (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) &&
33457c478bd9Sstevel@tonic-gate 		    (uiop->uio_resid != saved_resid) &&
33467c478bd9Sstevel@tonic-gate 		    !(flags & MSG_PEEK)) {
33477c478bd9Sstevel@tonic-gate 			sorecv_update_oobstate(so);
33487c478bd9Sstevel@tonic-gate 		}
33497c478bd9Sstevel@tonic-gate 
33507c478bd9Sstevel@tonic-gate 		if (namelen != 0) {
33517c478bd9Sstevel@tonic-gate 			/* Caller wants source address */
33527c478bd9Sstevel@tonic-gate 			addrlen = tpr->unitdata_ind.SRC_length;
33537c478bd9Sstevel@tonic-gate 			addr = sogetoff(mp,
33547c478bd9Sstevel@tonic-gate 			    tpr->unitdata_ind.SRC_offset,
33557c478bd9Sstevel@tonic-gate 			    addrlen, 1);
33567c478bd9Sstevel@tonic-gate 			if (addr == NULL) {
33577c478bd9Sstevel@tonic-gate 				freemsg(mp);
33587c478bd9Sstevel@tonic-gate 				error = EPROTO;
33597c478bd9Sstevel@tonic-gate 				eprintsoline(so, error);
336017169044Sbrutus 				goto out;
33617c478bd9Sstevel@tonic-gate 			}
33627c478bd9Sstevel@tonic-gate 			if (so->so_family == AF_UNIX) {
33637c478bd9Sstevel@tonic-gate 				/*
33647c478bd9Sstevel@tonic-gate 				 * Can not use the transport level address.
33657c478bd9Sstevel@tonic-gate 				 * If there is a SO_SRCADDR option carrying
33667c478bd9Sstevel@tonic-gate 				 * the socket level address it will be
33677c478bd9Sstevel@tonic-gate 				 * extracted below.
33687c478bd9Sstevel@tonic-gate 				 */
33697c478bd9Sstevel@tonic-gate 				addr = NULL;
33707c478bd9Sstevel@tonic-gate 				addrlen = 0;
33717c478bd9Sstevel@tonic-gate 			}
33727c478bd9Sstevel@tonic-gate 		}
33737c478bd9Sstevel@tonic-gate 		optlen = tpr->unitdata_ind.OPT_length;
33747c478bd9Sstevel@tonic-gate 		if (optlen != 0) {
33757c478bd9Sstevel@tonic-gate 			t_uscalar_t ncontrollen;
33767c478bd9Sstevel@tonic-gate 
33777c478bd9Sstevel@tonic-gate 			/*
33787c478bd9Sstevel@tonic-gate 			 * Extract any source address option.
33797c478bd9Sstevel@tonic-gate 			 * Determine how large cmsg buffer is needed.
33807c478bd9Sstevel@tonic-gate 			 */
33817c478bd9Sstevel@tonic-gate 			opt = sogetoff(mp,
33827c478bd9Sstevel@tonic-gate 			    tpr->unitdata_ind.OPT_offset,
33837c478bd9Sstevel@tonic-gate 			    optlen, __TPI_ALIGN_SIZE);
33847c478bd9Sstevel@tonic-gate 
33857c478bd9Sstevel@tonic-gate 			if (opt == NULL) {
33867c478bd9Sstevel@tonic-gate 				freemsg(mp);
33877c478bd9Sstevel@tonic-gate 				error = EPROTO;
33887c478bd9Sstevel@tonic-gate 				eprintsoline(so, error);
338917169044Sbrutus 				goto out;
33907c478bd9Sstevel@tonic-gate 			}
33917c478bd9Sstevel@tonic-gate 			if (so->so_family == AF_UNIX)
33927c478bd9Sstevel@tonic-gate 				so_getopt_srcaddr(opt, optlen, &addr, &addrlen);
33937c478bd9Sstevel@tonic-gate 			ncontrollen = so_cmsglen(mp, opt, optlen,
33947c478bd9Sstevel@tonic-gate 			    !(flags & MSG_XPG4_2));
33957c478bd9Sstevel@tonic-gate 			if (controllen != 0)
33967c478bd9Sstevel@tonic-gate 				controllen = ncontrollen;
33977c478bd9Sstevel@tonic-gate 			else if (ncontrollen != 0)
33987c478bd9Sstevel@tonic-gate 				msg->msg_flags |= MSG_CTRUNC;
33997c478bd9Sstevel@tonic-gate 		} else {
34007c478bd9Sstevel@tonic-gate 			controllen = 0;
34017c478bd9Sstevel@tonic-gate 		}
34027c478bd9Sstevel@tonic-gate 
34037c478bd9Sstevel@tonic-gate 		if (namelen != 0) {
34047c478bd9Sstevel@tonic-gate 			/*
34057c478bd9Sstevel@tonic-gate 			 * Return address to caller.
34067c478bd9Sstevel@tonic-gate 			 * Caller handles truncation if length
34077c478bd9Sstevel@tonic-gate 			 * exceeds msg_namelen.
34087c478bd9Sstevel@tonic-gate 			 * NOTE: AF_UNIX NUL termination is ensured by
34097c478bd9Sstevel@tonic-gate 			 * the sender's copyin_name().
34107c478bd9Sstevel@tonic-gate 			 */
34117c478bd9Sstevel@tonic-gate 			abuf = kmem_alloc(addrlen, KM_SLEEP);
34127c478bd9Sstevel@tonic-gate 
34137c478bd9Sstevel@tonic-gate 			bcopy(addr, abuf, addrlen);
34147c478bd9Sstevel@tonic-gate 			msg->msg_name = abuf;
34157c478bd9Sstevel@tonic-gate 			msg->msg_namelen = addrlen;
34167c478bd9Sstevel@tonic-gate 		}
34177c478bd9Sstevel@tonic-gate 
34187c478bd9Sstevel@tonic-gate 		if (controllen != 0) {
34197c478bd9Sstevel@tonic-gate 			/*
34207c478bd9Sstevel@tonic-gate 			 * Return control msg to caller.
34217c478bd9Sstevel@tonic-gate 			 * Caller handles truncation if length
34227c478bd9Sstevel@tonic-gate 			 * exceeds msg_controllen.
34237c478bd9Sstevel@tonic-gate 			 */
3424274af231Samehta 			control = kmem_zalloc(controllen, KM_SLEEP);
34257c478bd9Sstevel@tonic-gate 
34267c478bd9Sstevel@tonic-gate 			error = so_opt2cmsg(mp, opt, optlen,
34277c478bd9Sstevel@tonic-gate 			    !(flags & MSG_XPG4_2),
34287c478bd9Sstevel@tonic-gate 			    control, controllen);
34297c478bd9Sstevel@tonic-gate 			if (error) {
34307c478bd9Sstevel@tonic-gate 				freemsg(mp);
34317c478bd9Sstevel@tonic-gate 				if (msg->msg_namelen != 0)
34327c478bd9Sstevel@tonic-gate 					kmem_free(msg->msg_name,
34337c478bd9Sstevel@tonic-gate 					    msg->msg_namelen);
34347c478bd9Sstevel@tonic-gate 				kmem_free(control, controllen);
34357c478bd9Sstevel@tonic-gate 				eprintsoline(so, error);
343617169044Sbrutus 				goto out;
34377c478bd9Sstevel@tonic-gate 			}
34387c478bd9Sstevel@tonic-gate 			msg->msg_control = control;
34397c478bd9Sstevel@tonic-gate 			msg->msg_controllen = controllen;
34407c478bd9Sstevel@tonic-gate 		}
34417c478bd9Sstevel@tonic-gate 
34427c478bd9Sstevel@tonic-gate 		freemsg(mp);
344317169044Sbrutus 		goto out;
34447c478bd9Sstevel@tonic-gate 	}
34457c478bd9Sstevel@tonic-gate 	case T_OPTDATA_IND: {
34467c478bd9Sstevel@tonic-gate 		struct T_optdata_req *tdr;
34477c478bd9Sstevel@tonic-gate 		void *opt;
34487c478bd9Sstevel@tonic-gate 		t_uscalar_t optlen;
34497c478bd9Sstevel@tonic-gate 
34507c478bd9Sstevel@tonic-gate 		if ((so->so_state &
34517c478bd9Sstevel@tonic-gate 		    (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) &&
34527c478bd9Sstevel@tonic-gate 		    (uiop->uio_resid != saved_resid) &&
34537c478bd9Sstevel@tonic-gate 		    !(flags & MSG_PEEK)) {
34547c478bd9Sstevel@tonic-gate 			sorecv_update_oobstate(so);
34557c478bd9Sstevel@tonic-gate 		}
34567c478bd9Sstevel@tonic-gate 
34577c478bd9Sstevel@tonic-gate 		tdr = (struct T_optdata_req *)mp->b_rptr;
34587c478bd9Sstevel@tonic-gate 		optlen = tdr->OPT_length;
34597c478bd9Sstevel@tonic-gate 		if (optlen != 0) {
34607c478bd9Sstevel@tonic-gate 			t_uscalar_t ncontrollen;
34617c478bd9Sstevel@tonic-gate 			/*
34627c478bd9Sstevel@tonic-gate 			 * Determine how large cmsg buffer is needed.
34637c478bd9Sstevel@tonic-gate 			 */
34647c478bd9Sstevel@tonic-gate 			opt = sogetoff(mp,
34657c478bd9Sstevel@tonic-gate 			    tpr->optdata_ind.OPT_offset,
34667c478bd9Sstevel@tonic-gate 			    optlen, __TPI_ALIGN_SIZE);
34677c478bd9Sstevel@tonic-gate 
34687c478bd9Sstevel@tonic-gate 			if (opt == NULL) {
34697c478bd9Sstevel@tonic-gate 				freemsg(mp);
34707c478bd9Sstevel@tonic-gate 				error = EPROTO;
34717c478bd9Sstevel@tonic-gate 				eprintsoline(so, error);
347217169044Sbrutus 				goto out;
34737c478bd9Sstevel@tonic-gate 			}
34747c478bd9Sstevel@tonic-gate 
34757c478bd9Sstevel@tonic-gate 			ncontrollen = so_cmsglen(mp, opt, optlen,
34767c478bd9Sstevel@tonic-gate 			    !(flags & MSG_XPG4_2));
34777c478bd9Sstevel@tonic-gate 			if (controllen != 0)
34787c478bd9Sstevel@tonic-gate 				controllen = ncontrollen;
34797c478bd9Sstevel@tonic-gate 			else if (ncontrollen != 0)
34807c478bd9Sstevel@tonic-gate 				msg->msg_flags |= MSG_CTRUNC;
34817c478bd9Sstevel@tonic-gate 		} else {
34827c478bd9Sstevel@tonic-gate 			controllen = 0;
34837c478bd9Sstevel@tonic-gate 		}
34847c478bd9Sstevel@tonic-gate 
34857c478bd9Sstevel@tonic-gate 		if (controllen != 0) {
34867c478bd9Sstevel@tonic-gate 			/*
34877c478bd9Sstevel@tonic-gate 			 * Return control msg to caller.
34887c478bd9Sstevel@tonic-gate 			 * Caller handles truncation if length
34897c478bd9Sstevel@tonic-gate 			 * exceeds msg_controllen.
34907c478bd9Sstevel@tonic-gate 			 */
3491274af231Samehta 			control = kmem_zalloc(controllen, KM_SLEEP);
34927c478bd9Sstevel@tonic-gate 
34937c478bd9Sstevel@tonic-gate 			error = so_opt2cmsg(mp, opt, optlen,
34947c478bd9Sstevel@tonic-gate 			    !(flags & MSG_XPG4_2),
34957c478bd9Sstevel@tonic-gate 			    control, controllen);
34967c478bd9Sstevel@tonic-gate 			if (error) {
34977c478bd9Sstevel@tonic-gate 				freemsg(mp);
34987c478bd9Sstevel@tonic-gate 				kmem_free(control, controllen);
34997c478bd9Sstevel@tonic-gate 				eprintsoline(so, error);
350017169044Sbrutus 				goto out;
35017c478bd9Sstevel@tonic-gate 			}
35027c478bd9Sstevel@tonic-gate 			msg->msg_control = control;
35037c478bd9Sstevel@tonic-gate 			msg->msg_controllen = controllen;
35047c478bd9Sstevel@tonic-gate 		}
35057c478bd9Sstevel@tonic-gate 
35067c478bd9Sstevel@tonic-gate 		/*
35077c478bd9Sstevel@tonic-gate 		 * Set msg_flags to MSG_EOR based on
35087c478bd9Sstevel@tonic-gate 		 * DATA_flag and MOREDATA.
35097c478bd9Sstevel@tonic-gate 		 */
35107c478bd9Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
35117c478bd9Sstevel@tonic-gate 		so->so_state &= ~SS_SAVEDEOR;
35127c478bd9Sstevel@tonic-gate 		if (!(tpr->data_ind.MORE_flag & 1)) {
35137c478bd9Sstevel@tonic-gate 			if (!(rval.r_val1 & MOREDATA))
35147c478bd9Sstevel@tonic-gate 				msg->msg_flags |= MSG_EOR;
35157c478bd9Sstevel@tonic-gate 			else
35167c478bd9Sstevel@tonic-gate 				so->so_state |= SS_SAVEDEOR;
35177c478bd9Sstevel@tonic-gate 		}
35187c478bd9Sstevel@tonic-gate 		freemsg(mp);
35197c478bd9Sstevel@tonic-gate 		/*
35207c478bd9Sstevel@tonic-gate 		 * If some data was received (i.e. not EOF) and the
35217c478bd9Sstevel@tonic-gate 		 * read/recv* has not been satisfied wait for some more.
35227c478bd9Sstevel@tonic-gate 		 * Not possible to wait if control info was received.
35237c478bd9Sstevel@tonic-gate 		 */
35247c478bd9Sstevel@tonic-gate 		if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
35257c478bd9Sstevel@tonic-gate 		    controllen == 0 &&
35267c478bd9Sstevel@tonic-gate 		    uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
35277c478bd9Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
35287c478bd9Sstevel@tonic-gate 			pflag = opflag | MSG_NOMARK;
35297c478bd9Sstevel@tonic-gate 			goto retry;
35307c478bd9Sstevel@tonic-gate 		}
353117169044Sbrutus 		goto out_locked;
35327c478bd9Sstevel@tonic-gate 	}
35337c478bd9Sstevel@tonic-gate 	case T_EXDATA_IND: {
35347c478bd9Sstevel@tonic-gate 		dprintso(so, 1,
35357c478bd9Sstevel@tonic-gate 		    ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld "
35367c478bd9Sstevel@tonic-gate 		    "state %s\n",
35370f1702c5SYu Xiangning 		    sti->sti_oobsigcnt, sti->sti_oobcnt,
35387c478bd9Sstevel@tonic-gate 		    saved_resid - uiop->uio_resid,
35397c478bd9Sstevel@tonic-gate 		    pr_state(so->so_state, so->so_mode)));
35407c478bd9Sstevel@tonic-gate 		/*
35417c478bd9Sstevel@tonic-gate 		 * kstrgetmsg handles MSGMARK so there is nothing to
35427c478bd9Sstevel@tonic-gate 		 * inspect in the T_EXDATA_IND.
35437c478bd9Sstevel@tonic-gate 		 * strsock_proto makes the stream head queue the T_EXDATA_IND
35447c478bd9Sstevel@tonic-gate 		 * as a separate message with no M_DATA component. Furthermore,
35457c478bd9Sstevel@tonic-gate 		 * the stream head does not consolidate M_DATA messages onto
35467c478bd9Sstevel@tonic-gate 		 * an MSGMARK'ed message ensuring that the T_EXDATA_IND
35477c478bd9Sstevel@tonic-gate 		 * remains a message by itself. This is needed since MSGMARK
35487c478bd9Sstevel@tonic-gate 		 * marks both the whole message as well as the last byte
35497c478bd9Sstevel@tonic-gate 		 * of the message.
35507c478bd9Sstevel@tonic-gate 		 */
35517c478bd9Sstevel@tonic-gate 		freemsg(mp);
35527c478bd9Sstevel@tonic-gate 		ASSERT(uiop->uio_resid == saved_resid);	/* No data */
35537c478bd9Sstevel@tonic-gate 		if (flags & MSG_PEEK) {
35547c478bd9Sstevel@tonic-gate 			/*
35557c478bd9Sstevel@tonic-gate 			 * Even though we are peeking we consume the
35567c478bd9Sstevel@tonic-gate 			 * T_EXDATA_IND thereby moving the mark information
35577c478bd9Sstevel@tonic-gate 			 * to SS_RCVATMARK. Then the oob code below will
35587c478bd9Sstevel@tonic-gate 			 * retry the peeking kstrgetmsg.
35597c478bd9Sstevel@tonic-gate 			 * Note that the stream head read queue is
35607c478bd9Sstevel@tonic-gate 			 * never flushed without holding SOREADLOCKED
35617c478bd9Sstevel@tonic-gate 			 * thus the T_EXDATA_IND can not disappear
35627c478bd9Sstevel@tonic-gate 			 * underneath us.
35637c478bd9Sstevel@tonic-gate 			 */
35647c478bd9Sstevel@tonic-gate 			dprintso(so, 1,
35657c478bd9Sstevel@tonic-gate 			    ("sotpi_recvmsg: consume EXDATA_IND "
35667c478bd9Sstevel@tonic-gate 			    "counts %d/%d state %s\n",
35670f1702c5SYu Xiangning 			    sti->sti_oobsigcnt,
35680f1702c5SYu Xiangning 			    sti->sti_oobcnt,
35697c478bd9Sstevel@tonic-gate 			    pr_state(so->so_state, so->so_mode)));
35707c478bd9Sstevel@tonic-gate 
35717c478bd9Sstevel@tonic-gate 			pflag = MSG_ANY | MSG_DELAYERROR;
35727c478bd9Sstevel@tonic-gate 			if (so->so_mode & SM_ATOMIC)
35737c478bd9Sstevel@tonic-gate 				pflag |= MSG_DISCARDTAIL;
35747c478bd9Sstevel@tonic-gate 
35757c478bd9Sstevel@tonic-gate 			pri = 0;
35767c478bd9Sstevel@tonic-gate 			mp = NULL;
35777c478bd9Sstevel@tonic-gate 
35787c478bd9Sstevel@tonic-gate 			error = kstrgetmsg(SOTOV(so), &mp, uiop,
35797c478bd9Sstevel@tonic-gate 			    &pri, &pflag, (clock_t)-1, &rval);
35807c478bd9Sstevel@tonic-gate 			ASSERT(uiop->uio_resid == saved_resid);
35817c478bd9Sstevel@tonic-gate 
35827c478bd9Sstevel@tonic-gate 			if (error) {
35837c478bd9Sstevel@tonic-gate #ifdef SOCK_DEBUG
35847c478bd9Sstevel@tonic-gate 				if (error != EWOULDBLOCK && error != EINTR) {
35857c478bd9Sstevel@tonic-gate 					eprintsoline(so, error);
35867c478bd9Sstevel@tonic-gate 				}
35877c478bd9Sstevel@tonic-gate #endif /* SOCK_DEBUG */
358817169044Sbrutus 				goto out;
35897c478bd9Sstevel@tonic-gate 			}
35907c478bd9Sstevel@tonic-gate 			ASSERT(mp);
35917c478bd9Sstevel@tonic-gate 			tpr = (union T_primitives *)mp->b_rptr;
35927c478bd9Sstevel@tonic-gate 			ASSERT(tpr->type == T_EXDATA_IND);
35937c478bd9Sstevel@tonic-gate 			freemsg(mp);
35947c478bd9Sstevel@tonic-gate 		} /* end "if (flags & MSG_PEEK)" */
35957c478bd9Sstevel@tonic-gate 
35967c478bd9Sstevel@tonic-gate 		/*
35977c478bd9Sstevel@tonic-gate 		 * Decrement the number of queued and pending oob.
35987c478bd9Sstevel@tonic-gate 		 *
35997c478bd9Sstevel@tonic-gate 		 * SS_RCVATMARK is cleared when we read past a mark.
36007c478bd9Sstevel@tonic-gate 		 * SS_HAVEOOBDATA is cleared when we've read past the
36017c478bd9Sstevel@tonic-gate 		 * last mark.
36027c478bd9Sstevel@tonic-gate 		 * SS_OOBPEND is cleared if we've read past the last
36037c478bd9Sstevel@tonic-gate 		 * mark and no (new) SIGURG has been posted.
36047c478bd9Sstevel@tonic-gate 		 */
36057c478bd9Sstevel@tonic-gate 		mutex_enter(&so->so_lock);
36067c478bd9Sstevel@tonic-gate 		ASSERT(so_verify_oobstate(so));
36070f1702c5SYu Xiangning 		ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt);
36080f1702c5SYu Xiangning 		ASSERT(sti->sti_oobsigcnt > 0);
36090f1702c5SYu Xiangning 		sti->sti_oobsigcnt--;
36100f1702c5SYu Xiangning 		ASSERT(sti->sti_oobcnt > 0);
36110f1702c5SYu Xiangning 		sti->sti_oobcnt--;
36127c478bd9Sstevel@tonic-gate 		/*
36137c478bd9Sstevel@tonic-gate 		 * Since the T_EXDATA_IND has been removed from the stream
36147c478bd9Sstevel@tonic-gate 		 * head, but we have not read data past the mark,
36157c478bd9Sstevel@tonic-gate 		 * sockfs needs to track that the socket is still at the mark.
36167c478bd9Sstevel@tonic-gate 		 *
36177c478bd9Sstevel@tonic-gate 		 * Since no data was received call kstrgetmsg again to wait
36187c478bd9Sstevel@tonic-gate 		 * for data.
36197c478bd9Sstevel@tonic-gate 		 */
36207c478bd9Sstevel@tonic-gate 		so->so_state |= SS_RCVATMARK;
36217c478bd9Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
36227c478bd9Sstevel@tonic-gate 		dprintso(so, 1,
36237c478bd9Sstevel@tonic-gate 		    ("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n",
36240f1702c5SYu Xiangning 		    sti->sti_oobsigcnt, sti->sti_oobcnt,
36257c478bd9Sstevel@tonic-gate 		    pr_state(so->so_state, so->so_mode)));
36267c478bd9Sstevel@tonic-gate 		pflag = opflag;
36277c478bd9Sstevel@tonic-gate 		goto retry;
36287c478bd9Sstevel@tonic-gate 	}
36297c478bd9Sstevel@tonic-gate 	default:
36300f1702c5SYu Xiangning 		cmn_err(CE_CONT, "sotpi_recvmsg: so %p prim %d mp %p\n",
36310f1702c5SYu Xiangning 		    (void *)so, tpr->type, (void *)mp);
36327c478bd9Sstevel@tonic-gate 		ASSERT(0);
36337c478bd9Sstevel@tonic-gate 		freemsg(mp);
36347c478bd9Sstevel@tonic-gate 		error = EPROTO;
36357c478bd9Sstevel@tonic-gate 		eprintsoline(so, error);
363617169044Sbrutus 		goto out;
36377c478bd9Sstevel@tonic-gate 	}
36387c478bd9Sstevel@tonic-gate 	/* NOTREACHED */
363917169044Sbrutus out:
36407c478bd9Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
364117169044Sbrutus out_locked:
36427c478bd9Sstevel@tonic-gate 	so_unlock_read(so);	/* Clear SOREADLOCKED */
36437c478bd9Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
36447c478bd9Sstevel@tonic-gate 	return (error);
36457c478bd9Sstevel@tonic-gate }
36467c478bd9Sstevel@tonic-gate 
36477c478bd9Sstevel@tonic-gate /*
36487c478bd9Sstevel@tonic-gate  * Sending data with options on a datagram socket.
36497c478bd9Sstevel@tonic-gate  * Assumes caller has verified that SS_ISBOUND etc. are set.
3650f012ee0cSGordon Ross  *
3651f012ee0cSGordon Ross  * For AF_UNIX the destination address may be already in
3652f012ee0cSGordon Ross  * internal form, as indicated by sti->sti_faddr_noxlate
3653f012ee0cSGordon Ross  * or the MSG_SENDTO_NOXLATE flag.  Otherwise we need to
3654f012ee0cSGordon Ross  * translate the destination address to internal form.
3655f012ee0cSGordon Ross  *
3656f012ee0cSGordon Ross  * The source address is passed as an option.  If passing
3657f012ee0cSGordon Ross  * file descriptors, those are passed as file pointers in
3658f012ee0cSGordon Ross  * another option.
36597c478bd9Sstevel@tonic-gate  */
36607c478bd9Sstevel@tonic-gate static int
3661ff550d0eSmasputra sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen,
3662ff550d0eSmasputra     struct uio *uiop, void *control, t_uscalar_t controllen, int flags)
36637c478bd9Sstevel@tonic-gate {
36647c478bd9Sstevel@tonic-gate 	struct T_unitdata_req	tudr;
36657c478bd9Sstevel@tonic-gate 	mblk_t			*mp;
36667c478bd9Sstevel@tonic-gate 	int			error;
36677c478bd9Sstevel@tonic-gate 	void			*addr;
36687c478bd9Sstevel@tonic-gate 	socklen_t		addrlen;
36697c478bd9Sstevel@tonic-gate 	void			*src;
36707c478bd9Sstevel@tonic-gate 	socklen_t		srclen;
36717c478bd9Sstevel@tonic-gate 	ssize_t			len;
36727c478bd9Sstevel@tonic-gate 	int			size;
36737c478bd9Sstevel@tonic-gate 	struct T_opthdr		toh;
36747c478bd9Sstevel@tonic-gate 	struct fdbuf		*fdbuf;
36757c478bd9Sstevel@tonic-gate 	t_uscalar_t		optlen;
36767c478bd9Sstevel@tonic-gate 	void			*fds;
36777c478bd9Sstevel@tonic-gate 	int			fdlen;
36780f1702c5SYu Xiangning 	sotpi_info_t		*sti = SOTOTPI(so);
36797c478bd9Sstevel@tonic-gate 
36807c478bd9Sstevel@tonic-gate 	ASSERT(name && namelen);
36817c478bd9Sstevel@tonic-gate 	ASSERT(control && controllen);
36827c478bd9Sstevel@tonic-gate 
36837c478bd9Sstevel@tonic-gate 	len = uiop->uio_resid;
36840f1702c5SYu Xiangning 	if (len > (ssize_t)sti->sti_tidu_size) {
36857c478bd9Sstevel@tonic-gate 		return (EMSGSIZE);
36867c478bd9Sstevel@tonic-gate 	}
36877c478bd9Sstevel@tonic-gate 
3688f012ee0cSGordon Ross 	if (sti->sti_faddr_noxlate == 0 &&
3689f012ee0cSGordon Ross 	    (flags & MSG_SENDTO_NOXLATE) == 0) {
36907c478bd9Sstevel@tonic-gate 		/*
36917c478bd9Sstevel@tonic-gate 		 * Length and family checks.
3692f012ee0cSGordon Ross 		 * Don't verify internal form.
36937c478bd9Sstevel@tonic-gate 		 */
36947c478bd9Sstevel@tonic-gate 		error = so_addr_verify(so, name, namelen);
36957c478bd9Sstevel@tonic-gate 		if (error) {
36967c478bd9Sstevel@tonic-gate 			eprintsoline(so, error);
36977c478bd9Sstevel@tonic-gate 			return (error);
36987c478bd9Sstevel@tonic-gate 		}
3699f012ee0cSGordon Ross 	}
3700f012ee0cSGordon Ross 
37017c478bd9Sstevel@tonic-gate 	if (so->so_family == AF_UNIX) {
37020f1702c5SYu Xiangning 		if (sti->sti_faddr_noxlate) {
37037c478bd9Sstevel@tonic-gate 			/*
37047c478bd9Sstevel@tonic-gate 			 * Already have a transport internal address. Do not
37057c478bd9Sstevel@tonic-gate 			 * pass any (transport internal) source address.
37067c478bd9Sstevel@tonic-gate 			 */
37077c478bd9Sstevel@tonic-gate 			addr = name;
37087c478bd9Sstevel@tonic-gate 			addrlen = namelen;
37097c478bd9Sstevel@tonic-gate 			src = NULL;
37107c478bd9Sstevel@tonic-gate 			srclen = 0;
3711f012ee0cSGordon Ross 		} else if (flags & MSG_SENDTO_NOXLATE) {
3712f012ee0cSGordon Ross 			/*
3713f012ee0cSGordon Ross 			 * Have an internal form dest. address.
3714f012ee0cSGordon Ross 			 * Pass the source address as usual.
3715f012ee0cSGordon Ross 			 */
3716f012ee0cSGordon Ross 			addr = name;
3717f012ee0cSGordon Ross 			addrlen = namelen;
3718f012ee0cSGordon Ross 			src = sti->sti_laddr_sa;
3719f012ee0cSGordon Ross 			srclen = (socklen_t)sti->sti_laddr_len;
37207c478bd9Sstevel@tonic-gate 		} else {
37217c478bd9Sstevel@tonic-gate 			/*
37227c478bd9Sstevel@tonic-gate 			 * Pass the sockaddr_un source address as an option
37237c478bd9Sstevel@tonic-gate 			 * and translate the remote address.
37247c478bd9Sstevel@tonic-gate 			 *
37250f1702c5SYu Xiangning 			 * Note that this code does not prevent sti_laddr_sa
37267c478bd9Sstevel@tonic-gate 			 * from changing while it is being used. Thus
37277c478bd9Sstevel@tonic-gate 			 * if an unbind+bind occurs concurrently with this
37287c478bd9Sstevel@tonic-gate 			 * send the peer might see a partially new and a
37297c478bd9Sstevel@tonic-gate 			 * partially old "from" address.
37307c478bd9Sstevel@tonic-gate 			 */
37310f1702c5SYu Xiangning 			src = sti->sti_laddr_sa;
3732f012ee0cSGordon Ross 			srclen = (socklen_t)sti->sti_laddr_len;
37337c478bd9Sstevel@tonic-gate 			dprintso(so, 1,
37347c478bd9Sstevel@tonic-gate 			    ("sosend_dgramcmsg UNIX: srclen %d, src %p\n",
37357c478bd9Sstevel@tonic-gate 			    srclen, src));
3736f012ee0cSGordon Ross 			/*
3737f012ee0cSGordon Ross 			 * The sendmsg caller specified a destination
3738f012ee0cSGordon Ross 			 * address, which we must translate into our
3739f012ee0cSGordon Ross 			 * internal form.  addr = &sti->sti_ux_taddr
3740f012ee0cSGordon Ross 			 */
37417c478bd9Sstevel@tonic-gate 			error = so_ux_addr_xlate(so, name, namelen,
37427c478bd9Sstevel@tonic-gate 			    (flags & MSG_XPG4_2),
37437c478bd9Sstevel@tonic-gate 			    &addr, &addrlen);
37447c478bd9Sstevel@tonic-gate 			if (error) {
37457c478bd9Sstevel@tonic-gate 				eprintsoline(so, error);
37467c478bd9Sstevel@tonic-gate 				return (error);
37477c478bd9Sstevel@tonic-gate 			}
37487c478bd9Sstevel@tonic-gate 		}
37497c478bd9Sstevel@tonic-gate 	} else {
37507c478bd9Sstevel@tonic-gate 		addr = name;
37517c478bd9Sstevel@tonic-gate 		addrlen = namelen;
37527c478bd9Sstevel@tonic-gate 		src = NULL;
37537c478bd9Sstevel@tonic-gate 		srclen = 0;
37547c478bd9Sstevel@tonic-gate 	}
37557c478bd9Sstevel@tonic-gate 	optlen = so_optlen(control, controllen,
37567c478bd9Sstevel@tonic-gate 	    !(flags & MSG_XPG4_2));
37577c478bd9Sstevel@tonic-gate 	tudr.PRIM_type = T_UNITDATA_REQ;
37587c478bd9Sstevel@tonic-gate 	tudr.DEST_length = addrlen;
37597c478bd9Sstevel@tonic-gate 	tudr.DEST_offset = (t_scalar_t)sizeof (tudr);
37607c478bd9Sstevel@tonic-gate 	if (srclen != 0)
37617c478bd9Sstevel@tonic-gate 		tudr.OPT_length = (t_scalar_t)(optlen + sizeof (toh) +
37627c478bd9Sstevel@tonic-gate 		    _TPI_ALIGN_TOPT(srclen));
37637c478bd9Sstevel@tonic-gate 	else
37647c478bd9Sstevel@tonic-gate 		tudr.OPT_length = optlen;
37657c478bd9Sstevel@tonic-gate 	tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) +
37667c478bd9Sstevel@tonic-gate 	    _TPI_ALIGN_TOPT(addrlen));
37677c478bd9Sstevel@tonic-gate 
37687c478bd9Sstevel@tonic-gate 	size = tudr.OPT_offset + tudr.OPT_length;
37697c478bd9Sstevel@tonic-gate 
37707c478bd9Sstevel@tonic-gate 	/*
37717c478bd9Sstevel@tonic-gate 	 * File descriptors only when SM_FDPASSING set.
37727c478bd9Sstevel@tonic-gate 	 */
37737c478bd9Sstevel@tonic-gate 	error = so_getfdopt(control, controllen,
37747c478bd9Sstevel@tonic-gate 	    !(flags & MSG_XPG4_2), &fds, &fdlen);
37757c478bd9Sstevel@tonic-gate 	if (error)
37767c478bd9Sstevel@tonic-gate 		return (error);
37777c478bd9Sstevel@tonic-gate 	if (fdlen != -1) {
37787c478bd9Sstevel@tonic-gate 		if (!(so->so_mode & SM_FDPASSING))
37797c478bd9Sstevel@tonic-gate 			return (EOPNOTSUPP);
37807c478bd9Sstevel@tonic-gate 
37817c478bd9Sstevel@tonic-gate 		error = fdbuf_create(fds, fdlen, &fdbuf);
37827c478bd9Sstevel@tonic-gate 		if (error)
37837c478bd9Sstevel@tonic-gate 			return (error);
3784d28d4716SJerry Jelinek 
3785d28d4716SJerry Jelinek 		/*
3786d28d4716SJerry Jelinek 		 * Pre-allocate enough additional space for lower level modules
3787d28d4716SJerry Jelinek 		 * to append an option (e.g. see tl_unitdata). The following
3788d28d4716SJerry Jelinek 		 * is enough extra space for the largest option we might append.
3789d28d4716SJerry Jelinek 		 */
3790d28d4716SJerry Jelinek 		size += sizeof (struct T_opthdr) + ucredsize;
37917c478bd9Sstevel@tonic-gate 		mp = fdbuf_allocmsg(size, fdbuf);
37927c478bd9Sstevel@tonic-gate 	} else {
3793de8c4a14SErik Nordmark 		mp = soallocproto(size, _ALLOC_INTR, CRED());
37947c478bd9Sstevel@tonic-gate 		if (mp == NULL) {
37957c478bd9Sstevel@tonic-gate 			/*
37967c478bd9Sstevel@tonic-gate 			 * Caught a signal waiting for memory.
37977c478bd9Sstevel@tonic-gate 			 * Let send* return EINTR.
37987c478bd9Sstevel@tonic-gate 			 */
37997c478bd9Sstevel@tonic-gate 			return (EINTR);
38007c478bd9Sstevel@tonic-gate 		}
3801bd118333Smeem 	}
38027c478bd9Sstevel@tonic-gate 	soappendmsg(mp, &tudr, sizeof (tudr));
38037c478bd9Sstevel@tonic-gate 	soappendmsg(mp, addr, addrlen);
38047c478bd9Sstevel@tonic-gate 	mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen;
38057c478bd9Sstevel@tonic-gate 
38067c478bd9Sstevel@tonic-gate 	if (fdlen != -1) {
38077c478bd9Sstevel@tonic-gate 		ASSERT(fdbuf != NULL);
38087c478bd9Sstevel@tonic-gate 		toh.level = SOL_SOCKET;
38097c478bd9Sstevel@tonic-gate 		toh.name = SO_FILEP;
38107c478bd9Sstevel@tonic-gate 		toh.len = fdbuf->fd_size +
38117c478bd9Sstevel@tonic-gate 		    (t_uscalar_t)sizeof (struct T_opthdr);
38127c478bd9Sstevel@tonic-gate 		toh.status = 0;
38137c478bd9Sstevel@tonic-gate 		soappendmsg(mp, &toh, sizeof (toh));
38147c478bd9Sstevel@tonic-gate 		soappendmsg(mp, fdbuf, fdbuf->fd_size);
38157c478bd9Sstevel@tonic-gate 		ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr));
38167c478bd9Sstevel@tonic-gate 	}
38177c478bd9Sstevel@tonic-gate 	if (srclen != 0) {
38187c478bd9Sstevel@tonic-gate 		/*
38197c478bd9Sstevel@tonic-gate 		 * There is a AF_UNIX sockaddr_un to include as a source
38207c478bd9Sstevel@tonic-gate 		 * address option.
38217c478bd9Sstevel@tonic-gate 		 */
38227c478bd9Sstevel@tonic-gate 		toh.level = SOL_SOCKET;
38237c478bd9Sstevel@tonic-gate 		toh.name = SO_SRCADDR;
38247c478bd9Sstevel@tonic-gate 		toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr));
38257c478bd9Sstevel@tonic-gate 		toh.status = 0;
38267c478bd9Sstevel@tonic-gate 		soappendmsg(mp, &toh, sizeof (toh));
38277c478bd9Sstevel@tonic-gate 		soappendmsg(mp, src, srclen);
38287c478bd9Sstevel@tonic-gate 		mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen;
38297c478bd9Sstevel@tonic-gate 		ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr));
38307c478bd9Sstevel@tonic-gate 	}
38317c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
38327c478bd9Sstevel@tonic-gate 	so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp);
3833d28d4716SJerry Jelinek 	/*
3834d28d4716SJerry Jelinek 	 * Normally at most 3 bytes left in the message, but we might have
3835d28d4716SJerry Jelinek 	 * allowed for extra space if we're passing fd's through.
3836d28d4716SJerry Jelinek 	 */
38377c478bd9Sstevel@tonic-gate 	ASSERT(MBLKL(mp) <= (ssize_t)size);
38387c478bd9Sstevel@tonic-gate 
38397c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
3840005d3febSMarek Pospisil 	if (AU_AUDITING())
38417c478bd9Sstevel@tonic-gate 		audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0);
38427c478bd9Sstevel@tonic-gate 
38437c478bd9Sstevel@tonic-gate 	error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0);
38447c478bd9Sstevel@tonic-gate #ifdef SOCK_DEBUG
38457c478bd9Sstevel@tonic-gate 	if (error) {
38467c478bd9Sstevel@tonic-gate 		eprintsoline(so, error);
38477c478bd9Sstevel@tonic-gate 	}
38487c478bd9Sstevel@tonic-gate #endif /* SOCK_DEBUG */
38497c478bd9Sstevel@tonic-gate 	return (error);
38507c478bd9Sstevel@tonic-gate }
38517c478bd9Sstevel@tonic-gate 
38527c478bd9Sstevel@tonic-gate /*
38537c478bd9Sstevel@tonic-gate  * Sending data with options on a connected stream socket.
38547c478bd9Sstevel@tonic-gate  * Assumes caller has verified that SS_ISCONNECTED is set.
38557c478bd9Sstevel@tonic-gate  */
38567c478bd9Sstevel@tonic-gate static int
38570f1702c5SYu Xiangning sosend_svccmsg(struct sonode *so, struct uio *uiop, int more, void *control,
38580f1702c5SYu Xiangning     t_uscalar_t controllen, int flags)
38597c478bd9Sstevel@tonic-gate {
38607c478bd9Sstevel@tonic-gate 	struct T_optdata_req	tdr;
38617c478bd9Sstevel@tonic-gate 	mblk_t			*mp;
38627c478bd9Sstevel@tonic-gate 	int			error;
38637c478bd9Sstevel@tonic-gate 	ssize_t			iosize;
38647c478bd9Sstevel@tonic-gate 	int			size;
38657c478bd9Sstevel@tonic-gate 	struct fdbuf		*fdbuf;
38667c478bd9Sstevel@tonic-gate 	t_uscalar_t		optlen;
38677c478bd9Sstevel@tonic-gate 	void			*fds;
38687c478bd9Sstevel@tonic-gate 	int			fdlen;
38697c478bd9Sstevel@tonic-gate 	struct T_opthdr		toh;
38700f1702c5SYu Xiangning 	sotpi_info_t		*sti = SOTOTPI(so);
38717c478bd9Sstevel@tonic-gate 
38727c478bd9Sstevel@tonic-gate 	dprintso(so, 1,
38737c478bd9Sstevel@tonic-gate 	    ("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid));
38747c478bd9Sstevel@tonic-gate 
38757c478bd9Sstevel@tonic-gate 	/*
38767c478bd9Sstevel@tonic-gate 	 * Has to be bound and connected. However, since no locks are
38777c478bd9Sstevel@tonic-gate 	 * held the state could have changed after sotpi_sendmsg checked it
38787c478bd9Sstevel@tonic-gate 	 * thus it is not possible to ASSERT on the state.
38797c478bd9Sstevel@tonic-gate 	 */
38807c478bd9Sstevel@tonic-gate 
38817c478bd9Sstevel@tonic-gate 	/* Options on connection-oriented only when SM_OPTDATA set. */
38827c478bd9Sstevel@tonic-gate 	if (!(so->so_mode & SM_OPTDATA))
38837c478bd9Sstevel@tonic-gate 		return (EOPNOTSUPP);
38847c478bd9Sstevel@tonic-gate 
38857c478bd9Sstevel@tonic-gate 	do {
38867c478bd9Sstevel@tonic-gate 		/*
38877c478bd9Sstevel@tonic-gate 		 * Set the MORE flag if uio_resid does not fit in this
38887c478bd9Sstevel@tonic-gate 		 * message or if the caller passed in "more".
38897c478bd9Sstevel@tonic-gate 		 * Error for transports with zero tidu_size.
38907c478bd9Sstevel@tonic-gate 		 */
38917c478bd9Sstevel@tonic-gate 		tdr.PRIM_type = T_OPTDATA_REQ;
38920f1702c5SYu Xiangning 		iosize = sti->sti_tidu_size;
38937c478bd9Sstevel@tonic-gate 		if (iosize <= 0)
38947c478bd9Sstevel@tonic-gate 			return (EMSGSIZE);
38957c478bd9Sstevel@tonic-gate 		if (uiop->uio_resid > iosize) {
38967c478bd9Sstevel@tonic-gate 			tdr.DATA_flag = 1;
38977c478bd9Sstevel@tonic-gate 		} else {
38987c478bd9Sstevel@tonic-gate 			if (more)
38997c478bd9Sstevel@tonic-gate 				tdr.DATA_flag = 1;
39007c478bd9Sstevel@tonic-gate 			else
39017c478bd9Sstevel@tonic-gate 				tdr.DATA_flag = 0;
39027c478bd9Sstevel@tonic-gate 			iosize = uiop->uio_resid;
39037c478bd9Sstevel@tonic-gate 		}
39047c478bd9Sstevel@tonic-gate 		dprintso(so, 1, ("sosend_svccmsg: sending %d, %ld bytes\n",
39057c478bd9Sstevel@tonic-gate 		    tdr.DATA_flag, iosize));
39067c478bd9Sstevel@tonic-gate 
39077c478bd9Sstevel@tonic-gate 		optlen = so_optlen(control, controllen, !(flags & MSG_XPG4_2));
39087c478bd9Sstevel@tonic-gate 		tdr.OPT_length = optlen;
39097c478bd9Sstevel@tonic-gate 		tdr.OPT_offset = (t_scalar_t)sizeof (tdr);
39107c478bd9Sstevel@tonic-gate 
39117c478bd9Sstevel@tonic-gate 		size = (int)sizeof (tdr) + optlen;
39127c478bd9Sstevel@tonic-gate 		/*
39137c478bd9Sstevel@tonic-gate 		 * File descriptors only when SM_FDPASSING set.
39147c478bd9Sstevel@tonic-gate 		 */
39157c478bd9Sstevel@tonic-gate 		error = so_getfdopt(control, controllen,
39167c478bd9Sstevel@tonic-gate 		    !(flags & MSG_XPG4_2), &fds, &fdlen);
39177c478bd9Sstevel@tonic-gate 		if (error)
39187c478bd9Sstevel@tonic-gate 			return (error);
39197c478bd9Sstevel@tonic-gate 		if (fdlen != -1) {
39207c478bd9Sstevel@tonic-gate 			if (!(so->so_mode & SM_FDPASSING))
39217c478bd9Sstevel@tonic-gate 				return (EOPNOTSUPP);
39227c478bd9Sstevel@tonic-gate 
39237c478bd9Sstevel@tonic-gate 			error = fdbuf_create(fds, fdlen, &fdbuf);
39247c478bd9Sstevel@tonic-gate 			if (error)
39257c478bd9Sstevel@tonic-gate 				return (error);
3926d28d4716SJerry Jelinek 
3927d28d4716SJerry Jelinek 			/*
3928d28d4716SJerry Jelinek 			 * Pre-allocate enough additional space for lower level
3929d28d4716SJerry Jelinek 			 * modules to append an option (e.g. see tl_unitdata).
3930d28d4716SJerry Jelinek 			 * The following is enough extra space for the largest
3931d28d4716SJerry Jelinek 			 * option we might append.
3932d28d4716SJerry Jelinek 			 */
3933d28d4716SJerry Jelinek 			size += sizeof (struct T_opthdr) + ucredsize;
39347c478bd9Sstevel@tonic-gate 			mp = fdbuf_allocmsg(size, fdbuf);
39357c478bd9Sstevel@tonic-gate 		} else {
3936de8c4a14SErik Nordmark 			mp = soallocproto(size, _ALLOC_INTR, CRED());
39377c478bd9Sstevel@tonic-gate 			if (mp == NULL) {
39387c478bd9Sstevel@tonic-gate 				/*
39397c478bd9Sstevel@tonic-gate 				 * Caught a signal waiting for memory.
39407c478bd9Sstevel@tonic-gate 				 * Let send* return EINTR.
39417c478bd9Sstevel@tonic-gate 				 */
39420f1702c5SYu Xiangning 				return (EINTR);
3943bd118333Smeem 			}
39447c478bd9Sstevel@tonic-gate 		}
39457c478bd9Sstevel@tonic-gate 		soappendmsg(mp, &tdr, sizeof (tdr));
39467c478bd9Sstevel@tonic-gate 
39477c478bd9Sstevel@tonic-gate 		if (fdlen != -1) {
39487c478bd9Sstevel@tonic-gate 			ASSERT(fdbuf != NULL);
39497c478bd9Sstevel@tonic-gate 			toh.level = SOL_SOCKET;
39507c478bd9Sstevel@tonic-gate 			toh.name = SO_FILEP;
39517c478bd9Sstevel@tonic-gate 			toh.len = fdbuf->fd_size +
39527c478bd9Sstevel@tonic-gate 			    (t_uscalar_t)sizeof (struct T_opthdr);
39537c478bd9Sstevel@tonic-gate 			toh.status = 0;
39547c478bd9Sstevel@tonic-gate 			soappendmsg(mp, &toh, sizeof (toh));
39557c478bd9Sstevel@tonic-gate 			soappendmsg(mp, fdbuf, fdbuf->fd_size);
39567c478bd9Sstevel@tonic-gate 			ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr));
39577c478bd9Sstevel@tonic-gate 		}
39587c478bd9Sstevel@tonic-gate 		so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp);
3959d28d4716SJerry Jelinek 		/*
3960d28d4716SJerry Jelinek 		 * Normally at most 3 bytes left in the message, but we might
3961d28d4716SJerry Jelinek 		 * have allowed for extra space if we're passing fd's through.
3962d28d4716SJerry Jelinek 		 */
39637c478bd9Sstevel@tonic-gate 		ASSERT(MBLKL(mp) <= (ssize_t)size);
39647c478bd9Sstevel@tonic-gate 
39657c478bd9Sstevel@tonic-gate 		ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
39667c478bd9Sstevel@tonic-gate 
39677c478bd9Sstevel@tonic-gate 		error = kstrputmsg(SOTOV(so), mp, uiop, iosize,
39687c478bd9Sstevel@tonic-gate 		    0, MSG_BAND, 0);
39697c478bd9Sstevel@tonic-gate 		if (error) {
39707c478bd9Sstevel@tonic-gate 			eprintsoline(so, error);
39717c478bd9Sstevel@tonic-gate 			return (error);
39727c478bd9Sstevel@tonic-gate 		}
39737c478bd9Sstevel@tonic-gate 		control = NULL;
39747c478bd9Sstevel@tonic-gate 		if (uiop->uio_resid > 0) {
39757c478bd9Sstevel@tonic-gate 			/*
39767c478bd9Sstevel@tonic-gate 			 * Recheck for fatal errors. Fail write even though
39777c478bd9Sstevel@tonic-gate 			 * some data have been written. This is consistent
39787c478bd9Sstevel@tonic-gate 			 * with strwrite semantics and BSD sockets semantics.
39797c478bd9Sstevel@tonic-gate 			 */
39807c478bd9Sstevel@tonic-gate 			if (so->so_state & SS_CANTSENDMORE) {
39817c478bd9Sstevel@tonic-gate 				eprintsoline(so, error);
39827c478bd9Sstevel@tonic-gate 				return (EPIPE);
39837c478bd9Sstevel@tonic-gate 			}
39847c478bd9Sstevel@tonic-gate 			if (so->so_error != 0) {
39857c478bd9Sstevel@tonic-gate 				mutex_enter(&so->so_lock);
39860f1702c5SYu Xiangning 				error = sogeterr(so, B_TRUE);
39877c478bd9Sstevel@tonic-gate 				mutex_exit(&so->so_lock);
39887c478bd9Sstevel@tonic-gate 				if (error != 0) {
39897c478bd9Sstevel@tonic-gate 					eprintsoline(so, error);
39907c478bd9Sstevel@tonic-gate 					return (error);
39917c478bd9Sstevel@tonic-gate 				}
39927c478bd9Sstevel@tonic-gate 			}
39937c478bd9Sstevel@tonic-gate 		}
39947c478bd9Sstevel@tonic-gate 	} while (uiop->uio_resid > 0);
39957c478bd9Sstevel@tonic-gate 	return (0);
39967c478bd9Sstevel@tonic-gate }
39977c478bd9Sstevel@tonic-gate 
39987c478bd9Sstevel@tonic-gate /*
39997c478bd9Sstevel@tonic-gate  * Sending data on a datagram socket.
40007c478bd9Sstevel@tonic-gate  * Assumes caller has verified that SS_ISBOUND etc. are set.
40017c478bd9Sstevel@tonic-gate  *
4002f012ee0cSGordon Ross  * For AF_UNIX the destination address may be already in
4003f012ee0cSGordon Ross  * internal form, as indicated by sti->sti_faddr_noxlate
4004f012ee0cSGordon Ross  * or the MSG_SENDTO_NOXLATE flag.  Otherwise we need to
4005f012ee0cSGordon Ross  * translate the destination address to internal form.
4006f012ee0cSGordon Ross  *
4007f012ee0cSGordon Ross  * The source address is passed as an option.
40087c478bd9Sstevel@tonic-gate  */
40097c478bd9Sstevel@tonic-gate int
4010ff550d0eSmasputra sosend_dgram(struct sonode *so, struct sockaddr	*name, socklen_t namelen,
4011ff550d0eSmasputra     struct uio *uiop, int flags)
40127c478bd9Sstevel@tonic-gate {
40137c478bd9Sstevel@tonic-gate 	struct T_unitdata_req	tudr;
40147c478bd9Sstevel@tonic-gate 	mblk_t			*mp;
40157c478bd9Sstevel@tonic-gate 	int			error;
40167c478bd9Sstevel@tonic-gate 	void			*addr;
40177c478bd9Sstevel@tonic-gate 	socklen_t		addrlen;
40187c478bd9Sstevel@tonic-gate 	void			*src;
40197c478bd9Sstevel@tonic-gate 	socklen_t		srclen;
40207c478bd9Sstevel@tonic-gate 	ssize_t			len;
40210f1702c5SYu Xiangning 	sotpi_info_t		*sti = SOTOTPI(so);
40227c478bd9Sstevel@tonic-gate 
4023ff550d0eSmasputra 	ASSERT(name != NULL && namelen != 0);
40247c478bd9Sstevel@tonic-gate 
40257c478bd9Sstevel@tonic-gate 	len = uiop->uio_resid;
40260f1702c5SYu Xiangning 	if (len > sti->sti_tidu_size) {
40277c478bd9Sstevel@tonic-gate 		error = EMSGSIZE;
40287c478bd9Sstevel@tonic-gate 		goto done;
40297c478bd9Sstevel@tonic-gate 	}
40307c478bd9Sstevel@tonic-gate 
4031f012ee0cSGordon Ross 	if (sti->sti_faddr_noxlate == 0 &&
4032f012ee0cSGordon Ross 	    (flags & MSG_SENDTO_NOXLATE) == 0) {
4033f012ee0cSGordon Ross 		/*
4034f012ee0cSGordon Ross 		 * Length and family checks.
4035f012ee0cSGordon Ross 		 * Don't verify internal form.
4036f012ee0cSGordon Ross 		 */
40377c478bd9Sstevel@tonic-gate 		error = so_addr_verify(so, name, namelen);
4038ff550d0eSmasputra 		if (error != 0)
40397c478bd9Sstevel@tonic-gate 			goto done;
4040f012ee0cSGordon Ross 	}
4041ff550d0eSmasputra 
4042f012ee0cSGordon Ross 	if (sti->sti_direct)	/* Never on AF_UNIX */
4043ff550d0eSmasputra 		return (sodgram_direct(so, name, namelen, uiop, flags));
4044ff550d0eSmasputra 
40457c478bd9Sstevel@tonic-gate 	if (so->so_family == AF_UNIX) {
40460f1702c5SYu Xiangning 		if (sti->sti_faddr_noxlate) {
40477c478bd9Sstevel@tonic-gate 			/*
40487c478bd9Sstevel@tonic-gate 			 * Already have a transport internal address. Do not
40497c478bd9Sstevel@tonic-gate 			 * pass any (transport internal) source address.
40507c478bd9Sstevel@tonic-gate 			 */
40517c478bd9Sstevel@tonic-gate 			addr = name;
40527c478bd9Sstevel@tonic-gate 			addrlen = namelen;
40537c478bd9Sstevel@tonic-gate 			src = NULL;
40547c478bd9Sstevel@tonic-gate 			srclen = 0;
4055f012ee0cSGordon Ross 		} else if (flags & MSG_SENDTO_NOXLATE) {
4056f012ee0cSGordon Ross 			/*
4057f012ee0cSGordon Ross 			 * Have an internal form dest. address.
4058f012ee0cSGordon Ross 			 * Pass the source address as usual.
4059f012ee0cSGordon Ross 			 */
4060f012ee0cSGordon Ross 			addr = name;
4061f012ee0cSGordon Ross 			addrlen = namelen;
4062f012ee0cSGordon Ross 			src = sti->sti_laddr_sa;
4063f012ee0cSGordon Ross 			srclen = (socklen_t)sti->sti_laddr_len;
40647c478bd9Sstevel@tonic-gate 		} else {
40657c478bd9Sstevel@tonic-gate 			/*
40667c478bd9Sstevel@tonic-gate 			 * Pass the sockaddr_un source address as an option
40677c478bd9Sstevel@tonic-gate 			 * and translate the remote address.
40687c478bd9Sstevel@tonic-gate 			 *
40690f1702c5SYu Xiangning 			 * Note that this code does not prevent sti_laddr_sa
40707c478bd9Sstevel@tonic-gate 			 * from changing while it is being used. Thus
40717c478bd9Sstevel@tonic-gate 			 * if an unbind+bind occurs concurrently with this
40727c478bd9Sstevel@tonic-gate 			 * send the peer might see a partially new and a
40737c478bd9Sstevel@tonic-gate 			 * partially old "from" address.
40747c478bd9Sstevel@tonic-gate 			 */
40750f1702c5SYu Xiangning 			src = sti->sti_laddr_sa;
40760f1702c5SYu Xiangning 			srclen = (socklen_t)sti->sti_laddr_len;
40777c478bd9Sstevel@tonic-gate 			dprintso(so, 1,
40787c478bd9Sstevel@tonic-gate 			    ("sosend_dgram UNIX: srclen %d, src %p\n",
40797c478bd9Sstevel@tonic-gate 			    srclen, src));
4080f012ee0cSGordon Ross 			/*
4081f012ee0cSGordon Ross 			 * The sendmsg caller specified a destination
4082f012ee0cSGordon Ross 			 * address, which we must translate into our
4083f012ee0cSGordon Ross 			 * internal form.  addr = &sti->sti_ux_taddr
4084f012ee0cSGordon Ross 			 */
40857c478bd9Sstevel@tonic-gate 			error = so_ux_addr_xlate(so, name, namelen,
40867c478bd9Sstevel@tonic-gate 			    (flags & MSG_XPG4_2),
40877c478bd9Sstevel@tonic-gate 			    &addr, &addrlen);
40887c478bd9Sstevel@tonic-gate 			if (error) {
40897c478bd9Sstevel@tonic-gate 				eprintsoline(so, error);
40907c478bd9Sstevel@tonic-gate 				goto done;
40917c478bd9Sstevel@tonic-gate 			}
40927c478bd9Sstevel@tonic-gate 		}
40937c478bd9Sstevel@tonic-gate 	} else {
40947c478bd9Sstevel@tonic-gate 		addr = name;
40957c478bd9Sstevel@tonic-gate 		addrlen = namelen;
40967c478bd9Sstevel@tonic-gate 		src = NULL;
40977c478bd9Sstevel@tonic-gate 		srclen = 0;
40987c478bd9Sstevel@tonic-gate 	}
40997c478bd9Sstevel@tonic-gate 	tudr.PRIM_type = T_UNITDATA_REQ;
41007c478bd9Sstevel@tonic-gate 	tudr.DEST_length = addrlen;
41017c478bd9Sstevel@tonic-gate 	tudr.DEST_offset = (t_scalar_t)sizeof (tudr);
41027c478bd9Sstevel@tonic-gate 	if (srclen == 0) {
41037c478bd9Sstevel@tonic-gate 		tudr.OPT_length = 0;
41047c478bd9Sstevel@tonic-gate 		tudr.OPT_offset = 0;
41057c478bd9Sstevel@tonic-gate 
41067c478bd9Sstevel@tonic-gate 		mp = soallocproto2(&tudr, sizeof (tudr),
4107de8c4a14SErik Nordmark 		    addr, addrlen, 0, _ALLOC_INTR, CRED());
41087c478bd9Sstevel@tonic-gate 		if (mp == NULL) {
41097c478bd9Sstevel@tonic-gate 			/*
41107c478bd9Sstevel@tonic-gate 			 * Caught a signal waiting for memory.
41117c478bd9Sstevel@tonic-gate 			 * Let send* return EINTR.
41127c478bd9Sstevel@tonic-gate 			 */
41137c478bd9Sstevel@tonic-gate 			error = EINTR;
41147c478bd9Sstevel@tonic-gate 			goto done;
41157c478bd9Sstevel@tonic-gate 		}
41167c478bd9Sstevel@tonic-gate 	} else {
41177c478bd9Sstevel@tonic-gate 		/*
41187c478bd9Sstevel@tonic-gate 		 * There is a AF_UNIX sockaddr_un to include as a source
41197c478bd9Sstevel@tonic-gate 		 * address option.
41207c478bd9Sstevel@tonic-gate 		 */
41217c478bd9Sstevel@tonic-gate 		struct T_opthdr toh;
41227c478bd9Sstevel@tonic-gate 		ssize_t size;
41237c478bd9Sstevel@tonic-gate 
41247c478bd9Sstevel@tonic-gate 		tudr.OPT_length = (t_scalar_t)(sizeof (toh) +
41257c478bd9Sstevel@tonic-gate 		    _TPI_ALIGN_TOPT(srclen));
41267c478bd9Sstevel@tonic-gate 		tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) +
41277c478bd9Sstevel@tonic-gate 		    _TPI_ALIGN_TOPT(addrlen));
41287c478bd9Sstevel@tonic-gate 
41297c478bd9Sstevel@tonic-gate 		toh.level = SOL_SOCKET;
41307c478bd9Sstevel@tonic-gate 		toh.name = SO_SRCADDR;
41317c478bd9Sstevel@tonic-gate 		toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr));
41327c478bd9Sstevel@tonic-gate 		toh.status = 0;
41337c478bd9Sstevel@tonic-gate 
41347c478bd9Sstevel@tonic-gate 		size = tudr.OPT_offset + tudr.OPT_length;
41357c478bd9Sstevel@tonic-gate 		mp = soallocproto2(&tudr, sizeof (tudr),
4136de8c4a14SErik Nordmark 		    addr, addrlen, size, _ALLOC_INTR, CRED());
41377c478bd9Sstevel@tonic-gate 		if (mp == NULL) {
41387c478bd9Sstevel@tonic-gate 			/*
41397c478bd9Sstevel@tonic-gate 			 * Caught a signal waiting for memory.
41407c478bd9Sstevel@tonic-gate 			 * Let send* return EINTR.
41417c478bd9Sstevel@tonic-gate 			 */
41427c478bd9Sstevel@tonic-gate 			error = EINTR;
41437c478bd9Sstevel@tonic-gate 			goto done;
41447c478bd9Sstevel@tonic-gate 		}
41457c478bd9Sstevel@tonic-gate 		mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen;
41467c478bd9Sstevel@tonic-gate 		soappendmsg(mp, &toh, sizeof (toh));
41477c478bd9Sstevel@tonic-gate 		soappendmsg(mp, src, srclen);
41487c478bd9Sstevel@tonic-gate 		mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen;
41497c478bd9Sstevel@tonic-gate 		ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
41507c478bd9Sstevel@tonic-gate 	}
41517c478bd9Sstevel@tonic-gate 
4152005d3febSMarek Pospisil 	if (AU_AUDITING())
41537c478bd9Sstevel@tonic-gate 		audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0);
41547c478bd9Sstevel@tonic-gate 
41557c478bd9Sstevel@tonic-gate 	error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0);
41567c478bd9Sstevel@tonic-gate done:
41577c478bd9Sstevel@tonic-gate #ifdef SOCK_DEBUG
41587c478bd9Sstevel@tonic-gate 	if (error) {
41597c478bd9Sstevel@tonic-gate 		eprintsoline(so, error);
41607c478bd9Sstevel@tonic-gate 	}
41617c478bd9Sstevel@tonic-gate #endif /* SOCK_DEBUG */
41627c478bd9Sstevel@tonic-gate 	return (error);
41637c478bd9Sstevel@tonic-gate }
41647c478bd9Sstevel@tonic-gate 
41657c478bd9Sstevel@tonic-gate /*
41667c478bd9Sstevel@tonic-gate  * Sending data on a connected stream socket.
41677c478bd9Sstevel@tonic-gate  * Assumes caller has verified that SS_ISCONNECTED is set.
41687c478bd9Sstevel@tonic-gate  */
41697c478bd9Sstevel@tonic-gate int
41700f1702c5SYu Xiangning sosend_svc(struct sonode *so, struct uio *uiop, t_scalar_t prim, int more,
41717c478bd9Sstevel@tonic-gate     int sflag)
41727c478bd9Sstevel@tonic-gate {
41737c478bd9Sstevel@tonic-gate 	struct T_data_req	tdr;
41747c478bd9Sstevel@tonic-gate 	mblk_t			*mp;
41757c478bd9Sstevel@tonic-gate 	int			error;
41767c478bd9Sstevel@tonic-gate 	ssize_t			iosize;
41770f1702c5SYu Xiangning 	sotpi_info_t		*sti = SOTOTPI(so);
41787c478bd9Sstevel@tonic-gate 
41797c478bd9Sstevel@tonic-gate 	dprintso(so, 1,
41807c478bd9Sstevel@tonic-gate 	    ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n",
4181903a11ebSrh87107 	    (void *)so, uiop->uio_resid, prim, sflag));
41827c478bd9Sstevel@tonic-gate 
41837c478bd9Sstevel@tonic-gate 	/*
41847c478bd9Sstevel@tonic-gate 	 * Has to be bound and connected. However, since no locks are
41857c478bd9Sstevel@tonic-gate 	 * held the state could have changed after sotpi_sendmsg checked it
41867c478bd9Sstevel@tonic-gate 	 * thus it is not possible to ASSERT on the state.
41877c478bd9Sstevel@tonic-gate 	 */
41887c478bd9Sstevel@tonic-gate 
41897c478bd9Sstevel@tonic-gate 	do {
41907c478bd9Sstevel@tonic-gate 		/*
41917c478bd9Sstevel@tonic-gate 		 * Set the MORE flag if uio_resid does not fit in this
41927c478bd9Sstevel@tonic-gate 		 * message or if the caller passed in "more".
41937c478bd9Sstevel@tonic-gate 		 * Error for transports with zero tidu_size.
41947c478bd9Sstevel@tonic-gate 		 */
41957c478bd9Sstevel@tonic-gate 		tdr.PRIM_type = prim;
41960f1702c5SYu Xiangning 		iosize = sti->sti_tidu_size;
41977c478bd9Sstevel@tonic-gate 		if (iosize <= 0)
41987c478bd9Sstevel@tonic-gate 			return (EMSGSIZE);
41997c478bd9Sstevel@tonic-gate 		if (uiop->uio_resid > iosize) {
42007c478bd9Sstevel@tonic-gate 			tdr.MORE_flag = 1;
42017c478bd9Sstevel@tonic-gate 		} else {
42027c478bd9Sstevel@tonic-gate 			if (more)
42037c478bd9Sstevel@tonic-gate 				tdr.MORE_flag = 1;
42047c478bd9Sstevel@tonic-gate 			else
42057c478bd9Sstevel@tonic-gate 				tdr.MORE_flag = 0;
42067c478bd9Sstevel@tonic-gate 			iosize = uiop->uio_resid;
42077c478bd9Sstevel@tonic-gate 		}
42087c478bd9Sstevel@tonic-gate 		dprintso(so, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n",
42097c478bd9Sstevel@tonic-gate 		    prim, tdr.MORE_flag, iosize));
4210de8c4a14SErik Nordmark 		mp = soallocproto1(&tdr, sizeof (tdr), 0, _ALLOC_INTR, CRED());
42117c478bd9Sstevel@tonic-gate 		if (mp == NULL) {
42127c478bd9Sstevel@tonic-gate 			/*
42137c478bd9Sstevel@tonic-gate 			 * Caught a signal waiting for memory.
42147c478bd9Sstevel@tonic-gate 			 * Let send* return EINTR.
42157c478bd9Sstevel@tonic-gate 			 */
42167c478bd9Sstevel@tonic-gate 			return (EINTR);
42177c478bd9Sstevel@tonic-gate 		}
42187c478bd9Sstevel@tonic-gate 
42197c478bd9Sstevel@tonic-gate 		error = kstrputmsg(SOTOV(so), mp, uiop, iosize,
42207c478bd9Sstevel@tonic-gate 		    0, sflag | MSG_BAND, 0);
42217c478bd9Sstevel@tonic-gate 		if (error) {
42227c478bd9Sstevel@tonic-gate 			eprintsoline(so, error);
42237c478bd9Sstevel@tonic-gate 			return (error);
42247c478bd9Sstevel@tonic-gate 		}
42257c478bd9Sstevel@tonic-gate 		if (uiop->uio_resid > 0) {
42267c478bd9Sstevel@tonic-gate 			/*
42277c478bd9Sstevel@tonic-gate 			 * Recheck for fatal errors. Fail write even though
42287c478bd9Sstevel@tonic-gate 			 * some data have been written. This is consistent
42297c478bd9Sstevel@tonic-gate 			 * with strwrite semantics and BSD sockets semantics.
42307c478bd9Sstevel@tonic-gate 			 */
42317c478bd9Sstevel@tonic-gate 			if (so->so_state & SS_CANTSENDMORE) {
42327c478bd9Sstevel@tonic-gate 				eprintsoline(so, error);
42337c478bd9Sstevel@tonic-gate 				return (EPIPE);
42347c478bd9Sstevel@tonic-gate 			}
42357c478bd9Sstevel@tonic-gate 			if (so->so_error != 0) {
42367c478bd9Sstevel@tonic-gate 				mutex_enter(&so->so_lock);
42370f1702c5SYu Xiangning 				error = sogeterr(so, B_TRUE);
42387c478bd9Sstevel@tonic-gate 				mutex_exit(&so->so_lock);
42397c478bd9Sstevel@tonic-gate 				if (error != 0) {
42407c478bd9Sstevel@tonic-gate 					eprintsoline(so, error);
42417c478bd9Sstevel@tonic-gate 					return (error);
42427c478bd9Sstevel@tonic-gate 				}
42437c478bd9Sstevel@tonic-gate 			}
42447c478bd9Sstevel@tonic-gate 		}
42457c478bd9Sstevel@tonic-gate 	} while (uiop->uio_resid > 0);
42467c478bd9Sstevel@tonic-gate 	return (0);
42477c478bd9Sstevel@tonic-gate }
42487c478bd9Sstevel@tonic-gate 
42497c478bd9Sstevel@tonic-gate /*
42507c478bd9Sstevel@tonic-gate  * Check the state for errors and call the appropriate send function.
42517c478bd9Sstevel@tonic-gate  *
42527c478bd9Sstevel@tonic-gate  * If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set)
42537c478bd9Sstevel@tonic-gate  * this function issues a setsockopt to toggle SO_DONTROUTE before and
42547c478bd9Sstevel@tonic-gate  * after sending the message.
4255f012ee0cSGordon Ross  *
4256f012ee0cSGordon Ross  * The caller may optionally specify a destination address, for either
4257f012ee0cSGordon Ross  * stream or datagram sockets.  This table summarizes the cases:
4258f012ee0cSGordon Ross  *
4259f012ee0cSGordon Ross  *    Socket type    Dest. given    Connected    Result
4260f012ee0cSGordon Ross  *    -----------    -----------    ---------    --------------
4261f012ee0cSGordon Ross  *    Stream         *              Yes	         send to conn. addr.
4262f012ee0cSGordon Ross  *    Stream         *              No           error ENOTCONN
4263f012ee0cSGordon Ross  *    Dgram          yes            *            send to given addr.
4264f012ee0cSGordon Ross  *    Dgram          no             yes          send to conn. addr.
4265f012ee0cSGordon Ross  *    Dgram          no             no	         error EDESTADDRREQ
4266f012ee0cSGordon Ross  *
4267f012ee0cSGordon Ross  * There are subtleties around the destination address when using
4268f012ee0cSGordon Ross  * AF_UNIX datagram sockets.  When the sendmsg call specifies the
4269f012ee0cSGordon Ross  * destination address, it's in (struct sockaddr_un) form and we
4270f012ee0cSGordon Ross  * need to translate it to our internal form (struct so_ux_addr).
4271f012ee0cSGordon Ross  *
4272f012ee0cSGordon Ross  * When the sendmsg call does not specify a destination address
4273f012ee0cSGordon Ross  * we're using the peer address saved during sotpi_connect, and
4274f012ee0cSGordon Ross  * that address is already in internal form.  In this case, the
4275f012ee0cSGordon Ross  * (internal only) flag MSG_SENDTO_NOXLATE is set in the flags
4276f012ee0cSGordon Ross  * passed to sosend_dgram or sosend_dgramcmsg to indicate that
4277f012ee0cSGordon Ross  * those functions should skip translation to internal form.
4278f012ee0cSGordon Ross  * Avoiding that translation is not only more efficient, but it's
4279f012ee0cSGordon Ross  * also necessary when a process does a connect on an AF_UNIX
4280f012ee0cSGordon Ross  * datagram socket and then drops privileges.  After the process
4281f012ee0cSGordon Ross  * has dropped privileges, it may no longer be able to lookup the
4282f012ee0cSGordon Ross  * the external name in the filesystem, but it should still be
4283f012ee0cSGordon Ross  * able to send messages on the connected socket by leaving the
4284f012ee0cSGordon Ross  * destination name unspecified.
4285f012ee0cSGordon Ross  *
4286f012ee0cSGordon Ross  * Yet more subtleties arise with sockets connected by socketpair(),
4287f012ee0cSGordon Ross  * which puts internal form addresses in the fields where normally
4288f012ee0cSGordon Ross  * the external form is found, and sets sti_faddr_noxlate=1, which
4289f012ee0cSGordon Ross  * (like flag MSG_SENDTO_NOXLATE) causes the sosend_dgram functions
4290f012ee0cSGordon Ross  * to skip translation of destination addresses to internal form.
4291f012ee0cSGordon Ross  * However, beware that the flag sti_faddr_noxlate=1 also triggers
4292f012ee0cSGordon Ross  * different behaviour almost everywhere AF_UNIX addresses appear.
42937c478bd9Sstevel@tonic-gate  */
42947c478bd9Sstevel@tonic-gate static int
42950f1702c5SYu Xiangning sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
42960f1702c5SYu Xiangning     struct cred *cr)
42977c478bd9Sstevel@tonic-gate {
42987c478bd9Sstevel@tonic-gate 	int		so_state;
42997c478bd9Sstevel@tonic-gate 	int		so_mode;
43007c478bd9Sstevel@tonic-gate 	int		error;
43017c478bd9Sstevel@tonic-gate 	struct sockaddr *name;
43027c478bd9Sstevel@tonic-gate 	t_uscalar_t	namelen;
43037c478bd9Sstevel@tonic-gate 	int		dontroute;
43047c478bd9Sstevel@tonic-gate 	int		flags;
43050f1702c5SYu Xiangning 	sotpi_info_t	*sti = SOTOTPI(so);
43067c478bd9Sstevel@tonic-gate 
43077c478bd9Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n",
4308903a11ebSrh87107 	    (void *)so, (void *)msg, msg->msg_flags,
43097c478bd9Sstevel@tonic-gate 	    pr_state(so->so_state, so->so_mode), so->so_error));
43107c478bd9Sstevel@tonic-gate 
43110f1702c5SYu Xiangning 	if (so->so_version == SOV_STREAM) {
43120f1702c5SYu Xiangning 		/* The imaginary "sockmod" has been popped - act as a stream */
43130f1702c5SYu Xiangning 		so_update_attrs(so, SOMOD);
43140f1702c5SYu Xiangning 		return (strwrite(SOTOV(so), uiop, cr));
43150f1702c5SYu Xiangning 	}
43160f1702c5SYu Xiangning 
43177c478bd9Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
43187c478bd9Sstevel@tonic-gate 	so_state = so->so_state;
43197c478bd9Sstevel@tonic-gate 
43207c478bd9Sstevel@tonic-gate 	if (so_state & SS_CANTSENDMORE) {
43217c478bd9Sstevel@tonic-gate 		mutex_exit(&so->so_lock);
43227c478bd9Sstevel@tonic-gate 		return (EPIPE);
43237c478bd9Sstevel@tonic-gate 	}
43247c478bd9Sstevel@tonic-gate 
43257c478bd9Sstevel@tonic-gate 	if (so->so_error != 0) {
43260f1702c5SYu Xiangning 		error = sogeterr(so, B_TRUE);
43277c478bd9Sstevel@tonic-gate 		if (error != 0) {
43287c478bd9Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
43297c478bd9Sstevel@tonic-gate 			return (error);
43307c478bd9Sstevel@tonic-gate 		}
43317c478bd9Sstevel@tonic-gate 	}
43327c478bd9Sstevel@tonic-gate 
43337c478bd9Sstevel@tonic-gate 	name = (struct sockaddr *)msg->msg_name;
43347c478bd9Sstevel@tonic-gate 	namelen = msg->msg_namelen;
4335f012ee0cSGordon Ross 	flags = msg->msg_flags;
4336f012ee0cSGordon Ross 
4337f012ee0cSGordon Ross 	/*
4338f012ee0cSGordon Ross 	 * Historically, this function does not validate the flags
4339f012ee0cSGordon Ross 	 * passed in, and any errant bits are ignored.  However,
4340f012ee0cSGordon Ross 	 * we would not want any such errant flag bits accidently
4341f012ee0cSGordon Ross 	 * being treated as one of the internal-only flags, so
4342f012ee0cSGordon Ross 	 * clear the internal-only flag bits.
4343f012ee0cSGordon Ross 	 */
4344f012ee0cSGordon Ross 	flags &= ~MSG_SENDTO_NOXLATE;
43457c478bd9Sstevel@tonic-gate 
43467c478bd9Sstevel@tonic-gate 	so_mode = so->so_mode;
43477c478bd9Sstevel@tonic-gate 
43487c478bd9Sstevel@tonic-gate 	if (name == NULL) {
43497c478bd9Sstevel@tonic-gate 		if (!(so_state & SS_ISCONNECTED)) {
43507c478bd9Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
43517c478bd9Sstevel@tonic-gate 			if (so_mode & SM_CONNREQUIRED)
43527c478bd9Sstevel@tonic-gate 				return (ENOTCONN);
43537c478bd9Sstevel@tonic-gate 			else
43547c478bd9Sstevel@tonic-gate 				return (EDESTADDRREQ);
43557c478bd9Sstevel@tonic-gate 		}
4356f012ee0cSGordon Ross 		/*
4357f012ee0cSGordon Ross 		 * This is a connected socket.
4358f012ee0cSGordon Ross 		 */
43597c478bd9Sstevel@tonic-gate 		if (so_mode & SM_CONNREQUIRED) {
4360f012ee0cSGordon Ross 			/*
4361f012ee0cSGordon Ross 			 * This is a connected STREAM socket,
4362f012ee0cSGordon Ross 			 * destination not specified.
4363f012ee0cSGordon Ross 			 */
43647c478bd9Sstevel@tonic-gate 			name = NULL;
43657c478bd9Sstevel@tonic-gate 			namelen = 0;
43667c478bd9Sstevel@tonic-gate 		} else {
43677c478bd9Sstevel@tonic-gate 			/*
4368f012ee0cSGordon Ross 			 * Datagram send on connected socket with
4369f012ee0cSGordon Ross 			 * the destination name not specified.
4370f012ee0cSGordon Ross 			 * Use the peer address from connect.
43717c478bd9Sstevel@tonic-gate 			 */
4372f012ee0cSGordon Ross 			if (so->so_family == AF_UNIX) {
4373f012ee0cSGordon Ross 				/*
4374f012ee0cSGordon Ross 				 * Use the (internal form) address saved
4375f012ee0cSGordon Ross 				 * in sotpi_connect.  See above.
4376f012ee0cSGordon Ross 				 */
4377f012ee0cSGordon Ross 				name = (void *)&sti->sti_ux_faddr;
4378f012ee0cSGordon Ross 				namelen = sizeof (sti->sti_ux_faddr);
4379f012ee0cSGordon Ross 				flags |= MSG_SENDTO_NOXLATE;
4380f012ee0cSGordon Ross 			} else {
43810f1702c5SYu Xiangning 				ASSERT(sti->sti_faddr_sa);
43820f1702c5SYu Xiangning 				name = sti->sti_faddr_sa;
43830f1702c5SYu Xiangning 				namelen = (t_uscalar_t)sti->sti_faddr_len;
43847c478bd9Sstevel@tonic-gate 			}
4385f012ee0cSGordon Ross 		}
43867c478bd9Sstevel@tonic-gate 	} else {
4387f012ee0cSGordon Ross 		/*
4388f012ee0cSGordon Ross 		 * Sendmsg specifies a destination name
4389f012ee0cSGordon Ross 		 */
43907c478bd9Sstevel@tonic-gate 		if (!(so_state & SS_ISCONNECTED) &&
43917c478bd9Sstevel@tonic-gate 		    (so_mode & SM_CONNREQUIRED)) {
4392f012ee0cSGordon Ross 			/* i.e. TCP not connected */
43937c478bd9Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
43947c478bd9Sstevel@tonic-gate 			return (ENOTCONN);
43957c478bd9Sstevel@tonic-gate 		}
43967c478bd9Sstevel@tonic-gate 		/*
43977c478bd9Sstevel@tonic-gate 		 * Ignore the address on connection-oriented sockets.
43987c478bd9Sstevel@tonic-gate 		 * Just like BSD this code does not generate an error for
43997c478bd9Sstevel@tonic-gate 		 * TCP (a CONNREQUIRED socket) when sending to an address
44007c478bd9Sstevel@tonic-gate 		 * passed in with sendto/sendmsg. Instead the data is
44017c478bd9Sstevel@tonic-gate 		 * delivered on the connection as if no address had been
44027c478bd9Sstevel@tonic-gate 		 * supplied.
44037c478bd9Sstevel@tonic-gate 		 */
44047c478bd9Sstevel@tonic-gate 		if ((so_state & SS_ISCONNECTED) &&
44057c478bd9Sstevel@tonic-gate 		    !(so_mode & SM_CONNREQUIRED)) {
44067c478bd9Sstevel@tonic-gate 			mutex_exit(&so->so_lock);
44077c478bd9Sstevel@tonic-gate 			return (EISCONN);
44087c478bd9Sstevel@tonic-gate 		}
44097c478bd9Sstevel@tonic-gate 		if (!(so_state & SS_ISBOUND)) {
44107c478bd9Sstevel@tonic-gate 			so_lock_single(so);	/* Set SOLOCKED */
44117c478bd9Sstevel@tonic-gate 			error = sotpi_bind(so, NULL, 0,
44120f1702c5SYu Xiangning 			    _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr);
44137c478bd9Sstevel@tonic-gate 			so_unlock_single(so, SOLOCKED);
44147c478bd9Sstevel@tonic-gate 			if (error) {
44157c478bd9Sstevel@tonic-gate 				mutex_exit(&so->so_lock);
44167c478bd9Sstevel@tonic-gate 				eprintsoline(so, error);
44177c478bd9Sstevel@tonic-gate 				return (error);
44187c478bd9Sstevel@tonic-gate 			}
44197c478bd9Sstevel@tonic-gate 		}
44207c478bd9Sstevel@tonic-gate 		/*
44217c478bd9Sstevel@tonic-gate 		 * Handle delayed datagram errors. These are only queued
44227c478bd9Sstevel@tonic-gate 		 * when the application sets SO_DGRAM_ERRIND.
44237c478bd9Sstevel@tonic-gate 		 * Return the error if we are sending to the address
44247c478bd9Sstevel@tonic-gate 		 * that was returned in the last T_UDERROR_IND.
44257c478bd9Sstevel@tonic-gate 		 * If sending to some other address discard the delayed
44267c478bd9Sstevel@tonic-gate 		 * error indication.
44277c478bd9Sstevel@tonic-gate 		 */
44280f1702c5SYu Xiangning 		if (sti->sti_delayed_error) {
44297c478bd9Sstevel@tonic-gate 			struct T_uderror_ind	*tudi;
44307c478bd9Sstevel@tonic-gate 			void			*addr;
44317c478bd9Sstevel@tonic-gate 			t_uscalar_t		addrlen;
44327c478bd9Sstevel@tonic-gate 			boolean_t		match = B_FALSE;
44337c478bd9Sstevel@tonic-gate 
44340f1702c5SYu Xiangning 			ASSERT(sti->sti_eaddr_mp);
44350f1702c5SYu Xiangning 			error = sti->sti_delayed_error;
44360f1702c5SYu Xiangning 			sti->sti_delayed_error = 0;
44370f1702c5SYu Xiangning 			tudi =
44380f1702c5SYu Xiangning 			    (struct T_uderror_ind *)sti->sti_eaddr_mp->b_rptr;
44397c478bd9Sstevel@tonic-gate 			addrlen = tudi->DEST_length;
44400f1702c5SYu Xiangning 			addr = sogetoff(sti->sti_eaddr_mp,
44410f1702c5SYu Xiangning 			    tudi->DEST_offset, addrlen, 1);
44427c478bd9Sstevel@tonic-gate 			ASSERT(addr);	/* Checked by strsock_proto */
44437c478bd9Sstevel@tonic-gate 			switch (so->so_family) {
44447c478bd9Sstevel@tonic-gate 			case AF_INET: {
44457c478bd9Sstevel@tonic-gate 				/* Compare just IP address and port */
44467c478bd9Sstevel@tonic-gate 				sin_t *sin1 = (sin_t *)name;
44477c478bd9Sstevel@tonic-gate 				sin_t *sin2 = (sin_t *)addr;
44487c478bd9Sstevel@tonic-gate 
44497c478bd9Sstevel@tonic-gate 				if (addrlen == sizeof (sin_t) &&
44507c478bd9Sstevel@tonic-gate 				    namelen == addrlen &&
44517c478bd9Sstevel@tonic-gate 				    sin1->sin_port == sin2->sin_port &&
44527c478bd9Sstevel@tonic-gate 				    sin1->sin_addr.s_addr ==
44537c478bd9Sstevel@tonic-gate 				    sin2->sin_addr.s_addr)
44547c478bd9Sstevel@tonic-gate 					match = B_TRUE;
44557c478bd9Sstevel@tonic-gate 				break;
44567c478bd9Sstevel@tonic-gate 			}
44577c478bd9Sstevel@tonic-gate 			case AF_INET6: {
44587c478bd9Sstevel@tonic-gate 				/* Compare just IP address and port. Not flow */
44597c478bd9Sstevel@tonic-gate 				sin6_t *sin1 = (sin6_t *)name;
44607c478bd9Sstevel@tonic-gate 				sin6_t *sin2 = (sin6_t *)addr;
44617c478bd9Sstevel@tonic-gate 
44627c478bd9Sstevel@tonic-gate 				if (addrlen == sizeof (sin6_t) &&
44637c478bd9Sstevel@tonic-gate 				    namelen == addrlen &&
44647c478bd9Sstevel@tonic-gate 				    sin1->sin6_port == sin2->sin6_port &&
44657c478bd9Sstevel@tonic-gate 				    IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr,
44667c478bd9Sstevel@tonic-gate 				    &sin2->sin6_addr))
44677c478bd9Sstevel@tonic-gate 					match = B_TRUE;
44687c478bd9Sstevel@tonic-gate 				break;
44697c478bd9Sstevel@tonic-gate 			}
44707c478bd9Sstevel@tonic-gate 			case AF_UNIX:
44717c478bd9Sstevel@tonic-gate 			default:
44727c478bd9Sstevel@tonic-gate 				if (namelen == addrlen &&
44737c478bd9Sstevel@tonic-gate 				    bcmp(name, addr, namelen) == 0)
44747c478bd9Sstevel@tonic-gate 					match = B_TRUE;
44757c478bd9Sstevel@tonic-gate 			}
44767c478bd9Sstevel@tonic-gate 			if (match) {
44770f1702c5SYu Xiangning 				freemsg(sti->sti_eaddr_mp);
44780f1702c5SYu Xiangning 				sti->sti_eaddr_mp = NULL;
44797c478bd9Sstevel@tonic-gate 				mutex_exit(&so->so_lock);
44807c478bd9Sstevel@tonic-gate #ifdef DEBUG
44817c478bd9Sstevel@tonic-gate 				dprintso(so, 0,
44827c478bd9Sstevel@tonic-gate 				    ("sockfs delayed error %d for %s\n",
44837c478bd9Sstevel@tonic-gate 				    error,
44847c478bd9Sstevel@tonic-gate 				    pr_addr(so->so_family, name, namelen)));
44857c478bd9Sstevel@tonic-gate #endif /* DEBUG */
44867c478bd9Sstevel@tonic-gate 				return (error);
44877c478bd9Sstevel@tonic-gate 			}
44880f1702c5SYu Xiangning 			freemsg(sti->sti_eaddr_mp);
44890f1702c5SYu Xiangning 			sti->sti_eaddr_mp = NULL;
44907c478bd9Sstevel@tonic-gate 		}
44917c478bd9Sstevel@tonic-gate 	}
44927c478bd9Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
44937c478bd9Sstevel@tonic-gate 
44947c478bd9Sstevel@tonic-gate 	dontroute = 0;
44957c478bd9Sstevel@tonic-gate 	if ((flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE)) {
44967c478bd9Sstevel@tonic-gate 		uint32_t	val;
44977c478bd9Sstevel@tonic-gate 
44987c478bd9Sstevel@tonic-gate 		val = 1;
44997c478bd9Sstevel@tonic-gate 		error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE,
45000f1702c5SYu Xiangning 		    &val, (t_uscalar_t)sizeof (val), cr);
45017c478bd9Sstevel@tonic-gate 		if (error)
45027c478bd9Sstevel@tonic-gate 			return (error);
45037c478bd9Sstevel@tonic-gate 		dontroute = 1;
45047c478bd9Sstevel@tonic-gate 	}
45057c478bd9Sstevel@tonic-gate 
45067c478bd9Sstevel@tonic-gate 	if ((flags & MSG_OOB) && !(so_mode & SM_EXDATA)) {
45077c478bd9Sstevel@tonic-gate 		error = EOPNOTSUPP;
45087c478bd9Sstevel@tonic-gate 		goto done;
45097c478bd9Sstevel@tonic-gate 	}
45107c478bd9Sstevel@tonic-gate 	if (msg->msg_controllen != 0) {
45117c478bd9Sstevel@tonic-gate 		if (!(so_mode & SM_CONNREQUIRED)) {
45120f1702c5SYu Xiangning 			so_update_attrs(so, SOMOD);
45137c478bd9Sstevel@tonic-gate 			error = sosend_dgramcmsg(so, name, namelen, uiop,
4514ff550d0eSmasputra 			    msg->msg_control, msg->msg_controllen, flags);
45157c478bd9Sstevel@tonic-gate 		} else {
45167c478bd9Sstevel@tonic-gate 			if (flags & MSG_OOB) {
45177c478bd9Sstevel@tonic-gate 				/* Can't generate T_EXDATA_REQ with options */
45187c478bd9Sstevel@tonic-gate 				error = EOPNOTSUPP;
45197c478bd9Sstevel@tonic-gate 				goto done;
45207c478bd9Sstevel@tonic-gate 			}
45210f1702c5SYu Xiangning 			so_update_attrs(so, SOMOD);
45227c478bd9Sstevel@tonic-gate 			error = sosend_svccmsg(so, uiop,
45237c478bd9Sstevel@tonic-gate 			    !(flags & MSG_EOR),
45247c478bd9Sstevel@tonic-gate 			    msg->msg_control, msg->msg_controllen,
45257c478bd9Sstevel@tonic-gate 			    flags);
45267c478bd9Sstevel@tonic-gate 		}
45277c478bd9Sstevel@tonic-gate 		goto done;
45287c478bd9Sstevel@tonic-gate 	}
45297c478bd9Sstevel@tonic-gate 
45300f1702c5SYu Xiangning 	so_update_attrs(so, SOMOD);
45317c478bd9Sstevel@tonic-gate 	if (!(so_mode & SM_CONNREQUIRED)) {
45327c478bd9Sstevel@tonic-gate 		/*
45337c478bd9Sstevel@tonic-gate 		 * If there is no SO_DONTROUTE to turn off return immediately
4534ff550d0eSmasputra 		 * from send_dgram. This can allow tail-call optimizations.
45357c478bd9Sstevel@tonic-gate 		 */
45367c478bd9Sstevel@tonic-gate 		if (!dontroute) {
45377c478bd9Sstevel@tonic-gate 			return (sosend_dgram(so, name, namelen, uiop, flags));
45387c478bd9Sstevel@tonic-gate 		}
45397c478bd9Sstevel@tonic-gate 		error = sosend_dgram(so, name, namelen, uiop, flags);
45407c478bd9Sstevel@tonic-gate 	} else {
45417c478bd9Sstevel@tonic-gate 		t_scalar_t prim;
45427c478bd9Sstevel@tonic-gate 		int sflag;
45437c478bd9Sstevel@tonic-gate 
45447c478bd9Sstevel@tonic-gate 		/* Ignore msg_name in the connected state */
45457c478bd9Sstevel@tonic-gate 		if (flags & MSG_OOB) {
45467c478bd9Sstevel@tonic-gate 			prim = T_EXDATA_REQ;
45477c478bd9Sstevel@tonic-gate 			/*
45487c478bd9Sstevel@tonic-gate 			 * Send down T_EXDATA_REQ even if there is flow
45497c478bd9Sstevel@tonic-gate 			 * control for data.
45507c478bd9Sstevel@tonic-gate 			 */
45517c478bd9Sstevel@tonic-gate 			sflag = MSG_IGNFLOW;
45527c478bd9Sstevel@tonic-gate 		} else {
45537c478bd9Sstevel@tonic-gate 			if (so_mode & SM_BYTESTREAM) {
45547c478bd9Sstevel@tonic-gate 				/* Byte stream transport - use write */
45557c478bd9Sstevel@tonic-gate 				dprintso(so, 1, ("sotpi_sendmsg: write\n"));
45560f1702c5SYu Xiangning 
45570f1702c5SYu Xiangning 				/* Send M_DATA messages */
45580f1702c5SYu Xiangning 				if ((sti->sti_nl7c_flags & NL7C_ENABLED) &&
45590f1702c5SYu Xiangning 				    (error = nl7c_data(so, uiop)) >= 0) {
45600f1702c5SYu Xiangning 					/* NL7C consumed the data */
45610f1702c5SYu Xiangning 					return (error);
45620f1702c5SYu Xiangning 				}
45637c478bd9Sstevel@tonic-gate 				/*
4564ff550d0eSmasputra 				 * If there is no SO_DONTROUTE to turn off,
45650f1702c5SYu Xiangning 				 * sti_direct is on, and there is no flow
4566ff550d0eSmasputra 				 * control, we can take the fast path.
45677c478bd9Sstevel@tonic-gate 				 */
45680f1702c5SYu Xiangning 				if (!dontroute && sti->sti_direct != 0 &&
4569ff550d0eSmasputra 				    canputnext(SOTOV(so)->v_stream->sd_wrq)) {
4570ff550d0eSmasputra 					return (sostream_direct(so, uiop,
45710f1702c5SYu Xiangning 					    NULL, cr));
4572ff550d0eSmasputra 				}
45730f1702c5SYu Xiangning 				error = strwrite(SOTOV(so), uiop, cr);
45747c478bd9Sstevel@tonic-gate 				goto done;
45757c478bd9Sstevel@tonic-gate 			}
45767c478bd9Sstevel@tonic-gate 			prim = T_DATA_REQ;
45777c478bd9Sstevel@tonic-gate 			sflag = 0;
45787c478bd9Sstevel@tonic-gate 		}
45797c478bd9Sstevel@tonic-gate 		/*
45807c478bd9Sstevel@tonic-gate 		 * If there is no SO_DONTROUTE to turn off return immediately
45817c478bd9Sstevel@tonic-gate 		 * from sosend_svc. This can allow tail-call optimizations.
45827c478bd9Sstevel@tonic-gate 		 */
45837c478bd9Sstevel@tonic-gate 		if (!dontroute)
45847c478bd9Sstevel@tonic-gate 			return (sosend_svc(so, uiop, prim,
45857c478bd9Sstevel@tonic-gate 			    !(flags & MSG_EOR), sflag));
45867c478bd9Sstevel@tonic-gate 		error = sosend_svc(so, uiop, prim,
45877c478bd9Sstevel@tonic-gate 		    !(flags & MSG_EOR), sflag);
45887c478bd9Sstevel@tonic-gate 	}
45897c478bd9Sstevel@tonic-gate 	ASSERT(dontroute);
45907c478bd9Sstevel@tonic-gate done:
45917c478bd9Sstevel@tonic-gate 	if (dontroute) {
45927c478bd9Sstevel@tonic-gate 		uint32_t	val;
45937c478bd9Sstevel@tonic-gate 
45947c478bd9Sstevel@tonic-gate 		val = 0;
45957c478bd9Sstevel@tonic-gate 		(void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE,
45960f1702c5SYu Xiangning 		    &val, (t_uscalar_t)sizeof (val), cr);
45977c478bd9Sstevel@tonic-gate 	}
45987c478bd9Sstevel@tonic-gate 	return (error);
45997c478bd9Sstevel@tonic-gate }
46007c478bd9Sstevel@tonic-gate 
46017c478bd9Sstevel@tonic-gate /*
46020f1702c5SYu Xiangning  * kstrwritemp() has very similar semantics as that of strwrite().
46030f1702c5SYu Xiangning  * The main difference is it obtains mblks from the caller and also
46040f1702c5SYu Xiangning  * does not do any copy as done in strwrite() from user buffers to
46050f1702c5SYu Xiangning  * kernel buffers.
46060f1702c5SYu Xiangning  *
46070f1702c5SYu Xiangning  * Currently, this routine is used by sendfile to send data allocated
46080f1702c5SYu Xiangning  * within the kernel without any copying. This interface does not use the
46090f1702c5SYu Xiangning  * synchronous stream interface as synch. stream interface implies
46100f1702c5SYu Xiangning  * copying.
46110f1702c5SYu Xiangning  */
46120f1702c5SYu Xiangning int
46130f1702c5SYu Xiangning kstrwritemp(struct vnode *vp, mblk_t *mp, ushort_t fmode)
46140f1702c5SYu Xiangning {
46150f1702c5SYu Xiangning 	struct stdata *stp;
46160f1702c5SYu Xiangning 	struct queue *wqp;
46170f1702c5SYu Xiangning 	mblk_t *newmp;
46180f1702c5SYu Xiangning 	char waitflag;
46190f1702c5SYu Xiangning 	int tempmode;
46200f1702c5SYu Xiangning 	int error = 0;
46210f1702c5SYu Xiangning 	int done = 0;
46220f1702c5SYu Xiangning 	struct sonode *so;
46230f1702c5SYu Xiangning 	boolean_t direct;
46240f1702c5SYu Xiangning 
46250f1702c5SYu Xiangning 	ASSERT(vp->v_stream);
46260f1702c5SYu Xiangning 	stp = vp->v_stream;
46270f1702c5SYu Xiangning 
46280f1702c5SYu Xiangning 	so = VTOSO(vp);
46290f1702c5SYu Xiangning 	direct = _SOTOTPI(so)->sti_direct;
46300f1702c5SYu Xiangning 
46310f1702c5SYu Xiangning 	/*
46320f1702c5SYu Xiangning 	 * This is the sockfs direct fast path. canputnext() need
46330f1702c5SYu Xiangning 	 * not be accurate so we don't grab the sd_lock here. If
46340f1702c5SYu Xiangning 	 * we get flow-controlled, we grab sd_lock just before the
46350f1702c5SYu Xiangning 	 * do..while loop below to emulate what strwrite() does.
46360f1702c5SYu Xiangning 	 */
46370f1702c5SYu Xiangning 	wqp = stp->sd_wrq;
46380f1702c5SYu Xiangning 	if (canputnext(wqp) && direct &&
46390f1702c5SYu Xiangning 	    !(stp->sd_flag & (STWRERR|STRHUP|STPLEX))) {
46400f1702c5SYu Xiangning 		return (sostream_direct(so, NULL, mp, CRED()));
46410f1702c5SYu Xiangning 	} else if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
46420f1702c5SYu Xiangning 		/* Fast check of flags before acquiring the lock */
46430f1702c5SYu Xiangning 		mutex_enter(&stp->sd_lock);
46440f1702c5SYu Xiangning 		error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0);
46450f1702c5SYu Xiangning 		mutex_exit(&stp->sd_lock);
46460f1702c5SYu Xiangning 		if (error != 0) {
46470f1702c5SYu Xiangning 			if (!(stp->sd_flag & STPLEX) &&
46480f1702c5SYu Xiangning 			    (stp->sd_wput_opt & SW_SIGPIPE)) {
46490f1702c5SYu Xiangning 				error = EPIPE;
46500f1702c5SYu Xiangning 			}
46510f1702c5SYu Xiangning 			return (error);
46520f1702c5SYu Xiangning 		}
46530f1702c5SYu Xiangning 	}
46540f1702c5SYu Xiangning 
46550f1702c5SYu Xiangning 	waitflag = WRITEWAIT;
46560f1702c5SYu Xiangning 	if (stp->sd_flag & OLDNDELAY)
46570f1702c5SYu Xiangning 		tempmode = fmode & ~FNDELAY;
46580f1702c5SYu Xiangning 	else
46590f1702c5SYu Xiangning 		tempmode = fmode;
46600f1702c5SYu Xiangning 
46610f1702c5SYu Xiangning 	mutex_enter(&stp->sd_lock);
46620f1702c5SYu Xiangning 	do {
46630f1702c5SYu Xiangning 		if (canputnext(wqp)) {
46640f1702c5SYu Xiangning 			mutex_exit(&stp->sd_lock);
46650f1702c5SYu Xiangning 			if (stp->sd_wputdatafunc != NULL) {
46660f1702c5SYu Xiangning 				newmp = (stp->sd_wputdatafunc)(vp, mp, NULL,
46670f1702c5SYu Xiangning 				    NULL, NULL, NULL);
46680f1702c5SYu Xiangning 				if (newmp == NULL) {
46690f1702c5SYu Xiangning 					/* The caller will free mp */
46700f1702c5SYu Xiangning 					return (ECOMM);
46710f1702c5SYu Xiangning 				}
46720f1702c5SYu Xiangning 				mp = newmp;
46730f1702c5SYu Xiangning 			}
46740f1702c5SYu Xiangning 			putnext(wqp, mp);
46750f1702c5SYu Xiangning 			return (0);
46760f1702c5SYu Xiangning 		}
46770f1702c5SYu Xiangning 		error = strwaitq(stp, waitflag, (ssize_t)0, tempmode, -1,
46780f1702c5SYu Xiangning 		    &done);
46790f1702c5SYu Xiangning 	} while (error == 0 && !done);
46800f1702c5SYu Xiangning 
46810f1702c5SYu Xiangning 	mutex_exit(&stp->sd_lock);
46820f1702c5SYu Xiangning 	/*
46830f1702c5SYu Xiangning 	 * EAGAIN tells the application to try again. ENOMEM
46840f1702c5SYu Xiangning 	 * is returned only if the memory allocation size
46850f1702c5SYu Xiangning 	 * exceeds the physical limits of the system. ENOMEM
46860f1702c5SYu Xiangning 	 * can't be true here.
46870f1702c5SYu Xiangning 	 */
46880f1702c5SYu Xiangning 	if (error == ENOMEM)
46890f1702c5SYu Xiangning 		error = EAGAIN;
46900f1702c5SYu Xiangning 	return (error);
46910f1702c5SYu Xiangning }
46920f1702c5SYu Xiangning 
46930f1702c5SYu Xiangning /* ARGSUSED */
46940f1702c5SYu Xiangning static int
46950f1702c5SYu Xiangning sotpi_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag,
46960f1702c5SYu Xiangning     struct cred *cr, mblk_t **mpp)
46970f1702c5SYu Xiangning {
46980f1702c5SYu Xiangning 	int error;
46990f1702c5SYu Xiangning 
4700acb55917SPatrick Mooney 	switch (so->so_family) {
4701acb55917SPatrick Mooney 	case AF_INET:
4702acb55917SPatrick Mooney 	case AF_INET6:
4703acb55917SPatrick Mooney 	case AF_UNIX:
4704acb55917SPatrick Mooney 		break;
4705acb55917SPatrick Mooney 	default:
47060f1702c5SYu Xiangning 		return (EAFNOSUPPORT);
47070f1702c5SYu Xiangning 
4708acb55917SPatrick Mooney 	}
4709acb55917SPatrick Mooney 
47100f1702c5SYu Xiangning 	if (so->so_state & SS_CANTSENDMORE)
47110f1702c5SYu Xiangning 		return (EPIPE);
47120f1702c5SYu Xiangning 
47130f1702c5SYu Xiangning 	if (so->so_type != SOCK_STREAM)
47140f1702c5SYu Xiangning 		return (EOPNOTSUPP);
47150f1702c5SYu Xiangning 
47160f1702c5SYu Xiangning 	if ((so->so_state & SS_ISCONNECTED) == 0)
47170f1702c5SYu Xiangning 		return (ENOTCONN);
47180f1702c5SYu Xiangning 
47190f1702c5SYu Xiangning 	error = kstrwritemp(so->so_vnode, *mpp, fflag);
47200f1702c5SYu Xiangning 	if (error == 0)
47210f1702c5SYu Xiangning 		*mpp = NULL;
47220f1702c5SYu Xiangning 	return (error);
47230f1702c5SYu Xiangning }
47240f1702c5SYu Xiangning 
47250f1702c5SYu Xiangning /*
4726ff550d0eSmasputra  * Sending data on a datagram socket.
4727ff550d0eSmasputra  * Assumes caller has verified that SS_ISBOUND etc. are set.
4728ff550d0eSmasputra  */
4729ff550d0eSmasputra /* ARGSUSED */
4730ff550d0eSmasputra static int
4731ff550d0eSmasputra sodgram_direct(struct sonode *so, struct sockaddr *name,
4732ff550d0eSmasputra     socklen_t namelen, struct uio *uiop, int flags)
4733ff550d0eSmasputra {
4734ff550d0eSmasputra 	struct T_unitdata_req	tudr;
4735fc80c0dfSnordmark 	mblk_t			*mp = NULL;
4736ff550d0eSmasputra 	int			error = 0;
4737ff550d0eSmasputra 	void			*addr;
4738ff550d0eSmasputra 	socklen_t		addrlen;
4739ff550d0eSmasputra 	ssize_t			len;
4740ff550d0eSmasputra 	struct stdata		*stp = SOTOV(so)->v_stream;
4741ff550d0eSmasputra 	int			so_state;
4742ff550d0eSmasputra 	queue_t			*udp_wq;
4743fc80c0dfSnordmark 	boolean_t		connected;
4744fc80c0dfSnordmark 	mblk_t			*mpdata = NULL;
47450f1702c5SYu Xiangning 	sotpi_info_t		*sti = SOTOTPI(so);
4746005d3febSMarek Pospisil 	uint32_t		auditing = AU_AUDITING();
4747ff550d0eSmasputra 
4748ff550d0eSmasputra 	ASSERT(name != NULL && namelen != 0);
4749ff550d0eSmasputra 	ASSERT(!(so->so_mode & SM_CONNREQUIRED));
4750ff550d0eSmasputra 	ASSERT(!(so->so_mode & SM_EXDATA));
4751ff550d0eSmasputra 	ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6);
4752ff550d0eSmasputra 	ASSERT(SOTOV(so)->v_type == VSOCK);
4753ff550d0eSmasputra 
4754ff550d0eSmasputra 	/* Caller checked for proper length */
4755ff550d0eSmasputra 	len = uiop->uio_resid;
47560f1702c5SYu Xiangning 	ASSERT(len <= sti->sti_tidu_size);
4757ff550d0eSmasputra 
4758ff550d0eSmasputra 	/* Length and family checks have been done by caller */
4759ff550d0eSmasputra 	ASSERT(name->sa_family == so->so_family);
4760ff550d0eSmasputra 	ASSERT(so->so_family == AF_INET ||
4761ff550d0eSmasputra 	    (namelen == (socklen_t)sizeof (struct sockaddr_in6)));
4762ff550d0eSmasputra 	ASSERT(so->so_family == AF_INET6 ||
4763ff550d0eSmasputra 	    (namelen == (socklen_t)sizeof (struct sockaddr_in)));
4764ff550d0eSmasputra 
4765ff550d0eSmasputra 	addr = name;
4766ff550d0eSmasputra 	addrlen = namelen;
4767ff550d0eSmasputra 
4768ff550d0eSmasputra 	if (stp->sd_sidp != NULL &&
4769ff550d0eSmasputra 	    (error = straccess(stp, JCWRITE)) != 0)
4770ff550d0eSmasputra 		goto done;
4771ff550d0eSmasputra 
4772ff550d0eSmasputra 	so_state = so->so_state;
4773ff550d0eSmasputra 
4774fc80c0dfSnordmark 	connected = so_state & SS_ISCONNECTED;
4775fc80c0dfSnordmark 	if (!connected) {
4776fc80c0dfSnordmark 		tudr.PRIM_type = T_UNITDATA_REQ;
4777fc80c0dfSnordmark 		tudr.DEST_length = addrlen;
4778fc80c0dfSnordmark 		tudr.DEST_offset = (t_scalar_t)sizeof (tudr);
4779fc80c0dfSnordmark 		tudr.OPT_length = 0;
4780fc80c0dfSnordmark 		tudr.OPT_offset = 0;
4781fc80c0dfSnordmark 
4782fc80c0dfSnordmark 		mp = soallocproto2(&tudr, sizeof (tudr), addr, addrlen, 0,
4783de8c4a14SErik Nordmark 		    _ALLOC_INTR, CRED());
4784fc80c0dfSnordmark 		if (mp == NULL) {
4785fc80c0dfSnordmark 			/*
4786fc80c0dfSnordmark 			 * Caught a signal waiting for memory.
4787fc80c0dfSnordmark 			 * Let send* return EINTR.
4788fc80c0dfSnordmark 			 */
4789fc80c0dfSnordmark 			error = EINTR;
4790fc80c0dfSnordmark 			goto done;
4791fc80c0dfSnordmark 		}
4792fc80c0dfSnordmark 	}
4793fc80c0dfSnordmark 
4794ff550d0eSmasputra 	/*
4795ff550d0eSmasputra 	 * For UDP we don't break up the copyin into smaller pieces
4796ff550d0eSmasputra 	 * as in the TCP case.  That means if ENOMEM is returned by
4797ff550d0eSmasputra 	 * mcopyinuio() then the uio vector has not been modified at
4798ff550d0eSmasputra 	 * all and we fallback to either strwrite() or kstrputmsg()
4799ff550d0eSmasputra 	 * below.  Note also that we never generate priority messages
4800ff550d0eSmasputra 	 * from here.
4801ff550d0eSmasputra 	 */
4802ff550d0eSmasputra 	udp_wq = stp->sd_wrq->q_next;
4803ff550d0eSmasputra 	if (canput(udp_wq) &&
4804fc80c0dfSnordmark 	    (mpdata = mcopyinuio(stp, uiop, -1, -1, &error)) != NULL) {
4805fc80c0dfSnordmark 		ASSERT(DB_TYPE(mpdata) == M_DATA);
4806ff550d0eSmasputra 		ASSERT(uiop->uio_resid == 0);
4807fc80c0dfSnordmark 		if (!connected)
4808fc80c0dfSnordmark 			linkb(mp, mpdata);
4809fc80c0dfSnordmark 		else
4810fc80c0dfSnordmark 			mp = mpdata;
4811005d3febSMarek Pospisil 		if (auditing)
4812ff550d0eSmasputra 			audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0);
4813fc80c0dfSnordmark 
4814*52aec5b9SDan McDonald 		/* Always returns 0... */
4815*52aec5b9SDan McDonald 		return (udp_wput(udp_wq, mp));
4816ff550d0eSmasputra 	}
4817fc80c0dfSnordmark 
4818fc80c0dfSnordmark 	ASSERT(mpdata == NULL);
4819fc80c0dfSnordmark 	if (error != 0 && error != ENOMEM) {
4820fc80c0dfSnordmark 		freemsg(mp);
4821ff550d0eSmasputra 		return (error);
4822fc80c0dfSnordmark 	}
4823ff550d0eSmasputra 
4824ff550d0eSmasputra 	/*
4825ff550d0eSmasputra 	 * For connected, let strwrite() handle the blocking case.
4826ff550d0eSmasputra 	 * Otherwise we fall thru and use kstrputmsg().
4827ff550d0eSmasputra 	 */
4828fc80c0dfSnordmark 	if (connected)
4829ff550d0eSmasputra 		return (strwrite(SOTOV(so), uiop, CRED()));
4830ff550d0eSmasputra 
4831005d3febSMarek Pospisil 	if (auditing)
4832ff550d0eSmasputra 		audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0);
4833ff550d0eSmasputra 
4834ff550d0eSmasputra 	error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0);
4835ff550d0eSmasputra done:
4836ff550d0eSmasputra #ifdef SOCK_DEBUG
4837ff550d0eSmasputra 	if (error != 0) {
4838ff550d0eSmasputra 		eprintsoline(so, error);
4839ff550d0eSmasputra 	}
4840ff550d0eSmasputra #endif /* SOCK_DEBUG */
4841ff550d0eSmasputra 	return (error);
4842ff550d0eSmasputra }
4843ff550d0eSmasputra 
4844ff550d0eSmasputra int
4845ff550d0eSmasputra sostream_direct(struct sonode *so, struct uio *uiop, mblk_t *mp, cred_t *cr)
4846ff550d0eSmasputra {
4847ff550d0eSmasputra 	struct stdata *stp = SOTOV(so)->v_stream;
4848ff550d0eSmasputra 	ssize_t iosize, rmax, maxblk;
4849ff550d0eSmasputra 	queue_t *tcp_wq = stp->sd_wrq->q_next;
4850c28749e9Skais 	mblk_t *newmp;
4851ff550d0eSmasputra 	int error = 0, wflag = 0;
4852ff550d0eSmasputra 
4853ff550d0eSmasputra 	ASSERT(so->so_mode & SM_BYTESTREAM);
4854ff550d0eSmasputra 	ASSERT(SOTOV(so)->v_type == VSOCK);
4855ff550d0eSmasputra 
4856ff550d0eSmasputra 	if (stp->sd_sidp != NULL &&
4857ff550d0eSmasputra 	    (error = straccess(stp, JCWRITE)) != 0)
4858ff550d0eSmasputra 		return (error);
4859ff550d0eSmasputra 
4860ff550d0eSmasputra 	if (uiop == NULL) {
4861ff550d0eSmasputra 		/*
4862ff550d0eSmasputra 		 * kstrwritemp() should have checked sd_flag and
4863ff550d0eSmasputra 		 * flow-control before coming here.  If we end up
4864ff550d0eSmasputra 		 * here it means that we can simply pass down the
4865ff550d0eSmasputra 		 * data to tcp.
4866ff550d0eSmasputra 		 */
4867ff550d0eSmasputra 		ASSERT(mp != NULL);
4868c28749e9Skais 		if (stp->sd_wputdatafunc != NULL) {
4869c28749e9Skais 			newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL,
4870c28749e9Skais 			    NULL, NULL, NULL);
4871c28749e9Skais 			if (newmp == NULL) {
4872c28749e9Skais 				/* The caller will free mp */
4873c28749e9Skais 				return (ECOMM);
4874c28749e9Skais 			}
4875c28749e9Skais 			mp = newmp;
4876c28749e9Skais 		}
4877*52aec5b9SDan McDonald 		/* Always returns 0... */
4878*52aec5b9SDan McDonald 		return (tcp_wput(tcp_wq, mp));
4879ff550d0eSmasputra 	}
4880ff550d0eSmasputra 
4881ff550d0eSmasputra 	/* Fallback to strwrite() to do proper error handling */
4882ff550d0eSmasputra 	if (stp->sd_flag & (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY))
4883ff550d0eSmasputra 		return (strwrite(SOTOV(so), uiop, cr));
4884ff550d0eSmasputra 
4885ff550d0eSmasputra 	rmax = stp->sd_qn_maxpsz;
4886ff550d0eSmasputra 	ASSERT(rmax >= 0 || rmax == INFPSZ);
4887ff550d0eSmasputra 	if (rmax == 0 || uiop->uio_resid <= 0)
4888ff550d0eSmasputra 		return (0);
4889ff550d0eSmasputra 
4890ff550d0eSmasputra 	if (rmax == INFPSZ)
4891ff550d0eSmasputra 		rmax = uiop->uio_resid;
4892ff550d0eSmasputra 
4893ff550d0eSmasputra 	maxblk = stp->sd_maxblk;
4894ff550d0eSmasputra 
4895ff550d0eSmasputra 	for (;;) {
4896ff550d0eSmasputra 		iosize = MIN(uiop->uio_resid, rmax);
4897ff550d0eSmasputra 
4898ff550d0eSmasputra 		mp = mcopyinuio(stp, uiop, iosize, maxblk, &error);
4899ff550d0eSmasputra 		if (mp == NULL) {
4900ff550d0eSmasputra 			/*
4901ff550d0eSmasputra 			 * Fallback to strwrite() for ENOMEM; if this
4902ff550d0eSmasputra 			 * is our first time in this routine and the uio
4903ff550d0eSmasputra 			 * vector has not been modified, we will end up
4904ff550d0eSmasputra 			 * calling strwrite() without any flag set.
4905ff550d0eSmasputra 			 */
4906ff550d0eSmasputra 			if (error == ENOMEM)
4907ff550d0eSmasputra 				goto slow_send;
4908ff550d0eSmasputra 			else
4909ff550d0eSmasputra 				return (error);
4910ff550d0eSmasputra 		}
4911ff550d0eSmasputra 		ASSERT(uiop->uio_resid >= 0);
4912ff550d0eSmasputra 		/*
4913ff550d0eSmasputra 		 * If mp is non-NULL and ENOMEM is set, it means that
4914ff550d0eSmasputra 		 * mcopyinuio() was able to break down some of the user
4915ff550d0eSmasputra 		 * data into one or more mblks.  Send the partial data
4916ff550d0eSmasputra 		 * to tcp and let the rest be handled in strwrite().
4917ff550d0eSmasputra 		 */
4918ff550d0eSmasputra 		ASSERT(error == 0 || error == ENOMEM);
4919c28749e9Skais 		if (stp->sd_wputdatafunc != NULL) {
4920c28749e9Skais 			newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL,
4921c28749e9Skais 			    NULL, NULL, NULL);
4922c28749e9Skais 			if (newmp == NULL) {
4923c28749e9Skais 				/* The caller will free mp */
4924c28749e9Skais 				return (ECOMM);
4925c28749e9Skais 			}
4926c28749e9Skais 			mp = newmp;
4927c28749e9Skais 		}
4928*52aec5b9SDan McDonald 		(void) tcp_wput(tcp_wq, mp);	/* Always returns 0 anyway. */
4929ff550d0eSmasputra 
4930ff550d0eSmasputra 		wflag |= NOINTR;
4931ff550d0eSmasputra 
4932ff550d0eSmasputra 		if (uiop->uio_resid == 0) {	/* No more data; we're done */
4933ff550d0eSmasputra 			ASSERT(error == 0);
4934ff550d0eSmasputra 			break;
4935ff550d0eSmasputra 		} else if (error == ENOMEM || !canput(tcp_wq) || (stp->sd_flag &
4936ff550d0eSmasputra 		    (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY))) {
4937ff550d0eSmasputra slow_send:
4938ff550d0eSmasputra 			/*
4939ff550d0eSmasputra 			 * We were able to send down partial data using
4940ff550d0eSmasputra 			 * the direct call interface, but are now relying
4941ff550d0eSmasputra 			 * on strwrite() to handle the non-fastpath cases.
4942ff550d0eSmasputra 			 * If the socket is blocking we will sleep in
4943ff550d0eSmasputra 			 * strwaitq() until write is permitted, otherwise,
4944ff550d0eSmasputra 			 * we will need to return the amount of bytes
4945ff550d0eSmasputra 			 * written so far back to the app.  This is the
4946ff550d0eSmasputra 			 * reason why we pass NOINTR flag to strwrite()
4947ff550d0eSmasputra 			 * for non-blocking socket, because we don't want
4948ff550d0eSmasputra 			 * to return EAGAIN when portion of the user data
4949ff550d0eSmasputra 			 * has actually been sent down.
4950ff550d0eSmasputra 			 */
4951ff550d0eSmasputra 			return (strwrite_common(SOTOV(so), uiop, cr, wflag));
4952ff550d0eSmasputra 		}
4953ff550d0eSmasputra 	}
4954ff550d0eSmasputra 	return (0);
4955ff550d0eSmasputra }
4956ff550d0eSmasputra 
4957ff550d0eSmasputra /*
49580f1702c5SYu Xiangning  * Update sti_faddr by asking the transport (unless AF_UNIX).
49597c478bd9Sstevel@tonic-gate  */
49600f1702c5SYu Xiangning /* ARGSUSED */
49617c478bd9Sstevel@tonic-gate int
49620f1702c5SYu Xiangning sotpi_getpeername(struct sonode *so, struct sockaddr *name, socklen_t *namelen,
49630f1702c5SYu Xiangning     boolean_t accept, struct cred *cr)
49647c478bd9Sstevel@tonic-gate {
49657c478bd9Sstevel@tonic-gate 	struct strbuf	strbuf;
49667c478bd9Sstevel@tonic-gate 	int		error = 0, res;
49677c478bd9Sstevel@tonic-gate 	void		*addr;
49687c478bd9Sstevel@tonic-gate 	t_uscalar_t	addrlen;
49697c478bd9Sstevel@tonic-gate 	k_sigset_t	smask;
49700f1702c5SYu Xiangning 	sotpi_info_t	*sti = SOTOTPI(so);
49717c478bd9Sstevel@tonic-gate 
49727c478bd9Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_getpeername(%p) %s\n",
4973903a11ebSrh87107 	    (void *)so, pr_state(so->so_state, so->so_mode)));
49747c478bd9Sstevel@tonic-gate 
49750f1702c5SYu Xiangning 	ASSERT(*namelen > 0);
49767c478bd9Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
49777c478bd9Sstevel@tonic-gate 	so_lock_single(so);	/* Set SOLOCKED */
49780f1702c5SYu Xiangning 
49790f1702c5SYu Xiangning 	if (accept) {
49800f1702c5SYu Xiangning 		bcopy(sti->sti_faddr_sa, name,
49810f1702c5SYu Xiangning 		    MIN(*namelen, sti->sti_faddr_len));
49820f1702c5SYu Xiangning 		*namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len;
49830f1702c5SYu Xiangning 		goto done;
49840f1702c5SYu Xiangning 	}
49850f1702c5SYu Xiangning 
49867c478bd9Sstevel@tonic-gate 	if (!(so->so_state & SS_ISCONNECTED)) {
49877c478bd9Sstevel@tonic-gate 		error = ENOTCONN;
49887c478bd9Sstevel@tonic-gate 		goto done;
49897c478bd9Sstevel@tonic-gate 	}
49907c478bd9Sstevel@tonic-gate 	/* Added this check for X/Open */
49917c478bd9Sstevel@tonic-gate 	if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
49927c478bd9Sstevel@tonic-gate 		error = EINVAL;
49937c478bd9Sstevel@tonic-gate 		if (xnet_check_print) {
49947c478bd9Sstevel@tonic-gate 			printf("sockfs: X/Open getpeername check => EINVAL\n");
49957c478bd9Sstevel@tonic-gate 		}
49967c478bd9Sstevel@tonic-gate 		goto done;
49977c478bd9Sstevel@tonic-gate 	}
49980f1702c5SYu Xiangning 
49990f1702c5SYu Xiangning 	if (sti->sti_faddr_valid) {
50000f1702c5SYu Xiangning 		bcopy(sti->sti_faddr_sa, name,
50010f1702c5SYu Xiangning 		    MIN(*namelen, sti->sti_faddr_len));
50020f1702c5SYu Xiangning 		*namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len;
50030f1702c5SYu Xiangning 		goto done;
50040f1702c5SYu Xiangning 	}
50050f1702c5SYu Xiangning 
50067c478bd9Sstevel@tonic-gate #ifdef DEBUG
50077c478bd9Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_getpeername (local): %s\n",
50080f1702c5SYu Xiangning 	    pr_addr(so->so_family, sti->sti_faddr_sa,
50090f1702c5SYu Xiangning 	    (t_uscalar_t)sti->sti_faddr_len)));
50107c478bd9Sstevel@tonic-gate #endif /* DEBUG */
50117c478bd9Sstevel@tonic-gate 
50122caf0dcdSrshoaib 	if (so->so_family == AF_UNIX) {
50137c478bd9Sstevel@tonic-gate 		/* Transport has different name space - return local info */
50140f1702c5SYu Xiangning 		if (sti->sti_faddr_noxlate)
50150f1702c5SYu Xiangning 			*namelen = 0;
50167c478bd9Sstevel@tonic-gate 		error = 0;
50177c478bd9Sstevel@tonic-gate 		goto done;
50187c478bd9Sstevel@tonic-gate 	}
50197c478bd9Sstevel@tonic-gate 
50200f1702c5SYu Xiangning 	ASSERT(so->so_family != AF_UNIX && sti->sti_faddr_noxlate == 0);
50210f1702c5SYu Xiangning 
50220f1702c5SYu Xiangning 	ASSERT(sti->sti_faddr_sa);
50237c478bd9Sstevel@tonic-gate 	/* Allocate local buffer to use with ioctl */
50240f1702c5SYu Xiangning 	addrlen = (t_uscalar_t)sti->sti_faddr_maxlen;
50257c478bd9Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
50267c478bd9Sstevel@tonic-gate 	addr = kmem_alloc(addrlen, KM_SLEEP);
50277c478bd9Sstevel@tonic-gate 
50287c478bd9Sstevel@tonic-gate 	/*
50297c478bd9Sstevel@tonic-gate 	 * Issue TI_GETPEERNAME with signals masked.
50300f1702c5SYu Xiangning 	 * Put the result in sti_faddr_sa so that getpeername works after
50317c478bd9Sstevel@tonic-gate 	 * a shutdown(output).
50327c478bd9Sstevel@tonic-gate 	 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted
50337c478bd9Sstevel@tonic-gate 	 * back to the socket.
50347c478bd9Sstevel@tonic-gate 	 */
50357c478bd9Sstevel@tonic-gate 	strbuf.buf = addr;
50367c478bd9Sstevel@tonic-gate 	strbuf.maxlen = addrlen;
50377c478bd9Sstevel@tonic-gate 	strbuf.len = 0;
50387c478bd9Sstevel@tonic-gate 
50397c478bd9Sstevel@tonic-gate 	sigintr(&smask, 0);
50407c478bd9Sstevel@tonic-gate 	res = 0;
50410f1702c5SYu Xiangning 	ASSERT(cr);
50427c478bd9Sstevel@tonic-gate 	error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf,
50430f1702c5SYu Xiangning 	    0, K_TO_K, cr, &res);
50447c478bd9Sstevel@tonic-gate 	sigunintr(&smask);
50457c478bd9Sstevel@tonic-gate 
50467c478bd9Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
50477c478bd9Sstevel@tonic-gate 	/*
50487c478bd9Sstevel@tonic-gate 	 * If there is an error record the error in so_error put don't fail
50497c478bd9Sstevel@tonic-gate 	 * the getpeername. Instead fallback on the recorded
50500f1702c5SYu Xiangning 	 * sti->sti_faddr_sa.
50517c478bd9Sstevel@tonic-gate 	 */
50527c478bd9Sstevel@tonic-gate 	if (error) {
50537c478bd9Sstevel@tonic-gate 		/*
50547c478bd9Sstevel@tonic-gate 		 * Various stream head errors can be returned to the ioctl.
50557c478bd9Sstevel@tonic-gate 		 * However, it is impossible to determine which ones of
50567c478bd9Sstevel@tonic-gate 		 * these are really socket level errors that were incorrectly
50577c478bd9Sstevel@tonic-gate 		 * consumed by the ioctl. Thus this code silently ignores the
50587c478bd9Sstevel@tonic-gate 		 * error - to code explicitly does not reinstate the error
50597c478bd9Sstevel@tonic-gate 		 * using soseterror().
50607c478bd9Sstevel@tonic-gate 		 * Experiments have shows that at least this set of
50617c478bd9Sstevel@tonic-gate 		 * errors are reported and should not be reinstated on the
50627c478bd9Sstevel@tonic-gate 		 * socket:
50637c478bd9Sstevel@tonic-gate 		 *	EINVAL	E.g. if an I_LINK was in effect when
50647c478bd9Sstevel@tonic-gate 		 *		getpeername was called.
50657c478bd9Sstevel@tonic-gate 		 *	EPIPE	The ioctl error semantics prefer the write
50667c478bd9Sstevel@tonic-gate 		 *		side error over the read side error.
50677c478bd9Sstevel@tonic-gate 		 *	ENOTCONN The transport just got disconnected but
50687c478bd9Sstevel@tonic-gate 		 *		sockfs had not yet seen the T_DISCON_IND
50697c478bd9Sstevel@tonic-gate 		 *		when issuing the ioctl.
50707c478bd9Sstevel@tonic-gate 		 */
50717c478bd9Sstevel@tonic-gate 		error = 0;
50727c478bd9Sstevel@tonic-gate 	} else if (res == 0 && strbuf.len > 0 &&
50737c478bd9Sstevel@tonic-gate 	    (so->so_state & SS_ISCONNECTED)) {
50740f1702c5SYu Xiangning 		ASSERT(strbuf.len <= (int)sti->sti_faddr_maxlen);
50750f1702c5SYu Xiangning 		sti->sti_faddr_len = (socklen_t)strbuf.len;
50760f1702c5SYu Xiangning 		bcopy(addr, sti->sti_faddr_sa, sti->sti_faddr_len);
50770f1702c5SYu Xiangning 		sti->sti_faddr_valid = 1;
50780f1702c5SYu Xiangning 
50790f1702c5SYu Xiangning 		bcopy(addr, name, MIN(*namelen, sti->sti_faddr_len));
50800f1702c5SYu Xiangning 		*namelen = sti->sti_faddr_len;
50817c478bd9Sstevel@tonic-gate 	}
50827c478bd9Sstevel@tonic-gate 	kmem_free(addr, addrlen);
50837c478bd9Sstevel@tonic-gate #ifdef DEBUG
50847c478bd9Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_getpeername (tp): %s\n",
50850f1702c5SYu Xiangning 	    pr_addr(so->so_family, sti->sti_faddr_sa,
50860f1702c5SYu Xiangning 	    (t_uscalar_t)sti->sti_faddr_len)));
50877c478bd9Sstevel@tonic-gate #endif /* DEBUG */
50887c478bd9Sstevel@tonic-gate done:
50897c478bd9Sstevel@tonic-gate 	so_unlock_single(so, SOLOCKED);
50907c478bd9Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
50917c478bd9Sstevel@tonic-gate 	return (error);
50927c478bd9Sstevel@tonic-gate }
50937c478bd9Sstevel@tonic-gate 
50947c478bd9Sstevel@tonic-gate /*
50950f1702c5SYu Xiangning  * Update sti_laddr by asking the transport (unless AF_UNIX).
50967c478bd9Sstevel@tonic-gate  */
50977c478bd9Sstevel@tonic-gate int
50980f1702c5SYu Xiangning sotpi_getsockname(struct sonode *so, struct sockaddr *name, socklen_t *namelen,
50990f1702c5SYu Xiangning     struct cred *cr)
51007c478bd9Sstevel@tonic-gate {
51017c478bd9Sstevel@tonic-gate 	struct strbuf	strbuf;
51027c478bd9Sstevel@tonic-gate 	int		error = 0, res;
51037c478bd9Sstevel@tonic-gate 	void		*addr;
51047c478bd9Sstevel@tonic-gate 	t_uscalar_t	addrlen;
51057c478bd9Sstevel@tonic-gate 	k_sigset_t	smask;
51060f1702c5SYu Xiangning 	sotpi_info_t	*sti = SOTOTPI(so);
51077c478bd9Sstevel@tonic-gate 
51087c478bd9Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_getsockname(%p) %s\n",
5109903a11ebSrh87107 	    (void *)so, pr_state(so->so_state, so->so_mode)));
51107c478bd9Sstevel@tonic-gate 
51110f1702c5SYu Xiangning 	ASSERT(*namelen > 0);
51127c478bd9Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
51137c478bd9Sstevel@tonic-gate 	so_lock_single(so);	/* Set SOLOCKED */
51140f1702c5SYu Xiangning 
51157c478bd9Sstevel@tonic-gate #ifdef DEBUG
51160f1702c5SYu Xiangning 
51177c478bd9Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_getsockname (local): %s\n",
51180f1702c5SYu Xiangning 	    pr_addr(so->so_family, sti->sti_laddr_sa,
51190f1702c5SYu Xiangning 	    (t_uscalar_t)sti->sti_laddr_len)));
51207c478bd9Sstevel@tonic-gate #endif /* DEBUG */
51210f1702c5SYu Xiangning 	if (sti->sti_laddr_valid) {
51220f1702c5SYu Xiangning 		bcopy(sti->sti_laddr_sa, name,
51230f1702c5SYu Xiangning 		    MIN(*namelen, sti->sti_laddr_len));
51240f1702c5SYu Xiangning 		*namelen = sti->sti_laddr_len;
51250f1702c5SYu Xiangning 		goto done;
51260f1702c5SYu Xiangning 	}
51270f1702c5SYu Xiangning 
51287c478bd9Sstevel@tonic-gate 	if (so->so_family == AF_UNIX) {
5129dfc0fed8SRobert Mustacchi 		/*
5130dfc0fed8SRobert Mustacchi 		 * Transport has different name space - return local info. If we
5131dfc0fed8SRobert Mustacchi 		 * have enough space, let consumers know the family.
5132dfc0fed8SRobert Mustacchi 		 */
5133dfc0fed8SRobert Mustacchi 		if (*namelen >= sizeof (sa_family_t)) {
5134dfc0fed8SRobert Mustacchi 			name->sa_family = AF_UNIX;
5135dfc0fed8SRobert Mustacchi 			*namelen = sizeof (sa_family_t);
5136dfc0fed8SRobert Mustacchi 		} else {
5137a5adac4dSYu Xiangning 			*namelen = 0;
5138dfc0fed8SRobert Mustacchi 		}
5139dfc0fed8SRobert Mustacchi 		error = 0;
51407c478bd9Sstevel@tonic-gate 		goto done;
51417c478bd9Sstevel@tonic-gate 	}
514218cbc865Sblu 	if (!(so->so_state & SS_ISBOUND)) {
514318cbc865Sblu 		/* If not bound, then nothing to return. */
514418cbc865Sblu 		error = 0;
514518cbc865Sblu 		goto done;
514618cbc865Sblu 	}
51470f1702c5SYu Xiangning 
51487c478bd9Sstevel@tonic-gate 	/* Allocate local buffer to use with ioctl */
51490f1702c5SYu Xiangning 	addrlen = (t_uscalar_t)sti->sti_laddr_maxlen;
51507c478bd9Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
51517c478bd9Sstevel@tonic-gate 	addr = kmem_alloc(addrlen, KM_SLEEP);
51527c478bd9Sstevel@tonic-gate 
51537c478bd9Sstevel@tonic-gate 	/*
51547c478bd9Sstevel@tonic-gate 	 * Issue TI_GETMYNAME with signals masked.
51550f1702c5SYu Xiangning 	 * Put the result in sti_laddr_sa so that getsockname works after
51567c478bd9Sstevel@tonic-gate 	 * a shutdown(output).
51577c478bd9Sstevel@tonic-gate 	 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted
51587c478bd9Sstevel@tonic-gate 	 * back to the socket.
51597c478bd9Sstevel@tonic-gate 	 */
51607c478bd9Sstevel@tonic-gate 	strbuf.buf = addr;
51617c478bd9Sstevel@tonic-gate 	strbuf.maxlen = addrlen;
51627c478bd9Sstevel@tonic-gate 	strbuf.len = 0;
51637c478bd9Sstevel@tonic-gate 
51647c478bd9Sstevel@tonic-gate 	sigintr(&smask, 0);
51657c478bd9Sstevel@tonic-gate 	res = 0;
51660f1702c5SYu Xiangning 	ASSERT(cr);
51677c478bd9Sstevel@tonic-gate 	error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf,
51680f1702c5SYu Xiangning 	    0, K_TO_K, cr, &res);
51697c478bd9Sstevel@tonic-gate 	sigunintr(&smask);
51707c478bd9Sstevel@tonic-gate 
51717c478bd9Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
51727c478bd9Sstevel@tonic-gate 	/*
51737c478bd9Sstevel@tonic-gate 	 * If there is an error record the error in so_error put don't fail
51747c478bd9Sstevel@tonic-gate 	 * the getsockname. Instead fallback on the recorded
51750f1702c5SYu Xiangning 	 * sti->sti_laddr_sa.
51767c478bd9Sstevel@tonic-gate 	 */
51777c478bd9Sstevel@tonic-gate 	if (error) {
51787c478bd9Sstevel@tonic-gate 		/*
51797c478bd9Sstevel@tonic-gate 		 * Various stream head errors can be returned to the ioctl.
51807c478bd9Sstevel@tonic-gate 		 * However, it is impossible to determine which ones of
51817c478bd9Sstevel@tonic-gate 		 * these are really socket level errors that were incorrectly
51827c478bd9Sstevel@tonic-gate 		 * consumed by the ioctl. Thus this code silently ignores the
51837c478bd9Sstevel@tonic-gate 		 * error - to code explicitly does not reinstate the error
51847c478bd9Sstevel@tonic-gate 		 * using soseterror().
51857c478bd9Sstevel@tonic-gate 		 * Experiments have shows that at least this set of
51867c478bd9Sstevel@tonic-gate 		 * errors are reported and should not be reinstated on the
51877c478bd9Sstevel@tonic-gate 		 * socket:
51887c478bd9Sstevel@tonic-gate 		 *	EINVAL	E.g. if an I_LINK was in effect when
51897c478bd9Sstevel@tonic-gate 		 *		getsockname was called.
51907c478bd9Sstevel@tonic-gate 		 *	EPIPE	The ioctl error semantics prefer the write
51917c478bd9Sstevel@tonic-gate 		 *		side error over the read side error.
51927c478bd9Sstevel@tonic-gate 		 */
51937c478bd9Sstevel@tonic-gate 		error = 0;
51947c478bd9Sstevel@tonic-gate 	} else if (res == 0 && strbuf.len > 0 &&
51957c478bd9Sstevel@tonic-gate 	    (so->so_state & SS_ISBOUND)) {
51960f1702c5SYu Xiangning 		ASSERT(strbuf.len <= (int)sti->sti_laddr_maxlen);
51970f1702c5SYu Xiangning 		sti->sti_laddr_len = (socklen_t)strbuf.len;
51980f1702c5SYu Xiangning 		bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len);
51990f1702c5SYu Xiangning 		sti->sti_laddr_valid = 1;
52000f1702c5SYu Xiangning 
52010f1702c5SYu Xiangning 		bcopy(addr, name, MIN(sti->sti_laddr_len, *namelen));
52020f1702c5SYu Xiangning 		*namelen = sti->sti_laddr_len;
52037c478bd9Sstevel@tonic-gate 	}
52047c478bd9Sstevel@tonic-gate 	kmem_free(addr, addrlen);
52057c478bd9Sstevel@tonic-gate #ifdef DEBUG
52067c478bd9Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_getsockname (tp): %s\n",
52070f1702c5SYu Xiangning 	    pr_addr(so->so_family, sti->sti_laddr_sa,
52080f1702c5SYu Xiangning 	    (t_uscalar_t)sti->sti_laddr_len)));
52097c478bd9Sstevel@tonic-gate #endif /* DEBUG */
52107c478bd9Sstevel@tonic-gate done:
52117c478bd9Sstevel@tonic-gate 	so_unlock_single(so, SOLOCKED);
52127c478bd9Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
52137c478bd9Sstevel@tonic-gate 	return (error);
52147c478bd9Sstevel@tonic-gate }
52157c478bd9Sstevel@tonic-gate 
52167c478bd9Sstevel@tonic-gate /*
52177c478bd9Sstevel@tonic-gate  * Get socket options. For SOL_SOCKET options some options are handled
52187c478bd9Sstevel@tonic-gate  * by the sockfs while others use the value recorded in the sonode as a
52197c478bd9Sstevel@tonic-gate  * fallback should the T_SVR4_OPTMGMT_REQ fail.
52207c478bd9Sstevel@tonic-gate  *
52217c478bd9Sstevel@tonic-gate  * On the return most *optlenp bytes are copied to optval.
52227c478bd9Sstevel@tonic-gate  */
52230f1702c5SYu Xiangning /* ARGSUSED */
52247c478bd9Sstevel@tonic-gate int
52257c478bd9Sstevel@tonic-gate sotpi_getsockopt(struct sonode *so, int level, int option_name,
52260f1702c5SYu Xiangning     void *optval, socklen_t *optlenp, int flags, struct cred *cr)
52277c478bd9Sstevel@tonic-gate {
52287c478bd9Sstevel@tonic-gate 	struct T_optmgmt_req	optmgmt_req;
52297c478bd9Sstevel@tonic-gate 	struct T_optmgmt_ack	*optmgmt_ack;
52307c478bd9Sstevel@tonic-gate 	struct opthdr		oh;
52317c478bd9Sstevel@tonic-gate 	struct opthdr		*opt_res;
52327c478bd9Sstevel@tonic-gate 	mblk_t			*mp = NULL;
52337c478bd9Sstevel@tonic-gate 	int			error = 0;
52347c478bd9Sstevel@tonic-gate 	void			*option = NULL;	/* Set if fallback value */
52357c478bd9Sstevel@tonic-gate 	t_uscalar_t		maxlen = *optlenp;
52367c478bd9Sstevel@tonic-gate 	t_uscalar_t		len;
52377c478bd9Sstevel@tonic-gate 	uint32_t		value;
52380f1702c5SYu Xiangning 	struct timeval		tmo_val; /* used for SO_RCVTIMEO, SO_SNDTIMEO */
523922238f73Sshenjian 	struct timeval32	tmo_val32;
52400f1702c5SYu Xiangning 	struct so_snd_bufinfo	snd_bufinfo;	/* used for zero copy */
52417c478bd9Sstevel@tonic-gate 
52427c478bd9Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n",
5243903a11ebSrh87107 	    (void *)so, level, option_name, optval, (void *)optlenp,
52447c478bd9Sstevel@tonic-gate 	    pr_state(so->so_state, so->so_mode)));
52457c478bd9Sstevel@tonic-gate 
52467c478bd9Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
52477c478bd9Sstevel@tonic-gate 	so_lock_single(so);	/* Set SOLOCKED */
52487c478bd9Sstevel@tonic-gate 
52497c478bd9Sstevel@tonic-gate 	/*
52507c478bd9Sstevel@tonic-gate 	 * Check for SOL_SOCKET options.
52517c478bd9Sstevel@tonic-gate 	 * Certain SOL_SOCKET options are returned directly whereas
52527c478bd9Sstevel@tonic-gate 	 * others only provide a default (fallback) value should
52537c478bd9Sstevel@tonic-gate 	 * the T_SVR4_OPTMGMT_REQ fail.
52547c478bd9Sstevel@tonic-gate 	 */
52557c478bd9Sstevel@tonic-gate 	if (level == SOL_SOCKET) {
52567c478bd9Sstevel@tonic-gate 		/* Check parameters */
52577c478bd9Sstevel@tonic-gate 		switch (option_name) {
52587c478bd9Sstevel@tonic-gate 		case SO_TYPE:
52597c478bd9Sstevel@tonic-gate 		case SO_ERROR:
52607c478bd9Sstevel@tonic-gate 		case SO_DEBUG:
52617c478bd9Sstevel@tonic-gate 		case SO_ACCEPTCONN:
52627c478bd9Sstevel@tonic-gate 		case SO_REUSEADDR:
52637c478bd9Sstevel@tonic-gate 		case SO_KEEPALIVE:
52647c478bd9Sstevel@tonic-gate 		case SO_DONTROUTE:
52657c478bd9Sstevel@tonic-gate 		case SO_BROADCAST:
52667c478bd9Sstevel@tonic-gate 		case SO_USELOOPBACK:
52677c478bd9Sstevel@tonic-gate 		case SO_OOBINLINE:
52687c478bd9Sstevel@tonic-gate 		case SO_SNDBUF:
52697c478bd9Sstevel@tonic-gate 		case SO_RCVBUF:
52707c478bd9Sstevel@tonic-gate #ifdef notyet
52717c478bd9Sstevel@tonic-gate 		case SO_SNDLOWAT:
52727c478bd9Sstevel@tonic-gate 		case SO_RCVLOWAT:
52737c478bd9Sstevel@tonic-gate #endif /* notyet */
527488cda078Skcpoon 		case SO_DOMAIN:
52757c478bd9Sstevel@tonic-gate 		case SO_DGRAM_ERRIND:
52767c478bd9Sstevel@tonic-gate 			if (maxlen < (t_uscalar_t)sizeof (int32_t)) {
52777c478bd9Sstevel@tonic-gate 				error = EINVAL;
52787c478bd9Sstevel@tonic-gate 				eprintsoline(so, error);
52797c478bd9Sstevel@tonic-gate 				goto done2;
52807c478bd9Sstevel@tonic-gate 			}
52817c478bd9Sstevel@tonic-gate 			break;
52820f1702c5SYu Xiangning 		case SO_RCVTIMEO:
52830f1702c5SYu Xiangning 		case SO_SNDTIMEO:
5284e5083e81Sshenjian 			if (get_udatamodel() == DATAMODEL_NONE ||
5285e5083e81Sshenjian 			    get_udatamodel() == DATAMODEL_NATIVE) {
528622238f73Sshenjian 				if (maxlen < sizeof (struct timeval)) {
52870f1702c5SYu Xiangning 					error = EINVAL;
52880f1702c5SYu Xiangning 					eprintsoline(so, error);
52890f1702c5SYu Xiangning 					goto done2;
52900f1702c5SYu Xiangning 				}
529122238f73Sshenjian 			} else {
529222238f73Sshenjian 				if (maxlen < sizeof (struct timeval32)) {
529322238f73Sshenjian 					error = EINVAL;
529422238f73Sshenjian 					eprintsoline(so, error);
529522238f73Sshenjian 					goto done2;
529622238f73Sshenjian 				}
529722238f73Sshenjian 
529822238f73Sshenjian 			}
52990f1702c5SYu Xiangning 			break;
53007c478bd9Sstevel@tonic-gate 		case SO_LINGER:
53017c478bd9Sstevel@tonic-gate 			if (maxlen < (t_uscalar_t)sizeof (struct linger)) {
53027c478bd9Sstevel@tonic-gate 				error = EINVAL;
53037c478bd9Sstevel@tonic-gate 				eprintsoline(so, error);
53047c478bd9Sstevel@tonic-gate 				goto done2;
53057c478bd9Sstevel@tonic-gate 			}
53067c478bd9Sstevel@tonic-gate 			break;
53070f1702c5SYu Xiangning 		case SO_SND_BUFINFO:
53080f1702c5SYu Xiangning 			if (maxlen < (t_uscalar_t)
53090f1702c5SYu Xiangning 			    sizeof (struct so_snd_bufinfo)) {
53100f1702c5SYu Xiangning 				error = EINVAL;
53110f1702c5SYu Xiangning 				eprintsoline(so, error);
53120f1702c5SYu Xiangning 				goto done2;
53130f1702c5SYu Xiangning 			}
53140f1702c5SYu Xiangning 			break;
53157c478bd9Sstevel@tonic-gate 		}
53167c478bd9Sstevel@tonic-gate 
53177c478bd9Sstevel@tonic-gate 		len = (t_uscalar_t)sizeof (uint32_t);	/* Default */
53187c478bd9Sstevel@tonic-gate 
53197c478bd9Sstevel@tonic-gate 		switch (option_name) {
53207c478bd9Sstevel@tonic-gate 		case SO_TYPE:
53217c478bd9Sstevel@tonic-gate 			value = so->so_type;
53227c478bd9Sstevel@tonic-gate 			option = &value;
53237c478bd9Sstevel@tonic-gate 			goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */
53247c478bd9Sstevel@tonic-gate 
53257c478bd9Sstevel@tonic-gate 		case SO_ERROR:
53260f1702c5SYu Xiangning 			value = sogeterr(so, B_TRUE);
53277c478bd9Sstevel@tonic-gate 			option = &value;
53287c478bd9Sstevel@tonic-gate 			goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */
53297c478bd9Sstevel@tonic-gate 
53307c478bd9Sstevel@tonic-gate 		case SO_ACCEPTCONN:
53317c478bd9Sstevel@tonic-gate 			if (so->so_state & SS_ACCEPTCONN)
53327c478bd9Sstevel@tonic-gate 				value = SO_ACCEPTCONN;
53337c478bd9Sstevel@tonic-gate 			else
53347c478bd9Sstevel@tonic-gate 				value = 0;
53357c478bd9Sstevel@tonic-gate #ifdef DEBUG
53367c478bd9Sstevel@tonic-gate 			if (value) {
53377c478bd9Sstevel@tonic-gate 				dprintso(so, 1,
53387c478bd9Sstevel@tonic-gate 				    ("sotpi_getsockopt: 0x%x is set\n",
53397c478bd9Sstevel@tonic-gate 				    option_name));
53407c478bd9Sstevel@tonic-gate 			} else {
53417c478bd9Sstevel@tonic-gate 				dprintso(so, 1,
53427c478bd9Sstevel@tonic-gate 				    ("sotpi_getsockopt: 0x%x not set\n",
53437c478bd9Sstevel@tonic-gate 				    option_name));
53447c478bd9Sstevel@tonic-gate 			}
53457c478bd9Sstevel@tonic-gate #endif /* DEBUG */
53467c478bd9Sstevel@tonic-gate 			option = &value;
53477c478bd9Sstevel@tonic-gate 			goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */
53487c478bd9Sstevel@tonic-gate 
53497c478bd9Sstevel@tonic-gate 		case SO_DEBUG:
53507c478bd9Sstevel@tonic-gate 		case SO_REUSEADDR:
53517c478bd9Sstevel@tonic-gate 		case SO_KEEPALIVE:
53527c478bd9Sstevel@tonic-gate 		case SO_DONTROUTE:
53537c478bd9Sstevel@tonic-gate 		case SO_BROADCAST:
53547c478bd9Sstevel@tonic-gate 		case SO_USELOOPBACK:
53557c478bd9Sstevel@tonic-gate 		case SO_OOBINLINE:
53567c478bd9Sstevel@tonic-gate 		case SO_DGRAM_ERRIND:
53577c478bd9Sstevel@tonic-gate 			value = (so->so_options & option_name);
53587c478bd9Sstevel@tonic-gate #ifdef DEBUG
53597c478bd9Sstevel@tonic-gate 			if (value) {
53607c478bd9Sstevel@tonic-gate 				dprintso(so, 1,
53617c478bd9Sstevel@tonic-gate 				    ("sotpi_getsockopt: 0x%x is set\n",
53627c478bd9Sstevel@tonic-gate 				    option_name));
53637c478bd9Sstevel@tonic-gate 			} else {
53647c478bd9Sstevel@tonic-gate 				dprintso(so, 1,
53657c478bd9Sstevel@tonic-gate 				    ("sotpi_getsockopt: 0x%x not set\n",
53667c478bd9Sstevel@tonic-gate 				    option_name));
53677c478bd9Sstevel@tonic-gate 			}
53687c478bd9Sstevel@tonic-gate #endif /* DEBUG */
53697c478bd9Sstevel@tonic-gate 			option = &value;
53707c478bd9Sstevel@tonic-gate 			goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */
53717c478bd9Sstevel@tonic-gate 
53727c478bd9Sstevel@tonic-gate 		/*
53737c478bd9Sstevel@tonic-gate 		 * The following options are only returned by sockfs when the
53747c478bd9Sstevel@tonic-gate 		 * T_SVR4_OPTMGMT_REQ fails.
53757c478bd9Sstevel@tonic-gate 		 */
53767c478bd9Sstevel@tonic-gate 		case SO_LINGER:
53777c478bd9Sstevel@tonic-gate 			option = &so->so_linger;
53787c478bd9Sstevel@tonic-gate 			len = (t_uscalar_t)sizeof (struct linger);
53797c478bd9Sstevel@tonic-gate 			break;
53807c478bd9Sstevel@tonic-gate 		case SO_SNDBUF: {
53817c478bd9Sstevel@tonic-gate 			ssize_t lvalue;
53827c478bd9Sstevel@tonic-gate 
53837c478bd9Sstevel@tonic-gate 			/*
53847c478bd9Sstevel@tonic-gate 			 * If the option has not been set then get a default
53857c478bd9Sstevel@tonic-gate 			 * value from the read queue. This value is
53867c478bd9Sstevel@tonic-gate 			 * returned if the transport fails
53877c478bd9Sstevel@tonic-gate 			 * the T_SVR4_OPTMGMT_REQ.
53887c478bd9Sstevel@tonic-gate 			 */
53897c478bd9Sstevel@tonic-gate 			lvalue = so->so_sndbuf;
53907c478bd9Sstevel@tonic-gate 			if (lvalue == 0) {
53917c478bd9Sstevel@tonic-gate 				mutex_exit(&so->so_lock);
53927c478bd9Sstevel@tonic-gate 				(void) strqget(strvp2wq(SOTOV(so))->q_next,
53937c478bd9Sstevel@tonic-gate 				    QHIWAT, 0, &lvalue);
53947c478bd9Sstevel@tonic-gate 				mutex_enter(&so->so_lock);
53957c478bd9Sstevel@tonic-gate 				dprintso(so, 1,
53967c478bd9Sstevel@tonic-gate 				    ("got SO_SNDBUF %ld from q\n", lvalue));
53977c478bd9Sstevel@tonic-gate 			}
53987c478bd9Sstevel@tonic-gate 			value = (int)lvalue;
53997c478bd9Sstevel@tonic-gate 			option = &value;
54007c478bd9Sstevel@tonic-gate 			len = (t_uscalar_t)sizeof (so->so_sndbuf);
54017c478bd9Sstevel@tonic-gate 			break;
54027c478bd9Sstevel@tonic-gate 		}
54037c478bd9Sstevel@tonic-gate 		case SO_RCVBUF: {
54047c478bd9Sstevel@tonic-gate 			ssize_t lvalue;
54057c478bd9Sstevel@tonic-gate 
54067c478bd9Sstevel@tonic-gate 			/*
54077c478bd9Sstevel@tonic-gate 			 * If the option has not been set then get a default
54087c478bd9Sstevel@tonic-gate 			 * value from the read queue. This value is
54097c478bd9Sstevel@tonic-gate 			 * returned if the transport fails
54107c478bd9Sstevel@tonic-gate 			 * the T_SVR4_OPTMGMT_REQ.
54117c478bd9Sstevel@tonic-gate 			 *
54127c478bd9Sstevel@tonic-gate 			 * XXX If SO_RCVBUF has been set and this is an
54137c478bd9Sstevel@tonic-gate 			 * XPG 4.2 application then do not ask the transport
54147c478bd9Sstevel@tonic-gate 			 * since the transport might adjust the value and not
54157c478bd9Sstevel@tonic-gate 			 * return exactly what was set by the application.
54167c478bd9Sstevel@tonic-gate 			 * For non-XPG 4.2 application we return the value
54177c478bd9Sstevel@tonic-gate 			 * that the transport is actually using.
54187c478bd9Sstevel@tonic-gate 			 */
54197c478bd9Sstevel@tonic-gate 			lvalue = so->so_rcvbuf;
54207c478bd9Sstevel@tonic-gate 			if (lvalue == 0) {
54217c478bd9Sstevel@tonic-gate 				mutex_exit(&so->so_lock);
54227c478bd9Sstevel@tonic-gate 				(void) strqget(RD(strvp2wq(SOTOV(so))),
54237c478bd9Sstevel@tonic-gate 				    QHIWAT, 0, &lvalue);
54247c478bd9Sstevel@tonic-gate 				mutex_enter(&so->so_lock);
54257c478bd9Sstevel@tonic-gate 				dprintso(so, 1,
54267c478bd9Sstevel@tonic-gate 				    ("got SO_RCVBUF %ld from q\n", lvalue));
54277c478bd9Sstevel@tonic-gate 			} else if (flags & _SOGETSOCKOPT_XPG4_2) {
54287c478bd9Sstevel@tonic-gate 				value = (int)lvalue;
54297c478bd9Sstevel@tonic-gate 				option = &value;
54307c478bd9Sstevel@tonic-gate 				goto copyout;	/* skip asking transport */
54317c478bd9Sstevel@tonic-gate 			}
54327c478bd9Sstevel@tonic-gate 			value = (int)lvalue;
54337c478bd9Sstevel@tonic-gate 			option = &value;
54347c478bd9Sstevel@tonic-gate 			len = (t_uscalar_t)sizeof (so->so_rcvbuf);
54357c478bd9Sstevel@tonic-gate 			break;
54367c478bd9Sstevel@tonic-gate 		}
543788cda078Skcpoon 		case SO_DOMAIN:
543888cda078Skcpoon 			value = so->so_family;
543988cda078Skcpoon 			option = &value;
544088cda078Skcpoon 			goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */
544188cda078Skcpoon 
54427c478bd9Sstevel@tonic-gate #ifdef notyet
54437c478bd9Sstevel@tonic-gate 		/*
54447c478bd9Sstevel@tonic-gate 		 * We do not implement the semantics of these options
54457c478bd9Sstevel@tonic-gate 		 * thus we shouldn't implement the options either.
54467c478bd9Sstevel@tonic-gate 		 */
54477c478bd9Sstevel@tonic-gate 		case SO_SNDLOWAT:
54487c478bd9Sstevel@tonic-gate 			value = so->so_sndlowat;
54497c478bd9Sstevel@tonic-gate 			option = &value;
54507c478bd9Sstevel@tonic-gate 			break;
54517c478bd9Sstevel@tonic-gate 		case SO_RCVLOWAT:
54527c478bd9Sstevel@tonic-gate 			value = so->so_rcvlowat;
54537c478bd9Sstevel@tonic-gate 			option = &value;
54547c478bd9Sstevel@tonic-gate 			break;
54557c478bd9Sstevel@tonic-gate #endif /* notyet */
54560f1702c5SYu Xiangning 		case SO_SNDTIMEO:
54570f1702c5SYu Xiangning 		case SO_RCVTIMEO: {
54580f1702c5SYu Xiangning 			clock_t val;
545922238f73Sshenjian 
54600f1702c5SYu Xiangning 			if (option_name == SO_RCVTIMEO)
54610f1702c5SYu Xiangning 				val = drv_hztousec(so->so_rcvtimeo);
54620f1702c5SYu Xiangning 			else
54630f1702c5SYu Xiangning 				val = drv_hztousec(so->so_sndtimeo);
54640f1702c5SYu Xiangning 			tmo_val.tv_sec = val / (1000 * 1000);
54650f1702c5SYu Xiangning 			tmo_val.tv_usec = val % (1000 * 1000);
5466e5083e81Sshenjian 			if (get_udatamodel() == DATAMODEL_NONE ||
5467e5083e81Sshenjian 			    get_udatamodel() == DATAMODEL_NATIVE) {
54680f1702c5SYu Xiangning 				option = &tmo_val;
546922238f73Sshenjian 				len = sizeof (struct timeval);
547022238f73Sshenjian 			} else {
547122238f73Sshenjian 				TIMEVAL_TO_TIMEVAL32(&tmo_val32, &tmo_val);
547222238f73Sshenjian 				option = &tmo_val32;
547322238f73Sshenjian 				len = sizeof (struct timeval32);
547422238f73Sshenjian 			}
54750f1702c5SYu Xiangning 			break;
54760f1702c5SYu Xiangning 		}
54770f1702c5SYu Xiangning 		case SO_SND_BUFINFO: {
54780f1702c5SYu Xiangning 			snd_bufinfo.sbi_wroff =
54790f1702c5SYu Xiangning 			    (so->so_proto_props).sopp_wroff;
54800f1702c5SYu Xiangning 			snd_bufinfo.sbi_maxblk =
54810f1702c5SYu Xiangning 			    (so->so_proto_props).sopp_maxblk;
54820f1702c5SYu Xiangning 			snd_bufinfo.sbi_maxpsz =
54830f1702c5SYu Xiangning 			    (so->so_proto_props).sopp_maxpsz;
54840f1702c5SYu Xiangning 			snd_bufinfo.sbi_tail =
54850f1702c5SYu Xiangning 			    (so->so_proto_props).sopp_tail;
54860f1702c5SYu Xiangning 			option = &snd_bufinfo;
54870f1702c5SYu Xiangning 			len = (t_uscalar_t)sizeof (struct so_snd_bufinfo);
54880f1702c5SYu Xiangning 			break;
54890f1702c5SYu Xiangning 		}
54907c478bd9Sstevel@tonic-gate 		}
54917c478bd9Sstevel@tonic-gate 	}
54927c478bd9Sstevel@tonic-gate 
54937c478bd9Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
54947c478bd9Sstevel@tonic-gate 
54957c478bd9Sstevel@tonic-gate 	/* Send request */
54967c478bd9Sstevel@tonic-gate 	optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ;
54977c478bd9Sstevel@tonic-gate 	optmgmt_req.MGMT_flags = T_CHECK;
54987c478bd9Sstevel@tonic-gate 	optmgmt_req.OPT_length = (t_scalar_t)(sizeof (oh) + maxlen);
54997c478bd9Sstevel@tonic-gate 	optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req);
55007c478bd9Sstevel@tonic-gate 
55017c478bd9Sstevel@tonic-gate 	oh.level = level;
55027c478bd9Sstevel@tonic-gate 	oh.name = option_name;
55037c478bd9Sstevel@tonic-gate 	oh.len = maxlen;
55047c478bd9Sstevel@tonic-gate 
55057c478bd9Sstevel@tonic-gate 	mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req),
5506de8c4a14SErik Nordmark 	    &oh, sizeof (oh), NULL, maxlen, 0, _ALLOC_SLEEP, cr);
55077c478bd9Sstevel@tonic-gate 	/* Let option management work in the presence of data flow control */
55087c478bd9Sstevel@tonic-gate 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
55097c478bd9Sstevel@tonic-gate 	    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0);
55107c478bd9Sstevel@tonic-gate 	mp = NULL;
55117c478bd9Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
55127c478bd9Sstevel@tonic-gate 	if (error) {
55137c478bd9Sstevel@tonic-gate 		eprintsoline(so, error);
55147c478bd9Sstevel@tonic-gate 		goto done2;
55157c478bd9Sstevel@tonic-gate 	}
55167c478bd9Sstevel@tonic-gate 	error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK,
55177c478bd9Sstevel@tonic-gate 	    (t_uscalar_t)(sizeof (*optmgmt_ack) + sizeof (*opt_res)), &mp, 0);
55187c478bd9Sstevel@tonic-gate 	if (error) {
55197c478bd9Sstevel@tonic-gate 		if (option != NULL) {
55207c478bd9Sstevel@tonic-gate 			/* We have a fallback value */
55217c478bd9Sstevel@tonic-gate 			error = 0;
55227c478bd9Sstevel@tonic-gate 			goto copyout;
55237c478bd9Sstevel@tonic-gate 		}
55247c478bd9Sstevel@tonic-gate 		eprintsoline(so, error);
55257c478bd9Sstevel@tonic-gate 		goto done2;
55267c478bd9Sstevel@tonic-gate 	}
55277c478bd9Sstevel@tonic-gate 	ASSERT(mp);
55287c478bd9Sstevel@tonic-gate 	optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr;
55297c478bd9Sstevel@tonic-gate 	opt_res = (struct opthdr *)sogetoff(mp, optmgmt_ack->OPT_offset,
55307c478bd9Sstevel@tonic-gate 	    optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE);
55317c478bd9Sstevel@tonic-gate 	if (opt_res == NULL) {
55327c478bd9Sstevel@tonic-gate 		if (option != NULL) {
55337c478bd9Sstevel@tonic-gate 			/* We have a fallback value */
55347c478bd9Sstevel@tonic-gate 			error = 0;
55357c478bd9Sstevel@tonic-gate 			goto copyout;
55367c478bd9Sstevel@tonic-gate 		}
55377c478bd9Sstevel@tonic-gate 		error = EPROTO;
55387c478bd9Sstevel@tonic-gate 		eprintsoline(so, error);
55397c478bd9Sstevel@tonic-gate 		goto done;
55407c478bd9Sstevel@tonic-gate 	}
55417c478bd9Sstevel@tonic-gate 	option = &opt_res[1];
55427c478bd9Sstevel@tonic-gate 
55437c478bd9Sstevel@tonic-gate 	/* check to ensure that the option is within bounds */
55447c478bd9Sstevel@tonic-gate 	if (((uintptr_t)option + opt_res->len < (uintptr_t)option) ||
55457c478bd9Sstevel@tonic-gate 	    (uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) {
55467c478bd9Sstevel@tonic-gate 		if (option != NULL) {
55477c478bd9Sstevel@tonic-gate 			/* We have a fallback value */
55487c478bd9Sstevel@tonic-gate 			error = 0;
55497c478bd9Sstevel@tonic-gate 			goto copyout;
55507c478bd9Sstevel@tonic-gate 		}
55517c478bd9Sstevel@tonic-gate 		error = EPROTO;
55527c478bd9Sstevel@tonic-gate 		eprintsoline(so, error);
55537c478bd9Sstevel@tonic-gate 		goto done;
55547c478bd9Sstevel@tonic-gate 	}
55557c478bd9Sstevel@tonic-gate 
55567c478bd9Sstevel@tonic-gate 	len = opt_res->len;
55577c478bd9Sstevel@tonic-gate 
55587c478bd9Sstevel@tonic-gate copyout: {
55597c478bd9Sstevel@tonic-gate 		t_uscalar_t size = MIN(len, maxlen);
55607c478bd9Sstevel@tonic-gate 		bcopy(option, optval, size);
55617c478bd9Sstevel@tonic-gate 		bcopy(&size, optlenp, sizeof (size));
55627c478bd9Sstevel@tonic-gate 	}
55637c478bd9Sstevel@tonic-gate done:
55647c478bd9Sstevel@tonic-gate 	freemsg(mp);
55657c478bd9Sstevel@tonic-gate done2:
55667c478bd9Sstevel@tonic-gate 	so_unlock_single(so, SOLOCKED);
55677c478bd9Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
55680f1702c5SYu Xiangning 
55697c478bd9Sstevel@tonic-gate 	return (error);
55707c478bd9Sstevel@tonic-gate }
55717c478bd9Sstevel@tonic-gate 
55727c478bd9Sstevel@tonic-gate /*
55737c478bd9Sstevel@tonic-gate  * Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ.
55747c478bd9Sstevel@tonic-gate  * SOL_SOCKET options are also recorded in the sonode. A setsockopt for
55757c478bd9Sstevel@tonic-gate  * SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails -
55767c478bd9Sstevel@tonic-gate  * setsockopt has to work even if the transport does not support the option.
55777c478bd9Sstevel@tonic-gate  */
55780f1702c5SYu Xiangning /* ARGSUSED */
55797c478bd9Sstevel@tonic-gate int
55807c478bd9Sstevel@tonic-gate sotpi_setsockopt(struct sonode *so, int level, int option_name,
55810f1702c5SYu Xiangning     const void *optval, t_uscalar_t optlen, struct cred *cr)
55827c478bd9Sstevel@tonic-gate {
55837c478bd9Sstevel@tonic-gate 	struct T_optmgmt_req	optmgmt_req;
55847c478bd9Sstevel@tonic-gate 	struct opthdr		oh;
55857c478bd9Sstevel@tonic-gate 	mblk_t			*mp;
55867c478bd9Sstevel@tonic-gate 	int			error = 0;
55877c478bd9Sstevel@tonic-gate 	boolean_t		handled = B_FALSE;
55887c478bd9Sstevel@tonic-gate 
55897c478bd9Sstevel@tonic-gate 	dprintso(so, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n",
5590903a11ebSrh87107 	    (void *)so, level, option_name, optval, optlen,
55917c478bd9Sstevel@tonic-gate 	    pr_state(so->so_state, so->so_mode)));
55927c478bd9Sstevel@tonic-gate 
55937c478bd9Sstevel@tonic-gate 	/* X/Open requires this check */
55947c478bd9Sstevel@tonic-gate 	if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
55957c478bd9Sstevel@tonic-gate 		if (xnet_check_print)
55967c478bd9Sstevel@tonic-gate 			printf("sockfs: X/Open setsockopt check => EINVAL\n");
55977c478bd9Sstevel@tonic-gate 		return (EINVAL);
55987c478bd9Sstevel@tonic-gate 	}
55997c478bd9Sstevel@tonic-gate 
56007c478bd9Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
56017c478bd9Sstevel@tonic-gate 	so_lock_single(so);	/* Set SOLOCKED */
56027c478bd9Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
56037c478bd9Sstevel@tonic-gate 
56047c478bd9Sstevel@tonic-gate 	optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ;
56057c478bd9Sstevel@tonic-gate 	optmgmt_req.MGMT_flags = T_NEGOTIATE;
56067c478bd9Sstevel@tonic-gate 	optmgmt_req.OPT_length = (t_scalar_t)sizeof (oh) + optlen;
56077c478bd9Sstevel@tonic-gate 	optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req);
56087c478bd9Sstevel@tonic-gate 
56097c478bd9Sstevel@tonic-gate 	oh.level = level;
56107c478bd9Sstevel@tonic-gate 	oh.name = option_name;
56117c478bd9Sstevel@tonic-gate 	oh.len = optlen;
56127c478bd9Sstevel@tonic-gate 
56137c478bd9Sstevel@tonic-gate 	mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req),
5614de8c4a14SErik Nordmark 	    &oh, sizeof (oh), optval, optlen, 0, _ALLOC_SLEEP, cr);
56157c478bd9Sstevel@tonic-gate 	/* Let option management work in the presence of data flow control */
56167c478bd9Sstevel@tonic-gate 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
56177c478bd9Sstevel@tonic-gate 	    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0);
56187c478bd9Sstevel@tonic-gate 	mp = NULL;
56197c478bd9Sstevel@tonic-gate 	mutex_enter(&so->so_lock);
56207c478bd9Sstevel@tonic-gate 	if (error) {
56217c478bd9Sstevel@tonic-gate 		eprintsoline(so, error);
56220f1702c5SYu Xiangning 		goto done2;
56237c478bd9Sstevel@tonic-gate 	}
56247c478bd9Sstevel@tonic-gate 	error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK,
56257c478bd9Sstevel@tonic-gate 	    (t_uscalar_t)sizeof (struct T_optmgmt_ack), &mp, 0);
56267c478bd9Sstevel@tonic-gate 	if (error) {
56277c478bd9Sstevel@tonic-gate 		eprintsoline(so, error);
56287c478bd9Sstevel@tonic-gate 		goto done;
56297c478bd9Sstevel@tonic-gate 	}
56307c478bd9Sstevel@tonic-gate 	ASSERT(mp);
56317c478bd9Sstevel@tonic-gate 	/* No need to verify T_optmgmt_ack */
56327c478bd9Sstevel@tonic-gate 	freemsg(mp);
56337c478bd9Sstevel@tonic-gate done:
56347c478bd9Sstevel@tonic-gate 	/*
56357c478bd9Sstevel@tonic-gate 	 * Check for SOL_SOCKET options and record their values.
56367c478bd9Sstevel@tonic-gate 	 * If we know about a SOL_SOCKET parameter and the transport
56377c478bd9Sstevel@tonic-gate 	 * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or
56387c478bd9Sstevel@tonic-gate 	 * EPROTO) we let the setsockopt succeed.
56397c478bd9Sstevel@tonic-gate 	 */
56407c478bd9Sstevel@tonic-gate 	if (level == SOL_SOCKET) {
56417c478bd9Sstevel@tonic-gate 		/* Check parameters */
56427c478bd9Sstevel@tonic-gate 		switch (option_name) {
56437c478bd9Sstevel@tonic-gate 		case SO_DEBUG:
56447c478bd9Sstevel@tonic-gate 		case SO_REUSEADDR:
56457c478bd9Sstevel@tonic-gate 		case SO_KEEPALIVE:
56467c478bd9Sstevel@tonic-gate 		case SO_DONTROUTE:
56477c478bd9Sstevel@tonic-gate 		case SO_BROADCAST:
56487c478bd9Sstevel@tonic-gate 		case SO_USELOOPBACK:
56497c478bd9Sstevel@tonic-gate 		case SO_OOBINLINE:
56507c478bd9Sstevel@tonic-gate 		case SO_SNDBUF:
56517c478bd9Sstevel@tonic-gate 		case SO_RCVBUF:
56527c478bd9Sstevel@tonic-gate #ifdef notyet
56537c478bd9Sstevel@tonic-gate 		case SO_SNDLOWAT:
56547c478bd9Sstevel@tonic-gate 		case SO_RCVLOWAT:
56557c478bd9Sstevel@tonic-gate #endif /* notyet */
56567c478bd9Sstevel@tonic-gate 		case SO_DGRAM_ERRIND:
56577c478bd9Sstevel@tonic-gate 			if (optlen != (t_uscalar_t)sizeof (int32_t)) {
56587c478bd9Sstevel@tonic-gate 				error = EINVAL;
56597c478bd9Sstevel@tonic-gate 				eprintsoline(so, error);
56607c478bd9Sstevel@tonic-gate 				goto done2;
56617c478bd9Sstevel@tonic-gate 			}
56627c478bd9Sstevel@tonic-gate 			ASSERT(optval);
56637c478bd9Sstevel@tonic-gate 			handled = B_TRUE;
56647c478bd9Sstevel@tonic-gate 			break;
56650f1702c5SYu Xiangning 		case SO_SNDTIMEO:
56660f1702c5SYu Xiangning 		case SO_RCVTIMEO:
5667e5083e81Sshenjian 			if (get_udatamodel() == DATAMODEL_NONE ||
5668e5083e81Sshenjian 			    get_udatamodel() == DATAMODEL_NATIVE) {
566922238f73Sshenjian 				if (optlen != sizeof (struct timeval)) {
56700f1702c5SYu Xiangning 					error = EINVAL;
56710f1702c5SYu Xiangning 					eprintsoline(so, error);
56720f1702c5SYu Xiangning 					goto done2;
56730f1702c5SYu Xiangning 				}
567422238f73Sshenjian 			} else {
567522238f73Sshenjian 				if (optlen != sizeof (struct timeval32)) {
567622238f73Sshenjian 					error = EINVAL;
567722238f73Sshenjian 					eprintsoline(so, error);
567822238f73Sshenjian 					goto done2;
567922238f73Sshenjian 				}
568022238f73Sshenjian 			}
56810f1702c5SYu Xiangning 			ASSERT(optval);
56820f1702c5SYu Xiangning 			handled = B_TRUE;
56830f1702c5SYu Xiangning 			break;
56847c478bd9Sstevel@tonic-gate 		case SO_LINGER:
56857c478bd9Sstevel@tonic-gate 			if (optlen != (t_uscalar_t)sizeof (struct linger)) {
56867c478bd9Sstevel@tonic-gate 				error = EINVAL;
56877c478bd9Sstevel@tonic-gate 				eprintsoline(so, error);
56887c478bd9Sstevel@tonic-gate 				goto done2;
56897c478bd9Sstevel@tonic-gate 			}
56907c478bd9Sstevel@tonic-gate 			ASSERT(optval);
56917c478bd9Sstevel@tonic-gate 			handled = B_TRUE;
56927c478bd9Sstevel@tonic-gate 			break;
56937c478bd9Sstevel@tonic-gate 		}
56947c478bd9Sstevel@tonic-gate 
56957c478bd9Sstevel@tonic-gate #define	intvalue	(*(int32_t *)optval)
56967c478bd9Sstevel@tonic-gate 
56977c478bd9Sstevel@tonic-gate 		switch (option_name) {
56987c478bd9Sstevel@tonic-gate 		case SO_TYPE:
56997c478bd9Sstevel@tonic-gate 		case SO_ERROR:
57007c478bd9Sstevel@tonic-gate 		case SO_ACCEPTCONN:
57017c478bd9Sstevel@tonic-gate 			/* Can't be set */
57027c478bd9Sstevel@tonic-gate 			error = ENOPROTOOPT;
57037c478bd9Sstevel@tonic-gate 			goto done2;
57047c478bd9Sstevel@tonic-gate 		case SO_LINGER: {
57057c478bd9Sstevel@tonic-gate 			struct linger *l = (struct linger *)optval;
57067c478bd9Sstevel@tonic-gate 
57077c478bd9Sstevel@tonic-gate 			so->so_linger.l_linger = l->l_linger;
57087c478bd9Sstevel@tonic-gate 			if (l->l_onoff) {
57097c478bd9Sstevel@tonic-gate 				so->so_linger.l_onoff = SO_LINGER;
57107c478bd9Sstevel@tonic-gate 				so->so_options |= SO_LINGER;
57117c478bd9Sstevel@tonic-gate 			} else {
57127c478bd9Sstevel@tonic-gate 				so->so_linger.l_onoff = 0;
57137c478bd9Sstevel@tonic-gate 				so->so_options &= ~SO_LINGER;
57147c478bd9Sstevel@tonic-gate 			}
57157c478bd9Sstevel@tonic-gate 			break;
57167c478bd9Sstevel@tonic-gate 		}
57177c478bd9Sstevel@tonic-gate 
57187c478bd9Sstevel@tonic-gate 		case SO_DEBUG:
57197c478bd9Sstevel@tonic-gate #ifdef SOCK_TEST
57207c478bd9Sstevel@tonic-gate 			if (intvalue & 2)
57217c478bd9Sstevel@tonic-gate 				sock_test_timelimit = 10 * hz;
57227c478bd9Sstevel@tonic-gate 			else
57237c478bd9Sstevel@tonic-gate 				sock_test_timelimit = 0;
57247c478bd9Sstevel@tonic-gate 
57257c478bd9Sstevel@tonic-gate 			if (intvalue & 4)
57267c478bd9Sstevel@tonic-gate 				do_useracc = 0;
57277c478bd9Sstevel@tonic-gate 			else
57287c478bd9Sstevel@tonic-gate 				do_useracc = 1;
57297c478bd9Sstevel@tonic-gate #endif /* SOCK_TEST */
57307c478bd9Sstevel@tonic-gate 			/* FALLTHRU */
57317c478bd9Sstevel@tonic-gate 		case SO_REUSEADDR:
57327c478bd9Sstevel@tonic-gate 		case SO_KEEPALIVE:
57337c478bd9Sstevel@tonic-gate 		case SO_DONTROUTE:
57347c478bd9Sstevel@tonic-gate 		case SO_BROADCAST:
57357c478bd9Sstevel@tonic-gate 		case SO_USELOOPBACK:
57367c478bd9Sstevel@tonic-gate 		case SO_OOBINLINE:
57377c478bd9Sstevel@tonic-gate 		case SO_DGRAM_ERRIND:
57387c478bd9Sstevel@tonic-gate 			if (intvalue != 0) {
57397c478bd9Sstevel@tonic-gate 				dprintso(so, 1,
57400f1702c5SYu Xiangning 				    ("socket_setsockopt: setting 0x%x\n",
57417c478bd9Sstevel@tonic-gate 				    option_name));
57427c478bd9Sstevel@tonic-gate 				so->so_options |= option_name;
57437c478bd9Sstevel@tonic-gate 			} else {
57447c478bd9Sstevel@tonic-gate 				dprintso(so, 1,
57450f1702c5SYu Xiangning 				    ("socket_setsockopt: clearing 0x%x\n",
57467c478bd9Sstevel@tonic-gate 				    option_name));
57477c478bd9Sstevel@tonic-gate 				so->so_options &= ~option_name;
57487c478bd9Sstevel@tonic-gate 			}
57497c478bd9Sstevel@tonic-gate 			break;
57507c478bd9Sstevel@tonic-gate 		/*
57517c478bd9Sstevel@tonic-gate 		 * The following options are only returned by us when the
57520f1702c5SYu Xiangning 		 * transport layer fails.
57537c478bd9Sstevel@tonic-gate 		 * XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs
57547c478bd9Sstevel@tonic-gate 		 * since the transport might adjust the value and not
57557c478bd9Sstevel@tonic-gate 		 * return exactly what was set by the application.
57567c478bd9Sstevel@tonic-gate 		 */
57577c478bd9Sstevel@tonic-gate 		case SO_SNDBUF:
57587c478bd9Sstevel@tonic-gate 			so->so_sndbuf = intvalue;
57597c478bd9Sstevel@tonic-gate 			break;
57607c478bd9Sstevel@tonic-gate 		case SO_RCVBUF:
57617c478bd9Sstevel@tonic-gate 			so->so_rcvbuf = intvalue;
57627c478bd9Sstevel@tonic-gate 			break;
57630f1702c5SYu Xiangning 		case SO_RCVPSH:
57640f1702c5SYu Xiangning 			so->so_rcv_timer_interval = intvalue;
57650f1702c5SYu Xiangning 			break;
57667c478bd9Sstevel@tonic-gate #ifdef notyet
57677c478bd9Sstevel@tonic-gate 		/*
57687c478bd9Sstevel@tonic-gate 		 * We do not implement the semantics of these options
57697c478bd9Sstevel@tonic-gate 		 * thus we shouldn't implement the options either.
57707c478bd9Sstevel@tonic-gate 		 */
57717c478bd9Sstevel@tonic-gate 		case SO_SNDLOWAT:
57727c478bd9Sstevel@tonic-gate 			so->so_sndlowat = intvalue;
57737c478bd9Sstevel@tonic-gate 			break;
57747c478bd9Sstevel@tonic-gate 		case SO_RCVLOWAT:
57757c478bd9Sstevel@tonic-gate 			so->so_rcvlowat = intvalue;
57767c478bd9Sstevel@tonic-gate 			break;
57777c478bd9Sstevel@tonic-gate #endif /* notyet */
57780f1702c5SYu Xiangning 		case SO_SNDTIMEO:
57790f1702c5SYu Xiangning 		case SO_RCVTIMEO: {
578022238f73Sshenjian 			struct timeval tl;
578122238f73Sshenjian 			clock_t val;
578222238f73Sshenjian 
5783e5083e81Sshenjian 			if (get_udatamodel() == DATAMODEL_NONE ||
5784e5083e81Sshenjian 			    get_udatamodel() == DATAMODEL_NATIVE)
578522238f73Sshenjian 				bcopy(&tl, (struct timeval *)optval,
578622238f73Sshenjian 				    sizeof (struct timeval));
578722238f73Sshenjian 			else
578822238f73Sshenjian 				TIMEVAL32_TO_TIMEVAL(&tl,
578922238f73Sshenjian 				    (struct timeval32 *)optval);
579022238f73Sshenjian 			val = tl.tv_sec * 1000 * 1000 + tl.tv_usec;
57910f1702c5SYu Xiangning 			if (option_name == SO_RCVTIMEO)
57920f1702c5SYu Xiangning 				so->so_rcvtimeo = drv_usectohz(val);
57930f1702c5SYu Xiangning 			else
57940f1702c5SYu Xiangning 				so->so_sndtimeo = drv_usectohz(val);
57950f1702c5SYu Xiangning 			break;
57960f1702c5SYu Xiangning 		}
57977c478bd9Sstevel@tonic-gate 		}
57987c478bd9Sstevel@tonic-gate #undef	intvalue
57997c478bd9Sstevel@tonic-gate 
58007c478bd9Sstevel@tonic-gate 		if (error) {
58017c478bd9Sstevel@tonic-gate 			if ((error == ENOPROTOOPT || error == EPROTO ||
58027c478bd9Sstevel@tonic-gate 			    error == EINVAL) && handled) {
58037c478bd9Sstevel@tonic-gate 				dprintso(so, 1,
58047c478bd9Sstevel@tonic-gate 				    ("setsockopt: ignoring error %d for 0x%x\n",
58057c478bd9Sstevel@tonic-gate 				    error, option_name));
58067c478bd9Sstevel@tonic-gate 				error = 0;
58077c478bd9Sstevel@tonic-gate 			}
58087c478bd9Sstevel@tonic-gate 		}
58097c478bd9Sstevel@tonic-gate 	}
58107c478bd9Sstevel@tonic-gate done2:
58117c478bd9Sstevel@tonic-gate 	so_unlock_single(so, SOLOCKED);
58127c478bd9Sstevel@tonic-gate 	mutex_exit(&so->so_lock);
58137c478bd9Sstevel@tonic-gate 	return (error);
58147c478bd9Sstevel@tonic-gate }
58150f1702c5SYu Xiangning 
5816f0267584Sanders /*
5817f0267584Sanders  * sotpi_close() is called when the last open reference goes away.
5818f0267584Sanders  */
58190f1702c5SYu Xiangning /* ARGSUSED */
58200f1702c5SYu Xiangning int
58210f1702c5SYu Xiangning sotpi_close(struct sonode *so, int flag, struct cred *cr)
58220f1702c5SYu Xiangning {
58230f1702c5SYu Xiangning 	struct vnode *vp = SOTOV(so);
58240f1702c5SYu Xiangning 	dev_t dev;
58250f1702c5SYu Xiangning 	int error = 0;
58260f1702c5SYu Xiangning 	sotpi_info_t *sti = SOTOTPI(so);
58270f1702c5SYu Xiangning 
58280f1702c5SYu Xiangning 	dprintso(so, 1, ("sotpi_close(%p, %x) %s\n",
58290f1702c5SYu Xiangning 	    (void *)vp, flag, pr_state(so->so_state, so->so_mode)));
58300f1702c5SYu Xiangning 
58310f1702c5SYu Xiangning 	dev = sti->sti_dev;
58320f1702c5SYu Xiangning 
58330f1702c5SYu Xiangning 	ASSERT(STREAMSTAB(getmajor(dev)));
58340f1702c5SYu Xiangning 
58350f1702c5SYu Xiangning 	mutex_enter(&so->so_lock);
58360f1702c5SYu Xiangning 	so_lock_single(so);	/* Set SOLOCKED */
58370f1702c5SYu Xiangning 
5838d36be52eSRao Shoaib 	ASSERT(so_verify_oobstate(so));
5839d36be52eSRao Shoaib 
58400f1702c5SYu Xiangning 	if (sti->sti_nl7c_flags & NL7C_ENABLED) {
58410f1702c5SYu Xiangning 		sti->sti_nl7c_flags = 0;
58420f1702c5SYu Xiangning 		nl7c_close(so);
58430f1702c5SYu Xiangning 	}
58440f1702c5SYu Xiangning 
58450f1702c5SYu Xiangning 	if (vp->v_stream != NULL) {
58460f1702c5SYu Xiangning 		vnode_t *ux_vp;
58470f1702c5SYu Xiangning 
58480f1702c5SYu Xiangning 		if (so->so_family == AF_UNIX) {
58490f1702c5SYu Xiangning 			/* Could avoid this when CANTSENDMORE for !dgram */
58500f1702c5SYu Xiangning 			so_unix_close(so);
58510f1702c5SYu Xiangning 		}
58520f1702c5SYu Xiangning 
58530f1702c5SYu Xiangning 		mutex_exit(&so->so_lock);
58540f1702c5SYu Xiangning 		/*
58550f1702c5SYu Xiangning 		 * Disassemble the linkage from the AF_UNIX underlying file
58560f1702c5SYu Xiangning 		 * system vnode to this socket (by atomically clearing
58570f1702c5SYu Xiangning 		 * v_stream in vn_rele_stream) before strclose clears sd_vnode
58580f1702c5SYu Xiangning 		 * and frees the stream head.
58590f1702c5SYu Xiangning 		 */
58600f1702c5SYu Xiangning 		if ((ux_vp = sti->sti_ux_bound_vp) != NULL) {
58610f1702c5SYu Xiangning 			ASSERT(ux_vp->v_stream);
58620f1702c5SYu Xiangning 			sti->sti_ux_bound_vp = NULL;
58630f1702c5SYu Xiangning 			vn_rele_stream(ux_vp);
58640f1702c5SYu Xiangning 		}
58650f1702c5SYu Xiangning 		error = strclose(vp, flag, cr);
58660f1702c5SYu Xiangning 		vp->v_stream = NULL;
58670f1702c5SYu Xiangning 		mutex_enter(&so->so_lock);
58680f1702c5SYu Xiangning 	}
58690f1702c5SYu Xiangning 
58700f1702c5SYu Xiangning 	/*
58710f1702c5SYu Xiangning 	 * Flush the T_DISCON_IND on sti_discon_ind_mp.
58720f1702c5SYu Xiangning 	 */
58730f1702c5SYu Xiangning 	so_flush_discon_ind(so);
58740f1702c5SYu Xiangning 
58750f1702c5SYu Xiangning 	so_unlock_single(so, SOLOCKED);
58760f1702c5SYu Xiangning 	mutex_exit(&so->so_lock);
58770f1702c5SYu Xiangning 
58780f1702c5SYu Xiangning 	/*
58790f1702c5SYu Xiangning 	 * Needed for STREAMs.
58800f1702c5SYu Xiangning 	 * Decrement the device driver's reference count for streams
58810f1702c5SYu Xiangning 	 * opened via the clone dip. The driver was held in clone_open().
58820f1702c5SYu Xiangning 	 * The absence of clone_close() forces this asymmetry.
58830f1702c5SYu Xiangning 	 */
58840f1702c5SYu Xiangning 	if (so->so_flag & SOCLONE)
58850f1702c5SYu Xiangning 		ddi_rele_driver(getmajor(dev));
58860f1702c5SYu Xiangning 
58870f1702c5SYu Xiangning 	return (error);
58880f1702c5SYu Xiangning }
58890f1702c5SYu Xiangning 
58900f1702c5SYu Xiangning static int
58910f1702c5SYu Xiangning sotpi_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode,
58920f1702c5SYu Xiangning     struct cred *cr, int32_t *rvalp)
58930f1702c5SYu Xiangning {
58940f1702c5SYu Xiangning 	struct vnode *vp = SOTOV(so);
58950f1702c5SYu Xiangning 	sotpi_info_t *sti = SOTOTPI(so);
58960f1702c5SYu Xiangning 	int error = 0;
58970f1702c5SYu Xiangning 
58980f1702c5SYu Xiangning 	dprintso(so, 0, ("sotpi_ioctl: cmd 0x%x, arg 0x%lx, state %s\n",
58990f1702c5SYu Xiangning 	    cmd, arg, pr_state(so->so_state, so->so_mode)));
59000f1702c5SYu Xiangning 
59010f1702c5SYu Xiangning 	switch (cmd) {
5902bfcb55b8SRao Shoaib 	case SIOCSQPTR:
5903bfcb55b8SRao Shoaib 		/*
5904bfcb55b8SRao Shoaib 		 * SIOCSQPTR is valid only when helper stream is created
5905bfcb55b8SRao Shoaib 		 * by the protocol.
5906bfcb55b8SRao Shoaib 		 */
59070f1702c5SYu Xiangning 	case _I_INSERT:
59080f1702c5SYu Xiangning 	case _I_REMOVE:
59090f1702c5SYu Xiangning 		/*
59100f1702c5SYu Xiangning 		 * Since there's no compelling reason to support these ioctls
59110f1702c5SYu Xiangning 		 * on sockets, and doing so would increase the complexity
59120f1702c5SYu Xiangning 		 * markedly, prevent it.
59130f1702c5SYu Xiangning 		 */
59140f1702c5SYu Xiangning 		return (EOPNOTSUPP);
59150f1702c5SYu Xiangning 
59160f1702c5SYu Xiangning 	case I_FIND:
59170f1702c5SYu Xiangning 	case I_LIST:
59180f1702c5SYu Xiangning 	case I_LOOK:
59190f1702c5SYu Xiangning 	case I_POP:
59200f1702c5SYu Xiangning 	case I_PUSH:
59210f1702c5SYu Xiangning 		/*
59220f1702c5SYu Xiangning 		 * To prevent races and inconsistencies between the actual
59230f1702c5SYu Xiangning 		 * state of the stream and the state according to the sonode,
59240f1702c5SYu Xiangning 		 * we serialize all operations which modify or operate on the
59250f1702c5SYu Xiangning 		 * list of modules on the socket's stream.
59260f1702c5SYu Xiangning 		 */
59270f1702c5SYu Xiangning 		mutex_enter(&sti->sti_plumb_lock);
59280f1702c5SYu Xiangning 		error = socktpi_plumbioctl(vp, cmd, arg, mode, cr, rvalp);
59290f1702c5SYu Xiangning 		mutex_exit(&sti->sti_plumb_lock);
59300f1702c5SYu Xiangning 		return (error);
59310f1702c5SYu Xiangning 
59320f1702c5SYu Xiangning 	default:
59330f1702c5SYu Xiangning 		if (so->so_version != SOV_STREAM)
59340f1702c5SYu Xiangning 			break;
59350f1702c5SYu Xiangning 
59360f1702c5SYu Xiangning 		/*
59370f1702c5SYu Xiangning 		 * The imaginary "sockmod" has been popped; act as a stream.
59380f1702c5SYu Xiangning 		 */
59390f1702c5SYu Xiangning 		return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp));
59400f1702c5SYu Xiangning 	}
59410f1702c5SYu Xiangning 
59420f1702c5SYu Xiangning 	ASSERT(so->so_version != SOV_STREAM);
59430f1702c5SYu Xiangning 
59440f1702c5SYu Xiangning 	/*
59450f1702c5SYu Xiangning 	 * Process socket-specific ioctls.
59460f1702c5SYu Xiangning 	 */
59470f1702c5SYu Xiangning 	switch (cmd) {
59480f1702c5SYu Xiangning 	case FIONBIO: {
59490f1702c5SYu Xiangning 		int32_t value;
59500f1702c5SYu Xiangning 
59510f1702c5SYu Xiangning 		if (so_copyin((void *)arg, &value, sizeof (int32_t),
59520f1702c5SYu Xiangning 		    (mode & (int)FKIOCTL)))
59530f1702c5SYu Xiangning 			return (EFAULT);
59540f1702c5SYu Xiangning 
59550f1702c5SYu Xiangning 		mutex_enter(&so->so_lock);
59560f1702c5SYu Xiangning 		if (value) {
59570f1702c5SYu Xiangning 			so->so_state |= SS_NDELAY;
59580f1702c5SYu Xiangning 		} else {
59590f1702c5SYu Xiangning 			so->so_state &= ~SS_NDELAY;
59600f1702c5SYu Xiangning 		}
59610f1702c5SYu Xiangning 		mutex_exit(&so->so_lock);
59620f1702c5SYu Xiangning 		return (0);
59630f1702c5SYu Xiangning 	}
59640f1702c5SYu Xiangning 
59650f1702c5SYu Xiangning 	case FIOASYNC: {
59660f1702c5SYu Xiangning 		int32_t value;
59670f1702c5SYu Xiangning 
59680f1702c5SYu Xiangning 		if (so_copyin((void *)arg, &value, sizeof (int32_t),
59690f1702c5SYu Xiangning 		    (mode & (int)FKIOCTL)))
59700f1702c5SYu Xiangning 			return (EFAULT);
59710f1702c5SYu Xiangning 
59720f1702c5SYu Xiangning 		mutex_enter(&so->so_lock);
59730f1702c5SYu Xiangning 		/*
59740f1702c5SYu Xiangning 		 * SS_ASYNC flag not already set correctly?
59750f1702c5SYu Xiangning 		 * (!value != !(so->so_state & SS_ASYNC))
59760f1702c5SYu Xiangning 		 * but some engineers find that too hard to read.
59770f1702c5SYu Xiangning 		 */
59780f1702c5SYu Xiangning 		if (value == 0 && (so->so_state & SS_ASYNC) != 0 ||
59790f1702c5SYu Xiangning 		    value != 0 && (so->so_state & SS_ASYNC) == 0)
59800f1702c5SYu Xiangning 			error = so_flip_async(so, vp, mode, cr);
59810f1702c5SYu Xiangning 		mutex_exit(&so->so_lock);
59820f1702c5SYu Xiangning 		return (error);
59830f1702c5SYu Xiangning 	}
59840f1702c5SYu Xiangning 
59850f1702c5SYu Xiangning 	case SIOCSPGRP:
59860f1702c5SYu Xiangning 	case FIOSETOWN: {
59870f1702c5SYu Xiangning 		pid_t pgrp;
59880f1702c5SYu Xiangning 
59890f1702c5SYu Xiangning 		if (so_copyin((void *)arg, &pgrp, sizeof (pid_t),
59900f1702c5SYu Xiangning 		    (mode & (int)FKIOCTL)))
59910f1702c5SYu Xiangning 			return (EFAULT);
59920f1702c5SYu Xiangning 
59930f1702c5SYu Xiangning 		mutex_enter(&so->so_lock);
59940f1702c5SYu Xiangning 		dprintso(so, 1, ("setown: new %d old %d\n", pgrp, so->so_pgrp));
59950f1702c5SYu Xiangning 		/* Any change? */
59960f1702c5SYu Xiangning 		if (pgrp != so->so_pgrp)
59970f1702c5SYu Xiangning 			error = so_set_siggrp(so, vp, pgrp, mode, cr);
59980f1702c5SYu Xiangning 		mutex_exit(&so->so_lock);
59990f1702c5SYu Xiangning 		return (error);
60000f1702c5SYu Xiangning 	}
60010f1702c5SYu Xiangning 	case SIOCGPGRP:
60020f1702c5SYu Xiangning 	case FIOGETOWN:
60030f1702c5SYu Xiangning 		if (so_copyout(&so->so_pgrp, (void *)arg,
60040f1702c5SYu Xiangning 		    sizeof (pid_t), (mode & (int)FKIOCTL)))
60050f1702c5SYu Xiangning 			return (EFAULT);
60060f1702c5SYu Xiangning 		return (0);
60070f1702c5SYu Xiangning 
60080f1702c5SYu Xiangning 	case SIOCATMARK: {
60090f1702c5SYu Xiangning 		int retval;
60100f1702c5SYu Xiangning 		uint_t so_state;
60110f1702c5SYu Xiangning 
60120f1702c5SYu Xiangning 		/*
60130f1702c5SYu Xiangning 		 * strwaitmark has a finite timeout after which it
60140f1702c5SYu Xiangning 		 * returns -1 if the mark state is undetermined.
60150f1702c5SYu Xiangning 		 * In order to avoid any race between the mark state
60160f1702c5SYu Xiangning 		 * in sockfs and the mark state in the stream head this
60170f1702c5SYu Xiangning 		 * routine loops until the mark state can be determined
60180f1702c5SYu Xiangning 		 * (or the urgent data indication has been removed by some
60190f1702c5SYu Xiangning 		 * other thread).
60200f1702c5SYu Xiangning 		 */
60210f1702c5SYu Xiangning 		do {
60220f1702c5SYu Xiangning 			mutex_enter(&so->so_lock);
60230f1702c5SYu Xiangning 			so_state = so->so_state;
60240f1702c5SYu Xiangning 			mutex_exit(&so->so_lock);
60250f1702c5SYu Xiangning 			if (so_state & SS_RCVATMARK) {
60260f1702c5SYu Xiangning 				retval = 1;
60270f1702c5SYu Xiangning 			} else if (!(so_state & SS_OOBPEND)) {
60280f1702c5SYu Xiangning 				/*
60290f1702c5SYu Xiangning 				 * No SIGURG has been generated -- there is no
60300f1702c5SYu Xiangning 				 * pending or present urgent data. Thus can't
60310f1702c5SYu Xiangning 				 * possibly be at the mark.
60320f1702c5SYu Xiangning 				 */
60330f1702c5SYu Xiangning 				retval = 0;
60340f1702c5SYu Xiangning 			} else {
60350f1702c5SYu Xiangning 				/*
60360f1702c5SYu Xiangning 				 * Have the stream head wait until there is
60370f1702c5SYu Xiangning 				 * either some messages on the read queue, or
60380f1702c5SYu Xiangning 				 * STRATMARK or STRNOTATMARK gets set. The
60390f1702c5SYu Xiangning 				 * STRNOTATMARK flag is used so that the
60400f1702c5SYu Xiangning 				 * transport can send up a MSGNOTMARKNEXT
60410f1702c5SYu Xiangning 				 * M_DATA to indicate that it is not
60420f1702c5SYu Xiangning 				 * at the mark and additional data is not about
60430f1702c5SYu Xiangning 				 * to be send upstream.
60440f1702c5SYu Xiangning 				 *
60450f1702c5SYu Xiangning 				 * If the mark state is undetermined this will
60460f1702c5SYu Xiangning 				 * return -1 and we will loop rechecking the
60470f1702c5SYu Xiangning 				 * socket state.
60480f1702c5SYu Xiangning 				 */
60490f1702c5SYu Xiangning 				retval = strwaitmark(vp);
60500f1702c5SYu Xiangning 			}
60510f1702c5SYu Xiangning 		} while (retval == -1);
60520f1702c5SYu Xiangning 
60530f1702c5SYu Xiangning 		if (so_copyout(&retval, (void *)arg, sizeof (int),
60540f1702c5SYu Xiangning 		    (mode & (int)FKIOCTL)))
60550f1702c5SYu Xiangning 			return (EFAULT);
60560f1702c5SYu Xiangning 		return (0);
60570f1702c5SYu Xiangning 	}
60580f1702c5SYu Xiangning 
60590f1702c5SYu Xiangning 	case I_FDINSERT:
60600f1702c5SYu Xiangning 	case I_SENDFD:
60610f1702c5SYu Xiangning 	case I_RECVFD:
60620f1702c5SYu Xiangning 	case I_ATMARK:
60630f1702c5SYu Xiangning 	case _SIOCSOCKFALLBACK:
60640f1702c5SYu Xiangning 		/*
60650f1702c5SYu Xiangning 		 * These ioctls do not apply to sockets. I_FDINSERT can be
60660f1702c5SYu Xiangning 		 * used to send M_PROTO messages without modifying the socket
60670f1702c5SYu Xiangning 		 * state. I_SENDFD/RECVFD should not be used for socket file
60680f1702c5SYu Xiangning 		 * descriptor passing since they assume a twisted stream.
60690f1702c5SYu Xiangning 		 * SIOCATMARK must be used instead of I_ATMARK.
60700f1702c5SYu Xiangning 		 *
60710f1702c5SYu Xiangning 		 * _SIOCSOCKFALLBACK from an application should never be
60720f1702c5SYu Xiangning 		 * processed.  It is only generated by socktpi_open() or
60730f1702c5SYu Xiangning 		 * in response to I_POP or I_PUSH.
60740f1702c5SYu Xiangning 		 */
60750f1702c5SYu Xiangning #ifdef DEBUG
60760f1702c5SYu Xiangning 		zcmn_err(getzoneid(), CE_WARN,
60770f1702c5SYu Xiangning 		    "Unsupported STREAMS ioctl 0x%x on socket. "
60780f1702c5SYu Xiangning 		    "Pid = %d\n", cmd, curproc->p_pid);
60790f1702c5SYu Xiangning #endif /* DEBUG */
60800f1702c5SYu Xiangning 		return (EOPNOTSUPP);
60810f1702c5SYu Xiangning 
60820f1702c5SYu Xiangning 	case _I_GETPEERCRED:
60830f1702c5SYu Xiangning 		if ((mode & FKIOCTL) == 0)
60840f1702c5SYu Xiangning 			return (EINVAL);
60850f1702c5SYu Xiangning 
60860f1702c5SYu Xiangning 		mutex_enter(&so->so_lock);
60870f1702c5SYu Xiangning 		if ((so->so_mode & SM_CONNREQUIRED) == 0) {
60880f1702c5SYu Xiangning 			error = ENOTSUP;
60890f1702c5SYu Xiangning 		} else if ((so->so_state & SS_ISCONNECTED) == 0) {
60900f1702c5SYu Xiangning 			error = ENOTCONN;
60910f1702c5SYu Xiangning 		} else if (so->so_peercred != NULL) {
60920f1702c5SYu Xiangning 			k_peercred_t *kp = (k_peercred_t *)arg;
60930f1702c5SYu Xiangning 			kp->pc_cr = so->so_peercred;
60940f1702c5SYu Xiangning 			kp->pc_cpid = so->so_cpid;
60950f1702c5SYu Xiangning 			crhold(so->so_peercred);
60960f1702c5SYu Xiangning 		} else {
60970f1702c5SYu Xiangning 			error = EINVAL;
60980f1702c5SYu Xiangning 		}
60990f1702c5SYu Xiangning 		mutex_exit(&so->so_lock);
61000f1702c5SYu Xiangning 		return (error);
61010f1702c5SYu Xiangning 
61020f1702c5SYu Xiangning 	default:
61030f1702c5SYu Xiangning 		/*
61040f1702c5SYu Xiangning 		 * Do the higher-order bits of the ioctl cmd indicate
61050f1702c5SYu Xiangning 		 * that it is an I_* streams ioctl?
61060f1702c5SYu Xiangning 		 */
61070f1702c5SYu Xiangning 		if ((cmd & 0xffffff00U) == STR &&
61080f1702c5SYu Xiangning 		    so->so_version == SOV_SOCKBSD) {
61090f1702c5SYu Xiangning #ifdef DEBUG
61100f1702c5SYu Xiangning 			zcmn_err(getzoneid(), CE_WARN,
61110f1702c5SYu Xiangning 			    "Unsupported STREAMS ioctl 0x%x on socket. "
61120f1702c5SYu Xiangning 			    "Pid = %d\n", cmd, curproc->p_pid);
61130f1702c5SYu Xiangning #endif /* DEBUG */
61140f1702c5SYu Xiangning 			return (EOPNOTSUPP);
61150f1702c5SYu Xiangning 		}
61160f1702c5SYu Xiangning 		return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp));
61170f1702c5SYu Xiangning 	}
61180f1702c5SYu Xiangning }
61190f1702c5SYu Xiangning 
61200f1702c5SYu Xiangning /*
61210f1702c5SYu Xiangning  * Handle plumbing-related ioctls.
61220f1702c5SYu Xiangning  */
61230f1702c5SYu Xiangning static int
61240f1702c5SYu Xiangning socktpi_plumbioctl(struct vnode *vp, int cmd, intptr_t arg, int mode,
61250f1702c5SYu Xiangning     struct cred *cr, int32_t *rvalp)
61260f1702c5SYu Xiangning {
61270f1702c5SYu Xiangning 	static const char sockmod_name[] = "sockmod";
61280f1702c5SYu Xiangning 	struct sonode	*so = VTOSO(vp);
61290f1702c5SYu Xiangning 	char		mname[FMNAMESZ + 1];
61300f1702c5SYu Xiangning 	int		error;
61310f1702c5SYu Xiangning 	sotpi_info_t	*sti = SOTOTPI(so);
61320f1702c5SYu Xiangning 
61330f1702c5SYu Xiangning 	ASSERT(MUTEX_HELD(&sti->sti_plumb_lock));
61340f1702c5SYu Xiangning 
61350f1702c5SYu Xiangning 	if (so->so_version == SOV_SOCKBSD)
61360f1702c5SYu Xiangning 		return (EOPNOTSUPP);
61370f1702c5SYu Xiangning 
61380f1702c5SYu Xiangning 	if (so->so_version == SOV_STREAM) {
61390f1702c5SYu Xiangning 		/*
61400f1702c5SYu Xiangning 		 * The imaginary "sockmod" has been popped - act as a stream.
61410f1702c5SYu Xiangning 		 * If this is a push of sockmod then change back to a socket.
61420f1702c5SYu Xiangning 		 */
61430f1702c5SYu Xiangning 		if (cmd == I_PUSH) {
61440f1702c5SYu Xiangning 			error = ((mode & FKIOCTL) ? copystr : copyinstr)(
61450f1702c5SYu Xiangning 			    (void *)arg, mname, sizeof (mname), NULL);
61460f1702c5SYu Xiangning 
61470f1702c5SYu Xiangning 			if (error == 0 && strcmp(mname, sockmod_name) == 0) {
61480f1702c5SYu Xiangning 				dprintso(so, 0, ("socktpi_ioctl: going to "
61490f1702c5SYu Xiangning 				    "socket version\n"));
61500f1702c5SYu Xiangning 				so_stream2sock(so);
61510f1702c5SYu Xiangning 				return (0);
61520f1702c5SYu Xiangning 			}
61530f1702c5SYu Xiangning 		}
61540f1702c5SYu Xiangning 		return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp));
61550f1702c5SYu Xiangning 	}
61560f1702c5SYu Xiangning 
61570f1702c5SYu Xiangning 	switch (cmd) {
61580f1702c5SYu Xiangning 	case I_PUSH:
61590f1702c5SYu Xiangning 		if (sti->sti_direct) {
61600f1702c5SYu Xiangning 			mutex_enter(&so->so_lock);
61610f1702c5SYu Xiangning 			so_lock_single(so);
61620f1702c5SYu Xiangning 			mutex_exit(&so->so_lock);
61630f1702c5SYu Xiangning 
61640f1702c5SYu Xiangning 			error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K,
6165de8c4a14SErik Nordmark 			    cr, rvalp);
61660f1702c5SYu Xiangning 
61670f1702c5SYu Xiangning 			mutex_enter(&so->so_lock);
61680f1702c5SYu Xiangning 			if (error == 0)
61690f1702c5SYu Xiangning 				sti->sti_direct = 0;
61700f1702c5SYu Xiangning 			so_unlock_single(so, SOLOCKED);
61710f1702c5SYu Xiangning 			mutex_exit(&so->so_lock);
61720f1702c5SYu Xiangning 
61730f1702c5SYu Xiangning 			if (error != 0)
61740f1702c5SYu Xiangning 				return (error);
61750f1702c5SYu Xiangning 		}
61760f1702c5SYu Xiangning 
61770f1702c5SYu Xiangning 		error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp);
61780f1702c5SYu Xiangning 		if (error == 0)
61790f1702c5SYu Xiangning 			sti->sti_pushcnt++;
61800f1702c5SYu Xiangning 		return (error);
61810f1702c5SYu Xiangning 
61820f1702c5SYu Xiangning 	case I_POP:
61830f1702c5SYu Xiangning 		if (sti->sti_pushcnt == 0) {
61840f1702c5SYu Xiangning 			/* Emulate sockmod being popped */
61850f1702c5SYu Xiangning 			dprintso(so, 0,
61860f1702c5SYu Xiangning 			    ("socktpi_ioctl: going to STREAMS version\n"));
61870f1702c5SYu Xiangning 			return (so_sock2stream(so));
61880f1702c5SYu Xiangning 		}
61890f1702c5SYu Xiangning 
61900f1702c5SYu Xiangning 		error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp);
61910f1702c5SYu Xiangning 		if (error == 0)
61920f1702c5SYu Xiangning 			sti->sti_pushcnt--;
61930f1702c5SYu Xiangning 		return (error);
61940f1702c5SYu Xiangning 
61950f1702c5SYu Xiangning 	case I_LIST: {
61960f1702c5SYu Xiangning 		struct str_mlist *kmlistp, *umlistp;
61970f1702c5SYu Xiangning 		struct str_list	kstrlist;
61980f1702c5SYu Xiangning 		ssize_t		kstrlistsize;
61990f1702c5SYu Xiangning 		int		i, nmods;
62000f1702c5SYu Xiangning 
62010f1702c5SYu Xiangning 		STRUCT_DECL(str_list, ustrlist);
62020f1702c5SYu Xiangning 		STRUCT_INIT(ustrlist, mode);
62030f1702c5SYu Xiangning 
6204e9f74ea5SToomas Soome 		if (arg == 0) {
62050f1702c5SYu Xiangning 			error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp);
62060f1702c5SYu Xiangning 			if (error == 0)
62070f1702c5SYu Xiangning 				(*rvalp)++;	/* Add one for sockmod */
62080f1702c5SYu Xiangning 			return (error);
62090f1702c5SYu Xiangning 		}
62100f1702c5SYu Xiangning 
62110f1702c5SYu Xiangning 		error = so_copyin((void *)arg, STRUCT_BUF(ustrlist),
62120f1702c5SYu Xiangning 		    STRUCT_SIZE(ustrlist), mode & FKIOCTL);
62130f1702c5SYu Xiangning 		if (error != 0)
62140f1702c5SYu Xiangning 			return (error);
62150f1702c5SYu Xiangning 
62160f1702c5SYu Xiangning 		nmods = STRUCT_FGET(ustrlist, sl_nmods);
62170f1702c5SYu Xiangning 		if (nmods <= 0)
62180f1702c5SYu Xiangning 			return (EINVAL);
62190f1702c5SYu Xiangning 		/*
62200f1702c5SYu Xiangning 		 * Ceiling nmods at nstrpush to prevent someone from
62210f1702c5SYu Xiangning 		 * maliciously consuming lots of kernel memory.
62220f1702c5SYu Xiangning 		 */
62230f1702c5SYu Xiangning 		nmods = MIN(nmods, nstrpush);
62240f1702c5SYu Xiangning 
62250f1702c5SYu Xiangning 		kstrlistsize = (nmods + 1) * sizeof (struct str_mlist);
62260f1702c5SYu Xiangning 		kstrlist.sl_nmods = nmods;
62270f1702c5SYu Xiangning 		kstrlist.sl_modlist = kmem_zalloc(kstrlistsize, KM_SLEEP);
62280f1702c5SYu Xiangning 
62290f1702c5SYu Xiangning 		error = strioctl(vp, cmd, (intptr_t)&kstrlist, mode, K_TO_K,
62300f1702c5SYu Xiangning 		    cr, rvalp);
62310f1702c5SYu Xiangning 		if (error != 0)
62320f1702c5SYu Xiangning 			goto done;
62330f1702c5SYu Xiangning 
62340f1702c5SYu Xiangning 		/*
62350f1702c5SYu Xiangning 		 * Considering the module list as a 0-based array of sl_nmods
62360f1702c5SYu Xiangning 		 * modules, sockmod should conceptually exist at slot
62370f1702c5SYu Xiangning 		 * sti_pushcnt.  Insert sockmod at this location by sliding all
62380f1702c5SYu Xiangning 		 * of the module names after so_pushcnt over by one.  We know
62390f1702c5SYu Xiangning 		 * that there will be room to do this since we allocated
62400f1702c5SYu Xiangning 		 * sl_modlist with an additional slot.
62410f1702c5SYu Xiangning 		 */
62420f1702c5SYu Xiangning 		for (i = kstrlist.sl_nmods; i > sti->sti_pushcnt; i--)
62430f1702c5SYu Xiangning 			kstrlist.sl_modlist[i] = kstrlist.sl_modlist[i - 1];
62440f1702c5SYu Xiangning 
62450f1702c5SYu Xiangning 		(void) strcpy(kstrlist.sl_modlist[i].l_name, sockmod_name);
62460f1702c5SYu Xiangning 		kstrlist.sl_nmods++;
62470f1702c5SYu Xiangning 
62480f1702c5SYu Xiangning 		/*
62490f1702c5SYu Xiangning 		 * Copy all of the entries out to ustrlist.
62500f1702c5SYu Xiangning 		 */
62510f1702c5SYu Xiangning 		kmlistp = kstrlist.sl_modlist;
62520f1702c5SYu Xiangning 		umlistp = STRUCT_FGETP(ustrlist, sl_modlist);
62530f1702c5SYu Xiangning 		for (i = 0; i < nmods && i < kstrlist.sl_nmods; i++) {
62540f1702c5SYu Xiangning 			error = so_copyout(kmlistp++, umlistp++,
62550f1702c5SYu Xiangning 			    sizeof (struct str_mlist), mode & FKIOCTL);
62560f1702c5SYu Xiangning 			if (error != 0)
62570f1702c5SYu Xiangning 				goto done;
62580f1702c5SYu Xiangning 		}
62590f1702c5SYu Xiangning 
62600f1702c5SYu Xiangning 		error = so_copyout(&i, (void *)arg, sizeof (int32_t),
62610f1702c5SYu Xiangning 		    mode & FKIOCTL);
62620f1702c5SYu Xiangning 		if (error == 0)
62630f1702c5SYu Xiangning 			*rvalp = 0;
62640f1702c5SYu Xiangning 	done:
62650f1702c5SYu Xiangning 		kmem_free(kstrlist.sl_modlist, kstrlistsize);
62660f1702c5SYu Xiangning 		return (error);
62670f1702c5SYu Xiangning 	}
62680f1702c5SYu Xiangning 	case I_LOOK:
62690f1702c5SYu Xiangning 		if (sti->sti_pushcnt == 0) {
62700f1702c5SYu Xiangning 			return (so_copyout(sockmod_name, (void *)arg,
62710f1702c5SYu Xiangning 			    sizeof (sockmod_name), mode & FKIOCTL));
62720f1702c5SYu Xiangning 		}
62730f1702c5SYu Xiangning 		return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp));
62740f1702c5SYu Xiangning 
62750f1702c5SYu Xiangning 	case I_FIND:
62760f1702c5SYu Xiangning 		error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp);
62770f1702c5SYu Xiangning 		if (error && error != EINVAL)
62780f1702c5SYu Xiangning 			return (error);
62790f1702c5SYu Xiangning 
62800f1702c5SYu Xiangning 		/* if not found and string was sockmod return 1 */
62810f1702c5SYu Xiangning 		if (*rvalp == 0 || error == EINVAL) {
62820f1702c5SYu Xiangning 			error = ((mode & FKIOCTL) ? copystr : copyinstr)(
62830f1702c5SYu Xiangning 			    (void *)arg, mname, sizeof (mname), NULL);
62840f1702c5SYu Xiangning 			if (error == ENAMETOOLONG)
62850f1702c5SYu Xiangning 				error = EINVAL;
62860f1702c5SYu Xiangning 
62870f1702c5SYu Xiangning 			if (error == 0 && strcmp(mname, sockmod_name) == 0)
62880f1702c5SYu Xiangning 				*rvalp = 1;
62890f1702c5SYu Xiangning 		}
62900f1702c5SYu Xiangning 		return (error);
62910f1702c5SYu Xiangning 
62920f1702c5SYu Xiangning 	default:
62930f1702c5SYu Xiangning 		panic("socktpi_plumbioctl: unknown ioctl %d", cmd);
62940f1702c5SYu Xiangning 		break;
62950f1702c5SYu Xiangning 	}
62960f1702c5SYu Xiangning 
62970f1702c5SYu Xiangning 	return (0);
62980f1702c5SYu Xiangning }
62990f1702c5SYu Xiangning 
63000f1702c5SYu Xiangning /*
63010f1702c5SYu Xiangning  * Wrapper around the streams poll routine that implements socket poll
63020f1702c5SYu Xiangning  * semantics.
63030f1702c5SYu Xiangning  * The sockfs never calls pollwakeup itself - the stream head take care
63040f1702c5SYu Xiangning  * of all pollwakeups. Since sockfs never holds so_lock when calling the
63050f1702c5SYu Xiangning  * stream head there can never be a deadlock due to holding so_lock across
63060f1702c5SYu Xiangning  * pollwakeup and acquiring so_lock in this routine.
63070f1702c5SYu Xiangning  *
63080f1702c5SYu Xiangning  * However, since the performance of VOP_POLL is critical we avoid
63090f1702c5SYu Xiangning  * acquiring so_lock here. This is based on two assumptions:
63100f1702c5SYu Xiangning  *  - The poll implementation holds locks to serialize the VOP_POLL call
63110f1702c5SYu Xiangning  *    and a pollwakeup for the same pollhead. This ensures that should
63120f1702c5SYu Xiangning  *    e.g. so_state change during a socktpi_poll call the pollwakeup
63130f1702c5SYu Xiangning  *    (which strsock_* and strrput conspire to issue) is issued after
63140f1702c5SYu Xiangning  *    the state change. Thus the pollwakeup will block until VOP_POLL has
63150f1702c5SYu Xiangning  *    returned and then wake up poll and have it call VOP_POLL again.
63160f1702c5SYu Xiangning  *  - The reading of so_state without holding so_lock does not result in
63170f1702c5SYu Xiangning  *    stale data that is older than the latest state change that has dropped
63180f1702c5SYu Xiangning  *    so_lock. This is ensured by the mutex_exit issuing the appropriate
63190f1702c5SYu Xiangning  *    memory barrier to force the data into the coherency domain.
63200f1702c5SYu Xiangning  */
63210f1702c5SYu Xiangning static int
63220f1702c5SYu Xiangning sotpi_poll(
63230f1702c5SYu Xiangning 	struct sonode	*so,
63240f1702c5SYu Xiangning 	short		events,
63250f1702c5SYu Xiangning 	int		anyyet,
63260f1702c5SYu Xiangning 	short		*reventsp,
63270f1702c5SYu Xiangning 	struct pollhead **phpp)
63280f1702c5SYu Xiangning {
63290f1702c5SYu Xiangning 	short origevents = events;
63300f1702c5SYu Xiangning 	struct vnode *vp = SOTOV(so);
63310f1702c5SYu Xiangning 	int error;
63320f1702c5SYu Xiangning 	int so_state = so->so_state;	/* snapshot */
63330f1702c5SYu Xiangning 	sotpi_info_t *sti = SOTOTPI(so);
63340f1702c5SYu Xiangning 
63350f1702c5SYu Xiangning 	dprintso(so, 0, ("socktpi_poll(%p): state %s err %d\n",
63360f1702c5SYu Xiangning 	    (void *)vp, pr_state(so_state, so->so_mode), so->so_error));
63370f1702c5SYu Xiangning 
63380f1702c5SYu Xiangning 	ASSERT(vp->v_type == VSOCK);
63390f1702c5SYu Xiangning 	ASSERT(vp->v_stream != NULL);
63400f1702c5SYu Xiangning 
63410f1702c5SYu Xiangning 	if (so->so_version == SOV_STREAM) {
63420f1702c5SYu Xiangning 		/* The imaginary "sockmod" has been popped - act as a stream */
63430f1702c5SYu Xiangning 		return (strpoll(vp->v_stream, events, anyyet,
63440f1702c5SYu Xiangning 		    reventsp, phpp));
63450f1702c5SYu Xiangning 	}
63460f1702c5SYu Xiangning 
63470f1702c5SYu Xiangning 	if (!(so_state & SS_ISCONNECTED) &&
63480f1702c5SYu Xiangning 	    (so->so_mode & SM_CONNREQUIRED)) {
63490f1702c5SYu Xiangning 		/* Not connected yet - turn off write side events */
63500f1702c5SYu Xiangning 		events &= ~(POLLOUT|POLLWRBAND);
63510f1702c5SYu Xiangning 	}
63520f1702c5SYu Xiangning 	/*
63530f1702c5SYu Xiangning 	 * Check for errors without calling strpoll if the caller wants them.
63540f1702c5SYu Xiangning 	 * In sockets the errors are represented as input/output events
63550f1702c5SYu Xiangning 	 * and there is no need to ask the stream head for this information.
63560f1702c5SYu Xiangning 	 */
63570f1702c5SYu Xiangning 	if (so->so_error != 0 &&
63580f1702c5SYu Xiangning 	    ((POLLIN|POLLRDNORM|POLLOUT) & origevents)  != 0) {
63590f1702c5SYu Xiangning 		*reventsp = (POLLIN|POLLRDNORM|POLLOUT) & origevents;
63600f1702c5SYu Xiangning 		return (0);
63610f1702c5SYu Xiangning 	}
63620f1702c5SYu Xiangning 	/*
63630f1702c5SYu Xiangning 	 * Ignore M_PROTO only messages such as the T_EXDATA_IND messages.
63640f1702c5SYu Xiangning 	 * These message with only an M_PROTO/M_PCPROTO part and no M_DATA
63650f1702c5SYu Xiangning 	 * will not trigger a POLLIN event with POLLRDDATA set.
63660f1702c5SYu Xiangning 	 * The handling of urgent data (causing POLLRDBAND) is done by
63670f1702c5SYu Xiangning 	 * inspecting SS_OOBPEND below.
63680f1702c5SYu Xiangning 	 */
63690f1702c5SYu Xiangning 	events |= POLLRDDATA;
63700f1702c5SYu Xiangning 
63710f1702c5SYu Xiangning 	/*
63720f1702c5SYu Xiangning 	 * After shutdown(output) a stream head write error is set.
63730f1702c5SYu Xiangning 	 * However, we should not return output events.
63740f1702c5SYu Xiangning 	 */
63750f1702c5SYu Xiangning 	events |= POLLNOERR;
63760f1702c5SYu Xiangning 	error = strpoll(vp->v_stream, events, anyyet,
63770f1702c5SYu Xiangning 	    reventsp, phpp);
63780f1702c5SYu Xiangning 	if (error)
63790f1702c5SYu Xiangning 		return (error);
63800f1702c5SYu Xiangning 
63810f1702c5SYu Xiangning 	ASSERT(!(*reventsp & POLLERR));
63820f1702c5SYu Xiangning 
63830f1702c5SYu Xiangning 	/*
63840f1702c5SYu Xiangning 	 * Notes on T_CONN_IND handling for sockets.
63850f1702c5SYu Xiangning 	 *
63860f1702c5SYu Xiangning 	 * If strpoll() returned without events, SR_POLLIN is guaranteed
63870f1702c5SYu Xiangning 	 * to be set, ensuring any subsequent strrput() runs pollwakeup().
63880f1702c5SYu Xiangning 	 *
63890f1702c5SYu Xiangning 	 * Since the so_lock is not held, soqueueconnind() may have run
63900f1702c5SYu Xiangning 	 * and a T_CONN_IND may be waiting. We now check for any queued
63910f1702c5SYu Xiangning 	 * T_CONN_IND msgs on sti_conn_ind_head and set appropriate events
63920f1702c5SYu Xiangning 	 * to ensure poll returns.
63930f1702c5SYu Xiangning 	 *
63940f1702c5SYu Xiangning 	 * However:
63950f1702c5SYu Xiangning 	 * If the T_CONN_IND hasn't arrived by the time strpoll() returns,
63960f1702c5SYu Xiangning 	 * when strrput() does run for an arriving M_PROTO with T_CONN_IND
63970f1702c5SYu Xiangning 	 * the following actions will occur; taken together they ensure the
63980f1702c5SYu Xiangning 	 * syscall will return.
63990f1702c5SYu Xiangning 	 *
64000f1702c5SYu Xiangning 	 * 1. If a socket, soqueueconnind() will queue the T_CONN_IND but if
64010f1702c5SYu Xiangning 	 *    the accept() was run on a non-blocking socket sowaitconnind()
64020f1702c5SYu Xiangning 	 *    may have already returned EWOULDBLOCK, so not be waiting to
64030f1702c5SYu Xiangning 	 *    process the message. Additionally socktpi_poll() has probably
64040f1702c5SYu Xiangning 	 *    proceeded past the sti_conn_ind_head check below.
64050f1702c5SYu Xiangning 	 * 2. strrput() runs pollwakeup()->pollnotify()->cv_signal() to wake
64060f1702c5SYu Xiangning 	 *    this thread,  however that could occur before poll_common()
64070f1702c5SYu Xiangning 	 *    has entered cv_wait.
64080f1702c5SYu Xiangning 	 * 3. pollnotify() sets T_POLLWAKE, while holding the pc_lock.
64090f1702c5SYu Xiangning 	 *
64100f1702c5SYu Xiangning 	 * Before proceeding to cv_wait() in poll_common() for an event,
64110f1702c5SYu Xiangning 	 * poll_common() atomically checks for T_POLLWAKE under the pc_lock,
64120f1702c5SYu Xiangning 	 * and if set, re-calls strpoll() to ensure the late arriving
64130f1702c5SYu Xiangning 	 * T_CONN_IND is recognized, and pollsys() returns.
64140f1702c5SYu Xiangning 	 */
64150f1702c5SYu Xiangning 
64160f1702c5SYu Xiangning 	if (sti->sti_conn_ind_head != NULL)
64170f1702c5SYu Xiangning 		*reventsp |= (POLLIN|POLLRDNORM) & events;
64180f1702c5SYu Xiangning 
6419075fab9aSBryan Cantrill 	if (so->so_state & SS_CANTRCVMORE) {
6420075fab9aSBryan Cantrill 		*reventsp |= POLLRDHUP & events;
6421075fab9aSBryan Cantrill 
6422075fab9aSBryan Cantrill 		if (so->so_state & SS_CANTSENDMORE)
6423075fab9aSBryan Cantrill 			*reventsp |= POLLHUP;
6424075fab9aSBryan Cantrill 	}
6425075fab9aSBryan Cantrill 
64260f1702c5SYu Xiangning 	if (so->so_state & SS_OOBPEND)
64270f1702c5SYu Xiangning 		*reventsp |= POLLRDBAND & events;
64280f1702c5SYu Xiangning 
64290f1702c5SYu Xiangning 	if (sti->sti_nl7c_rcv_mp != NULL) {
64300f1702c5SYu Xiangning 		*reventsp |= (POLLIN|POLLRDNORM) & events;
64310f1702c5SYu Xiangning 	}
64320f1702c5SYu Xiangning 	if ((sti->sti_nl7c_flags & NL7C_ENABLED) &&
64330f1702c5SYu Xiangning 	    ((POLLIN|POLLRDNORM) & *reventsp)) {
64340f1702c5SYu Xiangning 		sti->sti_nl7c_flags |= NL7C_POLLIN;
64350f1702c5SYu Xiangning 	}
64360f1702c5SYu Xiangning 
64370f1702c5SYu Xiangning 	return (0);
64380f1702c5SYu Xiangning }
64390f1702c5SYu Xiangning 
64400f1702c5SYu Xiangning /*ARGSUSED*/
64410f1702c5SYu Xiangning static int
64420f1702c5SYu Xiangning socktpi_constructor(void *buf, void *cdrarg, int kmflags)
64430f1702c5SYu Xiangning {
64440f1702c5SYu Xiangning 	sotpi_sonode_t *st = (sotpi_sonode_t *)buf;
64450f1702c5SYu Xiangning 	int error = 0;
64460f1702c5SYu Xiangning 
64470f1702c5SYu Xiangning 	error = sonode_constructor(buf, cdrarg, kmflags);
64480f1702c5SYu Xiangning 	if (error != 0)
64490f1702c5SYu Xiangning 		return (error);
64500f1702c5SYu Xiangning 
64510f1702c5SYu Xiangning 	error = i_sotpi_info_constructor(&st->st_info);
64520f1702c5SYu Xiangning 	if (error != 0)
64530f1702c5SYu Xiangning 		sonode_destructor(buf, cdrarg);
64540f1702c5SYu Xiangning 
64550f1702c5SYu Xiangning 	st->st_sonode.so_priv = &st->st_info;
64560f1702c5SYu Xiangning 
64570f1702c5SYu Xiangning 	return (error);
64580f1702c5SYu Xiangning }
64590f1702c5SYu Xiangning 
64600f1702c5SYu Xiangning /*ARGSUSED1*/
64610f1702c5SYu Xiangning static void
64620f1702c5SYu Xiangning socktpi_destructor(void *buf, void *cdrarg)
64630f1702c5SYu Xiangning {
64640f1702c5SYu Xiangning 	sotpi_sonode_t *st = (sotpi_sonode_t *)buf;
64650f1702c5SYu Xiangning 
64660f1702c5SYu Xiangning 	ASSERT(st->st_sonode.so_priv == &st->st_info);
64670f1702c5SYu Xiangning 	st->st_sonode.so_priv = NULL;
64680f1702c5SYu Xiangning 
64690f1702c5SYu Xiangning 	i_sotpi_info_destructor(&st->st_info);
64700f1702c5SYu Xiangning 	sonode_destructor(buf, cdrarg);
64710f1702c5SYu Xiangning }
64720f1702c5SYu Xiangning 
64730f1702c5SYu Xiangning static int
64740f1702c5SYu Xiangning socktpi_unix_constructor(void *buf, void *cdrarg, int kmflags)
64750f1702c5SYu Xiangning {
64760f1702c5SYu Xiangning 	int retval;
64770f1702c5SYu Xiangning 
64780f1702c5SYu Xiangning 	if ((retval = socktpi_constructor(buf, cdrarg, kmflags)) == 0) {
64790f1702c5SYu Xiangning 		struct sonode *so = (struct sonode *)buf;
64800f1702c5SYu Xiangning 		sotpi_info_t *sti = SOTOTPI(so);
64810f1702c5SYu Xiangning 
64820f1702c5SYu Xiangning 		mutex_enter(&socklist.sl_lock);
64830f1702c5SYu Xiangning 
64840f1702c5SYu Xiangning 		sti->sti_next_so = socklist.sl_list;
64850f1702c5SYu Xiangning 		sti->sti_prev_so = NULL;
64860f1702c5SYu Xiangning 		if (sti->sti_next_so != NULL)
64870f1702c5SYu Xiangning 			SOTOTPI(sti->sti_next_so)->sti_prev_so = so;
64880f1702c5SYu Xiangning 		socklist.sl_list = so;
64890f1702c5SYu Xiangning 
64900f1702c5SYu Xiangning 		mutex_exit(&socklist.sl_lock);
64910f1702c5SYu Xiangning 
64920f1702c5SYu Xiangning 	}
64930f1702c5SYu Xiangning 	return (retval);
64940f1702c5SYu Xiangning }
64950f1702c5SYu Xiangning 
64960f1702c5SYu Xiangning static void
64970f1702c5SYu Xiangning socktpi_unix_destructor(void *buf, void *cdrarg)
64980f1702c5SYu Xiangning {
64990f1702c5SYu Xiangning 	struct sonode	*so = (struct sonode *)buf;
65000f1702c5SYu Xiangning 	sotpi_info_t	*sti = SOTOTPI(so);
65010f1702c5SYu Xiangning 
65020f1702c5SYu Xiangning 	mutex_enter(&socklist.sl_lock);
65030f1702c5SYu Xiangning 
65040f1702c5SYu Xiangning 	if (sti->sti_next_so != NULL)
65050f1702c5SYu Xiangning 		SOTOTPI(sti->sti_next_so)->sti_prev_so = sti->sti_prev_so;
65060f1702c5SYu Xiangning 	if (sti->sti_prev_so != NULL)
65070f1702c5SYu Xiangning 		SOTOTPI(sti->sti_prev_so)->sti_next_so = sti->sti_next_so;
65080f1702c5SYu Xiangning 	else
65090f1702c5SYu Xiangning 		socklist.sl_list = sti->sti_next_so;
65100f1702c5SYu Xiangning 
65110f1702c5SYu Xiangning 	mutex_exit(&socklist.sl_lock);
65120f1702c5SYu Xiangning 
65130f1702c5SYu Xiangning 	socktpi_destructor(buf, cdrarg);
65140f1702c5SYu Xiangning }
65150f1702c5SYu Xiangning 
65160f1702c5SYu Xiangning int
65170f1702c5SYu Xiangning socktpi_init(void)
65180f1702c5SYu Xiangning {
65190f1702c5SYu Xiangning 	/*
65200f1702c5SYu Xiangning 	 * Create sonode caches.  We create a special one for AF_UNIX so
65210f1702c5SYu Xiangning 	 * that we can track them for netstat(1m).
65220f1702c5SYu Xiangning 	 */
65230f1702c5SYu Xiangning 	socktpi_cache = kmem_cache_create("socktpi_cache",
65240f1702c5SYu Xiangning 	    sizeof (struct sotpi_sonode), 0, socktpi_constructor,
65250f1702c5SYu Xiangning 	    socktpi_destructor, NULL, NULL, NULL, 0);
65260f1702c5SYu Xiangning 
65270f1702c5SYu Xiangning 	socktpi_unix_cache = kmem_cache_create("socktpi_unix_cache",
65280f1702c5SYu Xiangning 	    sizeof (struct sotpi_sonode), 0, socktpi_unix_constructor,
65290f1702c5SYu Xiangning 	    socktpi_unix_destructor, NULL, NULL, NULL, 0);
65300f1702c5SYu Xiangning 
65310f1702c5SYu Xiangning 	return (0);
65320f1702c5SYu Xiangning }
65330f1702c5SYu Xiangning 
65340f1702c5SYu Xiangning /*
65350f1702c5SYu Xiangning  * Given a non-TPI sonode, allocate and prep it to be ready for TPI.
65360f1702c5SYu Xiangning  *
65370f1702c5SYu Xiangning  * Caller must still update state and mode using sotpi_update_state().
65380f1702c5SYu Xiangning  */
653941174437SAnders Persson int
65400f1702c5SYu Xiangning sotpi_convert_sonode(struct sonode *so, struct sockparams *newsp,
654141174437SAnders Persson     boolean_t *direct, queue_t **qp, struct cred *cr)
65420f1702c5SYu Xiangning {
65430f1702c5SYu Xiangning 	sotpi_info_t *sti;
65440f1702c5SYu Xiangning 	struct sockparams *origsp = so->so_sockparams;
65450f1702c5SYu Xiangning 	sock_lower_handle_t handle = so->so_proto_handle;
65460f1702c5SYu Xiangning 	struct stdata *stp;
65470f1702c5SYu Xiangning 	struct vnode *vp;
65480f1702c5SYu Xiangning 	queue_t *q;
654941174437SAnders Persson 	int error = 0;
65500f1702c5SYu Xiangning 
655141174437SAnders Persson 	ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) ==
655241174437SAnders Persson 	    SS_FALLBACK_PENDING);
655341174437SAnders Persson 	ASSERT(SOCK_IS_NONSTR(so));
655441174437SAnders Persson 
655541174437SAnders Persson 	*qp = NULL;
65560f1702c5SYu Xiangning 	*direct = B_FALSE;
65570f1702c5SYu Xiangning 	so->so_sockparams = newsp;
65580f1702c5SYu Xiangning 	/*
65590f1702c5SYu Xiangning 	 * Allocate and initalize fields required by TPI.
65600f1702c5SYu Xiangning 	 */
65610f1702c5SYu Xiangning 	(void) sotpi_info_create(so, KM_SLEEP);
65620f1702c5SYu Xiangning 	sotpi_info_init(so);
65630f1702c5SYu Xiangning 
656441174437SAnders Persson 	if ((error = sotpi_init(so, NULL, cr, SO_FALLBACK)) != 0) {
65650f1702c5SYu Xiangning 		sotpi_info_fini(so);
65660f1702c5SYu Xiangning 		sotpi_info_destroy(so);
656741174437SAnders Persson 		return (error);
65680f1702c5SYu Xiangning 	}
65690f1702c5SYu Xiangning 	ASSERT(handle == so->so_proto_handle);
65700f1702c5SYu Xiangning 	sti = SOTOTPI(so);
65710f1702c5SYu Xiangning 	if (sti->sti_direct != 0)
65720f1702c5SYu Xiangning 		*direct = B_TRUE;
65730f1702c5SYu Xiangning 
65740f1702c5SYu Xiangning 	/*
65750f1702c5SYu Xiangning 	 * Keep the original sp around so we can properly dispose of the
65760f1702c5SYu Xiangning 	 * sonode when the socket is being closed.
65770f1702c5SYu Xiangning 	 */
65780f1702c5SYu Xiangning 	sti->sti_orig_sp = origsp;
65790f1702c5SYu Xiangning 
65800f1702c5SYu Xiangning 	so_basic_strinit(so);	/* skips the T_CAPABILITY_REQ */
65810f1702c5SYu Xiangning 	so_alloc_addr(so, so->so_max_addr_len);
65820f1702c5SYu Xiangning 
65830f1702c5SYu Xiangning 	/*
65840f1702c5SYu Xiangning 	 * If the application has done a SIOCSPGRP, make sure the
65850f1702c5SYu Xiangning 	 * STREAM head is aware. This needs to take place before
65860f1702c5SYu Xiangning 	 * the protocol start sending up messages. Otherwise we
65870f1702c5SYu Xiangning 	 * might miss to generate SIGPOLL.
65880f1702c5SYu Xiangning 	 *
65890f1702c5SYu Xiangning 	 * It is possible that the application will receive duplicate
65900f1702c5SYu Xiangning 	 * signals if some were already generated for either data or
65910f1702c5SYu Xiangning 	 * connection indications.
65920f1702c5SYu Xiangning 	 */
65930f1702c5SYu Xiangning 	if (so->so_pgrp != 0) {
65940f1702c5SYu Xiangning 		if (so_set_events(so, so->so_vnode, cr) != 0)
65950f1702c5SYu Xiangning 			so->so_pgrp = 0;
65960f1702c5SYu Xiangning 	}
65970f1702c5SYu Xiangning 
65980f1702c5SYu Xiangning 	/*
65990f1702c5SYu Xiangning 	 * Determine which queue to use.
66000f1702c5SYu Xiangning 	 */
66010f1702c5SYu Xiangning 	vp = SOTOV(so);
66020f1702c5SYu Xiangning 	stp = vp->v_stream;
66030f1702c5SYu Xiangning 	ASSERT(stp != NULL);
66040f1702c5SYu Xiangning 	q = stp->sd_wrq->q_next;
66050f1702c5SYu Xiangning 
66060f1702c5SYu Xiangning 	/*
66070f1702c5SYu Xiangning 	 * Skip any modules that may have been auto pushed when the device
66080f1702c5SYu Xiangning 	 * was opened
66090f1702c5SYu Xiangning 	 */
66100f1702c5SYu Xiangning 	while (q->q_next != NULL)
66110f1702c5SYu Xiangning 		q = q->q_next;
661241174437SAnders Persson 	*qp = _RD(q);
66130f1702c5SYu Xiangning 
661441174437SAnders Persson 	/* This is now a STREAMS sockets */
661541174437SAnders Persson 	so->so_not_str = B_FALSE;
661641174437SAnders Persson 
661741174437SAnders Persson 	return (error);
661841174437SAnders Persson }
661941174437SAnders Persson 
662041174437SAnders Persson /*
662141174437SAnders Persson  * Revert a TPI sonode. It is only allowed to revert the sonode during
662241174437SAnders Persson  * the fallback process.
662341174437SAnders Persson  */
662441174437SAnders Persson void
662541174437SAnders Persson sotpi_revert_sonode(struct sonode *so, struct cred *cr)
662641174437SAnders Persson {
662741174437SAnders Persson 	vnode_t *vp = SOTOV(so);
662841174437SAnders Persson 
662941174437SAnders Persson 	ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) ==
663041174437SAnders Persson 	    SS_FALLBACK_PENDING);
663141174437SAnders Persson 	ASSERT(!SOCK_IS_NONSTR(so));
663241174437SAnders Persson 	ASSERT(vp->v_stream != NULL);
663341174437SAnders Persson 
663441174437SAnders Persson 	strclean(vp);
663541174437SAnders Persson 	(void) strclose(vp, FREAD|FWRITE|SO_FALLBACK, cr);
663641174437SAnders Persson 
663741174437SAnders Persson 	/*
663841174437SAnders Persson 	 * Restore the original sockparams. The caller is responsible for
663941174437SAnders Persson 	 * dropping the ref to the new sp.
664041174437SAnders Persson 	 */
664141174437SAnders Persson 	so->so_sockparams = SOTOTPI(so)->sti_orig_sp;
664241174437SAnders Persson 
664341174437SAnders Persson 	sotpi_info_fini(so);
664441174437SAnders Persson 	sotpi_info_destroy(so);
664541174437SAnders Persson 
664641174437SAnders Persson 	/* This is no longer a STREAMS sockets */
664741174437SAnders Persson 	so->so_not_str = B_TRUE;
66480f1702c5SYu Xiangning }
66490f1702c5SYu Xiangning 
66500f1702c5SYu Xiangning void
66510f1702c5SYu Xiangning sotpi_update_state(struct sonode *so, struct T_capability_ack *tcap,
66520f1702c5SYu Xiangning     struct sockaddr *laddr, socklen_t laddrlen, struct sockaddr *faddr,
66530f1702c5SYu Xiangning     socklen_t faddrlen, short opts)
66540f1702c5SYu Xiangning {
66550f1702c5SYu Xiangning 	sotpi_info_t *sti = SOTOTPI(so);
66560f1702c5SYu Xiangning 
66570f1702c5SYu Xiangning 	so_proc_tcapability_ack(so, tcap);
66580f1702c5SYu Xiangning 
66590f1702c5SYu Xiangning 	so->so_options |= opts;
66600f1702c5SYu Xiangning 
66610f1702c5SYu Xiangning 	/*
66620f1702c5SYu Xiangning 	 * Determine whether the foreign and local address are valid
66630f1702c5SYu Xiangning 	 */
66640f1702c5SYu Xiangning 	if (laddrlen != 0) {
66650f1702c5SYu Xiangning 		ASSERT(laddrlen <= sti->sti_laddr_maxlen);
66660f1702c5SYu Xiangning 		sti->sti_laddr_len = laddrlen;
66670f1702c5SYu Xiangning 		bcopy(laddr, sti->sti_laddr_sa, laddrlen);
66680f1702c5SYu Xiangning 		sti->sti_laddr_valid = (so->so_state & SS_ISBOUND);
66690f1702c5SYu Xiangning 	}
66700f1702c5SYu Xiangning 
66710f1702c5SYu Xiangning 	if (faddrlen != 0) {
66720f1702c5SYu Xiangning 		ASSERT(faddrlen <= sti->sti_faddr_maxlen);
66730f1702c5SYu Xiangning 		sti->sti_faddr_len = faddrlen;
66740f1702c5SYu Xiangning 		bcopy(faddr, sti->sti_faddr_sa, faddrlen);
66750f1702c5SYu Xiangning 		sti->sti_faddr_valid = (so->so_state & SS_ISCONNECTED);
66760f1702c5SYu Xiangning 	}
66770f1702c5SYu Xiangning 
66780f1702c5SYu Xiangning }
66790f1702c5SYu Xiangning 
66800f1702c5SYu Xiangning /*
66810f1702c5SYu Xiangning  * Allocate enough space to cache the local and foreign addresses.
66820f1702c5SYu Xiangning  */
66830f1702c5SYu Xiangning void
66840f1702c5SYu Xiangning so_alloc_addr(struct sonode *so, t_uscalar_t maxlen)
66850f1702c5SYu Xiangning {
66860f1702c5SYu Xiangning 	sotpi_info_t *sti = SOTOTPI(so);
66870f1702c5SYu Xiangning 
66880f1702c5SYu Xiangning 	ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL);
66890f1702c5SYu Xiangning 	ASSERT(sti->sti_laddr_len == 0 && sti->sti_faddr_len == 0);
66900f1702c5SYu Xiangning 	sti->sti_laddr_maxlen = sti->sti_faddr_maxlen =
66910f1702c5SYu Xiangning 	    P2ROUNDUP(maxlen, KMEM_ALIGN);
66920f1702c5SYu Xiangning 	so->so_max_addr_len = sti->sti_laddr_maxlen;
66930f1702c5SYu Xiangning 	sti->sti_laddr_sa = kmem_alloc(sti->sti_laddr_maxlen * 2, KM_SLEEP);
66940f1702c5SYu Xiangning 	sti->sti_faddr_sa = (struct sockaddr *)((caddr_t)sti->sti_laddr_sa
66950f1702c5SYu Xiangning 	    + sti->sti_laddr_maxlen);
66960f1702c5SYu Xiangning 
66970f1702c5SYu Xiangning 	if (so->so_family == AF_UNIX) {
66980f1702c5SYu Xiangning 		/*
66990f1702c5SYu Xiangning 		 * Initialize AF_UNIX related fields.
67000f1702c5SYu Xiangning 		 */
67010f1702c5SYu Xiangning 		bzero(&sti->sti_ux_laddr, sizeof (sti->sti_ux_laddr));
67020f1702c5SYu Xiangning 		bzero(&sti->sti_ux_faddr, sizeof (sti->sti_ux_faddr));
67030f1702c5SYu Xiangning 	}
67040f1702c5SYu Xiangning }
67050f1702c5SYu Xiangning 
67060f1702c5SYu Xiangning 
67070f1702c5SYu Xiangning sotpi_info_t *
67080f1702c5SYu Xiangning sotpi_sototpi(struct sonode *so)
67090f1702c5SYu Xiangning {
67100f1702c5SYu Xiangning 	sotpi_info_t *sti;
67110f1702c5SYu Xiangning 
671241174437SAnders Persson 	ASSERT(so != NULL);
67130f1702c5SYu Xiangning 
67140f1702c5SYu Xiangning 	sti = (sotpi_info_t *)so->so_priv;
67150f1702c5SYu Xiangning 
67160f1702c5SYu Xiangning 	ASSERT(sti != NULL);
67170f1702c5SYu Xiangning 	ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC);
67180f1702c5SYu Xiangning 
67190f1702c5SYu Xiangning 	return (sti);
67200f1702c5SYu Xiangning }
67210f1702c5SYu Xiangning 
67220f1702c5SYu Xiangning static int
67230f1702c5SYu Xiangning i_sotpi_info_constructor(sotpi_info_t *sti)
67240f1702c5SYu Xiangning {
67250f1702c5SYu Xiangning 	sti->sti_magic		= SOTPI_INFO_MAGIC;
67260f1702c5SYu Xiangning 	sti->sti_ack_mp		= NULL;
67270f1702c5SYu Xiangning 	sti->sti_discon_ind_mp	= NULL;
67280f1702c5SYu Xiangning 	sti->sti_ux_bound_vp	= NULL;
67290f1702c5SYu Xiangning 	sti->sti_unbind_mp	= NULL;
67300f1702c5SYu Xiangning 
67310f1702c5SYu Xiangning 	sti->sti_conn_ind_head	= NULL;
67320f1702c5SYu Xiangning 	sti->sti_conn_ind_tail	= NULL;
67330f1702c5SYu Xiangning 
67340f1702c5SYu Xiangning 	sti->sti_laddr_sa	= NULL;
67350f1702c5SYu Xiangning 	sti->sti_faddr_sa	= NULL;
67360f1702c5SYu Xiangning 
67370f1702c5SYu Xiangning 	sti->sti_nl7c_flags	= 0;
67380f1702c5SYu Xiangning 	sti->sti_nl7c_uri	= NULL;
67390f1702c5SYu Xiangning 	sti->sti_nl7c_rcv_mp	= NULL;
67400f1702c5SYu Xiangning 
67410f1702c5SYu Xiangning 	mutex_init(&sti->sti_plumb_lock, NULL, MUTEX_DEFAULT, NULL);
67420f1702c5SYu Xiangning 	cv_init(&sti->sti_ack_cv, NULL, CV_DEFAULT, NULL);
67430f1702c5SYu Xiangning 
67440f1702c5SYu Xiangning 	return (0);
67450f1702c5SYu Xiangning }
67460f1702c5SYu Xiangning 
67470f1702c5SYu Xiangning static void
67480f1702c5SYu Xiangning i_sotpi_info_destructor(sotpi_info_t *sti)
67490f1702c5SYu Xiangning {
67500f1702c5SYu Xiangning 	ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC);
67510f1702c5SYu Xiangning 	ASSERT(sti->sti_ack_mp == NULL);
67520f1702c5SYu Xiangning 	ASSERT(sti->sti_discon_ind_mp == NULL);
67530f1702c5SYu Xiangning 	ASSERT(sti->sti_ux_bound_vp == NULL);
67540f1702c5SYu Xiangning 	ASSERT(sti->sti_unbind_mp == NULL);
67550f1702c5SYu Xiangning 
67560f1702c5SYu Xiangning 	ASSERT(sti->sti_conn_ind_head == NULL);
67570f1702c5SYu Xiangning 	ASSERT(sti->sti_conn_ind_tail == NULL);
67580f1702c5SYu Xiangning 
67590f1702c5SYu Xiangning 	ASSERT(sti->sti_laddr_sa == NULL);
67600f1702c5SYu Xiangning 	ASSERT(sti->sti_faddr_sa == NULL);
67610f1702c5SYu Xiangning 
67620f1702c5SYu Xiangning 	ASSERT(sti->sti_nl7c_flags == 0);
67630f1702c5SYu Xiangning 	ASSERT(sti->sti_nl7c_uri == NULL);
67640f1702c5SYu Xiangning 	ASSERT(sti->sti_nl7c_rcv_mp == NULL);
67650f1702c5SYu Xiangning 
67660f1702c5SYu Xiangning 	mutex_destroy(&sti->sti_plumb_lock);
67670f1702c5SYu Xiangning 	cv_destroy(&sti->sti_ack_cv);
67680f1702c5SYu Xiangning }
67690f1702c5SYu Xiangning 
67700f1702c5SYu Xiangning /*
67710f1702c5SYu Xiangning  * Creates and attaches TPI information to the given sonode
67720f1702c5SYu Xiangning  */
67730f1702c5SYu Xiangning static boolean_t
67740f1702c5SYu Xiangning sotpi_info_create(struct sonode *so, int kmflags)
67750f1702c5SYu Xiangning {
67760f1702c5SYu Xiangning 	sotpi_info_t *sti;
67770f1702c5SYu Xiangning 
67780f1702c5SYu Xiangning 	ASSERT(so->so_priv == NULL);
67790f1702c5SYu Xiangning 
67800f1702c5SYu Xiangning 	if ((sti = kmem_zalloc(sizeof (*sti), kmflags)) == NULL)
67810f1702c5SYu Xiangning 		return (B_FALSE);
67820f1702c5SYu Xiangning 
67830f1702c5SYu Xiangning 	if (i_sotpi_info_constructor(sti) != 0) {
67840f1702c5SYu Xiangning 		kmem_free(sti, sizeof (*sti));
67850f1702c5SYu Xiangning 		return (B_FALSE);
67860f1702c5SYu Xiangning 	}
67870f1702c5SYu Xiangning 
67880f1702c5SYu Xiangning 	so->so_priv = (void *)sti;
67890f1702c5SYu Xiangning 	return (B_TRUE);
67900f1702c5SYu Xiangning }
67910f1702c5SYu Xiangning 
67920f1702c5SYu Xiangning /*
67930f1702c5SYu Xiangning  * Initializes the TPI information.
67940f1702c5SYu Xiangning  */
67950f1702c5SYu Xiangning static void
67960f1702c5SYu Xiangning sotpi_info_init(struct sonode *so)
67970f1702c5SYu Xiangning {
67980f1702c5SYu Xiangning 	struct vnode *vp = SOTOV(so);
67990f1702c5SYu Xiangning 	sotpi_info_t *sti = SOTOTPI(so);
68000f1702c5SYu Xiangning 	time_t now;
68010f1702c5SYu Xiangning 
68020f1702c5SYu Xiangning 	sti->sti_dev	= so->so_sockparams->sp_sdev_info.sd_vnode->v_rdev;
68030f1702c5SYu Xiangning 	vp->v_rdev	= sti->sti_dev;
68040f1702c5SYu Xiangning 
68050f1702c5SYu Xiangning 	sti->sti_orig_sp = NULL;
68060f1702c5SYu Xiangning 
68070f1702c5SYu Xiangning 	sti->sti_pushcnt = 0;
68080f1702c5SYu Xiangning 
68090f1702c5SYu Xiangning 	now = gethrestime_sec();
68100f1702c5SYu Xiangning 	sti->sti_atime	= now;
68110f1702c5SYu Xiangning 	sti->sti_mtime	= now;
68120f1702c5SYu Xiangning 	sti->sti_ctime	= now;
68130f1702c5SYu Xiangning 
68140f1702c5SYu Xiangning 	sti->sti_eaddr_mp = NULL;
68150f1702c5SYu Xiangning 	sti->sti_delayed_error = 0;
68160f1702c5SYu Xiangning 
68170f1702c5SYu Xiangning 	sti->sti_provinfo = NULL;
68180f1702c5SYu Xiangning 
68190f1702c5SYu Xiangning 	sti->sti_oobcnt = 0;
68200f1702c5SYu Xiangning 	sti->sti_oobsigcnt = 0;
68210f1702c5SYu Xiangning 
68220f1702c5SYu Xiangning 	ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL);
68230f1702c5SYu Xiangning 
68240f1702c5SYu Xiangning 	sti->sti_laddr_sa	= 0;
68250f1702c5SYu Xiangning 	sti->sti_faddr_sa	= 0;
68260f1702c5SYu Xiangning 	sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 0;
68270f1702c5SYu Xiangning 	sti->sti_laddr_len = sti->sti_faddr_len = 0;
68280f1702c5SYu Xiangning 
68290f1702c5SYu Xiangning 	sti->sti_laddr_valid = 0;
68300f1702c5SYu Xiangning 	sti->sti_faddr_valid = 0;
68310f1702c5SYu Xiangning 	sti->sti_faddr_noxlate = 0;
68320f1702c5SYu Xiangning 
68330f1702c5SYu Xiangning 	sti->sti_direct = 0;
68340f1702c5SYu Xiangning 
68350f1702c5SYu Xiangning 	ASSERT(sti->sti_ack_mp == NULL);
68360f1702c5SYu Xiangning 	ASSERT(sti->sti_ux_bound_vp == NULL);
68370f1702c5SYu Xiangning 	ASSERT(sti->sti_unbind_mp == NULL);
68380f1702c5SYu Xiangning 
68390f1702c5SYu Xiangning 	ASSERT(sti->sti_conn_ind_head == NULL);
68400f1702c5SYu Xiangning 	ASSERT(sti->sti_conn_ind_tail == NULL);
68410f1702c5SYu Xiangning }
68420f1702c5SYu Xiangning 
68430f1702c5SYu Xiangning /*
68440f1702c5SYu Xiangning  * Given a sonode, grab the TPI info and free any data.
68450f1702c5SYu Xiangning  */
68460f1702c5SYu Xiangning static void
68470f1702c5SYu Xiangning sotpi_info_fini(struct sonode *so)
68480f1702c5SYu Xiangning {
68490f1702c5SYu Xiangning 	sotpi_info_t *sti = SOTOTPI(so);
68500f1702c5SYu Xiangning 	mblk_t *mp;
68510f1702c5SYu Xiangning 
68520f1702c5SYu Xiangning 	ASSERT(sti->sti_discon_ind_mp == NULL);
68530f1702c5SYu Xiangning 
68540f1702c5SYu Xiangning 	if ((mp = sti->sti_conn_ind_head) != NULL) {
68550f1702c5SYu Xiangning 		mblk_t *mp1;
68560f1702c5SYu Xiangning 
68570f1702c5SYu Xiangning 		while (mp) {
68580f1702c5SYu Xiangning 			mp1 = mp->b_next;
68590f1702c5SYu Xiangning 			mp->b_next = NULL;
68600f1702c5SYu Xiangning 			freemsg(mp);
68610f1702c5SYu Xiangning 			mp = mp1;
68620f1702c5SYu Xiangning 		}
68630f1702c5SYu Xiangning 		sti->sti_conn_ind_head = sti->sti_conn_ind_tail = NULL;
68640f1702c5SYu Xiangning 	}
68650f1702c5SYu Xiangning 
68660f1702c5SYu Xiangning 	/*
68670f1702c5SYu Xiangning 	 * Protect so->so_[lf]addr_sa so that sockfs_snapshot() can safely
68680f1702c5SYu Xiangning 	 * indirect them.  It also uses so_count as a validity test.
68690f1702c5SYu Xiangning 	 */
68700f1702c5SYu Xiangning 	mutex_enter(&so->so_lock);
68710f1702c5SYu Xiangning 
68720f1702c5SYu Xiangning 	if (sti->sti_laddr_sa) {
68730f1702c5SYu Xiangning 		ASSERT((caddr_t)sti->sti_faddr_sa ==
68740f1702c5SYu Xiangning 		    (caddr_t)sti->sti_laddr_sa + sti->sti_laddr_maxlen);
68750f1702c5SYu Xiangning 		ASSERT(sti->sti_faddr_maxlen == sti->sti_laddr_maxlen);
68760f1702c5SYu Xiangning 		sti->sti_laddr_valid = 0;
68770f1702c5SYu Xiangning 		sti->sti_faddr_valid = 0;
68780f1702c5SYu Xiangning 		kmem_free(sti->sti_laddr_sa, sti->sti_laddr_maxlen * 2);
68790f1702c5SYu Xiangning 		sti->sti_laddr_sa = NULL;
68800f1702c5SYu Xiangning 		sti->sti_laddr_len = sti->sti_laddr_maxlen = 0;
68810f1702c5SYu Xiangning 		sti->sti_faddr_sa = NULL;
68820f1702c5SYu Xiangning 		sti->sti_faddr_len = sti->sti_faddr_maxlen = 0;
68830f1702c5SYu Xiangning 	}
68840f1702c5SYu Xiangning 
68850f1702c5SYu Xiangning 	mutex_exit(&so->so_lock);
68860f1702c5SYu Xiangning 
68870f1702c5SYu Xiangning 	if ((mp = sti->sti_eaddr_mp) != NULL) {
68880f1702c5SYu Xiangning 		freemsg(mp);
68890f1702c5SYu Xiangning 		sti->sti_eaddr_mp = NULL;
68900f1702c5SYu Xiangning 		sti->sti_delayed_error = 0;
68910f1702c5SYu Xiangning 	}
68920f1702c5SYu Xiangning 
68930f1702c5SYu Xiangning 	if ((mp = sti->sti_ack_mp) != NULL) {
68940f1702c5SYu Xiangning 		freemsg(mp);
68950f1702c5SYu Xiangning 		sti->sti_ack_mp = NULL;
68960f1702c5SYu Xiangning 	}
68970f1702c5SYu Xiangning 
68980f1702c5SYu Xiangning 	if ((mp = sti->sti_nl7c_rcv_mp) != NULL) {
68990f1702c5SYu Xiangning 		sti->sti_nl7c_rcv_mp = NULL;
69000f1702c5SYu Xiangning 		freemsg(mp);
69010f1702c5SYu Xiangning 	}
69020f1702c5SYu Xiangning 	sti->sti_nl7c_rcv_rval = 0;
69030f1702c5SYu Xiangning 	if (sti->sti_nl7c_uri != NULL) {
69040f1702c5SYu Xiangning 		nl7c_urifree(so);
69050f1702c5SYu Xiangning 		/* urifree() cleared nl7c_uri */
69060f1702c5SYu Xiangning 	}
69070f1702c5SYu Xiangning 	if (sti->sti_nl7c_flags) {
69080f1702c5SYu Xiangning 		sti->sti_nl7c_flags = 0;
69090f1702c5SYu Xiangning 	}
69100f1702c5SYu Xiangning 
69110f1702c5SYu Xiangning 	ASSERT(sti->sti_ux_bound_vp == NULL);
69120f1702c5SYu Xiangning 	if ((mp = sti->sti_unbind_mp) != NULL) {
69130f1702c5SYu Xiangning 		freemsg(mp);
69140f1702c5SYu Xiangning 		sti->sti_unbind_mp = NULL;
69150f1702c5SYu Xiangning 	}
69160f1702c5SYu Xiangning }
69170f1702c5SYu Xiangning 
69180f1702c5SYu Xiangning /*
69190f1702c5SYu Xiangning  * Destroys the TPI information attached to a sonode.
69200f1702c5SYu Xiangning  */
69210f1702c5SYu Xiangning static void
69220f1702c5SYu Xiangning sotpi_info_destroy(struct sonode *so)
69230f1702c5SYu Xiangning {
69240f1702c5SYu Xiangning 	sotpi_info_t *sti = SOTOTPI(so);
69250f1702c5SYu Xiangning 
69260f1702c5SYu Xiangning 	i_sotpi_info_destructor(sti);
69270f1702c5SYu Xiangning 	kmem_free(sti, sizeof (*sti));
69280f1702c5SYu Xiangning 
69290f1702c5SYu Xiangning 	so->so_priv = NULL;
69300f1702c5SYu Xiangning }
69310f1702c5SYu Xiangning 
69320f1702c5SYu Xiangning /*
69332691240cSYu Xiangning  * Create the global sotpi socket module entry. It will never be freed.
69340f1702c5SYu Xiangning  */
69350f1702c5SYu Xiangning smod_info_t *
69360f1702c5SYu Xiangning sotpi_smod_create(void)
69370f1702c5SYu Xiangning {
69380f1702c5SYu Xiangning 	smod_info_t *smodp;
69390f1702c5SYu Xiangning 
69400f1702c5SYu Xiangning 	smodp = kmem_zalloc(sizeof (*smodp), KM_SLEEP);
69412691240cSYu Xiangning 	smodp->smod_name = kmem_alloc(sizeof (SOTPI_SMOD_NAME), KM_SLEEP);
69422691240cSYu Xiangning 	(void) strcpy(smodp->smod_name, SOTPI_SMOD_NAME);
69430f1702c5SYu Xiangning 	/*
69442691240cSYu Xiangning 	 * Initialize the smod_refcnt to 1 so it will never be freed.
69450f1702c5SYu Xiangning 	 */
69460f1702c5SYu Xiangning 	smodp->smod_refcnt = 1;
69470f1702c5SYu Xiangning 	smodp->smod_uc_version = SOCK_UC_VERSION;
69480f1702c5SYu Xiangning 	smodp->smod_dc_version = SOCK_DC_VERSION;
69490f1702c5SYu Xiangning 	smodp->smod_sock_create_func = &sotpi_create;
69500f1702c5SYu Xiangning 	smodp->smod_sock_destroy_func = &sotpi_destroy;
69510f1702c5SYu Xiangning 	return (smodp);
69520f1702c5SYu Xiangning }
6953