xref: /titanic_52/usr/src/uts/common/sys/socketvar.h (revision ed31198c686205a26320612d2a5dd7b26ae63a15)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 /*
31  * University Copyright- Copyright (c) 1982, 1986, 1988
32  * The Regents of the University of California
33  * All Rights Reserved
34  *
35  * University Acknowledgment- Portions of this document are derived from
36  * software developed by the University of California, Berkeley, and its
37  * contributors.
38  */
39 
40 #ifndef _SYS_SOCKETVAR_H
41 #define	_SYS_SOCKETVAR_H
42 
43 #pragma ident	"%Z%%M%	%I%	%E% SMI"
44 
45 #include <sys/types.h>
46 #include <sys/stream.h>
47 #include <sys/t_lock.h>
48 #include <sys/cred.h>
49 #include <sys/vnode.h>
50 #include <sys/file.h>
51 #include <sys/param.h>
52 #include <sys/zone.h>
53 #include <sys/sodirect.h>
54 #include <inet/kssl/ksslapi.h>
55 
56 #ifdef	__cplusplus
57 extern "C" {
58 #endif
59 
60 /*
61  * Internal representation used for addresses.
62  */
63 struct soaddr {
64 	struct sockaddr	*soa_sa;	/* Actual address */
65 	t_uscalar_t	soa_len;	/* Length in bytes for kmem_free */
66 	t_uscalar_t	soa_maxlen;	/* Allocated length */
67 };
68 /* Maximum size address for transports that have ADDR_size == 1 */
69 #define	SOA_DEFSIZE	128
70 
71 /*
72  * Internal representation of the address used to represent addresses
73  * in the loopback transport for AF_UNIX. While the sockaddr_un is used
74  * as the sockfs layer address for AF_UNIX the pathnames contained in
75  * these addresses are not unique (due to relative pathnames) thus can not
76  * be used in the transport.
77  *
78  * The transport level address consists of a magic number (used to separate the
79  * name space for specific and implicit binds). For a specific bind
80  * this is followed by a "vnode *" which ensures that all specific binds
81  * have a unique transport level address. For implicit binds the latter
82  * part of the address is a byte string (of the same length as a pointer)
83  * that is assigned by the loopback transport.
84  *
85  * The uniqueness assumes that the loopback transport has a separate namespace
86  * for sockets in order to avoid name conflicts with e.g. TLI use of the
87  * same transport.
88  */
89 struct so_ux_addr {
90 	void	*soua_vp;	/* vnode pointer or assigned by tl */
91 	uint_t	soua_magic;	/* See below */
92 };
93 
94 #define	SOU_MAGIC_EXPLICIT	0x75787670	/* "uxvp" */
95 #define	SOU_MAGIC_IMPLICIT	0x616e6f6e	/* "anon" */
96 
97 struct sockaddr_ux {
98 	sa_family_t		sou_family;	/* AF_UNIX */
99 	struct so_ux_addr	sou_addr;
100 };
101 
102 typedef struct sonodeops sonodeops_t;
103 typedef struct sonode sonode_t;
104 
105 /*
106  * The sonode represents a socket. A sonode never exist in the file system
107  * name space and can not be opened using open() - only the socket, socketpair
108  * and accept calls create sonodes.
109  *
110  * When an AF_UNIX socket is bound to a pathname the sockfs
111  * creates a VSOCK vnode in the underlying file system. However, the vnodeops
112  * etc in this VNODE remain those of the underlying file system.
113  * Sockfs uses the v_stream pointer in the underlying file system VSOCK node
114  * to find the sonode bound to the pathname. The bound pathname vnode
115  * is accessed through so_ux_vp.
116  *
117  * A socket always corresponds to a VCHR stream representing the transport
118  * provider (e.g. /dev/tcp). This information is retrieved from the kernel
119  * socket configuration table and entered into so_accessvp. sockfs uses
120  * this to perform VOP_ACCESS checks before allowing an open of the transport
121  * provider.
122  *
123  * The locking of sockfs uses the so_lock mutex plus the SOLOCKED
124  * and SOREADLOCKED flags in so_flag. The mutex protects all the state
125  * in the sonode. The SOLOCKED flag is used to single-thread operations from
126  * sockfs users to prevent e.g. multiple bind() calls to operate on the
127  * same sonode concurrently. The SOREADLOCKED flag is used to ensure that
128  * only one thread sleeps in kstrgetmsg for a given sonode. This is needed
129  * to ensure atomic operation for things like MSG_WAITALL.
130  *
131  * Note that so_lock is sometimes held across calls that might go to sleep
132  * (kmem_alloc and soallocproto*). This implies that no other lock in
133  * the system should be held when calling into sockfs; from the system call
134  * side or from strrput. If locks are held while calling into sockfs
135  * the system might hang when running low on memory.
136  */
137 struct sonode {
138 	struct	vnode	*so_vnode;	/* vnode associated with this sonode */
139 
140 	sonodeops_t	*so_ops;	/* operations vector for this sonode */
141 
142 	/*
143 	 * These fields are initialized once.
144 	 */
145 	dev_t		so_dev;		/* device the sonode represents */
146 	struct	vnode	*so_accessvp;	/* vnode for the /dev entry */
147 
148 	/* The locks themselves */
149 	kmutex_t	so_lock;	/* protects sonode fields */
150 	kmutex_t	so_plumb_lock;	/* serializes plumbs, and the related */
151 					/* fields so_version and so_pushcnt */
152 	kcondvar_t	so_state_cv;	/* synchronize state changes */
153 	kcondvar_t	so_ack_cv;	/* wait for TPI acks */
154 	kcondvar_t	so_connind_cv;	/* wait for T_CONN_IND */
155 	kcondvar_t	so_want_cv;	/* wait due to SOLOCKED */
156 
157 	/* These fields are protected by so_lock */
158 	uint_t	so_state;		/* internal state flags SS_*, below */
159 	uint_t	so_mode;		/* characteristics on socket. SM_* */
160 
161 	mblk_t	*so_ack_mp;		/* TPI ack received from below */
162 	mblk_t	*so_conn_ind_head;	/* b_next list of T_CONN_IND */
163 	mblk_t	*so_conn_ind_tail;
164 	mblk_t	*so_unbind_mp;		/* Preallocated T_UNBIND_REQ message */
165 
166 	ushort_t so_flag;		/* flags, see below */
167 	dev_t	so_fsid;		/* file system identifier */
168 	time_t  so_atime;		/* time of last access */
169 	time_t  so_mtime;		/* time of last modification */
170 	time_t  so_ctime;		/* time of last attributes change */
171 	int	so_count;		/* count of opened references */
172 
173 	/* Needed to recreate the same socket for accept */
174 	short	so_family;
175 	short	so_type;
176 	short	so_protocol;
177 	short	so_version;		/* From so_socket call */
178 	short	so_pushcnt;		/* Number of modules above "sockmod" */
179 
180 	/* Options */
181 	short	so_options;		/* From socket call, see socket.h */
182 	struct linger	so_linger;	/* SO_LINGER value */
183 	int	so_sndbuf;		/* SO_SNDBUF value */
184 	int	so_rcvbuf;		/* SO_RCVBUF value */
185 	int	so_sndlowat;		/* send low water mark */
186 	int	so_rcvlowat;		/* receive low water mark */
187 #ifdef notyet
188 	int	so_sndtimeo;		/* Not yet implemented */
189 	int	so_rcvtimeo;		/* Not yet implemented */
190 #endif /* notyet */
191 	ushort_t so_error;		/* error affecting connection */
192 	ushort_t so_delayed_error;	/* From T_uderror_ind */
193 	int	so_backlog;		/* Listen backlog */
194 
195 	/*
196 	 * The counts (so_oobcnt and so_oobsigcnt) track the number of
197 	 * urgent indicates that are (logically) queued on the stream head
198 	 * read queue. The urgent data is queued on the stream head
199 	 * as follows.
200 	 *
201 	 * In the normal case the SIGURG is not generated until
202 	 * the T_EXDATA_IND arrives at the stream head. However, transports
203 	 * that have an early indication that urgent data is pending
204 	 * (e.g. TCP receiving a "new" urgent pointer value) can send up
205 	 * an M_PCPROTO/SIGURG message to generate the signal early.
206 	 *
207 	 * The mark is indicated by either:
208 	 *  - a T_EXDATA_IND (with no M_DATA b_cont) with MSGMARK set.
209 	 *    When this message is consumed by sorecvmsg the socket layer
210 	 *    sets SS_RCVATMARK until data has been consumed past the mark.
211 	 *  - a message with MSGMARKNEXT set (indicating that the
212 	 *    first byte of the next message constitutes the mark). When
213 	 *    the last byte of the MSGMARKNEXT message is consumed in
214 	 *    the stream head the stream head sets STRATMARK. This flag
215 	 *    is cleared when at least one byte is read. (Note that
216 	 *    the MSGMARKNEXT messages can be of zero length when there
217 	 *    is no previous data to which the marknext can be attached.)
218 	 *
219 	 * While the T_EXDATA_IND method is the common case which is used
220 	 * with all TPI transports, the MSGMARKNEXT method is needed to
221 	 * indicate the mark when e.g. the TCP urgent byte has not been
222 	 * received yet but the TCP urgent pointer has made TCP generate
223 	 * the M_PCSIG/SIGURG.
224 	 *
225 	 * The signal (the M_PCSIG carrying the SIGURG) and the mark
226 	 * indication can not be delivered as a single message, since
227 	 * the signal should be delivered as high priority and any mark
228 	 * indication must flow with the data. This implies that immediately
229 	 * when the SIGURG has been delivered if the stream head queue is
230 	 * empty it is impossible to determine if this will be the position
231 	 * of the mark. This race condition is resolved by using MSGNOTMARKNEXT
232 	 * messages and the STRNOTATMARK flag in the stream head. The
233 	 * SIOCATMARK code calls the stream head to wait for either a
234 	 * non-empty queue or one of the STR*ATMARK flags being set.
235 	 * This implies that any transport that is sending M_PCSIG(SIGURG)
236 	 * should send the appropriate MSGNOTMARKNEXT message (which can be
237 	 * zero length) after sending an M_PCSIG to prevent SIOCATMARK
238 	 * from sleeping unnecessarily.
239 	 */
240 	mblk_t	*so_oobmsg;		/* outofline oob data */
241 	uint_t	so_oobsigcnt;		/* Number of SIGURG generated */
242 	uint_t	so_oobcnt;		/* Number of T_EXDATA_IND queued */
243 	pid_t	so_pgrp;		/* pgrp for signals */
244 
245 	/* From T_info_ack */
246 	t_uscalar_t	so_tsdu_size;
247 	t_uscalar_t	so_etsdu_size;
248 	t_scalar_t	so_addr_size;
249 	t_uscalar_t	so_opt_size;
250 	t_uscalar_t	so_tidu_size;
251 	t_scalar_t	so_serv_type;
252 
253 	/* From T_capability_ack */
254 	t_uscalar_t	so_acceptor_id;
255 
256 	/* Internal provider information */
257 	struct tpi_provinfo	*so_provinfo;
258 
259 	/*
260 	 * The local and remote addresses have multiple purposes
261 	 * but one of the key reasons for their existence and careful
262 	 * tracking in sockfs is to support getsockname and getpeername
263 	 * when the transport does not handle the TI_GET*NAME ioctls
264 	 * and caching when it does (signaled by valid bits in so_state).
265 	 * When all transports support the new TPI (with T_ADDR_REQ)
266 	 * we can revisit this code.
267 	 * The other usage of so_faddr is to keep the "connected to"
268 	 * address for datagram sockets.
269 	 * Finally, for AF_UNIX both local and remote addresses are used
270 	 * to record the sockaddr_un since we use a separate namespace
271 	 * in the loopback transport.
272 	 */
273 	struct soaddr so_laddr;		/* Local address */
274 	struct soaddr so_faddr;		/* Peer address */
275 #define	so_laddr_sa	so_laddr.soa_sa
276 #define	so_faddr_sa	so_faddr.soa_sa
277 #define	so_laddr_len	so_laddr.soa_len
278 #define	so_faddr_len	so_faddr.soa_len
279 #define	so_laddr_maxlen	so_laddr.soa_maxlen
280 #define	so_faddr_maxlen	so_faddr.soa_maxlen
281 	mblk_t		*so_eaddr_mp;	/* for so_delayed_error */
282 
283 	/*
284 	 * For AF_UNIX sockets:
285 	 * so_ux_laddr/faddr records the internal addresses used with the
286 	 * transport.
287 	 * so_ux_vp and v_stream->sd_vnode form the cross-
288 	 * linkage between the underlying fs vnode corresponding to
289 	 * the bound sockaddr_un and the socket node.
290 	 */
291 	struct so_ux_addr so_ux_laddr;	/* laddr bound with the transport */
292 	struct so_ux_addr so_ux_faddr;	/* temporary peer address */
293 	struct vnode	*so_ux_bound_vp; /* bound AF_UNIX file system vnode */
294 	struct sonode	*so_next;	/* next sonode on socklist	*/
295 	struct sonode	*so_prev;	/* previous sonode on socklist	*/
296 	mblk_t	*so_discon_ind_mp;	/* T_DISCON_IND received from below */
297 
298 					/* put here for delayed processing  */
299 	void		*so_priv;	/* sonode private data */
300 	cred_t		*so_peercred;	/* connected socket peer cred */
301 	pid_t		so_cpid;	/* connected socket peer cached pid */
302 	zoneid_t	so_zoneid;	/* opener's zoneid */
303 
304 	kmem_cache_t	*so_cache;	/* object cache of this "sonode". */
305 	void		*so_obj;	/* object to free */
306 
307 	/*
308 	 * For NL7C sockets:
309 	 *
310 	 * so_nl7c_flags	the NL7C state of URL processing.
311 	 *
312 	 * so_nl7c_rcv_mp	mblk_t chain of already received data to be
313 	 *			passed up to the app after NL7C gives up on
314 	 *			a socket.
315 	 *
316 	 * so_nl7c_rcv_rval	returned rval for last mblk_t from above.
317 	 *
318 	 * so_nl7c_uri		the URI currently being processed.
319 	 *
320 	 * so_nl7c_rtime	URI request gethrestime_sec().
321 	 *
322 	 * so_nl7c_addr		pointer returned by nl7c_addr_lookup().
323 	 */
324 	uint64_t	so_nl7c_flags;
325 	mblk_t		*so_nl7c_rcv_mp;
326 	int64_t		so_nl7c_rcv_rval;
327 	void		*so_nl7c_uri;
328 	time_t		so_nl7c_rtime;
329 	void		*so_nl7c_addr;
330 
331 	/* For sockets acting as an in-kernel SSL proxy */
332 	kssl_endpt_type_t	so_kssl_type;	/* is proxy/is proxied/none */
333 	kssl_ent_t		so_kssl_ent;	/* SSL config entry */
334 	kssl_ctx_t		so_kssl_ctx;	/* SSL session context */
335 
336 	/* != NULL for sodirect_t enabled socket */
337 	sodirect_t	*so_direct;
338 };
339 
340 /* flags */
341 #define	SOMOD		0x0001		/* update socket modification time */
342 #define	SOACC		0x0002		/* update socket access time */
343 
344 #define	SOLOCKED	0x0010		/* use to serialize open/closes */
345 #define	SOREADLOCKED	0x0020		/* serialize kstrgetmsg calls */
346 #define	SOWANT		0x0040		/* some process waiting on lock */
347 #define	SOCLONE		0x0080		/* child of clone driver */
348 #define	SOASYNC_UNBIND	0x0100		/* wait for ACK of async unbind */
349 
350 /*
351  * Socket state bits.
352  */
353 #define	SS_ISCONNECTED		0x00000001 /* socket connected to a peer */
354 #define	SS_ISCONNECTING		0x00000002 /* in process, connecting to peer */
355 #define	SS_ISDISCONNECTING	0x00000004 /* in process of disconnecting */
356 #define	SS_CANTSENDMORE		0x00000008 /* can't send more data to peer */
357 
358 #define	SS_CANTRCVMORE		0x00000010 /* can't receive more data */
359 #define	SS_ISBOUND		0x00000020 /* socket is bound */
360 #define	SS_NDELAY		0x00000040 /* FNDELAY non-blocking */
361 #define	SS_NONBLOCK		0x00000080 /* O_NONBLOCK non-blocking */
362 
363 #define	SS_ASYNC		0x00000100 /* async i/o notify */
364 #define	SS_ACCEPTCONN		0x00000200 /* listen done */
365 #define	SS_HASCONNIND		0x00000400 /* T_CONN_IND for poll */
366 #define	SS_SAVEDEOR		0x00000800 /* Saved MSG_EOR rcv side state */
367 
368 #define	SS_RCVATMARK		0x00001000 /* at mark on input */
369 #define	SS_OOBPEND		0x00002000 /* OOB pending or present - poll */
370 #define	SS_HAVEOOBDATA		0x00004000 /* OOB data present */
371 #define	SS_HADOOBDATA		0x00008000 /* OOB data consumed */
372 
373 #define	SS_FADDR_NOXLATE	0x00020000 /* No xlation of faddr for AF_UNIX */
374 
375 #define	SS_HASDATA		0x00040000 /* NCAfs: data available */
376 #define	SS_DONEREAD		0x00080000 /* NCAfs: all data read */
377 #define	SS_MOREDATA		0x00100000 /* NCAfs: NCA has more data */
378 
379 #define	SS_DIRECT		0x00200000 /* transport is directly below */
380 #define	SS_SODIRECT		0x00400000 /* transport supports sodirect */
381 
382 #define	SS_LADDR_VALID		0x01000000	/* so_laddr valid for user */
383 #define	SS_FADDR_VALID		0x02000000	/* so_faddr valid for user */
384 
385 /* Set of states when the socket can't be rebound */
386 #define	SS_CANTREBIND	(SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING|\
387 			    SS_CANTSENDMORE|SS_CANTRCVMORE|SS_ACCEPTCONN)
388 
389 /*
390  * Characteristics of sockets. Not changed after the socket is created.
391  */
392 #define	SM_PRIV			0x001	/* privileged for broadcast, raw... */
393 #define	SM_ATOMIC		0x002	/* atomic data transmission */
394 #define	SM_ADDR			0x004	/* addresses given with messages */
395 #define	SM_CONNREQUIRED		0x008	/* connection required by protocol */
396 
397 #define	SM_FDPASSING		0x010	/* passes file descriptors */
398 #define	SM_EXDATA		0x020	/* Can handle T_EXDATA_REQ */
399 #define	SM_OPTDATA		0x040	/* Can handle T_OPTDATA_REQ */
400 #define	SM_BYTESTREAM		0x080	/* Byte stream - can use M_DATA */
401 
402 #define	SM_ACCEPTOR_ID		0x100	/* so_acceptor_id is valid */
403 
404 /*
405  * Socket versions. Used by the socket library when calling _so_socket().
406  */
407 #define	SOV_STREAM	0	/* Not a socket - just a stream */
408 #define	SOV_DEFAULT	1	/* Select based on so_default_version */
409 #define	SOV_SOCKSTREAM	2	/* Socket plus streams operations */
410 #define	SOV_SOCKBSD	3	/* Socket with no streams operations */
411 #define	SOV_XPG4_2	4	/* Xnet socket */
412 
413 #if defined(_KERNEL) || defined(_KMEMUSER)
414 /*
415  * Used for mapping family/type/protocol to vnode.
416  * Defined here so that crash can use it.
417  */
418 struct sockparams {
419 	int	sp_domain;
420 	int	sp_type;
421 	int	sp_protocol;
422 	char	*sp_devpath;
423 	int	sp_devpathlen;	/* Is 0 if sp_devpath is a static string */
424 	vnode_t	*sp_vnode;
425 	struct sockparams *sp_next;
426 };
427 
428 extern struct sockparams *sphead;
429 
430 /*
431  * Used to traverse the list of AF_UNIX sockets to construct the kstat
432  * for netstat(1m).
433  */
434 struct socklist {
435 	kmutex_t	sl_lock;
436 	struct sonode	*sl_list;
437 };
438 
439 extern struct socklist socklist;
440 /*
441  * ss_full_waits is the number of times the reader thread
442  * waits when the queue is full and ss_empty_waits is the number
443  * of times the consumer thread waits when the queue is empty.
444  * No locks for these as they are just indicators of whether
445  * disk or network or both is slow or fast.
446  */
447 struct sendfile_stats {
448 	uint32_t ss_file_cached;
449 	uint32_t ss_file_not_cached;
450 	uint32_t ss_full_waits;
451 	uint32_t ss_empty_waits;
452 	uint32_t ss_file_segmap;
453 };
454 
455 /*
456  * A single sendfile request is represented by snf_req.
457  */
458 typedef struct snf_req {
459 	struct snf_req	*sr_next;
460 	mblk_t		*sr_mp_head;
461 	mblk_t		*sr_mp_tail;
462 	kmutex_t	sr_lock;
463 	kcondvar_t	sr_cv;
464 	uint_t		sr_qlen;
465 	int		sr_hiwat;
466 	int		sr_lowat;
467 	int		sr_operation;
468 	struct vnode	*sr_vp;
469 	file_t 		*sr_fp;
470 	ssize_t		sr_maxpsz;
471 	u_offset_t	sr_file_off;
472 	u_offset_t	sr_file_size;
473 #define	SR_READ_DONE	0x80000000
474 	int		sr_read_error;
475 	int		sr_write_error;
476 } snf_req_t;
477 
478 /* A queue of sendfile requests */
479 struct sendfile_queue {
480 	snf_req_t	*snfq_req_head;
481 	snf_req_t	*snfq_req_tail;
482 	kmutex_t	snfq_lock;
483 	kcondvar_t	snfq_cv;
484 	int		snfq_svc_threads;	/* # of service threads */
485 	int		snfq_idle_cnt;		/* # of idling threads */
486 	int		snfq_max_threads;
487 	int		snfq_req_cnt;		/* Number of requests */
488 };
489 
490 #define	READ_OP			1
491 #define	SNFQ_TIMEOUT		(60 * 5 * hz)	/* 5 minutes */
492 
493 /* Socket network operations switch */
494 struct sonodeops {
495 	int	(*sop_accept)(struct sonode *, int, struct sonode **);
496 	int	(*sop_bind)(struct sonode *, struct sockaddr *, socklen_t,
497 		    int);
498 	int	(*sop_listen)(struct sonode *, int);
499 	int	(*sop_connect)(struct sonode *, const struct sockaddr *,
500 		    socklen_t, int, int);
501 	int	(*sop_recvmsg)(struct sonode *, struct msghdr *,
502 		    struct uio *);
503 	int	(*sop_sendmsg)(struct sonode *, struct msghdr *,
504 		    struct uio *);
505 	int	(*sop_getpeername)(struct sonode *);
506 	int	(*sop_getsockname)(struct sonode *);
507 	int	(*sop_shutdown)(struct sonode *, int);
508 	int	(*sop_getsockopt)(struct sonode *, int, int, void *,
509 		    socklen_t *, int);
510 	int 	(*sop_setsockopt)(struct sonode *, int, int, const void *,
511 		    socklen_t);
512 };
513 
514 #define	SOP_ACCEPT(so, fflag, nsop)	\
515 	((so)->so_ops->sop_accept((so), (fflag), (nsop)))
516 #define	SOP_BIND(so, name, namelen, flags)	\
517 	((so)->so_ops->sop_bind((so), (name), (namelen), (flags)))
518 #define	SOP_LISTEN(so, backlog)	\
519 	((so)->so_ops->sop_listen((so), (backlog)))
520 #define	SOP_CONNECT(so, name, namelen, fflag, flags)	\
521 	((so)->so_ops->sop_connect((so), (name), (namelen), (fflag), (flags)))
522 #define	SOP_RECVMSG(so, msg, uiop)	\
523 	((so)->so_ops->sop_recvmsg((so), (msg), (uiop)))
524 #define	SOP_SENDMSG(so, msg, uiop)	\
525 	((so)->so_ops->sop_sendmsg((so), (msg), (uiop)))
526 #define	SOP_GETPEERNAME(so)	\
527 	((so)->so_ops->sop_getpeername((so)))
528 #define	SOP_GETSOCKNAME(so)	\
529 	((so)->so_ops->sop_getsockname((so)))
530 #define	SOP_SHUTDOWN(so, how)	\
531 	((so)->so_ops->sop_shutdown((so), (how)))
532 #define	SOP_GETSOCKOPT(so, level, optionname, optval, optlenp, flags)	\
533 	((so)->so_ops->sop_getsockopt((so), (level), (optionname),	\
534 	    (optval), (optlenp), (flags)))
535 #define	SOP_SETSOCKOPT(so, level, optionname, optval, optlen)		\
536 	((so)->so_ops->sop_setsockopt((so), (level), (optionname),	\
537 	    (optval), (optlen)))
538 
539 #endif /* defined(_KERNEL) || defined(_KMEMUSER) */
540 
541 #ifdef _KERNEL
542 
543 #define	ISALIGNED_cmsghdr(addr) \
544 		(((uintptr_t)(addr) & (_CMSG_HDR_ALIGNMENT - 1)) == 0)
545 
546 #define	ROUNDUP_cmsglen(len) \
547 	(((len) + _CMSG_HDR_ALIGNMENT - 1) & ~(_CMSG_HDR_ALIGNMENT - 1))
548 
549 /*
550  * Macros that operate on struct cmsghdr.
551  * Used in parsing msg_control.
552  * The CMSG_VALID macro does not assume that the last option buffer is padded.
553  */
554 #define	CMSG_NEXT(cmsg)						\
555 	(struct cmsghdr *)((uintptr_t)(cmsg) +			\
556 	    ROUNDUP_cmsglen((cmsg)->cmsg_len))
557 #define	CMSG_CONTENT(cmsg)	(&((cmsg)[1]))
558 #define	CMSG_CONTENTLEN(cmsg)	((cmsg)->cmsg_len - sizeof (struct cmsghdr))
559 #define	CMSG_VALID(cmsg, start, end)					\
560 	(ISALIGNED_cmsghdr(cmsg) &&					\
561 	((uintptr_t)(cmsg) >= (uintptr_t)(start)) &&			\
562 	((uintptr_t)(cmsg) < (uintptr_t)(end)) &&			\
563 	((ssize_t)(cmsg)->cmsg_len >= sizeof (struct cmsghdr)) &&	\
564 	((uintptr_t)(cmsg) + (cmsg)->cmsg_len <= (uintptr_t)(end)))
565 
566 /*
567  * Maximum size of any argument that is copied in (addresses, options,
568  * access rights). MUST be at least MAXPATHLEN + 3.
569  * BSD and SunOS 4.X limited this to MLEN or MCLBYTES.
570  */
571 #define	SO_MAXARGSIZE	8192
572 
573 /*
574  * Convert between vnode and sonode
575  */
576 #define	VTOSO(vp)	((struct sonode *)((vp)->v_data))
577 #define	SOTOV(sp)	((sp)->so_vnode)
578 
579 /*
580  * Internal flags for sobind()
581  */
582 #define	_SOBIND_REBIND		0x01	/* Bind to existing local address */
583 #define	_SOBIND_UNSPEC		0x02	/* Bind to unspecified address */
584 #define	_SOBIND_LOCK_HELD	0x04	/* so_excl_lock held by caller */
585 #define	_SOBIND_NOXLATE		0x08	/* No addr translation for AF_UNIX */
586 #define	_SOBIND_XPG4_2		0x10	/* xpg4.2 semantics */
587 #define	_SOBIND_SOCKBSD		0x20	/* BSD semantics */
588 #define	_SOBIND_LISTEN		0x40	/* Make into SS_ACCEPTCONN */
589 #define	_SOBIND_SOCKETPAIR	0x80	/* Internal flag for so_socketpair() */
590 					/* to enable listen with backlog = 1 */
591 
592 /*
593  * Internal flags for sounbind()
594  */
595 #define	_SOUNBIND_REBIND	0x01	/* Don't clear fields - will rebind */
596 
597 /*
598  * Internal flags for soconnect()
599  */
600 #define	_SOCONNECT_NOXLATE	0x01	/* No addr translation for AF_UNIX */
601 #define	_SOCONNECT_DID_BIND	0x02	/* Unbind when connect fails */
602 #define	_SOCONNECT_XPG4_2	0x04	/* xpg4.2 semantics */
603 
604 /*
605  * Internal flags for sodisconnect()
606  */
607 #define	_SODISCONNECT_LOCK_HELD	0x01	/* so_excl_lock held by caller */
608 
609 /*
610  * Internal flags for sotpi_getsockopt().
611  */
612 #define	_SOGETSOCKOPT_XPG4_2	0x01	/* xpg4.2 semantics */
613 
614 /*
615  * Internal flags for soallocproto*()
616  */
617 #define	_ALLOC_NOSLEEP		0	/* Don't sleep for memory */
618 #define	_ALLOC_INTR		1	/* Sleep until interrupt */
619 #define	_ALLOC_SLEEP		2	/* Sleep forever */
620 
621 /*
622  * Internal structure for handling AF_UNIX file descriptor passing
623  */
624 struct fdbuf {
625 	int		fd_size;	/* In bytes, for kmem_free */
626 	int		fd_numfd;	/* Number of elements below */
627 	char		*fd_ebuf;	/* Extra buffer to free  */
628 	int		fd_ebuflen;
629 	frtn_t		fd_frtn;
630 	struct file	*fd_fds[1];	/* One or more */
631 };
632 #define	FDBUF_HDRSIZE	(sizeof (struct fdbuf) - sizeof (struct file *))
633 
634 /*
635  * Variable that can be patched to set what version of socket socket()
636  * will create.
637  */
638 extern int so_default_version;
639 
640 #ifdef DEBUG
641 /* Turn on extra testing capabilities */
642 #define	SOCK_TEST
643 #endif /* DEBUG */
644 
645 #ifdef DEBUG
646 char	*pr_state(uint_t, uint_t);
647 char	*pr_addr(int, struct sockaddr *, t_uscalar_t);
648 int	so_verify_oobstate(struct sonode *);
649 #endif /* DEBUG */
650 
651 /*
652  * DEBUG macros
653  */
654 #if defined(DEBUG) && !defined(__lint)
655 #define	SOCK_DEBUG
656 
657 extern int sockdebug;
658 extern int sockprinterr;
659 
660 #define	eprint(args)	printf args
661 #define	eprintso(so, args) \
662 { if (sockprinterr && ((so)->so_options & SO_DEBUG)) printf args; }
663 #define	eprintline(error)					\
664 {								\
665 	if (error != EINTR && (sockprinterr || sockdebug > 0))	\
666 		printf("socket error %d: line %d file %s\n",	\
667 			(error), __LINE__, __FILE__);		\
668 }
669 
670 #define	eprintsoline(so, error)					\
671 { if (sockprinterr && ((so)->so_options & SO_DEBUG))		\
672 	printf("socket(%p) error %d: line %d file %s\n",	\
673 		(so), (error), __LINE__, __FILE__);		\
674 }
675 #define	dprint(level, args)	{ if (sockdebug > (level)) printf args; }
676 #define	dprintso(so, level, args) \
677 { if (sockdebug > (level) && ((so)->so_options & SO_DEBUG)) printf args; }
678 
679 #else /* define(DEBUG) && !defined(__lint) */
680 
681 #define	eprint(args)		{}
682 #define	eprintso(so, args)	{}
683 #define	eprintline(error)	{}
684 #define	eprintsoline(so, error)	{}
685 #define	dprint(level, args)	{}
686 #define	dprintso(so, level, args) {}
687 #ifdef DEBUG
688 #undef DEBUG
689 #endif
690 
691 #endif /* defined(DEBUG) && !defined(__lint) */
692 
693 extern struct vfsops			sock_vfsops;
694 extern struct vnodeops			*socktpi_vnodeops;
695 extern const struct fs_operation_def	socktpi_vnodeops_template[];
696 
697 extern sonodeops_t			sotpi_sonodeops;
698 
699 extern dev_t				sockdev;
700 
701 /*
702  * sockfs functions
703  */
704 extern int	sock_getmsg(vnode_t *, struct strbuf *, struct strbuf *,
705 			uchar_t *, int *, int, rval_t *);
706 extern int	sock_putmsg(vnode_t *, struct strbuf *, struct strbuf *,
707 			uchar_t, int, int);
708 struct sonode	*sotpi_create(vnode_t *, int, int, int, int, struct sonode *,
709 			int *);
710 extern int	socktpi_open(struct vnode **, int, struct cred *,
711 			caller_context_t *);
712 extern int	so_sock2stream(struct sonode *);
713 extern void	so_stream2sock(struct sonode *);
714 extern int	sockinit(int, char *);
715 extern struct vnode
716 		*makesockvp(struct vnode *, int, int, int);
717 extern void	sockfree(struct sonode *);
718 extern void	so_update_attrs(struct sonode *, int);
719 extern int	soconfig(int, int, int,	char *, int);
720 extern struct vnode
721 		*solookup(int, int, int, char *, int *);
722 extern void	so_lock_single(struct sonode *);
723 extern void	so_unlock_single(struct sonode *, int);
724 extern int	so_lock_read(struct sonode *, int);
725 extern int	so_lock_read_intr(struct sonode *, int);
726 extern void	so_unlock_read(struct sonode *);
727 extern void	*sogetoff(mblk_t *, t_uscalar_t, t_uscalar_t, uint_t);
728 extern void	so_getopt_srcaddr(void *, t_uscalar_t,
729 			void **, t_uscalar_t *);
730 extern int	so_getopt_unix_close(void *, t_uscalar_t);
731 extern int	so_addr_verify(struct sonode *, const struct sockaddr *,
732 			socklen_t);
733 extern int	so_ux_addr_xlate(struct sonode *, struct sockaddr *,
734 			socklen_t, int, void **, socklen_t *);
735 extern void	fdbuf_free(struct fdbuf *);
736 extern mblk_t	*fdbuf_allocmsg(int, struct fdbuf *);
737 extern int	fdbuf_create(void *, int, struct fdbuf **);
738 extern void	so_closefds(void *, t_uscalar_t, int, int);
739 extern int	so_getfdopt(void *, t_uscalar_t, int, void **, int *);
740 t_uscalar_t	so_optlen(void *, t_uscalar_t, int);
741 extern void	so_cmsg2opt(void *, t_uscalar_t, int, mblk_t *);
742 extern t_uscalar_t
743 		so_cmsglen(mblk_t *, void *, t_uscalar_t, int);
744 extern int	so_opt2cmsg(mblk_t *, void *, t_uscalar_t, int,
745 			void *, t_uscalar_t);
746 extern void	soisconnecting(struct sonode *);
747 extern void	soisconnected(struct sonode *);
748 extern void	soisdisconnected(struct sonode *, int);
749 extern void	socantsendmore(struct sonode *);
750 extern void	socantrcvmore(struct sonode *);
751 extern void	soseterror(struct sonode *, int);
752 extern int	sogeterr(struct sonode *);
753 extern int	sogetrderr(vnode_t *, int, int *);
754 extern int	sogetwrerr(vnode_t *, int, int *);
755 extern void	so_unix_close(struct sonode *);
756 extern mblk_t	*soallocproto(size_t, int);
757 extern mblk_t	*soallocproto1(const void *, ssize_t, ssize_t, int);
758 extern void	soappendmsg(mblk_t *, const void *, ssize_t);
759 extern mblk_t	*soallocproto2(const void *, ssize_t, const void *, ssize_t,
760 			ssize_t, int);
761 extern mblk_t	*soallocproto3(const void *, ssize_t, const void *, ssize_t,
762 			const void *, ssize_t, ssize_t, int);
763 extern int	sowaitprim(struct sonode *, t_scalar_t, t_scalar_t,
764 			t_uscalar_t, mblk_t **, clock_t);
765 extern int	sowaitokack(struct sonode *, t_scalar_t);
766 extern int	sowaitack(struct sonode *, mblk_t **, clock_t);
767 extern void	soqueueack(struct sonode *, mblk_t *);
768 extern int	sowaitconnind(struct sonode *, int, mblk_t **);
769 extern void	soqueueconnind(struct sonode *, mblk_t *);
770 extern int	soflushconnind(struct sonode *, t_scalar_t);
771 extern void	so_drain_discon_ind(struct sonode *);
772 extern void	so_flush_discon_ind(struct sonode *);
773 extern int	sowaitconnected(struct sonode *, int, int);
774 
775 extern int	sostream_direct(struct sonode *, struct uio *,
776 		    mblk_t *, cred_t *);
777 extern int	sosend_dgram(struct sonode *, struct sockaddr *,
778 		    socklen_t, struct uio *, int);
779 extern int	sosend_svc(struct sonode *, struct uio *, t_scalar_t, int, int);
780 extern void	so_installhooks(struct sonode *);
781 extern int	so_strinit(struct sonode *, struct sonode *);
782 extern int	sotpi_recvmsg(struct sonode *, struct nmsghdr *,
783 		    struct uio *);
784 extern int	sotpi_getpeername(struct sonode *);
785 extern int	sotpi_getsockopt(struct sonode *, int, int, void *,
786 		    socklen_t *, int);
787 extern int	sotpi_setsockopt(struct sonode *, int, int, const void *,
788 		    socklen_t);
789 extern int	socktpi_ioctl(struct vnode *, int, intptr_t, int,
790 		    struct cred *, int *, caller_context_t *);
791 extern int	sodisconnect(struct sonode *, t_scalar_t, int);
792 extern ssize_t	soreadfile(file_t *, uchar_t *, u_offset_t, int *, size_t);
793 extern int	so_set_asyncsigs(vnode_t *, pid_t, int, int, cred_t *);
794 extern int	so_set_events(struct sonode *, vnode_t *, cred_t *);
795 extern int	so_flip_async(struct sonode *, vnode_t *, int, cred_t *);
796 extern int	so_set_siggrp(struct sonode *, vnode_t *, pid_t, int, cred_t *);
797 extern void	*sock_kstat_init(zoneid_t);
798 extern void	sock_kstat_fini(zoneid_t, void *);
799 extern struct sonode *getsonode(int, int *, file_t **);
800 
801 /*
802  * Function wrappers (mostly around the sonode switch) for
803  * backward compatibility.
804  */
805 extern int	soaccept(struct sonode *, int, struct sonode **);
806 extern int	sobind(struct sonode *, struct sockaddr *, socklen_t,
807 		    int, int);
808 extern int	solisten(struct sonode *, int);
809 extern int	soconnect(struct sonode *, const struct sockaddr *, socklen_t,
810 		    int, int);
811 extern int	sorecvmsg(struct sonode *, struct nmsghdr *, struct uio *);
812 extern int	sosendmsg(struct sonode *, struct nmsghdr *, struct uio *);
813 extern int	sogetpeername(struct sonode *);
814 extern int	sogetsockname(struct sonode *);
815 extern int	soshutdown(struct sonode *, int);
816 extern int	sogetsockopt(struct sonode *, int, int, void *, socklen_t *,
817 		    int);
818 extern int	sosetsockopt(struct sonode *, int, int, const void *,
819 		    t_uscalar_t);
820 
821 extern struct sonode	*socreate(vnode_t *, int, int, int, int,
822 			    struct sonode *, int *);
823 
824 extern int	so_copyin(const void *, void *, size_t, int);
825 extern int	so_copyout(const void *, void *, size_t, int);
826 
827 extern int	socktpi_access(struct vnode *, int, int, struct cred *,
828 		    caller_context_t *);
829 extern int	socktpi_fid(struct vnode *, struct fid *, caller_context_t *);
830 extern int	socktpi_fsync(struct vnode *, int, struct cred *,
831 		    caller_context_t *);
832 extern int	socktpi_getattr(struct vnode *, struct vattr *, int,
833 		    struct cred *, caller_context_t *);
834 extern int	socktpi_seek(struct vnode *, offset_t, offset_t *,
835 		    caller_context_t *);
836 extern int	socktpi_setattr(struct vnode *, struct vattr *, int,
837 		    struct cred *, caller_context_t *);
838 extern int	socktpi_setfl(vnode_t *, int, int, cred_t *,
839 		    caller_context_t *);
840 
841 /* SCTP sockfs */
842 extern struct sonode	*sosctp_create(vnode_t *, int, int, int, int,
843 			    struct sonode *, int *);
844 extern int sosctp_init(void);
845 
846 /* SDP sockfs */
847 extern struct sonode    *sosdp_create(vnode_t *, int, int, int, int,
848 			    struct sonode *, int *);
849 extern int sosdp_init(void);
850 
851 #endif
852 
853 /*
854  * Internal structure for obtaining sonode information from the socklist.
855  * These types match those corresponding in the sonode structure.
856  * This is not a published interface, and may change at any time.
857  */
858 struct sockinfo {
859 	uint_t		si_size;		/* real length of this struct */
860 	short		si_family;
861 	short		si_type;
862 	ushort_t	si_flag;
863 	uint_t		si_state;
864 	uint_t		si_ux_laddr_sou_magic;
865 	uint_t		si_ux_faddr_sou_magic;
866 	t_scalar_t	si_serv_type;
867 	t_uscalar_t	si_laddr_soa_len;
868 	t_uscalar_t	si_faddr_soa_len;
869 	uint16_t	si_laddr_family;
870 	uint16_t	si_faddr_family;
871 	char		si_laddr_sun_path[MAXPATHLEN + 1]; /* NULL terminated */
872 	char		si_faddr_sun_path[MAXPATHLEN + 1];
873 	zoneid_t	si_szoneid;
874 };
875 
876 
877 #ifdef	__cplusplus
878 }
879 #endif
880 
881 #endif	/* _SYS_SOCKETVAR_H */
882