1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2015 Joyent, Inc. 25 */ 26 27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 /* 31 * University Copyright- Copyright (c) 1982, 1986, 1988 32 * The Regents of the University of California 33 * All Rights Reserved 34 * 35 * University Acknowledgment- Portions of this document are derived from 36 * software developed by the University of California, Berkeley, and its 37 * contributors. 38 */ 39 /* 40 * Copyright 2015 Nexenta Systems, Inc. All rights reserved. 41 * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. 42 */ 43 44 #ifndef _SYS_SOCKETVAR_H 45 #define _SYS_SOCKETVAR_H 46 47 #include <sys/types.h> 48 #include <sys/stream.h> 49 #include <sys/t_lock.h> 50 #include <sys/cred.h> 51 #include <sys/vnode.h> 52 #include <sys/file.h> 53 #include <sys/param.h> 54 #include <sys/zone.h> 55 #include <sys/sdt.h> 56 #include <sys/modctl.h> 57 #include <sys/atomic.h> 58 #include <sys/socket.h> 59 #include <sys/ksocket.h> 60 #include <sys/kstat.h> 61 62 #ifdef _KERNEL 63 #include <sys/vfs_opreg.h> 64 #endif 65 66 #ifdef __cplusplus 67 extern "C" { 68 #endif 69 70 /* 71 * Internal representation of the address used to represent addresses 72 * in the loopback transport for AF_UNIX. While the sockaddr_un is used 73 * as the sockfs layer address for AF_UNIX the pathnames contained in 74 * these addresses are not unique (due to relative pathnames) thus can not 75 * be used in the transport. 76 * 77 * The transport level address consists of a magic number (used to separate the 78 * name space for specific and implicit binds). For a specific bind 79 * this is followed by a "vnode *" which ensures that all specific binds 80 * have a unique transport level address. For implicit binds the latter 81 * part of the address is a byte string (of the same length as a pointer) 82 * that is assigned by the loopback transport. 83 * 84 * The uniqueness assumes that the loopback transport has a separate namespace 85 * for sockets in order to avoid name conflicts with e.g. TLI use of the 86 * same transport. 87 */ 88 struct so_ux_addr { 89 void *soua_vp; /* vnode pointer or assigned by tl */ 90 uint_t soua_magic; /* See below */ 91 }; 92 93 #define SOU_MAGIC_EXPLICIT 0x75787670 /* "uxvp" */ 94 #define SOU_MAGIC_IMPLICIT 0x616e6f6e /* "anon" */ 95 96 struct sockaddr_ux { 97 sa_family_t sou_family; /* AF_UNIX */ 98 struct so_ux_addr sou_addr; 99 }; 100 101 #if defined(_KERNEL) || defined(_KMEMUSER) 102 103 #include <sys/socket_proto.h> 104 105 typedef struct sonodeops sonodeops_t; 106 typedef struct sonode sonode_t; 107 typedef boolean_t (*so_krecv_f)(sonode_t *, mblk_t *, size_t, int, void *); 108 109 struct sodirect_s; 110 111 /* 112 * The sonode represents a socket. A sonode never exist in the file system 113 * name space and can not be opened using open() - only the socket, socketpair 114 * and accept calls create sonodes. 115 * 116 * The locking of sockfs uses the so_lock mutex plus the SOLOCKED and 117 * SOREADLOCKED flags in so_flag. The mutex protects all the state in the 118 * sonode. It is expected that the underlying transport protocol serializes 119 * socket operations, so sockfs will not normally not single-thread 120 * operations. However, certain sockets, including TPI based ones, can only 121 * handle one control operation at a time. The SOLOCKED flag is used to 122 * single-thread operations from sockfs users to prevent e.g. multiple bind() 123 * calls to operate on the same sonode concurrently. The SOREADLOCKED flag is 124 * used to ensure that only one thread sleeps in kstrgetmsg for a given 125 * sonode. This is needed to ensure atomic operation for things like 126 * MSG_WAITALL. 127 * 128 * The so_fallback_rwlock is used to ensure that for sockets that can 129 * fall back to TPI, the fallback is not initiated until all pending 130 * operations have completed. 131 * 132 * Note that so_lock is sometimes held across calls that might go to sleep 133 * (kmem_alloc and soallocproto*). This implies that no other lock in 134 * the system should be held when calling into sockfs; from the system call 135 * side or from strrput (in case of TPI based sockets). If locks are held 136 * while calling into sockfs the system might hang when running low on memory. 137 */ 138 struct sonode { 139 struct vnode *so_vnode; /* vnode associated with this sonode */ 140 141 sonodeops_t *so_ops; /* operations vector for this sonode */ 142 void *so_priv; /* sonode private data */ 143 144 krwlock_t so_fallback_rwlock; 145 kmutex_t so_lock; /* protects sonode fields */ 146 147 kcondvar_t so_state_cv; /* synchronize state changes */ 148 kcondvar_t so_single_cv; /* wait due to SOLOCKED */ 149 kcondvar_t so_read_cv; /* wait due to SOREADLOCKED */ 150 151 /* These fields are protected by so_lock */ 152 153 uint_t so_state; /* internal state flags SS_*, below */ 154 uint_t so_mode; /* characteristics on socket. SM_* */ 155 ushort_t so_flag; /* flags, see below */ 156 int so_count; /* count of opened references */ 157 158 sock_connid_t so_proto_connid; /* protocol generation number */ 159 160 ushort_t so_error; /* error affecting connection */ 161 162 struct sockparams *so_sockparams; /* vnode or socket module */ 163 /* Needed to recreate the same socket for accept */ 164 short so_family; 165 short so_type; 166 short so_protocol; 167 short so_version; /* From so_socket call */ 168 169 /* Accept queue */ 170 kmutex_t so_acceptq_lock; /* protects accept queue */ 171 list_t so_acceptq_list; /* pending conns */ 172 list_t so_acceptq_defer; /* deferred conns */ 173 list_node_t so_acceptq_node; /* acceptq list node */ 174 unsigned int so_acceptq_len; /* # of conns (both lists) */ 175 unsigned int so_backlog; /* Listen backlog */ 176 kcondvar_t so_acceptq_cv; /* wait for new conn. */ 177 struct sonode *so_listener; /* parent socket */ 178 179 /* Options */ 180 short so_options; /* From socket call, see socket.h */ 181 struct linger so_linger; /* SO_LINGER value */ 182 #define so_sndbuf so_proto_props.sopp_txhiwat /* SO_SNDBUF value */ 183 #define so_sndlowat so_proto_props.sopp_txlowat /* tx low water mark */ 184 #define so_rcvbuf so_proto_props.sopp_rxhiwat /* SO_RCVBUF value */ 185 #define so_rcvlowat so_proto_props.sopp_rxlowat /* rx low water mark */ 186 #define so_max_addr_len so_proto_props.sopp_maxaddrlen 187 #define so_minpsz so_proto_props.sopp_minpsz 188 #define so_maxpsz so_proto_props.sopp_maxpsz 189 190 int so_xpg_rcvbuf; /* SO_RCVBUF value for XPG4 socket */ 191 clock_t so_sndtimeo; /* send timeout */ 192 clock_t so_rcvtimeo; /* recv timeout */ 193 194 mblk_t *so_oobmsg; /* outofline oob data */ 195 ssize_t so_oobmark; /* offset of the oob data */ 196 197 pid_t so_pgrp; /* pgrp for signals */ 198 199 cred_t *so_peercred; /* connected socket peer cred */ 200 pid_t so_cpid; /* connected socket peer cached pid */ 201 zoneid_t so_zoneid; /* opener's zoneid */ 202 203 struct pollhead so_poll_list; /* common pollhead */ 204 short so_pollev; /* events that should be generated */ 205 206 /* Receive */ 207 unsigned int so_rcv_queued; /* # bytes on both rcv lists */ 208 mblk_t *so_rcv_q_head; /* processing/copyout rcv queue */ 209 mblk_t *so_rcv_q_last_head; 210 mblk_t *so_rcv_head; /* protocol prequeue */ 211 mblk_t *so_rcv_last_head; /* last mblk in b_next chain */ 212 kcondvar_t so_rcv_cv; /* wait for data */ 213 uint_t so_rcv_wanted; /* # of bytes wanted by app */ 214 timeout_id_t so_rcv_timer_tid; 215 216 #define so_rcv_thresh so_proto_props.sopp_rcvthresh 217 #define so_rcv_timer_interval so_proto_props.sopp_rcvtimer 218 219 kcondvar_t so_snd_cv; /* wait for snd buffers */ 220 uint32_t 221 so_snd_qfull: 1, /* Transmit full */ 222 so_rcv_wakeup: 1, 223 so_snd_wakeup: 1, 224 so_not_str: 1, /* B_TRUE if not streams based socket */ 225 so_pad_to_bit_31: 28; 226 227 /* Communication channel with protocol */ 228 sock_lower_handle_t so_proto_handle; 229 sock_downcalls_t *so_downcalls; 230 231 struct sock_proto_props so_proto_props; /* protocol settings */ 232 boolean_t so_flowctrld; /* Flow controlled */ 233 uint_t so_copyflag; /* Copy related flag */ 234 kcondvar_t so_copy_cv; /* Copy cond variable */ 235 236 /* kernel sockets */ 237 ksocket_callbacks_t so_ksock_callbacks; 238 void *so_ksock_cb_arg; /* callback argument */ 239 kcondvar_t so_closing_cv; 240 241 /* != NULL for sodirect enabled socket */ 242 struct sodirect_s *so_direct; 243 244 /* socket filters */ 245 uint_t so_filter_active; /* # of active fil */ 246 uint_t so_filter_tx; /* pending tx ops */ 247 struct sof_instance *so_filter_top; /* top of stack */ 248 struct sof_instance *so_filter_bottom; /* bottom of stack */ 249 clock_t so_filter_defertime; /* time when deferred */ 250 251 /* Kernel direct receive callbacks */ 252 so_krecv_f so_krecv_cb; /* recv callback */ 253 void *so_krecv_arg; /* recv cb arg */ 254 }; 255 256 #define SO_HAVE_DATA(so) \ 257 /* \ 258 * For the (tid == 0) case we must check so_rcv_{q_,}head \ 259 * rather than (so_rcv_queued > 0), since the latter does not \ 260 * take into account mblks with only control/name information. \ 261 */ \ 262 ((so)->so_rcv_timer_tid == 0 && ((so)->so_rcv_head != NULL || \ 263 (so)->so_rcv_q_head != NULL)) || \ 264 ((so)->so_state & SS_CANTRCVMORE) 265 266 /* 267 * Events handled by the protocol (in case sd_poll is set) 268 */ 269 #define SO_PROTO_POLLEV (POLLIN|POLLRDNORM|POLLRDBAND) 270 271 272 #endif /* _KERNEL || _KMEMUSER */ 273 274 /* flags */ 275 #define SOMOD 0x0001 /* update socket modification time */ 276 #define SOACC 0x0002 /* update socket access time */ 277 278 #define SOLOCKED 0x0010 /* use to serialize open/closes */ 279 #define SOREADLOCKED 0x0020 /* serialize kstrgetmsg calls */ 280 #define SOCLONE 0x0040 /* child of clone driver */ 281 #define SOASYNC_UNBIND 0x0080 /* wait for ACK of async unbind */ 282 283 #define SOCK_IS_NONSTR(so) ((so)->so_not_str) 284 285 /* 286 * Socket state bits. 287 */ 288 #define SS_ISCONNECTED 0x00000001 /* socket connected to a peer */ 289 #define SS_ISCONNECTING 0x00000002 /* in process, connecting to peer */ 290 #define SS_ISDISCONNECTING 0x00000004 /* in process of disconnecting */ 291 #define SS_CANTSENDMORE 0x00000008 /* can't send more data to peer */ 292 293 #define SS_CANTRCVMORE 0x00000010 /* can't receive more data */ 294 #define SS_ISBOUND 0x00000020 /* socket is bound */ 295 #define SS_NDELAY 0x00000040 /* FNDELAY non-blocking */ 296 #define SS_NONBLOCK 0x00000080 /* O_NONBLOCK non-blocking */ 297 298 #define SS_ASYNC 0x00000100 /* async i/o notify */ 299 #define SS_ACCEPTCONN 0x00000200 /* listen done */ 300 /* unused 0x00000400 */ /* was SS_HASCONNIND */ 301 #define SS_SAVEDEOR 0x00000800 /* Saved MSG_EOR rcv side state */ 302 303 #define SS_RCVATMARK 0x00001000 /* at mark on input */ 304 #define SS_OOBPEND 0x00002000 /* OOB pending or present - poll */ 305 #define SS_HAVEOOBDATA 0x00004000 /* OOB data present */ 306 #define SS_HADOOBDATA 0x00008000 /* OOB data consumed */ 307 #define SS_CLOSING 0x00010000 /* in process of closing */ 308 309 #define SS_FIL_DEFER 0x00020000 /* filter deferred notification */ 310 #define SS_FILOP_OK 0x00040000 /* socket can attach filters */ 311 #define SS_FIL_RCV_FLOWCTRL 0x00080000 /* filter asserted rcv flow ctrl */ 312 #define SS_FIL_SND_FLOWCTRL 0x00100000 /* filter asserted snd flow ctrl */ 313 #define SS_FIL_STOP 0x00200000 /* no more filter actions */ 314 315 #define SS_SODIRECT 0x00400000 /* transport supports sodirect */ 316 317 #define SS_SENTLASTREADSIG 0x01000000 /* last rx signal has been sent */ 318 #define SS_SENTLASTWRITESIG 0x02000000 /* last tx signal has been sent */ 319 320 #define SS_FALLBACK_DRAIN 0x20000000 /* data was/is being drained */ 321 #define SS_FALLBACK_PENDING 0x40000000 /* fallback is pending */ 322 #define SS_FALLBACK_COMP 0x80000000 /* fallback has completed */ 323 324 325 /* Set of states when the socket can't be rebound */ 326 #define SS_CANTREBIND (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING|\ 327 SS_CANTSENDMORE|SS_CANTRCVMORE|SS_ACCEPTCONN) 328 329 /* 330 * Sockets that can fall back to TPI must ensure that fall back is not 331 * initiated while a thread is using a socket. 332 */ 333 #define SO_BLOCK_FALLBACK(so, fn) \ 334 ASSERT(MUTEX_NOT_HELD(&(so)->so_lock)); \ 335 rw_enter(&(so)->so_fallback_rwlock, RW_READER); \ 336 if ((so)->so_state & (SS_FALLBACK_COMP|SS_FILOP_OK)) { \ 337 if ((so)->so_state & SS_FALLBACK_COMP) { \ 338 rw_exit(&(so)->so_fallback_rwlock); \ 339 return (fn); \ 340 } else { \ 341 mutex_enter(&(so)->so_lock); \ 342 (so)->so_state &= ~SS_FILOP_OK; \ 343 mutex_exit(&(so)->so_lock); \ 344 } \ 345 } 346 347 #define SO_UNBLOCK_FALLBACK(so) { \ 348 rw_exit(&(so)->so_fallback_rwlock); \ 349 } 350 351 #define SO_SND_FLOWCTRLD(so) \ 352 ((so)->so_snd_qfull || (so)->so_state & SS_FIL_SND_FLOWCTRL) 353 354 /* Poll events */ 355 #define SO_POLLEV_IN 0x1 /* POLLIN wakeup needed */ 356 #define SO_POLLEV_ALWAYS 0x2 /* wakeups */ 357 358 /* 359 * Characteristics of sockets. Not changed after the socket is created. 360 */ 361 #define SM_PRIV 0x001 /* privileged for broadcast, raw... */ 362 #define SM_ATOMIC 0x002 /* atomic data transmission */ 363 #define SM_ADDR 0x004 /* addresses given with messages */ 364 #define SM_CONNREQUIRED 0x008 /* connection required by protocol */ 365 366 #define SM_FDPASSING 0x010 /* passes file descriptors */ 367 #define SM_EXDATA 0x020 /* Can handle T_EXDATA_REQ */ 368 #define SM_OPTDATA 0x040 /* Can handle T_OPTDATA_REQ */ 369 #define SM_BYTESTREAM 0x080 /* Byte stream - can use M_DATA */ 370 371 #define SM_ACCEPTOR_ID 0x100 /* so_acceptor_id is valid */ 372 373 #define SM_KERNEL 0x200 /* kernel socket */ 374 375 /* The modes below are only for non-streams sockets */ 376 #define SM_ACCEPTSUPP 0x400 /* can handle accept() */ 377 #define SM_SENDFILESUPP 0x800 /* Private: proto supp sendfile */ 378 379 /* 380 * Socket versions. Used by the socket library when calling _so_socket(). 381 */ 382 #define SOV_STREAM 0 /* Not a socket - just a stream */ 383 #define SOV_DEFAULT 1 /* Select based on so_default_version */ 384 #define SOV_SOCKSTREAM 2 /* Socket plus streams operations */ 385 #define SOV_SOCKBSD 3 /* Socket with no streams operations */ 386 #define SOV_XPG4_2 4 /* Xnet socket */ 387 388 #if defined(_KERNEL) || defined(_KMEMUSER) 389 390 /* 391 * sonode create and destroy functions. 392 */ 393 typedef struct sonode *(*so_create_func_t)(struct sockparams *, 394 int, int, int, int, int, int *, cred_t *); 395 typedef void (*so_destroy_func_t)(struct sonode *); 396 397 /* STREAM device information */ 398 typedef struct sdev_info { 399 char *sd_devpath; 400 int sd_devpathlen; /* Is 0 if sp_devpath is a static string */ 401 vnode_t *sd_vnode; 402 } sdev_info_t; 403 404 #define SOCKMOD_VERSION_1 1 405 #define SOCKMOD_VERSION 2 406 407 /* name of the TPI pseudo socket module */ 408 #define SOTPI_SMOD_NAME "socktpi" 409 410 typedef struct __smod_priv_s { 411 so_create_func_t smodp_sock_create_func; 412 so_destroy_func_t smodp_sock_destroy_func; 413 so_proto_fallback_func_t smodp_proto_fallback_func; 414 const char *smodp_fallback_devpath_v4; 415 const char *smodp_fallback_devpath_v6; 416 } __smod_priv_t; 417 418 /* 419 * Socket module register information 420 */ 421 typedef struct smod_reg_s { 422 int smod_version; 423 char *smod_name; 424 size_t smod_uc_version; 425 size_t smod_dc_version; 426 so_proto_create_func_t smod_proto_create_func; 427 428 /* __smod_priv_data must be NULL */ 429 __smod_priv_t *__smod_priv; 430 } smod_reg_t; 431 432 /* 433 * Socket module information 434 */ 435 typedef struct smod_info { 436 int smod_version; 437 char *smod_name; 438 uint_t smod_refcnt; /* # of entries */ 439 size_t smod_uc_version; /* upcall version */ 440 size_t smod_dc_version; /* down call version */ 441 so_proto_create_func_t smod_proto_create_func; 442 so_proto_fallback_func_t smod_proto_fallback_func; 443 const char *smod_fallback_devpath_v4; 444 const char *smod_fallback_devpath_v6; 445 so_create_func_t smod_sock_create_func; 446 so_destroy_func_t smod_sock_destroy_func; 447 list_node_t smod_node; 448 } smod_info_t; 449 450 typedef struct sockparams_stats { 451 kstat_named_t sps_nfallback; /* # of fallbacks to TPI */ 452 kstat_named_t sps_nactive; /* # of active sockets */ 453 kstat_named_t sps_ncreate; /* total # of created sockets */ 454 } sockparams_stats_t; 455 456 /* 457 * sockparams 458 * 459 * Used for mapping family/type/protocol to a socket module or STREAMS device 460 */ 461 struct sockparams { 462 /* 463 * The family, type, protocol, sdev_info and smod_name are 464 * set when the entry is created, and they will never change 465 * thereafter. 466 */ 467 int sp_family; 468 int sp_type; 469 int sp_protocol; 470 471 sdev_info_t sp_sdev_info; /* STREAM device */ 472 char *sp_smod_name; /* socket module name */ 473 474 kmutex_t sp_lock; /* lock for refcnt and smod_info */ 475 uint64_t sp_refcnt; /* entry reference count */ 476 smod_info_t *sp_smod_info; /* socket module */ 477 478 sockparams_stats_t sp_stats; 479 kstat_t *sp_kstat; 480 481 /* 482 * The entries below are only modified while holding 483 * sockconf_lock as a writer. 484 */ 485 int sp_flags; /* see below */ 486 list_node_t sp_node; 487 488 list_t sp_auto_filters; /* list of automatic filters */ 489 list_t sp_prog_filters; /* list of programmatic filters */ 490 }; 491 492 struct sof_entry; 493 494 typedef struct sp_filter { 495 struct sof_entry *spf_filter; 496 list_node_t spf_node; 497 } sp_filter_t; 498 499 500 /* 501 * sockparams flags 502 */ 503 #define SOCKPARAMS_EPHEMERAL 0x1 /* temp. entry, not on global list */ 504 505 extern void sockparams_init(void); 506 extern struct sockparams *sockparams_hold_ephemeral_bydev(int, int, int, 507 const char *, int, int *); 508 extern struct sockparams *sockparams_hold_ephemeral_bymod(int, int, int, 509 const char *, int, int *); 510 extern void sockparams_ephemeral_drop_last_ref(struct sockparams *); 511 512 extern struct sockparams *sockparams_create(int, int, int, char *, char *, int, 513 int, int, int *); 514 extern void sockparams_destroy(struct sockparams *); 515 extern int sockparams_add(struct sockparams *); 516 extern int sockparams_delete(int, int, int); 517 extern int sockparams_new_filter(struct sof_entry *); 518 extern void sockparams_filter_cleanup(struct sof_entry *); 519 extern int sockparams_copyout_socktable(uintptr_t); 520 521 extern void smod_init(void); 522 extern void smod_add(smod_info_t *); 523 extern int smod_register(const smod_reg_t *); 524 extern int smod_unregister(const char *); 525 extern smod_info_t *smod_lookup_byname(const char *); 526 527 #define SOCKPARAMS_HAS_DEVICE(sp) \ 528 ((sp)->sp_sdev_info.sd_devpath != NULL) 529 530 /* Increase the smod_info_t reference count */ 531 #define SMOD_INC_REF(smodp) { \ 532 ASSERT((smodp) != NULL); \ 533 DTRACE_PROBE1(smodinfo__inc__ref, struct smod_info *, (smodp)); \ 534 atomic_inc_uint(&(smodp)->smod_refcnt); \ 535 } 536 537 /* 538 * Decreace the socket module entry reference count. 539 * When no one mapping to the entry, we try to unload the module from the 540 * kernel. If the module can't unload, just leave the module entry with 541 * a zero refcnt. 542 */ 543 #define SMOD_DEC_REF(smodp, modname) { \ 544 ASSERT((smodp) != NULL); \ 545 ASSERT((smodp)->smod_refcnt != 0); \ 546 atomic_dec_uint(&(smodp)->smod_refcnt); \ 547 /* \ 548 * No need to atomically check the return value because the \ 549 * socket module framework will verify that no one is using \ 550 * the module before unloading. Worst thing that can happen \ 551 * here is multiple calls to mod_remove_by_name(), which is OK. \ 552 */ \ 553 if ((smodp)->smod_refcnt == 0) \ 554 (void) mod_remove_by_name(modname); \ 555 } 556 557 /* Increase the reference count */ 558 #define SOCKPARAMS_INC_REF(sp) { \ 559 ASSERT((sp) != NULL); \ 560 DTRACE_PROBE1(sockparams__inc__ref, struct sockparams *, (sp)); \ 561 mutex_enter(&(sp)->sp_lock); \ 562 (sp)->sp_refcnt++; \ 563 ASSERT((sp)->sp_refcnt != 0); \ 564 mutex_exit(&(sp)->sp_lock); \ 565 } 566 567 /* 568 * Decrease the reference count. 569 * 570 * If the sockparams is ephemeral, then the thread dropping the last ref 571 * count will destroy the entry. 572 */ 573 #define SOCKPARAMS_DEC_REF(sp) { \ 574 ASSERT((sp) != NULL); \ 575 DTRACE_PROBE1(sockparams__dec__ref, struct sockparams *, (sp)); \ 576 mutex_enter(&(sp)->sp_lock); \ 577 ASSERT((sp)->sp_refcnt > 0); \ 578 if ((sp)->sp_refcnt == 1) { \ 579 if ((sp)->sp_flags & SOCKPARAMS_EPHEMERAL) { \ 580 mutex_exit(&(sp)->sp_lock); \ 581 sockparams_ephemeral_drop_last_ref((sp)); \ 582 } else { \ 583 (sp)->sp_refcnt--; \ 584 if ((sp)->sp_smod_info != NULL) { \ 585 SMOD_DEC_REF((sp)->sp_smod_info, \ 586 (sp)->sp_smod_name); \ 587 } \ 588 (sp)->sp_smod_info = NULL; \ 589 mutex_exit(&(sp)->sp_lock); \ 590 } \ 591 } else { \ 592 (sp)->sp_refcnt--; \ 593 mutex_exit(&(sp)->sp_lock); \ 594 } \ 595 } 596 597 /* 598 * Used to traverse the list of AF_UNIX sockets to construct the kstat 599 * for netstat(8). 600 */ 601 struct socklist { 602 kmutex_t sl_lock; 603 struct sonode *sl_list; 604 }; 605 606 extern struct socklist socklist; 607 /* 608 * ss_full_waits is the number of times the reader thread 609 * waits when the queue is full and ss_empty_waits is the number 610 * of times the consumer thread waits when the queue is empty. 611 * No locks for these as they are just indicators of whether 612 * disk or network or both is slow or fast. 613 */ 614 struct sendfile_stats { 615 uint32_t ss_file_cached; 616 uint32_t ss_file_not_cached; 617 uint32_t ss_full_waits; 618 uint32_t ss_empty_waits; 619 uint32_t ss_file_segmap; 620 }; 621 622 /* 623 * A single sendfile request is represented by snf_req. 624 */ 625 typedef struct snf_req { 626 struct snf_req *sr_next; 627 mblk_t *sr_mp_head; 628 mblk_t *sr_mp_tail; 629 kmutex_t sr_lock; 630 kcondvar_t sr_cv; 631 uint_t sr_qlen; 632 int sr_hiwat; 633 int sr_lowat; 634 int sr_operation; 635 struct vnode *sr_vp; 636 file_t *sr_fp; 637 ssize_t sr_maxpsz; 638 u_offset_t sr_file_off; 639 u_offset_t sr_file_size; 640 #define SR_READ_DONE 0x80000000 641 int sr_read_error; 642 int sr_write_error; 643 } snf_req_t; 644 645 /* A queue of sendfile requests */ 646 struct sendfile_queue { 647 snf_req_t *snfq_req_head; 648 snf_req_t *snfq_req_tail; 649 kmutex_t snfq_lock; 650 kcondvar_t snfq_cv; 651 int snfq_svc_threads; /* # of service threads */ 652 int snfq_idle_cnt; /* # of idling threads */ 653 int snfq_max_threads; 654 int snfq_req_cnt; /* Number of requests */ 655 }; 656 657 #define READ_OP 1 658 #define SNFQ_TIMEOUT (60 * 5 * hz) /* 5 minutes */ 659 660 /* Socket network operations switch */ 661 struct sonodeops { 662 int (*sop_init)(struct sonode *, struct sonode *, cred_t *, 663 int); 664 int (*sop_accept)(struct sonode *, int, cred_t *, struct sonode **); 665 int (*sop_bind)(struct sonode *, struct sockaddr *, socklen_t, 666 int, cred_t *); 667 int (*sop_listen)(struct sonode *, int, cred_t *); 668 int (*sop_connect)(struct sonode *, struct sockaddr *, 669 socklen_t, int, int, cred_t *); 670 int (*sop_recvmsg)(struct sonode *, struct msghdr *, 671 struct uio *, cred_t *); 672 int (*sop_sendmsg)(struct sonode *, struct msghdr *, 673 struct uio *, cred_t *); 674 int (*sop_sendmblk)(struct sonode *, struct msghdr *, int, 675 cred_t *, mblk_t **); 676 int (*sop_getpeername)(struct sonode *, struct sockaddr *, 677 socklen_t *, boolean_t, cred_t *); 678 int (*sop_getsockname)(struct sonode *, struct sockaddr *, 679 socklen_t *, cred_t *); 680 int (*sop_shutdown)(struct sonode *, int, cred_t *); 681 int (*sop_getsockopt)(struct sonode *, int, int, void *, 682 socklen_t *, int, cred_t *); 683 int (*sop_setsockopt)(struct sonode *, int, int, const void *, 684 socklen_t, cred_t *); 685 int (*sop_ioctl)(struct sonode *, int, intptr_t, int, 686 cred_t *, int32_t *); 687 int (*sop_poll)(struct sonode *, short, int, short *, 688 struct pollhead **); 689 int (*sop_close)(struct sonode *, int, cred_t *); 690 }; 691 692 #define SOP_INIT(so, flag, cr, flags) \ 693 ((so)->so_ops->sop_init((so), (flag), (cr), (flags))) 694 #define SOP_ACCEPT(so, fflag, cr, nsop) \ 695 ((so)->so_ops->sop_accept((so), (fflag), (cr), (nsop))) 696 #define SOP_BIND(so, name, namelen, flags, cr) \ 697 ((so)->so_ops->sop_bind((so), (name), (namelen), (flags), (cr))) 698 #define SOP_LISTEN(so, backlog, cr) \ 699 ((so)->so_ops->sop_listen((so), (backlog), (cr))) 700 #define SOP_CONNECT(so, name, namelen, fflag, flags, cr) \ 701 ((so)->so_ops->sop_connect((so), (name), (namelen), (fflag), (flags), \ 702 (cr))) 703 #define SOP_RECVMSG(so, msg, uiop, cr) \ 704 ((so)->so_ops->sop_recvmsg((so), (msg), (uiop), (cr))) 705 #define SOP_SENDMSG(so, msg, uiop, cr) \ 706 ((so)->so_ops->sop_sendmsg((so), (msg), (uiop), (cr))) 707 #define SOP_SENDMBLK(so, msg, size, cr, mpp) \ 708 ((so)->so_ops->sop_sendmblk((so), (msg), (size), (cr), (mpp))) 709 #define SOP_GETPEERNAME(so, addr, addrlen, accept, cr) \ 710 ((so)->so_ops->sop_getpeername((so), (addr), (addrlen), (accept), (cr))) 711 #define SOP_GETSOCKNAME(so, addr, addrlen, cr) \ 712 ((so)->so_ops->sop_getsockname((so), (addr), (addrlen), (cr))) 713 #define SOP_SHUTDOWN(so, how, cr) \ 714 ((so)->so_ops->sop_shutdown((so), (how), (cr))) 715 #define SOP_GETSOCKOPT(so, level, optionname, optval, optlenp, flags, cr) \ 716 ((so)->so_ops->sop_getsockopt((so), (level), (optionname), \ 717 (optval), (optlenp), (flags), (cr))) 718 #define SOP_SETSOCKOPT(so, level, optionname, optval, optlen, cr) \ 719 ((so)->so_ops->sop_setsockopt((so), (level), (optionname), \ 720 (optval), (optlen), (cr))) 721 #define SOP_IOCTL(so, cmd, arg, mode, cr, rvalp) \ 722 ((so)->so_ops->sop_ioctl((so), (cmd), (arg), (mode), (cr), (rvalp))) 723 #define SOP_POLL(so, events, anyyet, reventsp, phpp) \ 724 ((so)->so_ops->sop_poll((so), (events), (anyyet), (reventsp), (phpp))) 725 #define SOP_CLOSE(so, flag, cr) \ 726 ((so)->so_ops->sop_close((so), (flag), (cr))) 727 728 #endif /* defined(_KERNEL) || defined(_KMEMUSER) */ 729 730 #ifdef _KERNEL 731 732 #define ISALIGNED_cmsghdr(addr) \ 733 (((uintptr_t)(addr) & (_CMSG_HDR_ALIGNMENT - 1)) == 0) 734 735 #define ROUNDUP_cmsglen(len) \ 736 (((len) + _CMSG_HDR_ALIGNMENT - 1) & ~(_CMSG_HDR_ALIGNMENT - 1)) 737 738 #define IS_NON_STREAM_SOCK(vp) \ 739 ((vp)->v_type == VSOCK && (vp)->v_stream == NULL) 740 /* 741 * Macros that operate on struct cmsghdr. 742 * Used in parsing msg_control. 743 * The CMSG_VALID macro does not assume that the last option buffer is padded. 744 */ 745 #define CMSG_NEXT(cmsg) \ 746 (struct cmsghdr *)((uintptr_t)(cmsg) + \ 747 ROUNDUP_cmsglen((cmsg)->cmsg_len)) 748 #define CMSG_CONTENT(cmsg) (&((cmsg)[1])) 749 #define CMSG_CONTENTLEN(cmsg) ((cmsg)->cmsg_len - sizeof (struct cmsghdr)) 750 #define CMSG_VALID(cmsg, start, end) \ 751 (ISALIGNED_cmsghdr(cmsg) && \ 752 ((uintptr_t)(cmsg) >= (uintptr_t)(start)) && \ 753 ((uintptr_t)(cmsg) < (uintptr_t)(end)) && \ 754 ((ssize_t)(cmsg)->cmsg_len >= sizeof (struct cmsghdr)) && \ 755 ((uintptr_t)(cmsg) + (cmsg)->cmsg_len <= (uintptr_t)(end))) 756 757 /* 758 * Maximum size of any argument that is copied in (addresses, options, 759 * access rights). MUST be at least MAXPATHLEN + 3. 760 * BSD and SunOS 4.X limited this to MLEN or MCLBYTES. 761 */ 762 #define SO_MAXARGSIZE 8192 763 764 /* 765 * Convert between vnode and sonode 766 */ 767 #define VTOSO(vp) ((struct sonode *)((vp)->v_data)) 768 #define SOTOV(sp) ((sp)->so_vnode) 769 770 /* 771 * Internal flags for sobind() 772 */ 773 #define _SOBIND_REBIND 0x01 /* Bind to existing local address */ 774 #define _SOBIND_UNSPEC 0x02 /* Bind to unspecified address */ 775 #define _SOBIND_LOCK_HELD 0x04 /* so_excl_lock held by caller */ 776 #define _SOBIND_NOXLATE 0x08 /* No addr translation for AF_UNIX */ 777 #define _SOBIND_XPG4_2 0x10 /* xpg4.2 semantics */ 778 #define _SOBIND_SOCKBSD 0x20 /* BSD semantics */ 779 #define _SOBIND_LISTEN 0x40 /* Make into SS_ACCEPTCONN */ 780 #define _SOBIND_SOCKETPAIR 0x80 /* Internal flag for so_socketpair() */ 781 /* to enable listen with backlog = 1 */ 782 783 /* 784 * Internal flags for sounbind() 785 */ 786 #define _SOUNBIND_REBIND 0x01 /* Don't clear fields - will rebind */ 787 788 /* 789 * Internal flags for soconnect() 790 */ 791 #define _SOCONNECT_NOXLATE 0x01 /* No addr translation for AF_UNIX */ 792 #define _SOCONNECT_DID_BIND 0x02 /* Unbind when connect fails */ 793 #define _SOCONNECT_XPG4_2 0x04 /* xpg4.2 semantics */ 794 795 /* 796 * Internal flags for sodisconnect() 797 */ 798 #define _SODISCONNECT_LOCK_HELD 0x01 /* so_excl_lock held by caller */ 799 800 /* 801 * Internal flags for sotpi_getsockopt(). 802 */ 803 #define _SOGETSOCKOPT_XPG4_2 0x01 /* xpg4.2 semantics */ 804 805 /* 806 * Internal flags for soallocproto*() 807 */ 808 #define _ALLOC_NOSLEEP 0 /* Don't sleep for memory */ 809 #define _ALLOC_INTR 1 /* Sleep until interrupt */ 810 #define _ALLOC_SLEEP 2 /* Sleep forever */ 811 812 /* 813 * Internal structure for handling AF_UNIX file descriptor passing 814 */ 815 struct fdbuf { 816 int fd_size; /* In bytes, for kmem_free */ 817 int fd_numfd; /* Number of elements below */ 818 char *fd_ebuf; /* Extra buffer to free */ 819 int fd_ebuflen; 820 frtn_t fd_frtn; 821 struct file *fd_fds[1]; /* One or more */ 822 }; 823 #define FDBUF_HDRSIZE (sizeof (struct fdbuf) - sizeof (struct file *)) 824 825 /* 826 * Variable that can be patched to set what version of socket socket() 827 * will create. 828 */ 829 extern int so_default_version; 830 831 #ifdef DEBUG 832 /* Turn on extra testing capabilities */ 833 #define SOCK_TEST 834 #endif /* DEBUG */ 835 836 #ifdef DEBUG 837 char *pr_state(uint_t, uint_t); 838 char *pr_addr(int, struct sockaddr *, t_uscalar_t); 839 int so_verify_oobstate(struct sonode *); 840 #endif /* DEBUG */ 841 842 /* 843 * DEBUG macros 844 */ 845 #if defined(DEBUG) 846 #define SOCK_DEBUG 847 848 extern int sockdebug; 849 extern int sockprinterr; 850 851 #define eprint(args) printf args 852 #define eprintso(so, args) \ 853 { if (sockprinterr && ((so)->so_options & SO_DEBUG)) printf args; } 854 #define eprintline(error) \ 855 { \ 856 if (error != EINTR && (sockprinterr || sockdebug > 0)) \ 857 printf("socket error %d: line %d file %s\n", \ 858 (error), __LINE__, __FILE__); \ 859 } 860 861 #define eprintsoline(so, error) \ 862 { if (sockprinterr && ((so)->so_options & SO_DEBUG)) \ 863 printf("socket(%p) error %d: line %d file %s\n", \ 864 (void *)(so), (error), __LINE__, __FILE__); \ 865 } 866 #define dprint(level, args) { if (sockdebug > (level)) printf args; } 867 #define dprintso(so, level, args) \ 868 { if (sockdebug > (level) && ((so)->so_options & SO_DEBUG)) printf args; } 869 870 #else /* define(DEBUG) */ 871 872 #define eprint(args) {} 873 #define eprintso(so, args) {} 874 #define eprintline(error) {} 875 #define eprintsoline(so, error) {} 876 #define dprint(level, args) {} 877 #define dprintso(so, level, args) {} 878 879 #endif /* defined(DEBUG) */ 880 881 extern struct vnodeops *socket_vnodeops; 882 extern const struct fs_operation_def socket_vnodeops_template[]; 883 884 extern vfs_t *sock_vfsp; 885 extern dev_t sockdev; 886 887 extern krwlock_t sockconf_lock; 888 889 /* 890 * sockfs vfsops 891 */ 892 extern int sockfs_statvfs(vfs_t *, struct statvfs64 *); 893 894 /* 895 * sockfs functions 896 */ 897 extern int sock_getmsg(vnode_t *, struct strbuf *, struct strbuf *, 898 uchar_t *, int *, int, rval_t *); 899 extern int sock_putmsg(vnode_t *, struct strbuf *, struct strbuf *, 900 uchar_t, int, int); 901 extern int sogetvp(char *, vnode_t **, int); 902 extern int sockinit(int, char *); 903 extern int solookup(int, int, int, struct sockparams **); 904 extern void so_lock_single(struct sonode *); 905 extern void so_unlock_single(struct sonode *, int); 906 extern int so_lock_read(struct sonode *, int); 907 extern int so_lock_read_intr(struct sonode *, int); 908 extern void so_unlock_read(struct sonode *); 909 extern void *sogetoff(mblk_t *, t_uscalar_t, t_uscalar_t, uint_t); 910 extern void so_getopt_srcaddr(void *, t_uscalar_t, 911 void **, t_uscalar_t *); 912 extern int so_getopt_unix_close(void *, t_uscalar_t); 913 extern void fdbuf_free(struct fdbuf *); 914 extern mblk_t *fdbuf_allocmsg(int, struct fdbuf *); 915 extern int fdbuf_create(void *, int, struct fdbuf **); 916 extern void so_closefds(void *, t_uscalar_t, int, int); 917 extern void so_truncatecmsg(void *, t_uscalar_t, uint_t); 918 919 extern int so_getfdopt(void *, t_uscalar_t, int, void **, int *); 920 t_uscalar_t so_optlen(void *, t_uscalar_t, int); 921 extern void so_cmsg2opt(void *, t_uscalar_t, int, mblk_t *); 922 extern t_uscalar_t 923 so_cmsglen(mblk_t *, void *, t_uscalar_t, int); 924 extern int so_opt2cmsg(mblk_t *, void *, t_uscalar_t, int, 925 void *, t_uscalar_t); 926 extern void soisconnecting(struct sonode *); 927 extern void soisconnected(struct sonode *); 928 extern void soisdisconnected(struct sonode *, int); 929 extern void socantsendmore(struct sonode *); 930 extern void socantrcvmore(struct sonode *); 931 extern void soseterror(struct sonode *, int); 932 extern int sogeterr(struct sonode *, boolean_t); 933 extern int sowaitconnected(struct sonode *, int, int); 934 935 extern ssize_t soreadfile(file_t *, uchar_t *, u_offset_t, int *, size_t); 936 extern void *sock_kstat_init(zoneid_t); 937 extern void sock_kstat_fini(zoneid_t, void *); 938 extern struct sonode *getsonode(int, int *, file_t **); 939 /* 940 * Function wrappers (mostly around the sonode switch) for 941 * backward compatibility. 942 */ 943 extern int soaccept(struct sonode *, int, struct sonode **); 944 extern int sobind(struct sonode *, struct sockaddr *, socklen_t, 945 int, int); 946 extern int solisten(struct sonode *, int); 947 extern int soconnect(struct sonode *, struct sockaddr *, socklen_t, 948 int, int); 949 extern int sorecvmsg(struct sonode *, struct nmsghdr *, struct uio *); 950 extern int sosendmsg(struct sonode *, struct nmsghdr *, struct uio *); 951 extern int soshutdown(struct sonode *, int); 952 extern int sogetsockopt(struct sonode *, int, int, void *, socklen_t *, 953 int); 954 extern int sosetsockopt(struct sonode *, int, int, const void *, 955 t_uscalar_t); 956 957 extern struct sonode *socreate(struct sockparams *, int, int, int, int, 958 int *); 959 960 extern int so_copyin(const void *, void *, size_t, int); 961 extern int so_copyout(const void *, void *, size_t, int); 962 963 /* 964 * Functions to manipulate the use of direct receive callbacks. This should not 965 * be used outside of sockfs and ksocket. These are generally considered a use 966 * once interface for a socket and will cause all outstanding data on the socket 967 * to be flushed. 968 */ 969 extern int so_krecv_set(sonode_t *, so_krecv_f, void *); 970 extern void so_krecv_unblock(sonode_t *); 971 972 #endif 973 974 /* 975 * Internal structure for obtaining sonode information from the socklist. 976 * These types match those corresponding in the sonode structure. 977 * This is not a published interface, and may change at any time. It is 978 * used for passing information back up to the kstat consumers. By converting 979 * kernel addresses to strings, we should be able to pass information from 980 * the kernel to userland regardless of n-bit kernel we are using. 981 */ 982 983 #define ADRSTRLEN (2 * sizeof (uint64_t) + 1) 984 985 struct sockinfo { 986 uint_t si_size; /* real length of this struct */ 987 short si_family; 988 short si_type; 989 ushort_t si_flag; 990 uint_t si_state; 991 uint_t si_ux_laddr_sou_magic; 992 uint_t si_ux_faddr_sou_magic; 993 t_scalar_t si_serv_type; 994 t_uscalar_t si_laddr_soa_len; 995 t_uscalar_t si_faddr_soa_len; 996 uint16_t si_laddr_family; 997 uint16_t si_faddr_family; 998 char si_laddr_sun_path[MAXPATHLEN + 1]; /* NULL terminated */ 999 char si_faddr_sun_path[MAXPATHLEN + 1]; 1000 boolean_t si_faddr_noxlate; 1001 zoneid_t si_szoneid; 1002 char si_son_straddr[ADRSTRLEN]; 1003 char si_lvn_straddr[ADRSTRLEN]; 1004 char si_fvn_straddr[ADRSTRLEN]; 1005 uint64_t si_inode; 1006 }; 1007 1008 /* 1009 * Subcodes for sockconf() system call 1010 */ 1011 #define SOCKCONFIG_ADD_SOCK 0 1012 #define SOCKCONFIG_REMOVE_SOCK 1 1013 #define SOCKCONFIG_ADD_FILTER 2 1014 #define SOCKCONFIG_REMOVE_FILTER 3 1015 #define SOCKCONFIG_GET_SOCKTABLE 4 1016 1017 /* 1018 * Data structures for configuring socket filters. 1019 */ 1020 1021 /* 1022 * Placement hint for automatic filters 1023 */ 1024 typedef enum { 1025 SOF_HINT_NONE, 1026 SOF_HINT_TOP, 1027 SOF_HINT_BOTTOM, 1028 SOF_HINT_BEFORE, 1029 SOF_HINT_AFTER 1030 } sof_hint_t; 1031 1032 /* 1033 * Socket tuple. Used by sockconfig_filter_props to list socket 1034 * types of interest. 1035 */ 1036 typedef struct sof_socktuple { 1037 int sofst_family; 1038 int sofst_type; 1039 int sofst_protocol; 1040 } sof_socktuple_t; 1041 1042 /* 1043 * Socket filter properties used by sockconfig() system call. 1044 */ 1045 struct sockconfig_filter_props { 1046 char *sfp_modname; 1047 boolean_t sfp_autoattach; 1048 sof_hint_t sfp_hint; 1049 char *sfp_hintarg; 1050 uint_t sfp_socktuple_cnt; 1051 sof_socktuple_t *sfp_socktuple; 1052 }; 1053 1054 /* 1055 * Data structures for the in-kernel socket configuration table. 1056 */ 1057 typedef struct sockconfig_socktable_entry { 1058 int se_family; 1059 int se_type; 1060 int se_protocol; 1061 int se_refcnt; 1062 int se_flags; 1063 char se_modname[MODMAXNAMELEN]; 1064 char se_strdev[MAXPATHLEN]; 1065 } sockconfig_socktable_entry_t; 1066 1067 typedef struct sockconfig_socktable { 1068 uint_t num_of_entries; 1069 sockconfig_socktable_entry_t *st_entries; 1070 } sockconfig_socktable_t; 1071 1072 #ifdef _SYSCALL32 1073 1074 typedef struct sof_socktuple32 { 1075 int32_t sofst_family; 1076 int32_t sofst_type; 1077 int32_t sofst_protocol; 1078 } sof_socktuple32_t; 1079 1080 struct sockconfig_filter_props32 { 1081 caddr32_t sfp_modname; 1082 boolean_t sfp_autoattach; 1083 sof_hint_t sfp_hint; 1084 caddr32_t sfp_hintarg; 1085 uint32_t sfp_socktuple_cnt; 1086 caddr32_t sfp_socktuple; 1087 }; 1088 1089 typedef struct sockconfig_socktable32 { 1090 uint_t num_of_entries; 1091 caddr32_t st_entries; 1092 } sockconfig_socktable32_t; 1093 1094 #endif /* _SYSCALL32 */ 1095 1096 #define SOCKMOD_PATH "socketmod" /* dir where sockmods are stored */ 1097 1098 #ifdef __cplusplus 1099 } 1100 #endif 1101 1102 #endif /* _SYS_SOCKETVAR_H */ 1103