1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 /* 31 * University Copyright- Copyright (c) 1982, 1986, 1988 32 * The Regents of the University of California 33 * All Rights Reserved 34 * 35 * University Acknowledgment- Portions of this document are derived from 36 * software developed by the University of California, Berkeley, and its 37 * contributors. 38 */ 39 40 #ifndef _SYS_SOCKETVAR_H 41 #define _SYS_SOCKETVAR_H 42 43 #include <sys/types.h> 44 #include <sys/stream.h> 45 #include <sys/t_lock.h> 46 #include <sys/cred.h> 47 #include <sys/vnode.h> 48 #include <sys/file.h> 49 #include <sys/param.h> 50 #include <sys/zone.h> 51 #include <sys/sdt.h> 52 #include <sys/modctl.h> 53 #include <sys/atomic.h> 54 #include <sys/socket.h> 55 #include <sys/ksocket.h> 56 #include <sys/sodirect.h> 57 #include <sys/kstat.h> 58 59 #ifdef __cplusplus 60 extern "C" { 61 #endif 62 63 /* 64 * Internal representation of the address used to represent addresses 65 * in the loopback transport for AF_UNIX. While the sockaddr_un is used 66 * as the sockfs layer address for AF_UNIX the pathnames contained in 67 * these addresses are not unique (due to relative pathnames) thus can not 68 * be used in the transport. 69 * 70 * The transport level address consists of a magic number (used to separate the 71 * name space for specific and implicit binds). For a specific bind 72 * this is followed by a "vnode *" which ensures that all specific binds 73 * have a unique transport level address. For implicit binds the latter 74 * part of the address is a byte string (of the same length as a pointer) 75 * that is assigned by the loopback transport. 76 * 77 * The uniqueness assumes that the loopback transport has a separate namespace 78 * for sockets in order to avoid name conflicts with e.g. TLI use of the 79 * same transport. 80 */ 81 struct so_ux_addr { 82 void *soua_vp; /* vnode pointer or assigned by tl */ 83 uint_t soua_magic; /* See below */ 84 }; 85 86 #define SOU_MAGIC_EXPLICIT 0x75787670 /* "uxvp" */ 87 #define SOU_MAGIC_IMPLICIT 0x616e6f6e /* "anon" */ 88 89 struct sockaddr_ux { 90 sa_family_t sou_family; /* AF_UNIX */ 91 struct so_ux_addr sou_addr; 92 }; 93 94 #if defined(_KERNEL) || defined(_KMEMUSER) 95 96 #include <sys/socket_proto.h> 97 98 typedef struct sonodeops sonodeops_t; 99 typedef struct sonode sonode_t; 100 101 /* 102 * The sonode represents a socket. A sonode never exist in the file system 103 * name space and can not be opened using open() - only the socket, socketpair 104 * and accept calls create sonodes. 105 * 106 * The locking of sockfs uses the so_lock mutex plus the SOLOCKED and 107 * SOREADLOCKED flags in so_flag. The mutex protects all the state in the 108 * sonode. It is expected that the underlying transport protocol serializes 109 * socket operations, so sockfs will not normally not single-thread 110 * operations. However, certain sockets, including TPI based ones, can only 111 * handle one control operation at a time. The SOLOCKED flag is used to 112 * single-thread operations from sockfs users to prevent e.g. multiple bind() 113 * calls to operate on the same sonode concurrently. The SOREADLOCKED flag is 114 * used to ensure that only one thread sleeps in kstrgetmsg for a given 115 * sonode. This is needed to ensure atomic operation for things like 116 * MSG_WAITALL. 117 * 118 * The so_fallback_rwlock is used to ensure that for sockets that can 119 * fall back to TPI, the fallback is not initiated until all pending 120 * operations have completed. 121 * 122 * Note that so_lock is sometimes held across calls that might go to sleep 123 * (kmem_alloc and soallocproto*). This implies that no other lock in 124 * the system should be held when calling into sockfs; from the system call 125 * side or from strrput (in case of TPI based sockets). If locks are held 126 * while calling into sockfs the system might hang when running low on memory. 127 */ 128 struct sonode { 129 struct vnode *so_vnode; /* vnode associated with this sonode */ 130 131 sonodeops_t *so_ops; /* operations vector for this sonode */ 132 void *so_priv; /* sonode private data */ 133 134 krwlock_t so_fallback_rwlock; 135 kmutex_t so_lock; /* protects sonode fields */ 136 137 kcondvar_t so_state_cv; /* synchronize state changes */ 138 kcondvar_t so_want_cv; /* wait due to SOLOCKED */ 139 140 /* These fields are protected by so_lock */ 141 142 uint_t so_state; /* internal state flags SS_*, below */ 143 uint_t so_mode; /* characteristics on socket. SM_* */ 144 ushort_t so_flag; /* flags, see below */ 145 int so_count; /* count of opened references */ 146 147 sock_connid_t so_proto_connid; /* protocol generation number */ 148 149 ushort_t so_error; /* error affecting connection */ 150 151 struct sockparams *so_sockparams; /* vnode or socket module */ 152 /* Needed to recreate the same socket for accept */ 153 short so_family; 154 short so_type; 155 short so_protocol; 156 short so_version; /* From so_socket call */ 157 158 /* Accept queue */ 159 kmutex_t so_acceptq_lock; /* protects accept queue */ 160 struct sonode *so_acceptq_next; /* acceptq list node */ 161 struct sonode *so_acceptq_head; 162 struct sonode **so_acceptq_tail; 163 unsigned int so_acceptq_len; 164 unsigned int so_backlog; /* Listen backlog */ 165 kcondvar_t so_acceptq_cv; /* wait for new conn. */ 166 167 /* Options */ 168 short so_options; /* From socket call, see socket.h */ 169 struct linger so_linger; /* SO_LINGER value */ 170 #define so_sndbuf so_proto_props.sopp_txhiwat /* SO_SNDBUF value */ 171 #define so_sndlowat so_proto_props.sopp_txlowat /* tx low water mark */ 172 #define so_rcvbuf so_proto_props.sopp_rxhiwat /* SO_RCVBUF value */ 173 #define so_rcvlowat so_proto_props.sopp_rxlowat /* rx low water mark */ 174 #define so_max_addr_len so_proto_props.sopp_maxaddrlen 175 #define so_minpsz so_proto_props.sopp_minpsz 176 #define so_maxpsz so_proto_props.sopp_maxpsz 177 178 int so_xpg_rcvbuf; /* SO_RCVBUF value for XPG4 socket */ 179 clock_t so_sndtimeo; /* send timeout */ 180 clock_t so_rcvtimeo; /* recv timeout */ 181 182 mblk_t *so_oobmsg; /* outofline oob data */ 183 ssize_t so_oobmark; /* offset of the oob data */ 184 185 pid_t so_pgrp; /* pgrp for signals */ 186 187 cred_t *so_peercred; /* connected socket peer cred */ 188 pid_t so_cpid; /* connected socket peer cached pid */ 189 zoneid_t so_zoneid; /* opener's zoneid */ 190 191 struct pollhead so_poll_list; /* common pollhead */ 192 short so_pollev; /* events that should be generated */ 193 194 /* Receive */ 195 unsigned int so_rcv_queued; /* # bytes on both rcv lists */ 196 mblk_t *so_rcv_q_head; /* processing/copyout rcv queue */ 197 mblk_t *so_rcv_q_last_head; 198 mblk_t *so_rcv_head; /* protocol prequeue */ 199 mblk_t *so_rcv_last_head; /* last mblk in b_next chain */ 200 kcondvar_t so_rcv_cv; /* wait for data */ 201 uint_t so_rcv_wanted; /* # of bytes wanted by app */ 202 timeout_id_t so_rcv_timer_tid; 203 204 #define so_rcv_thresh so_proto_props.sopp_rcvthresh 205 #define so_rcv_timer_interval so_proto_props.sopp_rcvtimer 206 207 kcondvar_t so_snd_cv; /* wait for snd buffers */ 208 uint32_t 209 so_snd_qfull: 1, /* Transmit full */ 210 so_rcv_wakeup: 1, 211 so_snd_wakeup: 1, 212 so_not_str: 1, /* B_TRUE if not streams based socket */ 213 so_pad_to_bit_31: 28; 214 215 /* Communication channel with protocol */ 216 sock_lower_handle_t so_proto_handle; 217 sock_downcalls_t *so_downcalls; 218 219 struct sock_proto_props so_proto_props; /* protocol settings */ 220 boolean_t so_flowctrld; /* Flow controlled */ 221 uint_t so_copyflag; /* Copy related flag */ 222 kcondvar_t so_copy_cv; /* Copy cond variable */ 223 224 /* kernel sockets */ 225 ksocket_callbacks_t so_ksock_callbacks; 226 void *so_ksock_cb_arg; /* callback argument */ 227 kcondvar_t so_closing_cv; 228 229 /* != NULL for sodirect_t enabled socket */ 230 sodirect_t *so_direct; 231 }; 232 233 #define SO_HAVE_DATA(so) \ 234 /* \ 235 * For the (tid == 0) case we must check so_rcv_{q_,}head \ 236 * rather than (so_rcv_queued > 0), since the latter does not \ 237 * take into account mblks with only control/name information. \ 238 */ \ 239 ((so)->so_rcv_timer_tid == 0 && ((so)->so_rcv_head != NULL || \ 240 (so)->so_rcv_q_head != NULL)) || \ 241 ((so)->so_state & SS_CANTRCVMORE) 242 243 /* 244 * Events handled by the protocol (in case sd_poll is set) 245 */ 246 #define SO_PROTO_POLLEV (POLLIN|POLLRDNORM|POLLRDBAND) 247 248 249 #endif /* _KERNEL || _KMEMUSER */ 250 251 /* flags */ 252 #define SOMOD 0x0001 /* update socket modification time */ 253 #define SOACC 0x0002 /* update socket access time */ 254 255 #define SOLOCKED 0x0010 /* use to serialize open/closes */ 256 #define SOREADLOCKED 0x0020 /* serialize kstrgetmsg calls */ 257 #define SOWANT 0x0040 /* some process waiting on lock */ 258 #define SOCLONE 0x0080 /* child of clone driver */ 259 #define SOASYNC_UNBIND 0x0100 /* wait for ACK of async unbind */ 260 261 #define SOCK_IS_NONSTR(so) ((so)->so_not_str) 262 263 /* 264 * Socket state bits. 265 */ 266 #define SS_ISCONNECTED 0x00000001 /* socket connected to a peer */ 267 #define SS_ISCONNECTING 0x00000002 /* in process, connecting to peer */ 268 #define SS_ISDISCONNECTING 0x00000004 /* in process of disconnecting */ 269 #define SS_CANTSENDMORE 0x00000008 /* can't send more data to peer */ 270 271 #define SS_CANTRCVMORE 0x00000010 /* can't receive more data */ 272 #define SS_ISBOUND 0x00000020 /* socket is bound */ 273 #define SS_NDELAY 0x00000040 /* FNDELAY non-blocking */ 274 #define SS_NONBLOCK 0x00000080 /* O_NONBLOCK non-blocking */ 275 276 #define SS_ASYNC 0x00000100 /* async i/o notify */ 277 #define SS_ACCEPTCONN 0x00000200 /* listen done */ 278 /* unused 0x00000400 */ /* was SS_HASCONNIND */ 279 #define SS_SAVEDEOR 0x00000800 /* Saved MSG_EOR rcv side state */ 280 281 #define SS_RCVATMARK 0x00001000 /* at mark on input */ 282 #define SS_OOBPEND 0x00002000 /* OOB pending or present - poll */ 283 #define SS_HAVEOOBDATA 0x00004000 /* OOB data present */ 284 #define SS_HADOOBDATA 0x00008000 /* OOB data consumed */ 285 #define SS_CLOSING 0x00010000 /* in process of closing */ 286 287 /* unused 0x00020000 */ /* was SS_FADDR_NOXLATE */ 288 /* unused 0x00040000 */ /* was SS_HASDATA */ 289 /* unused 0x00080000 */ /* was SS_DONEREAD */ 290 /* unused 0x00100000 */ /* was SS_MOREDATA */ 291 /* unused 0x00200000 */ /* was SS_DIRECT */ 292 293 #define SS_SODIRECT 0x00400000 /* transport supports sodirect */ 294 295 #define SS_SENTLASTREADSIG 0x01000000 /* last rx signal has been sent */ 296 #define SS_SENTLASTWRITESIG 0x02000000 /* last tx signal has been sent */ 297 298 #define SS_FALLBACK_DRAIN 0x20000000 /* data was/is being drained */ 299 #define SS_FALLBACK_PENDING 0x40000000 /* fallback is pending */ 300 #define SS_FALLBACK_COMP 0x80000000 /* fallback has completed */ 301 302 303 /* Set of states when the socket can't be rebound */ 304 #define SS_CANTREBIND (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING|\ 305 SS_CANTSENDMORE|SS_CANTRCVMORE|SS_ACCEPTCONN) 306 307 /* 308 * Sockets that can fall back to TPI must ensure that fall back is not 309 * initiated while a thread is using a socket. 310 */ 311 #define SO_BLOCK_FALLBACK(so, fn) { \ 312 ASSERT(MUTEX_NOT_HELD(&(so)->so_lock)); \ 313 rw_enter(&(so)->so_fallback_rwlock, RW_READER); \ 314 if ((so)->so_state & SS_FALLBACK_COMP) { \ 315 rw_exit(&(so)->so_fallback_rwlock); \ 316 return (fn); \ 317 } \ 318 } 319 320 #define SO_UNBLOCK_FALLBACK(so) { \ 321 rw_exit(&(so)->so_fallback_rwlock); \ 322 } 323 324 /* Poll events */ 325 #define SO_POLLEV_IN 0x1 /* POLLIN wakeup needed */ 326 #define SO_POLLEV_ALWAYS 0x2 /* wakeups */ 327 328 /* 329 * Characteristics of sockets. Not changed after the socket is created. 330 */ 331 #define SM_PRIV 0x001 /* privileged for broadcast, raw... */ 332 #define SM_ATOMIC 0x002 /* atomic data transmission */ 333 #define SM_ADDR 0x004 /* addresses given with messages */ 334 #define SM_CONNREQUIRED 0x008 /* connection required by protocol */ 335 336 #define SM_FDPASSING 0x010 /* passes file descriptors */ 337 #define SM_EXDATA 0x020 /* Can handle T_EXDATA_REQ */ 338 #define SM_OPTDATA 0x040 /* Can handle T_OPTDATA_REQ */ 339 #define SM_BYTESTREAM 0x080 /* Byte stream - can use M_DATA */ 340 341 #define SM_ACCEPTOR_ID 0x100 /* so_acceptor_id is valid */ 342 343 #define SM_KERNEL 0x200 /* kernel socket */ 344 345 /* The modes below are only for non-streams sockets */ 346 #define SM_ACCEPTSUPP 0x400 /* can handle accept() */ 347 #define SM_SENDFILESUPP 0x800 /* Private: proto supp sendfile */ 348 349 /* 350 * Socket versions. Used by the socket library when calling _so_socket(). 351 */ 352 #define SOV_STREAM 0 /* Not a socket - just a stream */ 353 #define SOV_DEFAULT 1 /* Select based on so_default_version */ 354 #define SOV_SOCKSTREAM 2 /* Socket plus streams operations */ 355 #define SOV_SOCKBSD 3 /* Socket with no streams operations */ 356 #define SOV_XPG4_2 4 /* Xnet socket */ 357 358 #if defined(_KERNEL) || defined(_KMEMUSER) 359 360 /* 361 * sonode create and destroy functions. 362 */ 363 typedef struct sonode *(*so_create_func_t)(struct sockparams *, 364 int, int, int, int, int, int *, cred_t *); 365 typedef void (*so_destroy_func_t)(struct sonode *); 366 367 /* STREAM device information */ 368 typedef struct sdev_info { 369 char *sd_devpath; 370 int sd_devpathlen; /* Is 0 if sp_devpath is a static string */ 371 vnode_t *sd_vnode; 372 } sdev_info_t; 373 374 #define SOCKMOD_VERSION 1 375 /* name of the TPI pseudo socket module */ 376 #define SOTPI_SMOD_NAME "socktpi" 377 378 typedef struct __smod_priv_s { 379 so_create_func_t smodp_sock_create_func; 380 so_destroy_func_t smodp_sock_destroy_func; 381 so_proto_fallback_func_t smodp_proto_fallback_func; 382 } __smod_priv_t; 383 384 /* 385 * Socket module register information 386 */ 387 typedef struct smod_reg_s { 388 int smod_version; 389 char *smod_name; 390 size_t smod_uc_version; 391 size_t smod_dc_version; 392 so_proto_create_func_t smod_proto_create_func; 393 394 /* __smod_priv_data must be NULL */ 395 __smod_priv_t *__smod_priv; 396 } smod_reg_t; 397 398 /* 399 * Socket module information 400 */ 401 typedef struct smod_info { 402 int smod_version; 403 char *smod_name; 404 uint_t smod_refcnt; /* # of entries */ 405 size_t smod_uc_version; /* upcall version */ 406 size_t smod_dc_version; /* down call version */ 407 so_proto_create_func_t smod_proto_create_func; 408 so_proto_fallback_func_t smod_proto_fallback_func; 409 so_create_func_t smod_sock_create_func; 410 so_destroy_func_t smod_sock_destroy_func; 411 list_node_t smod_node; 412 } smod_info_t; 413 414 typedef struct sockparams_stats { 415 kstat_named_t sps_nfallback; /* # of fallbacks to TPI */ 416 kstat_named_t sps_nactive; /* # of active sockets */ 417 kstat_named_t sps_ncreate; /* total # of created sockets */ 418 } sockparams_stats_t; 419 420 /* 421 * sockparams 422 * 423 * Used for mapping family/type/protocol to module 424 */ 425 struct sockparams { 426 /* 427 * The family, type, protocol, sdev_info and smod_info are 428 * set when the entry is created, and they will never change 429 * thereafter. 430 */ 431 int sp_family; 432 int sp_type; 433 int sp_protocol; 434 435 sdev_info_t sp_sdev_info; /* STREAM device */ 436 char *sp_smod_name; /* socket module name */ 437 smod_info_t *sp_smod_info; /* socket module */ 438 439 kmutex_t sp_lock; /* lock for refcnt */ 440 uint64_t sp_refcnt; /* entry reference count */ 441 sockparams_stats_t sp_stats; 442 kstat_t *sp_kstat; 443 444 /* 445 * The entries below are only modified while holding 446 * splist_lock as a writer. 447 */ 448 int sp_flags; /* see below */ 449 list_node_t sp_node; 450 }; 451 452 453 /* 454 * sockparams flags 455 */ 456 #define SOCKPARAMS_EPHEMERAL 0x1 /* temp. entry, not on global list */ 457 458 extern void sockparams_init(void); 459 extern struct sockparams *sockparams_hold_ephemeral_bydev(int, int, int, 460 const char *, int, int *); 461 extern struct sockparams *sockparams_hold_ephemeral_bymod(int, int, int, 462 const char *, int, int *); 463 extern void sockparams_ephemeral_drop_last_ref(struct sockparams *); 464 465 extern void smod_init(void); 466 extern void smod_add(smod_info_t *); 467 extern int smod_register(const smod_reg_t *); 468 extern int smod_unregister(const char *); 469 extern smod_info_t *smod_lookup_byname(const char *); 470 471 #define SOCKPARAMS_HAS_DEVICE(sp) \ 472 ((sp)->sp_sdev_info.sd_devpath != NULL) 473 474 /* Increase the smod_info_t reference count */ 475 #define SMOD_INC_REF(smodp) { \ 476 ASSERT((smodp) != NULL); \ 477 DTRACE_PROBE1(smodinfo__inc__ref, struct smod_info *, (smodp)); \ 478 atomic_inc_uint(&(smodp)->smod_refcnt); \ 479 } 480 481 /* 482 * Decreace the socket module entry reference count. 483 * When no one mapping to the entry, we try to unload the module from the 484 * kernel. If the module can't unload, just leave the module entry with 485 * a zero refcnt. 486 */ 487 #define SMOD_DEC_REF(sp, smodp) { \ 488 ASSERT((smodp) != NULL); \ 489 ASSERT((smodp)->smod_refcnt != 0); \ 490 atomic_dec_uint(&(smodp)->smod_refcnt); \ 491 /* \ 492 * No need to atomically check the return value because the \ 493 * socket module framework will verify that no one is using \ 494 * the module before unloading. Worst thing that can happen \ 495 * here is multiple calls to mod_remove_by_name(), which is OK. \ 496 */ \ 497 if ((smodp)->smod_refcnt == 0) \ 498 (void) mod_remove_by_name((sp)->sp_smod_name); \ 499 } 500 501 /* Increase the reference count */ 502 #define SOCKPARAMS_INC_REF(sp) { \ 503 ASSERT((sp) != NULL); \ 504 DTRACE_PROBE1(sockparams__inc__ref, struct sockparams *, (sp)); \ 505 mutex_enter(&(sp)->sp_lock); \ 506 (sp)->sp_refcnt++; \ 507 ASSERT((sp)->sp_refcnt != 0); \ 508 mutex_exit(&(sp)->sp_lock); \ 509 } 510 511 /* 512 * Decrease the reference count. 513 * 514 * If the sockparams is ephemeral, then the thread dropping the last ref 515 * count will destroy the entry. 516 */ 517 #define SOCKPARAMS_DEC_REF(sp) { \ 518 ASSERT((sp) != NULL); \ 519 DTRACE_PROBE1(sockparams__dec__ref, struct sockparams *, (sp)); \ 520 mutex_enter(&(sp)->sp_lock); \ 521 ASSERT((sp)->sp_refcnt > 0); \ 522 if ((sp)->sp_refcnt == 1) { \ 523 if ((sp)->sp_flags & SOCKPARAMS_EPHEMERAL) { \ 524 mutex_exit(&(sp)->sp_lock); \ 525 sockparams_ephemeral_drop_last_ref((sp)); \ 526 } else { \ 527 (sp)->sp_refcnt--; \ 528 if ((sp)->sp_smod_info != NULL) \ 529 SMOD_DEC_REF(sp, (sp)->sp_smod_info); \ 530 (sp)->sp_smod_info = NULL; \ 531 mutex_exit(&(sp)->sp_lock); \ 532 } \ 533 } else { \ 534 (sp)->sp_refcnt--; \ 535 mutex_exit(&(sp)->sp_lock); \ 536 } \ 537 } 538 539 /* 540 * Used to traverse the list of AF_UNIX sockets to construct the kstat 541 * for netstat(1m). 542 */ 543 struct socklist { 544 kmutex_t sl_lock; 545 struct sonode *sl_list; 546 }; 547 548 extern struct socklist socklist; 549 /* 550 * ss_full_waits is the number of times the reader thread 551 * waits when the queue is full and ss_empty_waits is the number 552 * of times the consumer thread waits when the queue is empty. 553 * No locks for these as they are just indicators of whether 554 * disk or network or both is slow or fast. 555 */ 556 struct sendfile_stats { 557 uint32_t ss_file_cached; 558 uint32_t ss_file_not_cached; 559 uint32_t ss_full_waits; 560 uint32_t ss_empty_waits; 561 uint32_t ss_file_segmap; 562 }; 563 564 /* 565 * A single sendfile request is represented by snf_req. 566 */ 567 typedef struct snf_req { 568 struct snf_req *sr_next; 569 mblk_t *sr_mp_head; 570 mblk_t *sr_mp_tail; 571 kmutex_t sr_lock; 572 kcondvar_t sr_cv; 573 uint_t sr_qlen; 574 int sr_hiwat; 575 int sr_lowat; 576 int sr_operation; 577 struct vnode *sr_vp; 578 file_t *sr_fp; 579 ssize_t sr_maxpsz; 580 u_offset_t sr_file_off; 581 u_offset_t sr_file_size; 582 #define SR_READ_DONE 0x80000000 583 int sr_read_error; 584 int sr_write_error; 585 } snf_req_t; 586 587 /* A queue of sendfile requests */ 588 struct sendfile_queue { 589 snf_req_t *snfq_req_head; 590 snf_req_t *snfq_req_tail; 591 kmutex_t snfq_lock; 592 kcondvar_t snfq_cv; 593 int snfq_svc_threads; /* # of service threads */ 594 int snfq_idle_cnt; /* # of idling threads */ 595 int snfq_max_threads; 596 int snfq_req_cnt; /* Number of requests */ 597 }; 598 599 #define READ_OP 1 600 #define SNFQ_TIMEOUT (60 * 5 * hz) /* 5 minutes */ 601 602 /* Socket network operations switch */ 603 struct sonodeops { 604 int (*sop_init)(struct sonode *, struct sonode *, cred_t *, 605 int); 606 int (*sop_accept)(struct sonode *, int, cred_t *, struct sonode **); 607 int (*sop_bind)(struct sonode *, struct sockaddr *, socklen_t, 608 int, cred_t *); 609 int (*sop_listen)(struct sonode *, int, cred_t *); 610 int (*sop_connect)(struct sonode *, const struct sockaddr *, 611 socklen_t, int, int, cred_t *); 612 int (*sop_recvmsg)(struct sonode *, struct msghdr *, 613 struct uio *, cred_t *); 614 int (*sop_sendmsg)(struct sonode *, struct msghdr *, 615 struct uio *, cred_t *); 616 int (*sop_sendmblk)(struct sonode *, struct msghdr *, int, 617 cred_t *, mblk_t **); 618 int (*sop_getpeername)(struct sonode *, struct sockaddr *, 619 socklen_t *, boolean_t, cred_t *); 620 int (*sop_getsockname)(struct sonode *, struct sockaddr *, 621 socklen_t *, cred_t *); 622 int (*sop_shutdown)(struct sonode *, int, cred_t *); 623 int (*sop_getsockopt)(struct sonode *, int, int, void *, 624 socklen_t *, int, cred_t *); 625 int (*sop_setsockopt)(struct sonode *, int, int, const void *, 626 socklen_t, cred_t *); 627 int (*sop_ioctl)(struct sonode *, int, intptr_t, int, 628 cred_t *, int32_t *); 629 int (*sop_poll)(struct sonode *, short, int, short *, 630 struct pollhead **); 631 int (*sop_close)(struct sonode *, int, cred_t *); 632 }; 633 634 #define SOP_INIT(so, flag, cr, flags) \ 635 ((so)->so_ops->sop_init((so), (flag), (cr), (flags))) 636 #define SOP_ACCEPT(so, fflag, cr, nsop) \ 637 ((so)->so_ops->sop_accept((so), (fflag), (cr), (nsop))) 638 #define SOP_BIND(so, name, namelen, flags, cr) \ 639 ((so)->so_ops->sop_bind((so), (name), (namelen), (flags), (cr))) 640 #define SOP_LISTEN(so, backlog, cr) \ 641 ((so)->so_ops->sop_listen((so), (backlog), (cr))) 642 #define SOP_CONNECT(so, name, namelen, fflag, flags, cr) \ 643 ((so)->so_ops->sop_connect((so), (name), (namelen), (fflag), (flags), \ 644 (cr))) 645 #define SOP_RECVMSG(so, msg, uiop, cr) \ 646 ((so)->so_ops->sop_recvmsg((so), (msg), (uiop), (cr))) 647 #define SOP_SENDMSG(so, msg, uiop, cr) \ 648 ((so)->so_ops->sop_sendmsg((so), (msg), (uiop), (cr))) 649 #define SOP_SENDMBLK(so, msg, size, cr, mpp) \ 650 ((so)->so_ops->sop_sendmblk((so), (msg), (size), (cr), (mpp))) 651 #define SOP_GETPEERNAME(so, addr, addrlen, accept, cr) \ 652 ((so)->so_ops->sop_getpeername((so), (addr), (addrlen), (accept), (cr))) 653 #define SOP_GETSOCKNAME(so, addr, addrlen, cr) \ 654 ((so)->so_ops->sop_getsockname((so), (addr), (addrlen), (cr))) 655 #define SOP_SHUTDOWN(so, how, cr) \ 656 ((so)->so_ops->sop_shutdown((so), (how), (cr))) 657 #define SOP_GETSOCKOPT(so, level, optionname, optval, optlenp, flags, cr) \ 658 ((so)->so_ops->sop_getsockopt((so), (level), (optionname), \ 659 (optval), (optlenp), (flags), (cr))) 660 #define SOP_SETSOCKOPT(so, level, optionname, optval, optlen, cr) \ 661 ((so)->so_ops->sop_setsockopt((so), (level), (optionname), \ 662 (optval), (optlen), (cr))) 663 #define SOP_IOCTL(so, cmd, arg, mode, cr, rvalp) \ 664 ((so)->so_ops->sop_ioctl((so), (cmd), (arg), (mode), (cr), (rvalp))) 665 #define SOP_POLL(so, events, anyyet, reventsp, phpp) \ 666 ((so)->so_ops->sop_poll((so), (events), (anyyet), (reventsp), (phpp))) 667 #define SOP_CLOSE(so, flag, cr) \ 668 ((so)->so_ops->sop_close((so), (flag), (cr))) 669 670 #endif /* defined(_KERNEL) || defined(_KMEMUSER) */ 671 672 #ifdef _KERNEL 673 674 #define ISALIGNED_cmsghdr(addr) \ 675 (((uintptr_t)(addr) & (_CMSG_HDR_ALIGNMENT - 1)) == 0) 676 677 #define ROUNDUP_cmsglen(len) \ 678 (((len) + _CMSG_HDR_ALIGNMENT - 1) & ~(_CMSG_HDR_ALIGNMENT - 1)) 679 680 #define IS_NON_STREAM_SOCK(vp) \ 681 ((vp)->v_type == VSOCK && (vp)->v_stream == NULL) 682 /* 683 * Macros that operate on struct cmsghdr. 684 * Used in parsing msg_control. 685 * The CMSG_VALID macro does not assume that the last option buffer is padded. 686 */ 687 #define CMSG_NEXT(cmsg) \ 688 (struct cmsghdr *)((uintptr_t)(cmsg) + \ 689 ROUNDUP_cmsglen((cmsg)->cmsg_len)) 690 #define CMSG_CONTENT(cmsg) (&((cmsg)[1])) 691 #define CMSG_CONTENTLEN(cmsg) ((cmsg)->cmsg_len - sizeof (struct cmsghdr)) 692 #define CMSG_VALID(cmsg, start, end) \ 693 (ISALIGNED_cmsghdr(cmsg) && \ 694 ((uintptr_t)(cmsg) >= (uintptr_t)(start)) && \ 695 ((uintptr_t)(cmsg) < (uintptr_t)(end)) && \ 696 ((ssize_t)(cmsg)->cmsg_len >= sizeof (struct cmsghdr)) && \ 697 ((uintptr_t)(cmsg) + (cmsg)->cmsg_len <= (uintptr_t)(end))) 698 699 /* 700 * Maximum size of any argument that is copied in (addresses, options, 701 * access rights). MUST be at least MAXPATHLEN + 3. 702 * BSD and SunOS 4.X limited this to MLEN or MCLBYTES. 703 */ 704 #define SO_MAXARGSIZE 8192 705 706 /* 707 * Convert between vnode and sonode 708 */ 709 #define VTOSO(vp) ((struct sonode *)((vp)->v_data)) 710 #define SOTOV(sp) ((sp)->so_vnode) 711 712 /* 713 * Internal flags for sobind() 714 */ 715 #define _SOBIND_REBIND 0x01 /* Bind to existing local address */ 716 #define _SOBIND_UNSPEC 0x02 /* Bind to unspecified address */ 717 #define _SOBIND_LOCK_HELD 0x04 /* so_excl_lock held by caller */ 718 #define _SOBIND_NOXLATE 0x08 /* No addr translation for AF_UNIX */ 719 #define _SOBIND_XPG4_2 0x10 /* xpg4.2 semantics */ 720 #define _SOBIND_SOCKBSD 0x20 /* BSD semantics */ 721 #define _SOBIND_LISTEN 0x40 /* Make into SS_ACCEPTCONN */ 722 #define _SOBIND_SOCKETPAIR 0x80 /* Internal flag for so_socketpair() */ 723 /* to enable listen with backlog = 1 */ 724 725 /* 726 * Internal flags for sounbind() 727 */ 728 #define _SOUNBIND_REBIND 0x01 /* Don't clear fields - will rebind */ 729 730 /* 731 * Internal flags for soconnect() 732 */ 733 #define _SOCONNECT_NOXLATE 0x01 /* No addr translation for AF_UNIX */ 734 #define _SOCONNECT_DID_BIND 0x02 /* Unbind when connect fails */ 735 #define _SOCONNECT_XPG4_2 0x04 /* xpg4.2 semantics */ 736 737 /* 738 * Internal flags for sodisconnect() 739 */ 740 #define _SODISCONNECT_LOCK_HELD 0x01 /* so_excl_lock held by caller */ 741 742 /* 743 * Internal flags for sotpi_getsockopt(). 744 */ 745 #define _SOGETSOCKOPT_XPG4_2 0x01 /* xpg4.2 semantics */ 746 747 /* 748 * Internal flags for soallocproto*() 749 */ 750 #define _ALLOC_NOSLEEP 0 /* Don't sleep for memory */ 751 #define _ALLOC_INTR 1 /* Sleep until interrupt */ 752 #define _ALLOC_SLEEP 2 /* Sleep forever */ 753 754 /* 755 * Internal structure for handling AF_UNIX file descriptor passing 756 */ 757 struct fdbuf { 758 int fd_size; /* In bytes, for kmem_free */ 759 int fd_numfd; /* Number of elements below */ 760 char *fd_ebuf; /* Extra buffer to free */ 761 int fd_ebuflen; 762 frtn_t fd_frtn; 763 struct file *fd_fds[1]; /* One or more */ 764 }; 765 #define FDBUF_HDRSIZE (sizeof (struct fdbuf) - sizeof (struct file *)) 766 767 /* 768 * Variable that can be patched to set what version of socket socket() 769 * will create. 770 */ 771 extern int so_default_version; 772 773 #ifdef DEBUG 774 /* Turn on extra testing capabilities */ 775 #define SOCK_TEST 776 #endif /* DEBUG */ 777 778 #ifdef DEBUG 779 char *pr_state(uint_t, uint_t); 780 char *pr_addr(int, struct sockaddr *, t_uscalar_t); 781 int so_verify_oobstate(struct sonode *); 782 #endif /* DEBUG */ 783 784 /* 785 * DEBUG macros 786 */ 787 #if defined(DEBUG) 788 #define SOCK_DEBUG 789 790 extern int sockdebug; 791 extern int sockprinterr; 792 793 #define eprint(args) printf args 794 #define eprintso(so, args) \ 795 { if (sockprinterr && ((so)->so_options & SO_DEBUG)) printf args; } 796 #define eprintline(error) \ 797 { \ 798 if (error != EINTR && (sockprinterr || sockdebug > 0)) \ 799 printf("socket error %d: line %d file %s\n", \ 800 (error), __LINE__, __FILE__); \ 801 } 802 803 #define eprintsoline(so, error) \ 804 { if (sockprinterr && ((so)->so_options & SO_DEBUG)) \ 805 printf("socket(%p) error %d: line %d file %s\n", \ 806 (void *)(so), (error), __LINE__, __FILE__); \ 807 } 808 #define dprint(level, args) { if (sockdebug > (level)) printf args; } 809 #define dprintso(so, level, args) \ 810 { if (sockdebug > (level) && ((so)->so_options & SO_DEBUG)) printf args; } 811 812 #else /* define(DEBUG) */ 813 814 #define eprint(args) {} 815 #define eprintso(so, args) {} 816 #define eprintline(error) {} 817 #define eprintsoline(so, error) {} 818 #define dprint(level, args) {} 819 #define dprintso(so, level, args) {} 820 821 #endif /* defined(DEBUG) */ 822 823 extern struct vfsops sock_vfsops; 824 extern struct vnodeops *socket_vnodeops; 825 extern const struct fs_operation_def socket_vnodeops_template[]; 826 827 extern dev_t sockdev; 828 829 /* 830 * sockfs functions 831 */ 832 extern int sock_getmsg(vnode_t *, struct strbuf *, struct strbuf *, 833 uchar_t *, int *, int, rval_t *); 834 extern int sock_putmsg(vnode_t *, struct strbuf *, struct strbuf *, 835 uchar_t, int, int); 836 extern int sogetvp(char *, vnode_t **, int); 837 extern int sockinit(int, char *); 838 extern int soconfig(int, int, int, char *, int, char *); 839 extern int solookup(int, int, int, struct sockparams **); 840 extern void so_lock_single(struct sonode *); 841 extern void so_unlock_single(struct sonode *, int); 842 extern int so_lock_read(struct sonode *, int); 843 extern int so_lock_read_intr(struct sonode *, int); 844 extern void so_unlock_read(struct sonode *); 845 extern void *sogetoff(mblk_t *, t_uscalar_t, t_uscalar_t, uint_t); 846 extern void so_getopt_srcaddr(void *, t_uscalar_t, 847 void **, t_uscalar_t *); 848 extern int so_getopt_unix_close(void *, t_uscalar_t); 849 extern void fdbuf_free(struct fdbuf *); 850 extern mblk_t *fdbuf_allocmsg(int, struct fdbuf *); 851 extern int fdbuf_create(void *, int, struct fdbuf **); 852 extern void so_closefds(void *, t_uscalar_t, int, int); 853 extern int so_getfdopt(void *, t_uscalar_t, int, void **, int *); 854 t_uscalar_t so_optlen(void *, t_uscalar_t, int); 855 extern void so_cmsg2opt(void *, t_uscalar_t, int, mblk_t *); 856 extern t_uscalar_t 857 so_cmsglen(mblk_t *, void *, t_uscalar_t, int); 858 extern int so_opt2cmsg(mblk_t *, void *, t_uscalar_t, int, 859 void *, t_uscalar_t); 860 extern void soisconnecting(struct sonode *); 861 extern void soisconnected(struct sonode *); 862 extern void soisdisconnected(struct sonode *, int); 863 extern void socantsendmore(struct sonode *); 864 extern void socantrcvmore(struct sonode *); 865 extern void soseterror(struct sonode *, int); 866 extern int sogeterr(struct sonode *, boolean_t); 867 extern int sowaitconnected(struct sonode *, int, int); 868 869 extern ssize_t soreadfile(file_t *, uchar_t *, u_offset_t, int *, size_t); 870 extern void *sock_kstat_init(zoneid_t); 871 extern void sock_kstat_fini(zoneid_t, void *); 872 extern struct sonode *getsonode(int, int *, file_t **); 873 /* 874 * Function wrappers (mostly around the sonode switch) for 875 * backward compatibility. 876 */ 877 extern int soaccept(struct sonode *, int, struct sonode **); 878 extern int sobind(struct sonode *, struct sockaddr *, socklen_t, 879 int, int); 880 extern int solisten(struct sonode *, int); 881 extern int soconnect(struct sonode *, const struct sockaddr *, socklen_t, 882 int, int); 883 extern int sorecvmsg(struct sonode *, struct nmsghdr *, struct uio *); 884 extern int sosendmsg(struct sonode *, struct nmsghdr *, struct uio *); 885 extern int soshutdown(struct sonode *, int); 886 extern int sogetsockopt(struct sonode *, int, int, void *, socklen_t *, 887 int); 888 extern int sosetsockopt(struct sonode *, int, int, const void *, 889 t_uscalar_t); 890 891 extern struct sonode *socreate(struct sockparams *, int, int, int, int, 892 int *); 893 894 extern int so_copyin(const void *, void *, size_t, int); 895 extern int so_copyout(const void *, void *, size_t, int); 896 897 #endif 898 899 /* 900 * Internal structure for obtaining sonode information from the socklist. 901 * These types match those corresponding in the sonode structure. 902 * This is not a published interface, and may change at any time. 903 */ 904 struct sockinfo { 905 uint_t si_size; /* real length of this struct */ 906 short si_family; 907 short si_type; 908 ushort_t si_flag; 909 uint_t si_state; 910 uint_t si_ux_laddr_sou_magic; 911 uint_t si_ux_faddr_sou_magic; 912 t_scalar_t si_serv_type; 913 t_uscalar_t si_laddr_soa_len; 914 t_uscalar_t si_faddr_soa_len; 915 uint16_t si_laddr_family; 916 uint16_t si_faddr_family; 917 char si_laddr_sun_path[MAXPATHLEN + 1]; /* NULL terminated */ 918 char si_faddr_sun_path[MAXPATHLEN + 1]; 919 boolean_t si_faddr_noxlate; 920 zoneid_t si_szoneid; 921 }; 922 923 #define SOCKMOD_PATH "socketmod" /* dir where sockmods are stored */ 924 925 #ifdef __cplusplus 926 } 927 #endif 928 929 #endif /* _SYS_SOCKETVAR_H */ 930