1.\"- 2.\" Copyright (c) 2006 Robert N. M. Watson 3.\" Copyright (c) 2014 Benjamin J. Kaduk 4.\" All rights reserved. 5.\" 6.\" Redistribution and use in source and binary forms, with or without 7.\" modification, are permitted provided that the following conditions 8.\" are met: 9.\" 1. Redistributions of source code must retain the above copyright 10.\" notice, this list of conditions and the following disclaimer. 11.\" 2. Redistributions in binary form must reproduce the above copyright 12.\" notice, this list of conditions and the following disclaimer in the 13.\" documentation and/or other materials provided with the distribution. 14.\" 15.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25.\" SUCH DAMAGE. 26.\" 27.\" $FreeBSD$ 28.\" 29.Dd October 18, 2018 30.Dt SOCKET 9 31.Os 32.Sh NAME 33.Nm socket 34.Nd "kernel socket interface" 35.Sh SYNOPSIS 36.In sys/socket.h 37.In sys/socketvar.h 38.Ft void 39.Fn soabort "struct socket *so" 40.Ft int 41.Fn soaccept "struct socket *so" "struct sockaddr **nam" 42.Ft int 43.Fn socheckuid "struct socket *so" "uid_t uid" 44.Ft int 45.Fn sobind "struct socket *so" "struct sockaddr *nam" "struct thread *td" 46.Ft void 47.Fn soclose "struct socket *so" 48.Ft int 49.Fn soconnect "struct socket *so" "struct sockaddr *nam" "struct thread *td" 50.Ft int 51.Fo socreate 52.Fa "int dom" "struct socket **aso" "int type" "int proto" 53.Fa "struct ucred *cred" "struct thread *td" 54.Fc 55.Ft int 56.Fn sodisconnect "struct socket *so" 57.Ft void 58.Fo sodtor_set 59.Fa "struct socket *so" 60.Fa "void (*func)(struct socket *)" 61.Fc 62.Ft struct sockaddr * 63.Fn sodupsockaddr "const struct sockaddr *sa" "int mflags" 64.Ft void 65.Fn sofree "struct socket *so" 66.Ft void 67.Fn sohasoutofband "struct socket *so" 68.Ft int 69.Fn solisten "struct socket *so" "int backlog" "struct thread *td" 70.Ft void 71.Fn solisten_proto "struct socket *so" "int backlog" 72.Ft int 73.Fn solisten_proto_check "struct socket *so" 74.Ft struct socket * 75.Fn sonewconn "struct socket *head" "int connstatus" 76.Ft int 77.Fo sopoll 78.Fa "struct socket *so" "int events" "struct ucred *active_cred" 79.Fa "struct thread *td" 80.Fc 81.Ft int 82.Fo sopoll_generic 83.Fa "struct socket *so" "int events" "struct ucred *active_cred" 84.Fa "struct thread *td" 85.Fc 86.Ft int 87.Fo soreceive 88.Fa "struct socket *so" "struct sockaddr **psa" "struct uio *uio" 89.Fa "struct mbuf **mp0" "struct mbuf **controlp" "int *flagsp" 90.Fc 91.Ft int 92.Fo soreceive_stream 93.Fa "struct socket *so" "struct sockaddr **paddr" 94.Fa "struct uio *uio" "struct mbuf **mp0" "struct mbuf **controlp" 95.Fa "int *flagsp" 96.Fc 97.Ft int 98.Fo soreceive_dgram 99.Fa "struct socket *so" "struct sockaddr **paddr" 100.Fa "struct uio *uio" "struct mbuf **mp0" "struct mbuf **controlp" 101.Fa "int *flagsp" 102.Fc 103.Ft int 104.Fo soreceive_generic 105.Fa "struct socket *so" "struct sockaddr **paddr" 106.Fa "struct uio *uio" "struct mbuf **mp0" "struct mbuf **controlp" 107.Fa "int *flagsp" 108.Fc 109.Ft int 110.Fn soreserve "struct socket *so" "u_long sndcc" "u_long rcvcc" 111.Ft void 112.Fn sorflush "struct socket *so" 113.Ft int 114.Fo sosend 115.Fa "struct socket *so" "struct sockaddr *addr" "struct uio *uio" 116.Fa "struct mbuf *top" "struct mbuf *control" "int flags" "struct thread *td" 117.Fc 118.Ft int 119.Fo sosend_dgram 120.Fa "struct socket *so" "struct sockaddr *addr" 121.Fa "struct uio *uio" "struct mbuf *top" "struct mbuf *control" 122.Fa "int flags" "struct thread *td" 123.Fc 124.Ft int 125.Fo sosend_generic 126.Fa "struct socket *so" "struct sockaddr *addr" 127.Fa "struct uio *uio" "struct mbuf *top" "struct mbuf *control" 128.Fa "int flags" "struct thread *td" 129.Fc 130.Ft int 131.Fn soshutdown "struct socket *so" "int how" 132.Ft void 133.Fn sotoxsocket "struct socket *so" "struct xsocket *xso" 134.Ft void 135.Fn soupcall_clear "struct socket *so" "int which" 136.Ft void 137.Fo soupcall_set 138.Fa "struct socket *so" "int which" 139.Fa "int (*func)(struct socket *, void *, int)" "void *arg" 140.Fc 141.Ft void 142.Fn sowakeup "struct socket *so" "struct sockbuf *sb" 143.In sys/sockopt.h 144.Ft int 145.Fn sosetopt "struct socket *so" "struct sockopt *sopt" 146.Ft int 147.Fn sogetopt "struct socket *so" "struct sockopt *sopt" 148.Ft int 149.Fn sooptcopyin "struct sockopt *sopt" "void *buf" "size_t len" "size_t minlen" 150.Ft int 151.Fn sooptcopyout "struct sockopt *sopt" "const void *buf" "size_t len" 152.Sh DESCRIPTION 153The kernel 154.Nm 155programming interface permits in-kernel consumers to interact with 156local and network socket objects in a manner similar to that permitted using 157the 158.Xr socket 2 159user API. 160These interfaces are appropriate for use by distributed file systems and 161other network-aware kernel services. 162While the user API operates on file descriptors, the kernel interfaces 163operate directly on 164.Vt "struct socket" 165pointers. 166Some portions of the kernel API exist only to implement the user API, 167and are not expected to be used by kernel code. 168The portions of the socket API used by socket consumers and 169implementations of network protocols will differ; some routines 170are only useful for protocol implementors. 171.Pp 172Except where otherwise indicated, 173.Nm 174functions may sleep, and are not appropriate for use in an 175.Xr ithread 9 176context or while holding non-sleepable kernel locks. 177.Ss Creating and Destroying Sockets 178A new socket may be created using 179.Fn socreate . 180As with 181.Xr socket 2 , 182arguments specify the requested domain, type, and protocol via 183.Fa dom , type , 184and 185.Fa proto . 186The socket is returned via 187.Fa aso 188on success. 189In addition, the credential used to authorize operations associated with the 190socket will be passed via 191.Fa cred 192(and will be cached for the lifetime of the socket), and the thread 193performing the operation via 194.Fa td . 195.Em Warning : 196authorization of the socket creation operation will be performed 197using the thread credential for some protocols (such as raw sockets). 198.Pp 199Sockets may be closed and freed using 200.Fn soclose , 201which has similar semantics to 202.Xr close 2 . 203.Pp 204In certain circumstances, it is appropriate to destroy a socket without 205waiting for it to disconnect, for which 206.Fn soabort 207is used. 208This is only appropriate for incoming connections which are in a 209partially connected state. 210It must be called on an unreferenced socket, by the thread which 211removed the socket from its listen queue, to prevent races. 212It will call into protocol code, so no socket locks may be held 213over the call. 214The caller of 215.Fn soabort 216is responsible for setting the VNET context. 217The normal path to freeing a socket is 218.Fn sofree , 219which handles reference counting on the socket. 220It should be called whenever a reference is released, and also whenever 221reference flags are cleared in socket or protocol code. 222Calls to 223.Fn sofree 224should not be made from outside the socket layer; outside callers 225should use 226.Fn soclose 227instead. 228.Ss Connections and Addresses 229The 230.Fn sobind 231function is equivalent to the 232.Xr bind 2 233system call, and binds the socket 234.Fa so 235to the address 236.Fa nam . 237The operation would be authorized using the credential on thread 238.Fa td . 239.Pp 240The 241.Fn soconnect 242function is equivalent to the 243.Xr connect 2 244system call, and initiates a connection on the socket 245.Fa so 246to the address 247.Fa nam . 248The operation will be authorized using the credential on thread 249.Fa td . 250Unlike the user system call, 251.Fn soconnect 252returns immediately; the caller may 253.Xr msleep 9 254on 255.Fa so->so_timeo 256while holding the socket mutex and waiting for the 257.Dv SS_ISCONNECTING 258flag to clear or 259.Fa so->so_error 260to become non-zero. 261If 262.Fn soconnect 263fails, the caller must manually clear the 264.Dv SS_ISCONNECTING 265flag. 266.Pp 267A call to 268.Fn sodisconnect 269disconnects the socket without closing it. 270.Pp 271The 272.Fn soshutdown 273function is equivalent to the 274.Xr shutdown 2 275system call, and causes part or all of a connection on a socket to be closed 276down. 277.Pp 278Sockets are transitioned from non-listening status to listening with 279.Fn solisten . 280.Ss Socket Options 281The 282.Fn sogetopt 283function is equivalent to the 284.Xr getsockopt 2 285system call, and retrieves a socket option on socket 286.Fa so . 287The 288.Fn sosetopt 289function is equivalent to the 290.Xr setsockopt 2 291system call, and sets a socket option on socket 292.Fa so . 293.Pp 294The second argument in both 295.Fn sogetopt 296and 297.Fn sosetopt 298is the 299.Fa sopt 300pointer to a 301.Vt "struct sopt" 302describing the socket option operation. 303The caller-allocated structure must be zeroed, and then have its fields 304initialized to specify socket option operation arguments: 305.Bl -tag -width ".Va sopt_valsize" 306.It Va sopt_dir 307Set to 308.Dv SOPT_SET 309or 310.Dv SOPT_GET 311depending on whether this is a get or set operation. 312.It Va sopt_level 313Specify the level in the network stack the operation is targeted at; for 314example, 315.Dv SOL_SOCKET . 316.It Va sopt_name 317Specify the name of the socket option to set. 318.It Va sopt_val 319Kernel space pointer to the argument value for the socket option. 320.It Va sopt_valsize 321Size of the argument value in bytes. 322.El 323.Ss Socket Upcalls 324In order for the owner of a socket to be notified when the socket 325is ready to send or receive data, an upcall may be registered on 326the socket. 327The upcall is a function that will be called by the socket framework 328when a socket buffer associated with the given socket is ready for 329reading or writing. 330.Fn soupcall_set 331is used to register a socket upcall. 332The function 333.Va func 334is registered, and the pointer 335.Va arg 336will be passed as its second argument when it is called by the framework. 337The possible values for 338.Va which 339are 340.Dv SO_RCV 341and 342.Dv SO_SND , 343which register upcalls for receive and send events, respectively. 344The upcall function 345.Fn func 346must return either 347.Dv SU_OK 348or 349.Dv SU_ISCONNECTED , 350depending on whether or not a call to 351.Xr soisconnected 352should be made by the socket framework after the upcall returns. 353The upcall 354.Va func 355cannot call 356.Xr soisconnected 357itself due to lock ordering with the socket buffer lock. 358Only 359.Dv SO_RCV 360upcalls should return 361.Dv SU_ISCONNECTED . 362When a 363.Dv SO_RCV 364upcall returns 365.Dv SU_ISCONNECTED , 366the upcall will be removed from the socket. 367.Pp 368Upcalls are removed from their socket by 369.Fn soupcall_clear . 370The 371.Va which 372argument again specifies whether the sending or receiving upcall is to 373be cleared, with 374.Dv SO_RCV 375or 376.Dv SO_SND . 377.Ss Socket Destructor Callback 378A kernel system can use the 379.Fn sodtor_set 380function to set a destructor for a socket. 381The destructor is called when the socket is is about to be freed. 382The destructor is called before the protocol detach routine. 383The destructor can serve as a callback to initiate additional cleanup actions. 384.Ss Socket I/O 385The 386.Fn soreceive 387function is equivalent to the 388.Xr recvmsg 2 389system call, and attempts to receive bytes of data from the socket 390.Fa so , 391optionally blocking awaiting for data if none is ready to read. 392Data may be retrieved directly to kernel or user memory via the 393.Fa uio 394argument, or as an mbuf chain returned to the caller via 395.Fa mp0 , 396avoiding a data copy. 397The 398.Fa uio 399must always be 400.Pf non- Dv NULL . 401If 402.Fa mp0 403is 404.Pf non- Dv NULL , 405only the 406.Fa uio_resid 407of 408.Fa uio 409is used. 410The caller may optionally retrieve a socket address on a protocol with the 411.Dv PR_ADDR 412capability by providing storage via 413.Pf non- Dv NULL 414.Fa psa 415argument. 416The caller may optionally retrieve control data mbufs via a 417.Pf non- Dv NULL 418.Fa controlp 419argument. 420Optional flags may be passed to 421.Fn soreceive 422via a 423.Pf non- Dv NULL 424.Fa flagsp 425argument, and use the same flag name space as the 426.Xr recvmsg 2 427system call. 428.Pp 429The 430.Fn sosend 431function is equivalent to the 432.Xr sendmsg 2 433system call, and attempts to send bytes of data via the socket 434.Fa so , 435optionally blocking if data cannot be immediately sent. 436Data may be sent directly from kernel or user memory via the 437.Fa uio 438argument, or as an mbuf chain via 439.Fa top , 440avoiding a data copy. 441Only one of the 442.Fa uio 443or 444.Fa top 445pointers may be 446.Pf non- Dv NULL . 447An optional destination address may be specified via a 448.Pf non- Dv NULL 449.Fa addr 450argument, which may result in an implicit connect if supported by the 451protocol. 452The caller may optionally send control data mbufs via a 453.Pf non- Dv NULL 454.Fa control 455argument. 456Flags may be passed to 457.Fn sosend 458using the 459.Fa flags 460argument, and use the same flag name space as the 461.Xr sendmsg 2 462system call. 463.Pp 464Kernel callers running in 465.Xr ithread 9 466context, or with a mutex held, will wish to use non-blocking sockets and pass 467the 468.Dv MSG_DONTWAIT 469flag in order to prevent these functions from sleeping. 470.Pp 471A socket can be queried for readability, writability, out-of-band data, 472or end-of-file using 473.Fn sopoll . 474The possible values for 475.Va events 476are as for 477.Xr poll 2 , 478with symbolic values 479.Dv POLLIN , 480.Dv POLLPRI , 481.Dv POLLOUT , 482.Dv POLLRDNORM , 483.Dv POLLWRNORM , 484.Dv POLLRDBAND , 485and 486.Dv POLLINGEOF 487taken from 488.In sys/poll.h . 489.Pp 490Calls to 491.Fn soaccept 492pass through to the protocol's accept routine to accept an incoming connection. 493.Ss Socket Utility Functions 494The uid of a socket's credential may be compared against a 495.Va uid 496with 497.Fn socheckuid . 498.Pp 499A copy of an existing 500.Vt struct sockaddr 501may be made using 502.Fn sodupsockaddr . 503.Pp 504Protocol implementations notify the socket layer of the arrival of 505out-of-band data using 506.Fn sohasoutofband , 507so that the socket layer can notify socket consumers of the available data. 508.Pp 509An 510.Dq external-format 511version of a 512.Vt struct socket 513can be created using 514.Fn sotoxsocket , 515suitable for isolating user code from changes in the kernel structure. 516.Ss Protocol Implementations 517Protocols must supply an implementation for 518.Fn solisten ; 519such protocol implementations can call back into the socket layer using 520.Fn solisten_proto_check 521and 522.Fn solisten_proto 523to check and set the socket-layer listen state. 524These callbacks are provided so that the protocol implementation 525can order the socket layer and protocol locks as necessary. 526Protocols must supply an implementation of 527.Fn soreceive ; 528the functions 529.Fn soreceive_stream , 530.Fn soreceive_dgram , 531and 532.Fn soreceive_generic 533are supplied for use by such implementations. 534.Pp 535Protocol implementations can use 536.Fn sonewconn 537to create a socket and attach protocol state to that socket. 538This can be used to create new sockets available for 539.Fn soaccept 540on a listen socket. 541The returned socket has a reference count of zero. 542.Pp 543Protocols must supply an implementation for 544.Fn sopoll ; 545.Fn sopoll_generic 546is provided for the use by protocol implementations. 547.Pp 548The functions 549.Fn sosend_dgram 550and 551.Fn sosend_generic 552are supplied to assist in protocol implementations of 553.Fn sosend . 554.Pp 555When a protocol creates a new socket structure, it is necessary to 556reserve socket buffer space for that socket, by calling 557.Fn soreserve . 558The rough inverse of this reservation is performed by 559.Fn sorflush , 560which is called automatically by the socket framework. 561.Pp 562When a protocol needs to wake up threads waiting for the socket to 563become ready to read or write, variants of 564.Fn sowakeup 565are used. 566The 567.Fn sowakeup 568function should not be called directly by protocol code, instead use the 569wrappers 570.Fn sorwakeup , 571.Fn sorwakeup_locked , 572.Fn sowwakeup , 573and 574.Fn sowwakeup_locked 575for readers and writers, with the corresponding socket buffer lock 576not already locked, or already held, respectively. 577.Pp 578The functions 579.Fn sooptcopyin 580and 581.Fn sooptcopyout 582are useful for transferring 583.Vt struct sockopt 584data between user and kernel code. 585.Sh SEE ALSO 586.Xr bind 2 , 587.Xr close 2 , 588.Xr connect 2 , 589.Xr getsockopt 2 , 590.Xr recv 2 , 591.Xr send 2 , 592.Xr setsockopt 2 , 593.Xr shutdown 2 , 594.Xr socket 2 , 595.Xr ng_ksocket 4 , 596.Xr ithread 9 , 597.Xr msleep 9 , 598.Xr ucred 9 599.Sh HISTORY 600The 601.Xr socket 2 602system call appeared in 603.Bx 4.2 . 604This manual page was introduced in 605.Fx 7.0 . 606.Sh AUTHORS 607This manual page was written by 608.An Robert Watson 609and 610.An Benjamin Kaduk . 611.Sh BUGS 612The use of explicitly passed credentials, credentials hung from explicitly 613passed threads, the credential on 614.Dv curthread , 615and the cached credential from 616socket creation time is inconsistent, and may lead to unexpected behaviour. 617It is possible that several of the 618.Fa td 619arguments should be 620.Fa cred 621arguments, or simply not be present at all. 622.Pp 623The caller may need to manually clear 624.Dv SS_ISCONNECTING 625if 626.Fn soconnect 627returns an error. 628.Pp 629The 630.Dv MSG_DONTWAIT 631flag is not implemented for 632.Fn sosend , 633and may not always work with 634.Fn soreceive 635when zero copy sockets are enabled. 636.Pp 637This manual page does not describe how to register socket upcalls or monitor 638a socket for readability/writability without using blocking I/O. 639.Pp 640The 641.Fn soref 642and 643.Fn sorele 644functions are not described, and in most cases should not be used, due to 645confusing and potentially incorrect interactions when 646.Fn sorele 647is last called after 648.Fn soclose . 649