1.\"- 2.\" Copyright (c) 2006 Robert N. M. Watson 3.\" Copyright (c) 2014 Benjamin J. Kaduk 4.\" All rights reserved. 5.\" 6.\" Redistribution and use in source and binary forms, with or without 7.\" modification, are permitted provided that the following conditions 8.\" are met: 9.\" 1. Redistributions of source code must retain the above copyright 10.\" notice, this list of conditions and the following disclaimer. 11.\" 2. Redistributions in binary form must reproduce the above copyright 12.\" notice, this list of conditions and the following disclaimer in the 13.\" documentation and/or other materials provided with the distribution. 14.\" 15.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25.\" SUCH DAMAGE. 26.\" 27.\" $FreeBSD$ 28.\" 29.Dd September 6, 2022 30.Dt SOCKET 9 31.Os 32.Sh NAME 33.Nm socket 34.Nd "kernel socket interface" 35.Sh SYNOPSIS 36.In sys/socket.h 37.In sys/socketvar.h 38.Ft void 39.Fn soabort "struct socket *so" 40.Ft int 41.Fn soaccept "struct socket *so" "struct sockaddr **nam" 42.Ft int 43.Fn socheckuid "struct socket *so" "uid_t uid" 44.Ft int 45.Fn sobind "struct socket *so" "struct sockaddr *nam" "struct thread *td" 46.Ft void 47.Fn soclose "struct socket *so" 48.Ft int 49.Fn soconnect "struct socket *so" "struct sockaddr *nam" "struct thread *td" 50.Ft int 51.Fo socreate 52.Fa "int dom" "struct socket **aso" "int type" "int proto" 53.Fa "struct ucred *cred" "struct thread *td" 54.Fc 55.Ft int 56.Fn sodisconnect "struct socket *so" 57.Ft void 58.Fo sodtor_set 59.Fa "struct socket *so" 60.Fa "void (*func)(struct socket *)" 61.Fc 62.Ft struct sockaddr * 63.Fn sodupsockaddr "const struct sockaddr *sa" "int mflags" 64.Ft void 65.Fn sofree "struct socket *so" 66.Ft void 67.Fn sohasoutofband "struct socket *so" 68.Ft int 69.Fn solisten "struct socket *so" "int backlog" "struct thread *td" 70.Ft void 71.Fn solisten_proto "struct socket *so" "int backlog" 72.Ft int 73.Fn solisten_proto_check "struct socket *so" 74.Ft struct socket * 75.Fn sonewconn "struct socket *head" "int connstatus" 76.Ft int 77.Fo sopoll 78.Fa "struct socket *so" "int events" "struct ucred *active_cred" 79.Fa "struct thread *td" 80.Fc 81.Ft int 82.Fo sopoll_generic 83.Fa "struct socket *so" "int events" "struct ucred *active_cred" 84.Fa "struct thread *td" 85.Fc 86.Ft int 87.Fo soreceive 88.Fa "struct socket *so" "struct sockaddr **psa" "struct uio *uio" 89.Fa "struct mbuf **mp0" "struct mbuf **controlp" "int *flagsp" 90.Fc 91.Ft int 92.Fo soreceive_stream 93.Fa "struct socket *so" "struct sockaddr **paddr" 94.Fa "struct uio *uio" "struct mbuf **mp0" "struct mbuf **controlp" 95.Fa "int *flagsp" 96.Fc 97.Ft int 98.Fo soreceive_dgram 99.Fa "struct socket *so" "struct sockaddr **paddr" 100.Fa "struct uio *uio" "struct mbuf **mp0" "struct mbuf **controlp" 101.Fa "int *flagsp" 102.Fc 103.Ft int 104.Fo soreceive_generic 105.Fa "struct socket *so" "struct sockaddr **paddr" 106.Fa "struct uio *uio" "struct mbuf **mp0" "struct mbuf **controlp" 107.Fa "int *flagsp" 108.Fc 109.Ft int 110.Fn soreserve "struct socket *so" "u_long sndcc" "u_long rcvcc" 111.Ft void 112.Fn sorflush "struct socket *so" 113.Ft int 114.Fo sosend 115.Fa "struct socket *so" "struct sockaddr *addr" "struct uio *uio" 116.Fa "struct mbuf *top" "struct mbuf *control" "int flags" "struct thread *td" 117.Fc 118.Ft int 119.Fo sosend_dgram 120.Fa "struct socket *so" "struct sockaddr *addr" 121.Fa "struct uio *uio" "struct mbuf *top" "struct mbuf *control" 122.Fa "int flags" "struct thread *td" 123.Fc 124.Ft int 125.Fo sosend_generic 126.Fa "struct socket *so" "struct sockaddr *addr" 127.Fa "struct uio *uio" "struct mbuf *top" "struct mbuf *control" 128.Fa "int flags" "struct thread *td" 129.Fc 130.Ft int 131.Fn soshutdown "struct socket *so" "int how" 132.Ft void 133.Fn sotoxsocket "struct socket *so" "struct xsocket *xso" 134.Ft void 135.Fn soupcall_clear "struct socket *so" "int which" 136.Ft void 137.Fo soupcall_set 138.Fa "struct socket *so" "int which" 139.Fa "int (*func)(struct socket *, void *, int)" "void *arg" 140.Fc 141.Ft void 142.Fn sowakeup "struct socket *so" "struct sockbuf *sb" 143.In sys/sockopt.h 144.Ft int 145.Fn sosetopt "struct socket *so" "struct sockopt *sopt" 146.Ft int 147.Fn sogetopt "struct socket *so" "struct sockopt *sopt" 148.Ft int 149.Fn sooptcopyin "struct sockopt *sopt" "void *buf" "size_t len" "size_t minlen" 150.Ft int 151.Fn sooptcopyout "struct sockopt *sopt" "const void *buf" "size_t len" 152.Sh DESCRIPTION 153The kernel 154.Nm 155programming interface permits in-kernel consumers to interact with 156local and network socket objects in a manner similar to that permitted using 157the 158.Xr socket 2 159user API. 160These interfaces are appropriate for use by distributed file systems and 161other network-aware kernel services. 162While the user API operates on file descriptors, the kernel interfaces 163operate directly on 164.Vt "struct socket" 165pointers. 166Some portions of the kernel API exist only to implement the user API, 167and are not expected to be used by kernel code. 168The portions of the socket API used by socket consumers and 169implementations of network protocols will differ; some routines 170are only useful for protocol implementors. 171.Pp 172Except where otherwise indicated, 173.Nm 174functions may sleep, and are not appropriate for use in an interrupt thread 175context or while holding non-sleepable kernel locks. 176.Ss Creating and Destroying Sockets 177A new socket may be created using 178.Fn socreate . 179As with 180.Xr socket 2 , 181arguments specify the requested domain, type, and protocol via 182.Fa dom , type , 183and 184.Fa proto . 185The socket is returned via 186.Fa aso 187on success. 188In addition, the credential used to authorize operations associated with the 189socket will be passed via 190.Fa cred 191(and will be cached for the lifetime of the socket), and the thread 192performing the operation via 193.Fa td . 194.Em Warning : 195authorization of the socket creation operation will be performed 196using the thread credential for some protocols (such as raw sockets). 197.Pp 198Sockets may be closed and freed using 199.Fn soclose , 200which has similar semantics to 201.Xr close 2 . 202.Pp 203In certain circumstances, it is appropriate to destroy a socket without 204waiting for it to disconnect, for which 205.Fn soabort 206is used. 207This is only appropriate for incoming connections which are in a 208partially connected state. 209It must be called on an unreferenced socket, by the thread which 210removed the socket from its listen queue, to prevent races. 211It will call into protocol code, so no socket locks may be held 212over the call. 213The caller of 214.Fn soabort 215is responsible for setting the VNET context. 216The normal path to freeing a socket is 217.Fn sofree , 218which handles reference counting on the socket. 219It should be called whenever a reference is released, and also whenever 220reference flags are cleared in socket or protocol code. 221Calls to 222.Fn sofree 223should not be made from outside the socket layer; outside callers 224should use 225.Fn soclose 226instead. 227.Ss Connections and Addresses 228The 229.Fn sobind 230function is equivalent to the 231.Xr bind 2 232system call, and binds the socket 233.Fa so 234to the address 235.Fa nam . 236The operation would be authorized using the credential on thread 237.Fa td . 238.Pp 239The 240.Fn soconnect 241function is equivalent to the 242.Xr connect 2 243system call, and initiates a connection on the socket 244.Fa so 245to the address 246.Fa nam . 247The operation will be authorized using the credential on thread 248.Fa td . 249Unlike the user system call, 250.Fn soconnect 251returns immediately; the caller may 252.Xr msleep 9 253on 254.Fa so->so_timeo 255while holding the socket mutex and waiting for the 256.Dv SS_ISCONNECTING 257flag to clear or 258.Fa so->so_error 259to become non-zero. 260If 261.Fn soconnect 262fails, the caller must manually clear the 263.Dv SS_ISCONNECTING 264flag. 265.Pp 266A call to 267.Fn sodisconnect 268disconnects the socket without closing it. 269.Pp 270The 271.Fn soshutdown 272function is equivalent to the 273.Xr shutdown 2 274system call, and causes part or all of a connection on a socket to be closed 275down. 276.Pp 277Sockets are transitioned from non-listening status to listening with 278.Fn solisten . 279.Ss Socket Options 280The 281.Fn sogetopt 282function is equivalent to the 283.Xr getsockopt 2 284system call, and retrieves a socket option on socket 285.Fa so . 286The 287.Fn sosetopt 288function is equivalent to the 289.Xr setsockopt 2 290system call, and sets a socket option on socket 291.Fa so . 292.Pp 293The second argument in both 294.Fn sogetopt 295and 296.Fn sosetopt 297is the 298.Fa sopt 299pointer to a 300.Vt "struct sopt" 301describing the socket option operation. 302The caller-allocated structure must be zeroed, and then have its fields 303initialized to specify socket option operation arguments: 304.Bl -tag -width ".Va sopt_valsize" 305.It Va sopt_dir 306Set to 307.Dv SOPT_SET 308or 309.Dv SOPT_GET 310depending on whether this is a get or set operation. 311.It Va sopt_level 312Specify the level in the network stack the operation is targeted at; for 313example, 314.Dv SOL_SOCKET . 315.It Va sopt_name 316Specify the name of the socket option to set. 317.It Va sopt_val 318Kernel space pointer to the argument value for the socket option. 319.It Va sopt_valsize 320Size of the argument value in bytes. 321.El 322.Ss Socket Upcalls 323In order for the owner of a socket to be notified when the socket 324is ready to send or receive data, an upcall may be registered on 325the socket. 326The upcall is a function that will be called by the socket framework 327when a socket buffer associated with the given socket is ready for 328reading or writing. 329.Fn soupcall_set 330is used to register a socket upcall. 331The function 332.Va func 333is registered, and the pointer 334.Va arg 335will be passed as its second argument when it is called by the framework. 336The possible values for 337.Va which 338are 339.Dv SO_RCV 340and 341.Dv SO_SND , 342which register upcalls for receive and send events, respectively. 343The upcall function 344.Fn func 345must return either 346.Dv SU_OK 347or 348.Dv SU_ISCONNECTED , 349depending on whether or not a call to 350.Xr soisconnected 351should be made by the socket framework after the upcall returns. 352The upcall 353.Va func 354cannot call 355.Xr soisconnected 356itself due to lock ordering with the socket buffer lock. 357Only 358.Dv SO_RCV 359upcalls should return 360.Dv SU_ISCONNECTED . 361When a 362.Dv SO_RCV 363upcall returns 364.Dv SU_ISCONNECTED , 365the upcall will be removed from the socket. 366.Pp 367Upcalls are removed from their socket by 368.Fn soupcall_clear . 369The 370.Va which 371argument again specifies whether the sending or receiving upcall is to 372be cleared, with 373.Dv SO_RCV 374or 375.Dv SO_SND . 376.Ss Socket Destructor Callback 377A kernel system can use the 378.Fn sodtor_set 379function to set a destructor for a socket. 380The destructor is called when the socket is about to be freed. 381The destructor is called before the protocol detach routine. 382The destructor can serve as a callback to initiate additional cleanup actions. 383.Ss Socket I/O 384The 385.Fn soreceive 386function is equivalent to the 387.Xr recvmsg 2 388system call, and attempts to receive bytes of data from the socket 389.Fa so , 390optionally blocking awaiting for data if none is ready to read. 391Data may be retrieved directly to kernel or user memory via the 392.Fa uio 393argument, or as an mbuf chain returned to the caller via 394.Fa mp0 , 395avoiding a data copy. 396The 397.Fa uio 398must always be 399.Pf non- Dv NULL . 400If 401.Fa mp0 402is 403.Pf non- Dv NULL , 404only the 405.Fa uio_resid 406of 407.Fa uio 408is used. 409The caller may optionally retrieve a socket address on a protocol with the 410.Dv PR_ADDR 411capability by providing storage via 412.Pf non- Dv NULL 413.Fa psa 414argument. 415The caller may optionally retrieve control data mbufs via a 416.Pf non- Dv NULL 417.Fa controlp 418argument. 419Optional flags may be passed to 420.Fn soreceive 421via a 422.Pf non- Dv NULL 423.Fa flagsp 424argument, and use the same flag name space as the 425.Xr recvmsg 2 426system call. 427.Pp 428The 429.Fn sosend 430function is equivalent to the 431.Xr sendmsg 2 432system call, and attempts to send bytes of data via the socket 433.Fa so , 434optionally blocking if data cannot be immediately sent. 435Data may be sent directly from kernel or user memory via the 436.Fa uio 437argument, or as an mbuf chain via 438.Fa top , 439avoiding a data copy. 440Only one of the 441.Fa uio 442or 443.Fa top 444pointers may be 445.Pf non- Dv NULL . 446An optional destination address may be specified via a 447.Pf non- Dv NULL 448.Fa addr 449argument, which may result in an implicit connect if supported by the 450protocol. 451The caller may optionally send control data mbufs via a 452.Pf non- Dv NULL 453.Fa control 454argument. 455Flags may be passed to 456.Fn sosend 457using the 458.Fa flags 459argument, and use the same flag name space as the 460.Xr sendmsg 2 461system call. 462.Pp 463Kernel callers running in an interrupt thread context, or with a mutex held, 464will wish to use non-blocking sockets and pass the 465.Dv MSG_DONTWAIT 466flag in order to prevent these functions from sleeping. 467.Pp 468A socket can be queried for readability, writability, out-of-band data, 469or end-of-file using 470.Fn sopoll . 471The possible values for 472.Va events 473are as for 474.Xr poll 2 , 475with symbolic values 476.Dv POLLIN , 477.Dv POLLPRI , 478.Dv POLLOUT , 479.Dv POLLRDNORM , 480.Dv POLLWRNORM , 481.Dv POLLRDBAND , 482and 483.Dv POLLINGEOF 484taken from 485.In sys/poll.h . 486.Pp 487Calls to 488.Fn soaccept 489pass through to the protocol's accept routine to accept an incoming connection. 490.Ss Socket Utility Functions 491The uid of a socket's credential may be compared against a 492.Va uid 493with 494.Fn socheckuid . 495.Pp 496A copy of an existing 497.Vt struct sockaddr 498may be made using 499.Fn sodupsockaddr . 500.Pp 501Protocol implementations notify the socket layer of the arrival of 502out-of-band data using 503.Fn sohasoutofband , 504so that the socket layer can notify socket consumers of the available data. 505.Pp 506An 507.Dq external-format 508version of a 509.Vt struct socket 510can be created using 511.Fn sotoxsocket , 512suitable for isolating user code from changes in the kernel structure. 513.Ss Protocol Implementations 514Protocols must supply an implementation for 515.Fn solisten ; 516such protocol implementations can call back into the socket layer using 517.Fn solisten_proto_check 518and 519.Fn solisten_proto 520to check and set the socket-layer listen state. 521These callbacks are provided so that the protocol implementation 522can order the socket layer and protocol locks as necessary. 523Protocols must supply an implementation of 524.Fn soreceive ; 525the functions 526.Fn soreceive_stream , 527.Fn soreceive_dgram , 528and 529.Fn soreceive_generic 530are supplied for use by such implementations. 531.Pp 532Protocol implementations can use 533.Fn sonewconn 534to create a socket and attach protocol state to that socket. 535This can be used to create new sockets available for 536.Fn soaccept 537on a listen socket. 538The returned socket has a reference count of zero. 539.Pp 540Protocols must supply an implementation for 541.Fn sopoll ; 542.Fn sopoll_generic 543is provided for the use by protocol implementations. 544.Pp 545The functions 546.Fn sosend_dgram 547and 548.Fn sosend_generic 549are supplied to assist in protocol implementations of 550.Fn sosend . 551.Pp 552When a protocol creates a new socket structure, it is necessary to 553reserve socket buffer space for that socket, by calling 554.Fn soreserve . 555The rough inverse of this reservation is performed by 556.Fn sorflush , 557which is called automatically by the socket framework. 558.Pp 559When a protocol needs to wake up threads waiting for the socket to 560become ready to read or write, variants of 561.Fn sowakeup 562are used. 563The 564.Fn sowakeup 565function should not be called directly by protocol code, instead use the 566wrappers 567.Fn sorwakeup , 568.Fn sorwakeup_locked , 569.Fn sowwakeup , 570and 571.Fn sowwakeup_locked 572for readers and writers, with the corresponding socket buffer lock 573not already locked, or already held, respectively. 574.Pp 575The functions 576.Fn sooptcopyin 577and 578.Fn sooptcopyout 579are useful for transferring 580.Vt struct sockopt 581data between user and kernel code. 582.Sh SEE ALSO 583.Xr bind 2 , 584.Xr close 2 , 585.Xr connect 2 , 586.Xr getsockopt 2 , 587.Xr recv 2 , 588.Xr send 2 , 589.Xr setsockopt 2 , 590.Xr shutdown 2 , 591.Xr socket 2 , 592.Xr ng_ksocket 4 , 593.Xr intr_event 9 , 594.Xr msleep 9 , 595.Xr ucred 9 596.Sh HISTORY 597The 598.Xr socket 2 599system call appeared in 600.Bx 4.2 . 601This manual page was introduced in 602.Fx 7.0 . 603.Sh AUTHORS 604This manual page was written by 605.An Robert Watson 606and 607.An Benjamin Kaduk . 608.Sh BUGS 609The use of explicitly passed credentials, credentials hung from explicitly 610passed threads, the credential on 611.Dv curthread , 612and the cached credential from 613socket creation time is inconsistent, and may lead to unexpected behaviour. 614It is possible that several of the 615.Fa td 616arguments should be 617.Fa cred 618arguments, or simply not be present at all. 619.Pp 620The caller may need to manually clear 621.Dv SS_ISCONNECTING 622if 623.Fn soconnect 624returns an error. 625.Pp 626The 627.Dv MSG_DONTWAIT 628flag is not implemented for 629.Fn sosend , 630and may not always work with 631.Fn soreceive 632when zero copy sockets are enabled. 633.Pp 634This manual page does not describe how to register socket upcalls or monitor 635a socket for readability/writability without using blocking I/O. 636.Pp 637The 638.Fn soref 639and 640.Fn sorele 641functions are not described, and in most cases should not be used, due to 642confusing and potentially incorrect interactions when 643.Fn sorele 644is last called after 645.Fn soclose . 646