xref: /freebsd/share/man/man9/socket.9 (revision a0409676120c1e558d0ade943019934e0f15118d)
1.\"-
2.\" Copyright (c) 2006 Robert N. M. Watson
3.\" Copyright (c) 2014 Benjamin J. Kaduk
4.\" All rights reserved.
5.\"
6.\" Redistribution and use in source and binary forms, with or without
7.\" modification, are permitted provided that the following conditions
8.\" are met:
9.\" 1. Redistributions of source code must retain the above copyright
10.\"    notice, this list of conditions and the following disclaimer.
11.\" 2. Redistributions in binary form must reproduce the above copyright
12.\"    notice, this list of conditions and the following disclaimer in the
13.\"    documentation and/or other materials provided with the distribution.
14.\"
15.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25.\" SUCH DAMAGE.
26.\"
27.\" $FreeBSD$
28.\"
29.Dd October 18, 2018
30.Dt SOCKET 9
31.Os
32.Sh NAME
33.Nm socket
34.Nd "kernel socket interface"
35.Sh SYNOPSIS
36.In sys/socket.h
37.In sys/socketvar.h
38.Ft void
39.Fn soabort "struct socket *so"
40.Ft int
41.Fn soaccept "struct socket *so" "struct sockaddr **nam"
42.Ft int
43.Fn socheckuid "struct socket *so" "uid_t uid"
44.Ft int
45.Fn sobind "struct socket *so" "struct sockaddr *nam" "struct thread *td"
46.Ft void
47.Fn soclose "struct socket *so"
48.Ft int
49.Fn soconnect "struct socket *so" "struct sockaddr *nam" "struct thread *td"
50.Ft int
51.Fo socreate
52.Fa "int dom" "struct socket **aso" "int type" "int proto"
53.Fa "struct ucred *cred" "struct thread *td"
54.Fc
55.Ft int
56.Fn sodisconnect "struct socket *so"
57.Ft void
58.Fo sodtor_set
59.Fa "struct socket *so"
60.Fa "void (*func)(struct socket *)"
61.Fc
62.Ft struct  sockaddr *
63.Fn sodupsockaddr "const struct sockaddr *sa" "int mflags"
64.Ft void
65.Fn sofree "struct socket *so"
66.Ft void
67.Fn sohasoutofband "struct socket *so"
68.Ft int
69.Fn solisten "struct socket *so" "int backlog" "struct thread *td"
70.Ft void
71.Fn solisten_proto "struct socket *so" "int backlog"
72.Ft int
73.Fn solisten_proto_check "struct socket *so"
74.Ft struct socket *
75.Fn sonewconn "struct socket *head" "int connstatus"
76.Ft int
77.Fo sopoll
78.Fa "struct socket *so" "int events" "struct ucred *active_cred"
79.Fa "struct thread *td"
80.Fc
81.Ft int
82.Fo sopoll_generic
83.Fa "struct socket *so" "int events" "struct ucred *active_cred"
84.Fa "struct thread *td"
85.Fc
86.Ft int
87.Fo soreceive
88.Fa "struct socket *so" "struct sockaddr **psa" "struct uio *uio"
89.Fa "struct mbuf **mp0" "struct mbuf **controlp" "int *flagsp"
90.Fc
91.Ft int
92.Fo soreceive_stream
93.Fa "struct socket *so" "struct sockaddr **paddr"
94.Fa "struct uio *uio" "struct mbuf **mp0" "struct mbuf **controlp"
95.Fa "int *flagsp"
96.Fc
97.Ft int
98.Fo soreceive_dgram
99.Fa "struct socket *so" "struct sockaddr **paddr"
100.Fa "struct uio *uio" "struct mbuf **mp0" "struct mbuf **controlp"
101.Fa "int *flagsp"
102.Fc
103.Ft int
104.Fo soreceive_generic
105.Fa "struct socket *so" "struct sockaddr **paddr"
106.Fa "struct uio *uio" "struct mbuf **mp0" "struct mbuf **controlp"
107.Fa "int *flagsp"
108.Fc
109.Ft int
110.Fn soreserve "struct socket *so" "u_long sndcc" "u_long rcvcc"
111.Ft void
112.Fn sorflush "struct socket *so"
113.Ft int
114.Fo sosend
115.Fa "struct socket *so" "struct sockaddr *addr" "struct uio *uio"
116.Fa "struct mbuf *top" "struct mbuf *control" "int flags" "struct thread *td"
117.Fc
118.Ft int
119.Fo sosend_dgram
120.Fa "struct socket *so" "struct sockaddr *addr"
121.Fa "struct uio *uio" "struct mbuf *top" "struct mbuf *control"
122.Fa "int flags" "struct thread *td"
123.Fc
124.Ft int
125.Fo sosend_generic
126.Fa "struct socket *so" "struct sockaddr *addr"
127.Fa "struct uio *uio" "struct mbuf *top" "struct mbuf *control"
128.Fa "int flags" "struct thread *td"
129.Fc
130.Ft int
131.Fn soshutdown "struct socket *so" "int how"
132.Ft void
133.Fn sotoxsocket "struct socket *so" "struct xsocket *xso"
134.Ft void
135.Fn soupcall_clear "struct socket *so" "int which"
136.Ft void
137.Fo soupcall_set
138.Fa "struct socket *so" "int which"
139.Fa "int (*func)(struct socket *, void *, int)" "void *arg"
140.Fc
141.Ft void
142.Fn sowakeup "struct socket *so" "struct sockbuf *sb"
143.In sys/sockopt.h
144.Ft int
145.Fn sosetopt "struct socket *so" "struct sockopt *sopt"
146.Ft int
147.Fn sogetopt "struct socket *so" "struct sockopt *sopt"
148.Ft int
149.Fn sooptcopyin "struct sockopt *sopt" "void *buf" "size_t len" "size_t minlen"
150.Ft int
151.Fn sooptcopyout "struct sockopt *sopt" "const void *buf" "size_t len"
152.Sh DESCRIPTION
153The kernel
154.Nm
155programming interface permits in-kernel consumers to interact with
156local and network socket objects in a manner similar to that permitted using
157the
158.Xr socket 2
159user API.
160These interfaces are appropriate for use by distributed file systems and
161other network-aware kernel services.
162While the user API operates on file descriptors, the kernel interfaces
163operate directly on
164.Vt "struct socket"
165pointers.
166Some portions of the kernel API exist only to implement the user API,
167and are not expected to be used by kernel code.
168The portions of the socket API used by socket consumers and
169implementations of network protocols will differ; some routines
170are only useful for protocol implementors.
171.Pp
172Except where otherwise indicated,
173.Nm
174functions may sleep, and are not appropriate for use in an
175.Xr ithread 9
176context or while holding non-sleepable kernel locks.
177.Ss Creating and Destroying Sockets
178A new socket may be created using
179.Fn socreate .
180As with
181.Xr socket 2 ,
182arguments specify the requested domain, type, and protocol via
183.Fa dom , type ,
184and
185.Fa proto .
186The socket is returned via
187.Fa aso
188on success.
189In addition, the credential used to authorize operations associated with the
190socket will be passed via
191.Fa cred
192(and will be cached for the lifetime of the socket), and the thread
193performing the operation via
194.Fa td .
195.Em Warning :
196authorization of the socket creation operation will be performed
197using the thread credential for some protocols (such as raw sockets).
198.Pp
199Sockets may be closed and freed using
200.Fn soclose ,
201which has similar semantics to
202.Xr close 2 .
203.Pp
204In certain circumstances, it is appropriate to destroy a socket without
205waiting for it to disconnect, for which
206.Fn soabort
207is used.
208This is only appropriate for incoming connections which are in a
209partially connected state.
210It must be called on an unreferenced socket, by the thread which
211removed the socket from its listen queue, to prevent races.
212It will call into protocol code, so no socket locks may be held
213over the call.
214The caller of
215.Fn soabort
216is responsible for setting the VNET context.
217The normal path to freeing a socket is
218.Fn sofree ,
219which handles reference counting on the socket.
220It should be called whenever a reference is released, and also whenever
221reference flags are cleared in socket or protocol code.
222Calls to
223.Fn sofree
224should not be made from outside the socket layer; outside callers
225should use
226.Fn soclose
227instead.
228.Ss Connections and Addresses
229The
230.Fn sobind
231function is equivalent to the
232.Xr bind 2
233system call, and binds the socket
234.Fa so
235to the address
236.Fa nam .
237The operation would be authorized using the credential on thread
238.Fa td .
239.Pp
240The
241.Fn soconnect
242function is equivalent to the
243.Xr connect 2
244system call, and initiates a connection on the socket
245.Fa so
246to the address
247.Fa nam .
248The operation will be authorized using the credential on thread
249.Fa td .
250Unlike the user system call,
251.Fn soconnect
252returns immediately; the caller may
253.Xr msleep 9
254on
255.Fa so->so_timeo
256while holding the socket mutex and waiting for the
257.Dv SS_ISCONNECTING
258flag to clear or
259.Fa so->so_error
260to become non-zero.
261If
262.Fn soconnect
263fails, the caller must manually clear the
264.Dv SS_ISCONNECTING
265flag.
266.Pp
267A call to
268.Fn sodisconnect
269disconnects the socket without closing it.
270.Pp
271The
272.Fn soshutdown
273function is equivalent to the
274.Xr shutdown 2
275system call, and causes part or all of a connection on a socket to be closed
276down.
277.Pp
278Sockets are transitioned from non-listening status to listening with
279.Fn solisten .
280.Ss Socket Options
281The
282.Fn sogetopt
283function is equivalent to the
284.Xr getsockopt 2
285system call, and retrieves a socket option on socket
286.Fa so .
287The
288.Fn sosetopt
289function is equivalent to the
290.Xr setsockopt 2
291system call, and sets a socket option on socket
292.Fa so .
293.Pp
294The second argument in both
295.Fn sogetopt
296and
297.Fn sosetopt
298is the
299.Fa sopt
300pointer to a
301.Vt "struct sopt"
302describing the socket option operation.
303The caller-allocated structure must be zeroed, and then have its fields
304initialized to specify socket option operation arguments:
305.Bl -tag -width ".Va sopt_valsize"
306.It Va sopt_dir
307Set to
308.Dv SOPT_SET
309or
310.Dv SOPT_GET
311depending on whether this is a get or set operation.
312.It Va sopt_level
313Specify the level in the network stack the operation is targeted at; for
314example,
315.Dv SOL_SOCKET .
316.It Va sopt_name
317Specify the name of the socket option to set.
318.It Va sopt_val
319Kernel space pointer to the argument value for the socket option.
320.It Va sopt_valsize
321Size of the argument value in bytes.
322.El
323.Ss Socket Upcalls
324In order for the owner of a socket to be notified when the socket
325is ready to send or receive data, an upcall may be registered on
326the socket.
327The upcall is a function that will be called by the socket framework
328when a socket buffer associated with the given socket is ready for
329reading or writing.
330.Fn soupcall_set
331is used to register a socket upcall.
332The function
333.Va func
334is registered, and the pointer
335.Va arg
336will be passed as its second argument when it is called by the framework.
337The possible values for
338.Va which
339are
340.Dv SO_RCV
341and
342.Dv SO_SND ,
343which register upcalls for receive and send events, respectively.
344The upcall function
345.Fn func
346must return either
347.Dv SU_OK
348or
349.Dv SU_ISCONNECTED ,
350depending on whether or not a call to
351.Xr soisconnected
352should be made by the socket framework after the upcall returns.
353The upcall
354.Va func
355cannot call
356.Xr soisconnected
357itself due to lock ordering with the socket buffer lock.
358Only
359.Dv SO_RCV
360upcalls should return
361.Dv SU_ISCONNECTED .
362When a
363.Dv SO_RCV
364upcall returns
365.Dv SU_ISCONNECTED ,
366the upcall will be removed from the socket.
367.Pp
368Upcalls are removed from their socket by
369.Fn soupcall_clear .
370The
371.Va which
372argument again specifies whether the sending or receiving upcall is to
373be cleared, with
374.Dv SO_RCV
375or
376.Dv SO_SND .
377.Ss Socket Destructor Callback
378A kernel system can use the
379.Fn sodtor_set
380function to set a destructor for a socket.
381The destructor is called when the socket is about to be freed.
382The destructor is called before the protocol detach routine.
383The destructor can serve as a callback to initiate additional cleanup actions.
384.Ss Socket I/O
385The
386.Fn soreceive
387function is equivalent to the
388.Xr recvmsg 2
389system call, and attempts to receive bytes of data from the socket
390.Fa so ,
391optionally blocking awaiting for data if none is ready to read.
392Data may be retrieved directly to kernel or user memory via the
393.Fa uio
394argument, or as an mbuf chain returned to the caller via
395.Fa mp0 ,
396avoiding a data copy.
397The
398.Fa uio
399must always be
400.Pf non- Dv NULL .
401If
402.Fa mp0
403is
404.Pf non- Dv NULL ,
405only the
406.Fa uio_resid
407of
408.Fa uio
409is used.
410The caller may optionally retrieve a socket address on a protocol with the
411.Dv PR_ADDR
412capability by providing storage via
413.Pf non- Dv NULL
414.Fa psa
415argument.
416The caller may optionally retrieve control data mbufs via a
417.Pf non- Dv NULL
418.Fa controlp
419argument.
420Optional flags may be passed to
421.Fn soreceive
422via a
423.Pf non- Dv NULL
424.Fa flagsp
425argument, and use the same flag name space as the
426.Xr recvmsg 2
427system call.
428.Pp
429The
430.Fn sosend
431function is equivalent to the
432.Xr sendmsg 2
433system call, and attempts to send bytes of data via the socket
434.Fa so ,
435optionally blocking if data cannot be immediately sent.
436Data may be sent directly from kernel or user memory via the
437.Fa uio
438argument, or as an mbuf chain via
439.Fa top ,
440avoiding a data copy.
441Only one of the
442.Fa uio
443or
444.Fa top
445pointers may be
446.Pf non- Dv NULL .
447An optional destination address may be specified via a
448.Pf non- Dv NULL
449.Fa addr
450argument, which may result in an implicit connect if supported by the
451protocol.
452The caller may optionally send control data mbufs via a
453.Pf non- Dv NULL
454.Fa control
455argument.
456Flags may be passed to
457.Fn sosend
458using the
459.Fa flags
460argument, and use the same flag name space as the
461.Xr sendmsg 2
462system call.
463.Pp
464Kernel callers running in
465.Xr ithread 9
466context, or with a mutex held, will wish to use non-blocking sockets and pass
467the
468.Dv MSG_DONTWAIT
469flag in order to prevent these functions from sleeping.
470.Pp
471A socket can be queried for readability, writability, out-of-band data,
472or end-of-file using
473.Fn sopoll .
474The possible values for
475.Va events
476are as for
477.Xr poll 2 ,
478with symbolic values
479.Dv POLLIN ,
480.Dv POLLPRI ,
481.Dv POLLOUT ,
482.Dv POLLRDNORM ,
483.Dv POLLWRNORM ,
484.Dv POLLRDBAND ,
485and
486.Dv POLLINGEOF
487taken from
488.In sys/poll.h .
489.Pp
490Calls to
491.Fn soaccept
492pass through to the protocol's accept routine to accept an incoming connection.
493.Ss Socket Utility Functions
494The uid of a socket's credential may be compared against a
495.Va uid
496with
497.Fn socheckuid .
498.Pp
499A copy of an existing
500.Vt struct sockaddr
501may be made using
502.Fn sodupsockaddr .
503.Pp
504Protocol implementations notify the socket layer of the arrival of
505out-of-band data using
506.Fn sohasoutofband ,
507so that the socket layer can notify socket consumers of the available data.
508.Pp
509An
510.Dq external-format
511version of a
512.Vt struct socket
513can be created using
514.Fn sotoxsocket ,
515suitable for isolating user code from changes in the kernel structure.
516.Ss Protocol Implementations
517Protocols must supply an implementation for
518.Fn solisten ;
519such protocol implementations can call back into the socket layer using
520.Fn solisten_proto_check
521and
522.Fn solisten_proto
523to check and set the socket-layer listen state.
524These callbacks are provided so that the protocol implementation
525can order the socket layer and protocol locks as necessary.
526Protocols must supply an implementation of
527.Fn soreceive ;
528the functions
529.Fn soreceive_stream ,
530.Fn soreceive_dgram ,
531and
532.Fn soreceive_generic
533are supplied for use by such implementations.
534.Pp
535Protocol implementations can use
536.Fn sonewconn
537to create a socket and attach protocol state to that socket.
538This can be used to create new sockets available for
539.Fn soaccept
540on a listen socket.
541The returned socket has a reference count of zero.
542.Pp
543Protocols must supply an implementation for
544.Fn sopoll ;
545.Fn sopoll_generic
546is provided for the use by protocol implementations.
547.Pp
548The functions
549.Fn sosend_dgram
550and
551.Fn sosend_generic
552are supplied to assist in protocol implementations of
553.Fn sosend .
554.Pp
555When a protocol creates a new socket structure, it is necessary to
556reserve socket buffer space for that socket, by calling
557.Fn soreserve .
558The rough inverse of this reservation is performed by
559.Fn sorflush ,
560which is called automatically by the socket framework.
561.Pp
562When a protocol needs to wake up threads waiting for the socket to
563become ready to read or write, variants of
564.Fn sowakeup
565are used.
566The
567.Fn sowakeup
568function should not be called directly by protocol code, instead use the
569wrappers
570.Fn sorwakeup ,
571.Fn sorwakeup_locked ,
572.Fn sowwakeup ,
573and
574.Fn sowwakeup_locked
575for readers and writers, with the corresponding socket buffer lock
576not already locked, or already held, respectively.
577.Pp
578The functions
579.Fn sooptcopyin
580and
581.Fn sooptcopyout
582are useful for transferring
583.Vt struct sockopt
584data between user and kernel code.
585.Sh SEE ALSO
586.Xr bind 2 ,
587.Xr close 2 ,
588.Xr connect 2 ,
589.Xr getsockopt 2 ,
590.Xr recv 2 ,
591.Xr send 2 ,
592.Xr setsockopt 2 ,
593.Xr shutdown 2 ,
594.Xr socket 2 ,
595.Xr ng_ksocket 4 ,
596.Xr ithread 9 ,
597.Xr msleep 9 ,
598.Xr ucred 9
599.Sh HISTORY
600The
601.Xr socket 2
602system call appeared in
603.Bx 4.2 .
604This manual page was introduced in
605.Fx 7.0 .
606.Sh AUTHORS
607This manual page was written by
608.An Robert Watson
609and
610.An Benjamin Kaduk .
611.Sh BUGS
612The use of explicitly passed credentials, credentials hung from explicitly
613passed threads, the credential on
614.Dv curthread ,
615and the cached credential from
616socket creation time is inconsistent, and may lead to unexpected behaviour.
617It is possible that several of the
618.Fa td
619arguments should be
620.Fa cred
621arguments, or simply not be present at all.
622.Pp
623The caller may need to manually clear
624.Dv SS_ISCONNECTING
625if
626.Fn soconnect
627returns an error.
628.Pp
629The
630.Dv MSG_DONTWAIT
631flag is not implemented for
632.Fn sosend ,
633and may not always work with
634.Fn soreceive
635when zero copy sockets are enabled.
636.Pp
637This manual page does not describe how to register socket upcalls or monitor
638a socket for readability/writability without using blocking I/O.
639.Pp
640The
641.Fn soref
642and
643.Fn sorele
644functions are not described, and in most cases should not be used, due to
645confusing and potentially incorrect interactions when
646.Fn sorele
647is last called after
648.Fn soclose .
649