xref: /freebsd/share/man/man9/socket.9 (revision b3e7694832e81d7a904a10f525f8797b753bf0d3)
1.\"-
2.\" Copyright (c) 2006 Robert N. M. Watson
3.\" Copyright (c) 2014 Benjamin J. Kaduk
4.\" All rights reserved.
5.\"
6.\" Redistribution and use in source and binary forms, with or without
7.\" modification, are permitted provided that the following conditions
8.\" are met:
9.\" 1. Redistributions of source code must retain the above copyright
10.\"    notice, this list of conditions and the following disclaimer.
11.\" 2. Redistributions in binary form must reproduce the above copyright
12.\"    notice, this list of conditions and the following disclaimer in the
13.\"    documentation and/or other materials provided with the distribution.
14.\"
15.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25.\" SUCH DAMAGE.
26.\"
27.\" $FreeBSD$
28.\"
29.Dd September 6, 2022
30.Dt SOCKET 9
31.Os
32.Sh NAME
33.Nm socket
34.Nd "kernel socket interface"
35.Sh SYNOPSIS
36.In sys/socket.h
37.In sys/socketvar.h
38.Ft void
39.Fn soabort "struct socket *so"
40.Ft int
41.Fn soaccept "struct socket *so" "struct sockaddr **nam"
42.Ft int
43.Fn socheckuid "struct socket *so" "uid_t uid"
44.Ft int
45.Fn sobind "struct socket *so" "struct sockaddr *nam" "struct thread *td"
46.Ft void
47.Fn soclose "struct socket *so"
48.Ft int
49.Fn soconnect "struct socket *so" "struct sockaddr *nam" "struct thread *td"
50.Ft int
51.Fo socreate
52.Fa "int dom" "struct socket **aso" "int type" "int proto"
53.Fa "struct ucred *cred" "struct thread *td"
54.Fc
55.Ft int
56.Fn sodisconnect "struct socket *so"
57.Ft void
58.Fo sodtor_set
59.Fa "struct socket *so"
60.Fa "void (*func)(struct socket *)"
61.Fc
62.Ft struct  sockaddr *
63.Fn sodupsockaddr "const struct sockaddr *sa" "int mflags"
64.Ft void
65.Fn sofree "struct socket *so"
66.Ft void
67.Fn sohasoutofband "struct socket *so"
68.Ft int
69.Fn solisten "struct socket *so" "int backlog" "struct thread *td"
70.Ft void
71.Fn solisten_proto "struct socket *so" "int backlog"
72.Ft int
73.Fn solisten_proto_check "struct socket *so"
74.Ft struct socket *
75.Fn sonewconn "struct socket *head" "int connstatus"
76.Ft int
77.Fo sopoll
78.Fa "struct socket *so" "int events" "struct ucred *active_cred"
79.Fa "struct thread *td"
80.Fc
81.Ft int
82.Fo sopoll_generic
83.Fa "struct socket *so" "int events" "struct ucred *active_cred"
84.Fa "struct thread *td"
85.Fc
86.Ft int
87.Fo soreceive
88.Fa "struct socket *so" "struct sockaddr **psa" "struct uio *uio"
89.Fa "struct mbuf **mp0" "struct mbuf **controlp" "int *flagsp"
90.Fc
91.Ft int
92.Fo soreceive_stream
93.Fa "struct socket *so" "struct sockaddr **paddr"
94.Fa "struct uio *uio" "struct mbuf **mp0" "struct mbuf **controlp"
95.Fa "int *flagsp"
96.Fc
97.Ft int
98.Fo soreceive_dgram
99.Fa "struct socket *so" "struct sockaddr **paddr"
100.Fa "struct uio *uio" "struct mbuf **mp0" "struct mbuf **controlp"
101.Fa "int *flagsp"
102.Fc
103.Ft int
104.Fo soreceive_generic
105.Fa "struct socket *so" "struct sockaddr **paddr"
106.Fa "struct uio *uio" "struct mbuf **mp0" "struct mbuf **controlp"
107.Fa "int *flagsp"
108.Fc
109.Ft int
110.Fn soreserve "struct socket *so" "u_long sndcc" "u_long rcvcc"
111.Ft void
112.Fn sorflush "struct socket *so"
113.Ft int
114.Fo sosend
115.Fa "struct socket *so" "struct sockaddr *addr" "struct uio *uio"
116.Fa "struct mbuf *top" "struct mbuf *control" "int flags" "struct thread *td"
117.Fc
118.Ft int
119.Fo sosend_dgram
120.Fa "struct socket *so" "struct sockaddr *addr"
121.Fa "struct uio *uio" "struct mbuf *top" "struct mbuf *control"
122.Fa "int flags" "struct thread *td"
123.Fc
124.Ft int
125.Fo sosend_generic
126.Fa "struct socket *so" "struct sockaddr *addr"
127.Fa "struct uio *uio" "struct mbuf *top" "struct mbuf *control"
128.Fa "int flags" "struct thread *td"
129.Fc
130.Ft int
131.Fn soshutdown "struct socket *so" "int how"
132.Ft void
133.Fn sotoxsocket "struct socket *so" "struct xsocket *xso"
134.Ft void
135.Fn soupcall_clear "struct socket *so" "int which"
136.Ft void
137.Fo soupcall_set
138.Fa "struct socket *so" "int which"
139.Fa "int (*func)(struct socket *, void *, int)" "void *arg"
140.Fc
141.Ft void
142.Fn sowakeup "struct socket *so" "struct sockbuf *sb"
143.In sys/sockopt.h
144.Ft int
145.Fn sosetopt "struct socket *so" "struct sockopt *sopt"
146.Ft int
147.Fn sogetopt "struct socket *so" "struct sockopt *sopt"
148.Ft int
149.Fn sooptcopyin "struct sockopt *sopt" "void *buf" "size_t len" "size_t minlen"
150.Ft int
151.Fn sooptcopyout "struct sockopt *sopt" "const void *buf" "size_t len"
152.Sh DESCRIPTION
153The kernel
154.Nm
155programming interface permits in-kernel consumers to interact with
156local and network socket objects in a manner similar to that permitted using
157the
158.Xr socket 2
159user API.
160These interfaces are appropriate for use by distributed file systems and
161other network-aware kernel services.
162While the user API operates on file descriptors, the kernel interfaces
163operate directly on
164.Vt "struct socket"
165pointers.
166Some portions of the kernel API exist only to implement the user API,
167and are not expected to be used by kernel code.
168The portions of the socket API used by socket consumers and
169implementations of network protocols will differ; some routines
170are only useful for protocol implementors.
171.Pp
172Except where otherwise indicated,
173.Nm
174functions may sleep, and are not appropriate for use in an interrupt thread
175context or while holding non-sleepable kernel locks.
176.Ss Creating and Destroying Sockets
177A new socket may be created using
178.Fn socreate .
179As with
180.Xr socket 2 ,
181arguments specify the requested domain, type, and protocol via
182.Fa dom , type ,
183and
184.Fa proto .
185The socket is returned via
186.Fa aso
187on success.
188In addition, the credential used to authorize operations associated with the
189socket will be passed via
190.Fa cred
191(and will be cached for the lifetime of the socket), and the thread
192performing the operation via
193.Fa td .
194.Em Warning :
195authorization of the socket creation operation will be performed
196using the thread credential for some protocols (such as raw sockets).
197.Pp
198Sockets may be closed and freed using
199.Fn soclose ,
200which has similar semantics to
201.Xr close 2 .
202.Pp
203In certain circumstances, it is appropriate to destroy a socket without
204waiting for it to disconnect, for which
205.Fn soabort
206is used.
207This is only appropriate for incoming connections which are in a
208partially connected state.
209It must be called on an unreferenced socket, by the thread which
210removed the socket from its listen queue, to prevent races.
211It will call into protocol code, so no socket locks may be held
212over the call.
213The caller of
214.Fn soabort
215is responsible for setting the VNET context.
216The normal path to freeing a socket is
217.Fn sofree ,
218which handles reference counting on the socket.
219It should be called whenever a reference is released, and also whenever
220reference flags are cleared in socket or protocol code.
221Calls to
222.Fn sofree
223should not be made from outside the socket layer; outside callers
224should use
225.Fn soclose
226instead.
227.Ss Connections and Addresses
228The
229.Fn sobind
230function is equivalent to the
231.Xr bind 2
232system call, and binds the socket
233.Fa so
234to the address
235.Fa nam .
236The operation would be authorized using the credential on thread
237.Fa td .
238.Pp
239The
240.Fn soconnect
241function is equivalent to the
242.Xr connect 2
243system call, and initiates a connection on the socket
244.Fa so
245to the address
246.Fa nam .
247The operation will be authorized using the credential on thread
248.Fa td .
249Unlike the user system call,
250.Fn soconnect
251returns immediately; the caller may
252.Xr msleep 9
253on
254.Fa so->so_timeo
255while holding the socket mutex and waiting for the
256.Dv SS_ISCONNECTING
257flag to clear or
258.Fa so->so_error
259to become non-zero.
260If
261.Fn soconnect
262fails, the caller must manually clear the
263.Dv SS_ISCONNECTING
264flag.
265.Pp
266A call to
267.Fn sodisconnect
268disconnects the socket without closing it.
269.Pp
270The
271.Fn soshutdown
272function is equivalent to the
273.Xr shutdown 2
274system call, and causes part or all of a connection on a socket to be closed
275down.
276.Pp
277Sockets are transitioned from non-listening status to listening with
278.Fn solisten .
279.Ss Socket Options
280The
281.Fn sogetopt
282function is equivalent to the
283.Xr getsockopt 2
284system call, and retrieves a socket option on socket
285.Fa so .
286The
287.Fn sosetopt
288function is equivalent to the
289.Xr setsockopt 2
290system call, and sets a socket option on socket
291.Fa so .
292.Pp
293The second argument in both
294.Fn sogetopt
295and
296.Fn sosetopt
297is the
298.Fa sopt
299pointer to a
300.Vt "struct sopt"
301describing the socket option operation.
302The caller-allocated structure must be zeroed, and then have its fields
303initialized to specify socket option operation arguments:
304.Bl -tag -width ".Va sopt_valsize"
305.It Va sopt_dir
306Set to
307.Dv SOPT_SET
308or
309.Dv SOPT_GET
310depending on whether this is a get or set operation.
311.It Va sopt_level
312Specify the level in the network stack the operation is targeted at; for
313example,
314.Dv SOL_SOCKET .
315.It Va sopt_name
316Specify the name of the socket option to set.
317.It Va sopt_val
318Kernel space pointer to the argument value for the socket option.
319.It Va sopt_valsize
320Size of the argument value in bytes.
321.El
322.Ss Socket Upcalls
323In order for the owner of a socket to be notified when the socket
324is ready to send or receive data, an upcall may be registered on
325the socket.
326The upcall is a function that will be called by the socket framework
327when a socket buffer associated with the given socket is ready for
328reading or writing.
329.Fn soupcall_set
330is used to register a socket upcall.
331The function
332.Va func
333is registered, and the pointer
334.Va arg
335will be passed as its second argument when it is called by the framework.
336The possible values for
337.Va which
338are
339.Dv SO_RCV
340and
341.Dv SO_SND ,
342which register upcalls for receive and send events, respectively.
343The upcall function
344.Fn func
345must return either
346.Dv SU_OK
347or
348.Dv SU_ISCONNECTED ,
349depending on whether or not a call to
350.Xr soisconnected
351should be made by the socket framework after the upcall returns.
352The upcall
353.Va func
354cannot call
355.Xr soisconnected
356itself due to lock ordering with the socket buffer lock.
357Only
358.Dv SO_RCV
359upcalls should return
360.Dv SU_ISCONNECTED .
361When a
362.Dv SO_RCV
363upcall returns
364.Dv SU_ISCONNECTED ,
365the upcall will be removed from the socket.
366.Pp
367Upcalls are removed from their socket by
368.Fn soupcall_clear .
369The
370.Va which
371argument again specifies whether the sending or receiving upcall is to
372be cleared, with
373.Dv SO_RCV
374or
375.Dv SO_SND .
376.Ss Socket Destructor Callback
377A kernel system can use the
378.Fn sodtor_set
379function to set a destructor for a socket.
380The destructor is called when the socket is about to be freed.
381The destructor is called before the protocol detach routine.
382The destructor can serve as a callback to initiate additional cleanup actions.
383.Ss Socket I/O
384The
385.Fn soreceive
386function is equivalent to the
387.Xr recvmsg 2
388system call, and attempts to receive bytes of data from the socket
389.Fa so ,
390optionally blocking awaiting for data if none is ready to read.
391Data may be retrieved directly to kernel or user memory via the
392.Fa uio
393argument, or as an mbuf chain returned to the caller via
394.Fa mp0 ,
395avoiding a data copy.
396The
397.Fa uio
398must always be
399.Pf non- Dv NULL .
400If
401.Fa mp0
402is
403.Pf non- Dv NULL ,
404only the
405.Fa uio_resid
406of
407.Fa uio
408is used.
409The caller may optionally retrieve a socket address on a protocol with the
410.Dv PR_ADDR
411capability by providing storage via
412.Pf non- Dv NULL
413.Fa psa
414argument.
415The caller may optionally retrieve control data mbufs via a
416.Pf non- Dv NULL
417.Fa controlp
418argument.
419Optional flags may be passed to
420.Fn soreceive
421via a
422.Pf non- Dv NULL
423.Fa flagsp
424argument, and use the same flag name space as the
425.Xr recvmsg 2
426system call.
427.Pp
428The
429.Fn sosend
430function is equivalent to the
431.Xr sendmsg 2
432system call, and attempts to send bytes of data via the socket
433.Fa so ,
434optionally blocking if data cannot be immediately sent.
435Data may be sent directly from kernel or user memory via the
436.Fa uio
437argument, or as an mbuf chain via
438.Fa top ,
439avoiding a data copy.
440Only one of the
441.Fa uio
442or
443.Fa top
444pointers may be
445.Pf non- Dv NULL .
446An optional destination address may be specified via a
447.Pf non- Dv NULL
448.Fa addr
449argument, which may result in an implicit connect if supported by the
450protocol.
451The caller may optionally send control data mbufs via a
452.Pf non- Dv NULL
453.Fa control
454argument.
455Flags may be passed to
456.Fn sosend
457using the
458.Fa flags
459argument, and use the same flag name space as the
460.Xr sendmsg 2
461system call.
462.Pp
463Kernel callers running in an interrupt thread context, or with a mutex held,
464will wish to use non-blocking sockets and pass the
465.Dv MSG_DONTWAIT
466flag in order to prevent these functions from sleeping.
467.Pp
468A socket can be queried for readability, writability, out-of-band data,
469or end-of-file using
470.Fn sopoll .
471The possible values for
472.Va events
473are as for
474.Xr poll 2 ,
475with symbolic values
476.Dv POLLIN ,
477.Dv POLLPRI ,
478.Dv POLLOUT ,
479.Dv POLLRDNORM ,
480.Dv POLLWRNORM ,
481.Dv POLLRDBAND ,
482and
483.Dv POLLINGEOF
484taken from
485.In sys/poll.h .
486.Pp
487Calls to
488.Fn soaccept
489pass through to the protocol's accept routine to accept an incoming connection.
490.Ss Socket Utility Functions
491The uid of a socket's credential may be compared against a
492.Va uid
493with
494.Fn socheckuid .
495.Pp
496A copy of an existing
497.Vt struct sockaddr
498may be made using
499.Fn sodupsockaddr .
500.Pp
501Protocol implementations notify the socket layer of the arrival of
502out-of-band data using
503.Fn sohasoutofband ,
504so that the socket layer can notify socket consumers of the available data.
505.Pp
506An
507.Dq external-format
508version of a
509.Vt struct socket
510can be created using
511.Fn sotoxsocket ,
512suitable for isolating user code from changes in the kernel structure.
513.Ss Protocol Implementations
514Protocols must supply an implementation for
515.Fn solisten ;
516such protocol implementations can call back into the socket layer using
517.Fn solisten_proto_check
518and
519.Fn solisten_proto
520to check and set the socket-layer listen state.
521These callbacks are provided so that the protocol implementation
522can order the socket layer and protocol locks as necessary.
523Protocols must supply an implementation of
524.Fn soreceive ;
525the functions
526.Fn soreceive_stream ,
527.Fn soreceive_dgram ,
528and
529.Fn soreceive_generic
530are supplied for use by such implementations.
531.Pp
532Protocol implementations can use
533.Fn sonewconn
534to create a socket and attach protocol state to that socket.
535This can be used to create new sockets available for
536.Fn soaccept
537on a listen socket.
538The returned socket has a reference count of zero.
539.Pp
540Protocols must supply an implementation for
541.Fn sopoll ;
542.Fn sopoll_generic
543is provided for the use by protocol implementations.
544.Pp
545The functions
546.Fn sosend_dgram
547and
548.Fn sosend_generic
549are supplied to assist in protocol implementations of
550.Fn sosend .
551.Pp
552When a protocol creates a new socket structure, it is necessary to
553reserve socket buffer space for that socket, by calling
554.Fn soreserve .
555The rough inverse of this reservation is performed by
556.Fn sorflush ,
557which is called automatically by the socket framework.
558.Pp
559When a protocol needs to wake up threads waiting for the socket to
560become ready to read or write, variants of
561.Fn sowakeup
562are used.
563The
564.Fn sowakeup
565function should not be called directly by protocol code, instead use the
566wrappers
567.Fn sorwakeup ,
568.Fn sorwakeup_locked ,
569.Fn sowwakeup ,
570and
571.Fn sowwakeup_locked
572for readers and writers, with the corresponding socket buffer lock
573not already locked, or already held, respectively.
574.Pp
575The functions
576.Fn sooptcopyin
577and
578.Fn sooptcopyout
579are useful for transferring
580.Vt struct sockopt
581data between user and kernel code.
582.Sh SEE ALSO
583.Xr bind 2 ,
584.Xr close 2 ,
585.Xr connect 2 ,
586.Xr getsockopt 2 ,
587.Xr recv 2 ,
588.Xr send 2 ,
589.Xr setsockopt 2 ,
590.Xr shutdown 2 ,
591.Xr socket 2 ,
592.Xr ng_ksocket 4 ,
593.Xr intr_event 9 ,
594.Xr msleep 9 ,
595.Xr ucred 9
596.Sh HISTORY
597The
598.Xr socket 2
599system call appeared in
600.Bx 4.2 .
601This manual page was introduced in
602.Fx 7.0 .
603.Sh AUTHORS
604This manual page was written by
605.An Robert Watson
606and
607.An Benjamin Kaduk .
608.Sh BUGS
609The use of explicitly passed credentials, credentials hung from explicitly
610passed threads, the credential on
611.Dv curthread ,
612and the cached credential from
613socket creation time is inconsistent, and may lead to unexpected behaviour.
614It is possible that several of the
615.Fa td
616arguments should be
617.Fa cred
618arguments, or simply not be present at all.
619.Pp
620The caller may need to manually clear
621.Dv SS_ISCONNECTING
622if
623.Fn soconnect
624returns an error.
625.Pp
626The
627.Dv MSG_DONTWAIT
628flag is not implemented for
629.Fn sosend ,
630and may not always work with
631.Fn soreceive
632when zero copy sockets are enabled.
633.Pp
634This manual page does not describe how to register socket upcalls or monitor
635a socket for readability/writability without using blocking I/O.
636.Pp
637The
638.Fn soref
639and
640.Fn sorele
641functions are not described, and in most cases should not be used, due to
642confusing and potentially incorrect interactions when
643.Fn sorele
644is last called after
645.Fn soclose .
646