xref: /freebsd/share/man/man9/socket.9 (revision da7d7b9c861cf98e912c0bd1e549752d2dae4fb6)
1.\"-
2.\" Copyright (c) 2006 Robert N. M. Watson
3.\" Copyright (c) 2014 Benjamin J. Kaduk
4.\" All rights reserved.
5.\"
6.\" Redistribution and use in source and binary forms, with or without
7.\" modification, are permitted provided that the following conditions
8.\" are met:
9.\" 1. Redistributions of source code must retain the above copyright
10.\"    notice, this list of conditions and the following disclaimer.
11.\" 2. Redistributions in binary form must reproduce the above copyright
12.\"    notice, this list of conditions and the following disclaimer in the
13.\"    documentation and/or other materials provided with the distribution.
14.\"
15.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25.\" SUCH DAMAGE.
26.\"
27.\" $FreeBSD$
28.\"
29.Dd May 26, 2014
30.Dt SOCKET 9
31.Os
32.Sh NAME
33.Nm socket
34.Nd "kernel socket interface"
35.Sh SYNOPSIS
36.In sys/socket.h
37.In sys/socketvar.h
38.Ft void
39.Fn soabort "struct socket *so"
40.Ft int
41.Fn soaccept "struct socket *so" "struct sockaddr **nam"
42.Ft int
43.Fn socheckuid "struct socket *so" "uid_t uid"
44.Ft int
45.Fn sobind "struct socket *so" "struct sockaddr *nam" "struct thread *td"
46.Ft void
47.Fn soclose "struct socket *so"
48.Ft int
49.Fn soconnect "struct socket *so" "struct sockaddr *nam" "struct thread *td"
50.Ft int
51.Fo socreate
52.Fa "int dom" "struct socket **aso" "int type" "int proto"
53.Fa "struct ucred *cred" "struct thread *td"
54.Fc
55.Ft int
56.Fn sodisconnect "struct socket *so"
57.Ft struct  sockaddr *
58.Fn sodupsockaddr "const struct sockaddr *sa" "int mflags"
59.Ft void
60.Fn sofree "struct socket *so"
61.Ft void
62.Fn sohasoutofband "struct socket *so"
63.Ft int
64.Fn solisten "struct socket *so" "int backlog" "struct thread *td"
65.Ft void
66.Fn solisten_proto "struct socket *so" "int backlog"
67.Ft int
68.Fn solisten_proto_check "struct socket *so"
69.Ft struct socket *
70.Fn sonewconn "struct socket *head" "int connstatus"
71.Ft int
72.Fo sopoll
73.Fa "struct socket *so" "int events" "struct ucred *active_cred"
74.Fa "struct thread *td"
75.Fc
76.Ft int
77.Fo sopoll_generic
78.Fa "struct socket *so" "int events" "struct ucred *active_cred"
79.Fa "struct thread *td"
80.Fc
81.Ft int
82.Fo soreceive
83.Fa "struct socket *so" "struct sockaddr **psa" "struct uio *uio"
84.Fa "struct mbuf **mp0" "struct mbuf **controlp" "int *flagsp"
85.Fc
86.Ft int
87.Fo soreceive_stream
88.Fa "struct socket *so" "struct sockaddr **paddr"
89.Fa "struct uio *uio" "struct mbuf **mp0" "struct mbuf **controlp"
90.Fa "int *flagsp"
91.Fc
92.Ft int
93.Fo soreceive_dgram
94.Fa "struct socket *so" "struct sockaddr **paddr"
95.Fa "struct uio *uio" "struct mbuf **mp0" "struct mbuf **controlp"
96.Fa "int *flagsp"
97.Fc
98.Ft int
99.Fo soreceive_generic
100.Fa "struct socket *so" "struct sockaddr **paddr"
101.Fa "struct uio *uio" "struct mbuf **mp0" "struct mbuf **controlp"
102.Fa "int *flagsp"
103.Fc
104.Ft int
105.Fn soreserve "struct socket *so" "u_long sndcc" "u_long rcvcc"
106.Ft void
107.Fn sorflush "struct socket *so"
108.Ft int
109.Fo sosend
110.Fa "struct socket *so" "struct sockaddr *addr" "struct uio *uio"
111.Fa "struct mbuf *top" "struct mbuf *control" "int flags" "struct thread *td"
112.Fc
113.Ft int
114.Fo sosend_dgram
115.Fa "struct socket *so" "struct sockaddr *addr"
116.Fa "struct uio *uio" "struct mbuf *top" "struct mbuf *control"
117.Fa "int flags" "struct thread *td"
118.Fc
119.Ft int
120.Fo sosend_generic
121.Fa "struct socket *so" "struct sockaddr *addr"
122.Fa "struct uio *uio" "struct mbuf *top" "struct mbuf *control"
123.Fa "int flags" "struct thread *td"
124.Fc
125.Ft int
126.Fn soshutdown "struct socket *so" "int how"
127.Ft void
128.Fn sotoxsocket "struct socket *so" "struct xsocket *xso"
129.Ft void
130.Fn soupcall_clear "struct socket *so" "int which"
131.Ft void
132.Fo soupcall_set
133.Fa "struct socket *so" "int which"
134.Fa "int (*func)(struct socket *, void *, int)" "void *arg"
135.Fc
136.Ft void
137.Fn sowakeup "struct socket *so" "struct sockbuf *sb"
138.In sys/sockopt.h
139.Ft int
140.Fn sosetopt "struct socket *so" "struct sockopt *sopt"
141.Ft int
142.Fn sogetopt "struct socket *so" "struct sockopt *sopt"
143.Ft int
144.Fn sooptcopyin "struct sockopt *sopt" "void *buf" "size_t len" "size_t minlen"
145.Ft int
146.Fn sooptcopyout "struct sockopt *sopt" "const void *buf" "size_t len"
147.Sh DESCRIPTION
148The kernel
149.Nm
150programming interface permits in-kernel consumers to interact with
151local and network socket objects in a manner similar to that permitted using
152the
153.Xr socket 2
154user API.
155These interfaces are appropriate for use by distributed file systems and
156other network-aware kernel services.
157While the user API operates on file descriptors, the kernel interfaces
158operate directly on
159.Vt "struct socket"
160pointers.
161Some portions of the kernel API exist only to implement the user API,
162and are not expected to be used by kernel code.
163The portions of the socket API used by socket consumers and
164implementations of network protocols will differ; some routines
165are only useful for protocol implementors.
166.Pp
167Except where otherwise indicated,
168.Nm
169functions may sleep, and are not appropriate for use in an
170.Xr ithread 9
171context or while holding non-sleepable kernel locks.
172.Ss Creating and Destroying Sockets
173A new socket may be created using
174.Fn socreate .
175As with
176.Xr socket 2 ,
177arguments specify the requested domain, type, and protocol via
178.Fa dom , type ,
179and
180.Fa proto .
181The socket is returned via
182.Fa aso
183on success.
184In addition, the credential used to authorize operations associated with the
185socket will be passed via
186.Fa cred
187(and will be cached for the lifetime of the socket), and the thread
188performing the operation via
189.Fa td .
190.Em Warning :
191authorization of the socket creation operation will be performed
192using the thread credential for some protocols (such as raw sockets).
193.Pp
194Sockets may be closed and freed using
195.Fn soclose ,
196which has similar semantics to
197.Xr close 2 .
198.Pp
199In certain circumstances, it is appropriate to destroy a socket without
200waiting for it to disconnect, for which
201.Fn soabort
202is used.
203This is only appropriate for incoming connections which are in a
204partially connected state.
205It must be called on an unreferenced socket, by the thread which
206removed the socket from its listen queue, to prevent races.
207It will call into protocol code, so no socket locks may be held
208over the call.
209The caller of
210.Fn soabort
211is responsible for setting the VNET context.
212The normal path to freeing a socket is
213.Fn sofree ,
214which handles reference counting on the socket.
215It should be called whenever a reference is released, and also whenever
216reference flags are cleared in socket or protocol code.
217Calls to
218.Fn sofree
219should not be made from outside the socket layer; outside callers
220should use
221.Fn soclose
222instead.
223.Ss Connections and Addresses
224The
225.Fn sobind
226function is equivalent to the
227.Xr bind 2
228system call, and binds the socket
229.Fa so
230to the address
231.Fa nam .
232The operation would be authorized using the credential on thread
233.Fa td .
234.Pp
235The
236.Fn soconnect
237function is equivalent to the
238.Xr connect 2
239system call, and initiates a connection on the socket
240.Fa so
241to the address
242.Fa nam .
243The operation will be authorized using the credential on thread
244.Fa td .
245Unlike the user system call,
246.Fn soconnect
247returns immediately; the caller may
248.Xr msleep 9
249on
250.Fa so->so_timeo
251while holding the socket mutex and waiting for the
252.Dv SS_ISCONNECTING
253flag to clear or
254.Fa so->so_error
255to become non-zero.
256If
257.Fn soconnect
258fails, the caller must manually clear the
259.Dv SS_ISCONNECTING
260flag.
261.Pp
262A call to
263.Fn sodisconnect
264disconnects the socket without closing it.
265.Pp
266The
267.Fn soshutdown
268function is equivalent to the
269.Xr shutdown 2
270system call, and causes part or all of a connection on a socket to be closed
271down.
272.Pp
273Sockets are transitioned from non-listening status to listening with
274.Fn solisten .
275.Ss Socket Options
276The
277.Fn sogetopt
278function is equivalent to the
279.Xr getsockopt 2
280system call, and retrieves a socket option on socket
281.Fa so .
282The
283.Fn sosetopt
284function is equivalent to the
285.Xr setsockopt 2
286system call, and sets a socket option on socket
287.Fa so .
288.Pp
289The second argument in both
290.Fn sogetopt
291and
292.Fn sosetopt
293is the
294.Fa sopt
295pointer to a
296.Vt "struct sopt"
297describing the socket option operation.
298The caller-allocated structure must be zeroed, and then have its fields
299initialized to specify socket option operation arguments:
300.Bl -tag -width ".Va sopt_valsize"
301.It Va sopt_dir
302Set to
303.Dv SOPT_SET
304or
305.Dv SOPT_GET
306depending on whether this is a get or set operation.
307.It Va sopt_level
308Specify the level in the network stack the operation is targeted at; for
309example,
310.Dv SOL_SOCKET .
311.It Va sopt_name
312Specify the name of the socket option to set.
313.It Va sopt_val
314Kernel space pointer to the argument value for the socket option.
315.It Va sopt_valsize
316Size of the argument value in bytes.
317.El
318.Ss Socket Upcalls
319In order for the owner of a socket to be notified when the socket
320is ready to send or receive data, an upcall may be registered on
321the socket.
322The upcall is a function that will be called by the socket framework
323when a socket buffer associated with the given socket is ready for
324reading or writing.
325.Fn soupcall_set
326is used to register a socket upcall.
327The function
328.Va func
329is registered, and the pointer
330.Va arg
331will be passed as its second argument when it is called by the framework.
332The possible values for
333.Va which
334are
335.Dv SO_RCV
336and
337.Dv SO_SND ,
338which register upcalls for receive and send events, respectively.
339The upcall function
340.Fn func
341must return either
342.Dv SU_OK
343or
344.Dv SU_ISCONNECTED ,
345depending on whether or not a call to
346.Xr soisconnected
347should be made by the socket framework after the upcall returns.
348The upcall
349.Va func
350cannot call
351.Xr soisconnected
352itself due to lock ordering with the socket buffer lock.
353Only
354.Dv SO_RCV
355upcalls should return
356.Dv SU_ISCONNECTED .
357When a
358.Dv SO_RCV
359upcall returns
360.Dv SU_ISCONNECTED ,
361the upcall will be removed from the socket.
362.Pp
363Upcalls are removed from their socket by
364.Fn soupcall_clear .
365The
366.Va which
367argument again specifies whether the sending or receiving upcall is to
368be cleared, with
369.Dv SO_RCV
370or
371.Dv SO_SND .
372.Ss Socket I/O
373The
374.Fn soreceive
375function is equivalent to the
376.Xr recvmsg 2
377system call, and attempts to receive bytes of data from the socket
378.Fa so ,
379optionally blocking awaiting for data if none is ready to read.
380Data may be retrieved directly to kernel or user memory via the
381.Fa uio
382argument, or as an mbuf chain returned to the caller via
383.Fa mp0 ,
384avoiding a data copy.
385The
386.Fa uio
387must always be
388.Pf non- Dv NULL .
389If
390.Fa mp0
391is
392.Pf non- Dv NULL ,
393only the
394.Fa uio_resid
395of
396.Fa uio
397is used.
398The caller may optionally retrieve a socket address on a protocol with the
399.Dv PR_ADDR
400capability by providing storage via
401.Pf non- Dv NULL
402.Fa psa
403argument.
404The caller may optionally retrieve control data mbufs via a
405.Pf non- Dv NULL
406.Fa controlp
407argument.
408Optional flags may be passed to
409.Fn soreceive
410via a
411.Pf non- Dv NULL
412.Fa flagsp
413argument, and use the same flag name space as the
414.Xr recvmsg 2
415system call.
416.Pp
417The
418.Fn sosend
419function is equivalent to the
420.Xr sendmsg 2
421system call, and attempts to send bytes of data via the socket
422.Fa so ,
423optionally blocking if data cannot be immediately sent.
424Data may be sent directly from kernel or user memory via the
425.Fa uio
426argument, or as an mbuf chain via
427.Fa top ,
428avoiding a data copy.
429Only one of the
430.Fa uio
431or
432.Fa top
433pointers may be
434.Pf non- Dv NULL .
435An optional destination address may be specified via a
436.Pf non- Dv NULL
437.Fa addr
438argument, which may result in an implicit connect if supported by the
439protocol.
440The caller may optionally send control data mbufs via a
441.Pf non- Dv NULL
442.Fa control
443argument.
444Flags may be passed to
445.Fn sosend
446using the
447.Fa flags
448argument, and use the same flag name space as the
449.Xr sendmsg 2
450system call.
451.Pp
452Kernel callers running in
453.Xr ithread 9
454context, or with a mutex held, will wish to use non-blocking sockets and pass
455the
456.Dv MSG_DONTWAIT
457flag in order to prevent these functions from sleeping.
458.Pp
459A socket can be queried for readability, writability, out-of-band data,
460or end-of-file using
461.Fn sopoll .
462The possible values for
463.Va events
464are as for
465.Xr poll 2 ,
466with symbolic values
467.Dv POLLIN ,
468.Dv POLLPRI ,
469.Dv POLLOUT ,
470.Dv POLLRDNORM ,
471.Dv POLLWRNORM ,
472.Dv POLLRDBAND ,
473and
474.Dv POLLINGEOF
475taken from
476.In sys/poll.h .
477.Pp
478Calls to
479.Fn soaccept
480pass through to the protocol's accept routine to accept an incoming connection.
481.Ss Socket Utility Functions
482The uid of a socket's credential may be compared against a
483.Va uid
484with
485.Fn socheckuid .
486.Pp
487A copy of an existing
488.Vt struct sockaddr
489may be made using
490.Fn sodupsockaddr .
491.Pp
492Protocol implementations notify the socket layer of the arrival of
493out-of-band data using
494.Fn sohasoutofband ,
495so that the socket layer can notify socket consumers of the available data.
496.Pp
497An
498.Dq external-format
499version of a
500.Vt struct socket
501can be created using
502.Fn sotoxsocket ,
503suitable for isolating user code from changes in the kernel structure.
504.Ss Protocol Implementations
505Protocols must supply an implementation for
506.Fn solisten ;
507such protocol implementations can call back into the socket layer using
508.Fn solisten_proto_check
509and
510.Fn solisten_proto
511to check and set the socket-layer listen state.
512These callbacks are provided so that the protocol implementation
513can order the socket layer and protocol locks as necessary.
514Protocols must supply an implementation of
515.Fn soreceive ;
516the functions
517.Fn soreceive_stream ,
518.Fn soreceive_dgram ,
519and
520.Fn soreceive_generic
521are supplied for use by such implementations.
522.Pp
523Protocol implementations can use
524.Fn sonewconn
525to create a socket and attach protocol state to that socket.
526This can be used to create new sockets available for
527.Fn soaccept
528on a listen socket.
529The returned socket has a reference count of zero.
530.Pp
531Protocols must supply an implementation for
532.Fn sopoll ;
533.Fn sopoll_generic
534is provided for the use by protocol implementations.
535.Pp
536The functions
537.Fn sosend_dgram
538and
539.Fn sosend_generic
540are supplied to assist in protocol implementations of
541.Fn sosend .
542.Pp
543When a protocol creates a new socket structure, it is necessary to
544reserve socket buffer space for that socket, by calling
545.Fn soreserve .
546The rough inverse of this reservation is performed by
547.Fn sorflush ,
548which is called automatically by the socket framework.
549.Pp
550When a protocol needs to wake up threads waiting for the socket to
551become ready to read or write, variants of
552.Fn sowakeup
553are used.
554The
555.Fn sowakeup
556function should not be called directly by protocol code, instead use the
557wrappers
558.Fn sorwakeup ,
559.Fn sorwakeup_locked ,
560.Fn sowwakeup ,
561and
562.Fn sowwakeup_locked
563for readers and writers, with the corresponding socket buffer lock
564not already locked, or already held, respectively.
565.Pp
566The functions
567.Fn sooptcopyin
568and
569.Fn sooptcopyout
570are useful for transferring
571.Vt struct sockopt
572data between user and kernel code.
573.Sh SEE ALSO
574.Xr bind 2 ,
575.Xr close 2 ,
576.Xr connect 2 ,
577.Xr getsockopt 2 ,
578.Xr recv 2 ,
579.Xr send 2 ,
580.Xr setsockopt 2 ,
581.Xr shutdown 2 ,
582.Xr socket 2 ,
583.Xr ng_ksocket 4 ,
584.Xr ithread 9 ,
585.Xr msleep 9 ,
586.Xr ucred 9
587.Sh HISTORY
588The
589.Xr socket 2
590system call appeared in
591.Bx 4.2 .
592This manual page was introduced in
593.Fx 7.0 .
594.Sh AUTHORS
595This manual page was written by
596.An Robert Watson
597and
598.An Benjamin Kaduk .
599.Sh BUGS
600The use of explicitly passed credentials, credentials hung from explicitly
601passed threads, the credential on
602.Dv curthread ,
603and the cached credential from
604socket creation time is inconsistent, and may lead to unexpected behaviour.
605It is possible that several of the
606.Fa td
607arguments should be
608.Fa cred
609arguments, or simply not be present at all.
610.Pp
611The caller may need to manually clear
612.Dv SS_ISCONNECTING
613if
614.Fn soconnect
615returns an error.
616.Pp
617The
618.Dv MSG_DONTWAIT
619flag is not implemented for
620.Fn sosend ,
621and may not always work with
622.Fn soreceive
623when zero copy sockets are enabled.
624.Pp
625This manual page does not describe how to register socket upcalls or monitor
626a socket for readability/writability without using blocking I/O.
627.Pp
628The
629.Fn soref
630and
631.Fn sorele
632functions are not described, and in most cases should not be used, due to
633confusing and potentially incorrect interactions when
634.Fn sorele
635is last called after
636.Fn soclose .
637