xref: /freebsd/share/man/man9/vnet.9 (revision f2d48b5e2c3b45850585e4d7aee324fe148afbf2)
1.\"-
2.\" Copyright (c) 2010 The FreeBSD Foundation
3.\" All rights reserved.
4.\"
5.\" This documentation was written by CK Software GmbH under sponsorship from
6.\" the FreeBSD Foundation.
7.\"
8.\" Redistribution and use in source and binary forms, with or without
9.\" modification, are permitted provided that the following conditions
10.\" are met:
11.\" 1. Redistributions of source code must retain the above copyright
12.\"    notice, this list of conditions and the following disclaimer.
13.\" 2. Redistributions in binary form must reproduce the above copyright
14.\"    notice, this list of conditions and the following disclaimer in the
15.\"    documentation and/or other materials provided with the distribution.
16.\"
17.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27.\" SUCH DAMAGE.
28.\"
29.\" $FreeBSD$
30.\"
31.Dd December 10, 2020
32.Dt VNET 9
33.Os
34.Sh NAME
35.Nm VNET
36.Nd "network subsystem virtualization infrastructure"
37.Sh SYNOPSIS
38.Cd "options VIMAGE"
39.Cd "options VNET_DEBUG"
40.Pp
41.In net/vnet.h
42.\"------------------------------------------------------------
43.Ss "Constants and Global Variables"
44.\"
45.Dv VNET_SETNAME
46.\"	"set_vnet"
47.Dv VNET_SYMPREFIX
48.\"	"vnet_entry_"
49.Vt extern struct vnet *vnet0;
50.\"------------------------------------------------------------
51.Ss "Variable Declaration"
52.Fo VNET
53.Fa "name"
54.Fc
55.\"
56.Fo VNET_NAME
57.Fa "name"
58.Fc
59.\"
60.Fo VNET_DECLARE
61.Fa "type" "name"
62.Fc
63.\"
64.Fo VNET_DEFINE
65.Fa "type" "name"
66.Fc
67.\"
68.Fo VNET_DEFINE_STATIC
69.Fa "type" "name"
70.Fc
71.\"
72.Bd -literal
73#define	V_name	VNET(name)
74.Ed
75.\" ------------------------------------------------------------
76.Ss "Virtual Instance Selection"
77.\"
78.Fo CRED_TO_VNET
79.Fa "struct ucred *"
80.Fc
81.\"
82.Fo TD_TO_VNET
83.Fa "struct thread *"
84.Fc
85.\"
86.Fo P_TO_VNET
87.Fa "struct proc *"
88.Fc
89.\"
90.Fo IS_DEFAULT_VNET
91.Fa "struct vnet *"
92.Fc
93.\"
94.Fo VNET_ASSERT
95.Fa exp msg
96.Fc
97.\"
98.Fo CURVNET_SET
99.Fa "struct vnet *"
100.Fc
101.\"
102.Fo CURVNET_SET_QUIET
103.Fa "struct vnet *"
104.Fc
105.\"
106.Fn CURVNET_RESTORE
107.\"
108.Fo VNET_ITERATOR_DECL
109.Fa "struct vnet *"
110.Fc
111.\"
112.Fo VNET_FOREACH
113.Fa "struct vnet *"
114.Fc
115.\" ------------------------------------------------------------
116.Ss "Locking"
117.\"
118.Fn VNET_LIST_RLOCK
119.Fn VNET_LIST_RUNLOCK
120.Fn VNET_LIST_RLOCK_NOSLEEP
121.Fn VNET_LIST_RUNLOCK_NOSLEEP
122.\" ------------------------------------------------------------
123.Ss "Startup and Teardown Functions"
124.\"
125.Ft "struct vnet *"
126.Fo vnet_alloc
127.Fa void
128.Fc
129.\"
130.Ft void
131.Fo vnet_destroy
132.Fa "struct vnet *"
133.Fc
134.\"
135.Fo VNET_SYSINIT
136.Fa ident
137.Fa "enum sysinit_sub_id subsystem"
138.Fa "enum sysinit_elem_order order"
139.Fa "sysinit_cfunc_t func"
140.Fa "const void *arg"
141.Fc
142.\"
143.Fo VNET_SYSUNINIT
144.Fa ident
145.Fa "enum sysinit_sub_id subsystem"
146.Fa "enum sysinit_elem_order order"
147.Fa "sysinit_cfunc_t func"
148.Fa "const void *arg"
149.Fc
150.\" ------------------------------------------------------------
151.Ss "Eventhandlers"
152.\"
153.Fo VNET_GLOBAL_EVENTHANDLER_REGISTER
154.Fa "const char *name"
155.Fa "void *func"
156.Fa "void *arg"
157.Fa "int priority"
158.Fc
159.\"
160.Fo VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG
161.Fa "eventhandler_tag tag"
162.Fa "const char *name"
163.Fa "void *func"
164.Fa "void *arg"
165.Fa "int priority"
166.Fc
167.\" ------------------------------------------------------------
168.Ss "Sysctl Handling"
169.Fo SYSCTL_VNET_INT
170.Fa parent nbr name access ptr val descr
171.Fc
172.Fo SYSCTL_VNET_PROC
173.Fa parent nbr name access ptr arg handler fmt descr
174.Fc
175.Fo SYSCTL_VNET_STRING
176.Fa parent nbr name access arg len descr
177.Fc
178.Fo SYSCTL_VNET_STRUCT
179.Fa parent nbr name access ptr type descr
180.Fc
181.Fo SYSCTL_VNET_UINT
182.Fa parent nbr name access ptr val descr
183.Fc
184.Fo VNET_SYSCTL_ARG
185.Fa req arg1
186.Fc
187.\" ------------------------------------------------------------
188.Sh DESCRIPTION
189.Nm
190is the name of a technique to virtualize the network stack.
191The basic idea is to change global resources most notably variables into
192per network stack resources and have functions, sysctls, eventhandlers,
193etc. access and handle them in the context of the correct instance.
194Each (virtual) network stack is attached to a
195.Em prison ,
196with
197.Vt vnet0
198being the unrestricted default network stack of the base system.
199.Pp
200The global defines for
201.Dv VNET_SETNAME
202and
203.Dv VNET_SYMPREFIX
204are shared with
205.Xr kvm 3
206to access internals for debugging reasons.
207.\" ------------------------------------------------------------
208.Ss "Variable Declaration"
209.\"
210Variables are virtualized by using the
211.Fn VNET_DEFINE
212macro rather than writing them out as
213.Em type name .
214One can still use static initialization, e.g.,
215.Pp
216.Dl Li VNET_DEFINE(int, foo) = 1;
217.Pp
218Variables declared with the static keyword can use the
219.Fn VNET_DEFINE_STATIC
220macro, e.g.,
221.Pp
222.Dl Li VNET_DEFINE_STATIC(SLIST_HEAD(, bar), bars);
223.Pp
224Static initialization is not possible when the virtualized variable
225would need to be referenced, e.g., with
226.Dq TAILQ_HEAD_INITIALIZER() .
227In that case a
228.Fn VNET_SYSINIT
229based initialization function must be used.
230.Pp
231External variables have to be declared using the
232.Fn VNET_DECLARE
233macro.
234In either case the convention is to define another macro,
235that is then used throughout the implementation to access that variable.
236The variable name is usually prefixed by
237.Em V_
238to express that it is virtualized.
239The
240.Fn VNET
241macro will then translate accesses to that variable to the copy of the
242currently selected instance (see the
243.Sx "Virtual instance selection"
244section):
245.Pp
246.Dl Li #define	V_name	VNET(name)
247.Pp
248.Em NOTE:
249Do not confuse this with the convention used by
250.Xr VFS 9 .
251.Pp
252The
253.Fn VNET_NAME
254macro returns the offset within the memory region of the virtual network
255stack instance.
256It is usually only used with
257.Fn SYSCTL_VNET_*
258macros.
259.\" ------------------------------------------------------------
260.Ss "Virtual Instance Selection"
261.\"
262There are three different places where the current virtual
263network stack pointer is stored and can be taken from:
264.Bl -enum -offset indent
265.It
266a
267.Em prison :
268.Dl "(struct prison *)->pr_vnet"
269.Pp
270For convenience the following macros are provided:
271.Bd -literal -compact -offset indent
272.Fn CRED_TO_VNET "struct ucred *"
273.Fn TD_TO_VNET "struct thread *"
274.Fn P_TO_VNET "struct proc *"
275.Ed
276.It
277a
278.Em socket :
279.Dl "(struct socket *)->so_vnet"
280.It
281an
282.Em interface :
283.Dl "(struct ifnet *)->if_vnet"
284.El
285.Pp
286.\"
287In addition the currently active instance is cached in
288.Dq "curthread->td_vnet"
289which is usually only accessed through the
290.Dv curvnet
291macro.
292.Pp
293.\"
294To set the correct context of the current virtual network instance, use the
295.Fn CURVNET_SET
296or
297.Fn CURVNET_SET_QUIET
298macros.
299The
300.Fn CURVNET_SET_QUIET
301version will not record vnet recursions in case the kernel was compiled
302with
303.Cd "options VNET_DEBUG"
304and should thus only be used in well known cases, where recursion is
305unavoidable.
306Both macros will save the previous state on the stack and it must be restored
307with the
308.Fn CURVNET_RESTORE
309macro.
310.Pp
311.Em NOTE:
312As the previous state is saved on the stack, you cannot have multiple
313.Fn CURVNET_SET
314calls in the same block.
315.Pp
316.Em NOTE:
317As the previous state is saved on the stack, a
318.Fn CURVNET_RESTORE
319call has to be in the same block as the
320.Fn CURVNET_SET
321call or in a subblock with the same idea of the saved instances as the
322outer block.
323.Pp
324.Em NOTE:
325As each macro is a set of operations and, as previously explained, cannot
326be put into its own block when defined, one cannot conditionally set
327the current vnet context.
328The following will
329.Em not
330work:
331.Bd -literal -offset indent
332if (condition)
333	CURVNET_SET(vnet);
334.Ed
335.Pp
336nor would this work:
337.Bd -literal -offset indent
338if (condition) {
339	CURVNET_SET(vnet);
340}
341CURVNET_RESTORE();
342.Ed
343.Pp
344.\"
345Sometimes one needs to loop over all virtual instances, for example to update
346virtual from global state, to run a function from a
347.Xr callout 9
348for each instance, etc.
349For those cases the
350.Fn VNET_ITERATOR_DECL
351and
352.Fn VNET_FOREACH
353macros are provided.
354The former macro defines the variable that iterates over the loop,
355and the latter loops over all of the virtual network stack instances.
356See
357.Sx "Locking"
358for how to savely traverse the list of all virtual instances.
359.Pp
360.\"
361The
362.Fn IS_DEFAULT_VNET
363macro provides a safe way to check whether the currently active instance is the
364unrestricted default network stack of the base system
365.Pq Vt vnet0 .
366.Pp
367.\"
368The
369.Fn VNET_ASSERT
370macro provides a way to conditionally add assertions that are only active with
371.Cd "options VIMAGE"
372compiled in and either
373.Cd "options VNET_DEBUG"
374or
375.Cd "options INVARIANTS"
376enabled as well.
377It uses the same semantics as
378.Xr KASSERT 9 .
379.\" ------------------------------------------------------------
380.Ss "Locking"
381.\"
382For public access to the list of virtual network stack instances
383e.g., by the
384.Fn VNET_FOREACH
385macro, read locks are provided.
386Macros are used to abstract from the actual type of the locks.
387If a caller may sleep while traversing the list, it must use the
388.Fn VNET_LIST_RLOCK
389and
390.Fn VNET_LIST_RUNLOCK
391macros.
392Otherwise, the caller can use
393.Fn VNET_LIST_RLOCK_NOSLEEP
394and
395.Fn VNET_LIST_RUNLOCK_NOSLEEP .
396.\" ------------------------------------------------------------
397.Ss "Startup and Teardown Functions"
398.\"
399To start or tear down a virtual network stack instance the internal
400functions
401.Fn vnet_alloc
402and
403.Fn vnet_destroy
404are provided and called from the jail framework.
405They run the publicly provided methods to handle network stack
406startup and teardown.
407.Pp
408For public control, the system startup interface has been enhanced
409to not only handle a system boot but to also handle a virtual
410network stack startup and teardown.
411To the base system the
412.Fn VNET_SYSINIT
413and
414.Fn VNET_SYSUNINIT
415macros look exactly as if there were no virtual network stack.
416In fact, if
417.Cd "options VIMAGE"
418is not compiled in they are compiled to the standard
419.Fn SYSINIT
420macros.
421In addition to that they are run for each virtual network stack
422when starting or, in reverse order, when shutting down.
423.\" ------------------------------------------------------------
424.Ss "Eventhandlers"
425.\"
426Eventhandlers can be handled in two ways:
427.Pp
428.Bl -enum -offset indent -compact
429.It
430save the
431.Em tags
432returned in each virtual instance and properly free the eventhandlers
433on teardown using those, or
434.It
435use one eventhandler that will iterate over all virtual network
436stack instances.
437.El
438.Pp
439For the first case one can just use the normal
440.Xr EVENTHANDLER 9
441functions, while for the second case the
442.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER
443and
444.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG
445macros are provided.
446These differ in that
447.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG
448takes an extra first argument that will carry the
449.Fa "tag"
450upon return.
451Eventhandlers registered with either of these will not run
452.Fa func
453directly but
454.Fa func
455will be called from an internal iterator function for each vnet.
456Both macros can only be used for eventhandlers that do not take
457additional arguments, as the variadic arguments from an
458.Xr EVENTHANDLER_INVOKE 9
459call will be ignored.
460.\" ------------------------------------------------------------
461.Ss "Sysctl Handling"
462.\"
463A
464.Xr sysctl 9
465can be virtualized by using one of the
466.Fn SYSCTL_VNET_*
467macros.
468.Pp
469They take the same arguments as the standard
470.Xr sysctl 9
471functions, with the only difference, that the
472.Fa ptr
473argument has to be passed as
474.Ql &VNET_NAME(foo)
475instead of
476.Ql &foo
477so that the variable can be selected from the correct memory
478region of the virtual network stack instance of the caller.
479.Pp
480For the very rare case a sysctl handler function would want to
481handle
482.Fa arg1
483itself the
484.Fn VNET_SYSCTL_ARG req arg1
485is provided that will translate the
486.Fa arg1
487argument to the correct memory address in the virtual network stack
488context of the caller.
489.\" ------------------------------------------------------------
490.Sh SEE ALSO
491.Xr jail 2 ,
492.Xr kvm 3 ,
493.Xr EVENTHANDLER 9 ,
494.\" .Xr pcpu 9 ,
495.Xr KASSERT 9 ,
496.Xr sysctl 9
497.\" .Xr SYSINIT 9
498.Pp
499Marko Zec, Implementing a Clonable Network Stack in the FreeBSD Kernel,
500USENIX ATC'03, June 2003, Boston
501.Sh HISTORY
502The virtual network stack implementation first appeared in
503.Fx 8.0 .
504.Sh AUTHORS
505.An -nosplit
506The
507.Nm
508framework was designed and implemented at the University of Zagreb by
509.An Marko Zec
510under sponsorship of the FreeBSD Foundation and NLnet Foundation,
511and later extended and refined by
512.An Bjoern A. Zeeb
513(also under FreeBSD Foundation sponsorship), and
514.An Robert Watson .
515.Pp
516This manual page was written by
517.An Bjoern A. Zeeb, CK Software GmbH,
518under sponsorship from the FreeBSD Foundation.
519