xref: /freebsd/share/man/man9/vnet.9 (revision e08e9e999091f86081377b7cedc3fd2fe2ab70fc)
1.\"-
2.\" Copyright (c) 2010 The FreeBSD Foundation
3.\" All rights reserved.
4.\"
5.\" This documentation was written by CK Software GmbH under sponsorship from
6.\" the FreeBSD Foundation.
7.\"
8.\" Redistribution and use in source and binary forms, with or without
9.\" modification, are permitted provided that the following conditions
10.\" are met:
11.\" 1. Redistributions of source code must retain the above copyright
12.\"    notice, this list of conditions and the following disclaimer.
13.\" 2. Redistributions in binary form must reproduce the above copyright
14.\"    notice, this list of conditions and the following disclaimer in the
15.\"    documentation and/or other materials provided with the distribution.
16.\"
17.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27.\" SUCH DAMAGE.
28.\"
29.\" $FreeBSD$
30.\"
31.Dd May 16, 2018
32.Dt VNET 9
33.Os
34.Sh NAME
35.Nm VNET
36.Nd "network subsystem virtualization infrastructure"
37.Sh SYNOPSIS
38.Cd "options VIMAGE"
39.Cd "options VNET_DEBUG"
40.Pp
41.In sys/vnet.h
42.Pp
43.\"------------------------------------------------------------
44.Ss "Constants and Global Variables"
45.\"
46.Dv VNET_SETNAME
47.\"	"set_vnet"
48.Dv VNET_SYMPREFIX
49.\"	"vnet_entry_"
50.Vt extern struct vnet *vnet0;
51.\"------------------------------------------------------------
52.Ss "Variable Declaration"
53.Fo VNET
54.Fa "name"
55.Fc
56.\"
57.Fo VNET_NAME
58.Fa "name"
59.Fc
60.\"
61.Fo VNET_DECLARE
62.Fa "type" "name"
63.Fc
64.\"
65.Fo VNET_DEFINE
66.Fa "type" "name"
67.Fc
68.\"
69.Bd -literal
70#define	V_name	VNET(name)
71.Ed
72.\" ------------------------------------------------------------
73.Ss "Virtual Instance Selection"
74.\"
75.Fo CRED_TO_VNET
76.Fa "struct ucred *"
77.Fc
78.\"
79.Fo TD_TO_VNET
80.Fa "struct thread *"
81.Fc
82.\"
83.Fo P_TO_VNET
84.Fa "struct proc *"
85.Fc
86.\"
87.Fo IS_DEFAULT_VNET
88.Fa "struct vnet *"
89.Fc
90.\"
91.Fo VNET_ASSERT
92.Fa exp msg
93.Fc
94.\"
95.Fo CURVNET_SET
96.Fa "struct vnet *"
97.Fc
98.\"
99.Fo CURVNET_SET_QUIET
100.Fa "struct vnet *"
101.Fc
102.\"
103.Fn CURVNET_RESTORE
104.\"
105.Fo VNET_ITERATOR_DECL
106.Fa "struct vnet *"
107.Fc
108.\"
109.Fo VNET_FOREACH
110.Fa "struct vnet *"
111.Fc
112.\" ------------------------------------------------------------
113.Ss "Locking"
114.\"
115.Fn VNET_LIST_RLOCK
116.Fn VNET_LIST_RUNLOCK
117.Fn VNET_LIST_RLOCK_NOSLEEP
118.Fn VNET_LIST_RUNLOCK_NOSLEEP
119.\" ------------------------------------------------------------
120.Ss "Startup and Teardown Functions"
121.\"
122.Ft "struct vnet *"
123.Fo vnet_alloc
124.Fa void
125.Fc
126.\"
127.Ft void
128.Fo vnet_destroy
129.Fa "struct vnet *"
130.Fc
131.\"
132.Fo VNET_SYSINIT
133.Fa ident
134.Fa "enum sysinit_sub_id subsystem"
135.Fa "enum sysinit_elem_order order"
136.Fa "sysinit_cfunc_t func"
137.Fa "const void *arg"
138.Fc
139.\"
140.Fo VNET_SYSUNINIT
141.Fa ident
142.Fa "enum sysinit_sub_id subsystem"
143.Fa "enum sysinit_elem_order order"
144.Fa "sysinit_cfunc_t func"
145.Fa "const void *arg"
146.Fc
147.\" ------------------------------------------------------------
148.Ss "Eventhandlers"
149.\"
150.Fo VNET_GLOBAL_EVENTHANDLER_REGISTER
151.Fa "const char *name"
152.Fa "void *func"
153.Fa "void *arg"
154.Fa "int priority"
155.Fc
156.\"
157.Fo VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG
158.Fa "eventhandler_tag tag"
159.Fa "const char *name"
160.Fa "void *func"
161.Fa "void *arg"
162.Fa "int priority"
163.Fc
164.\" ------------------------------------------------------------
165.Ss "Sysctl Handling"
166.Fo SYSCTL_VNET_INT
167.Fa parent nbr name access ptr val descr
168.Fc
169.Fo SYSCTL_VNET_PROC
170.Fa parent nbr name access ptr arg handler fmt descr
171.Fc
172.Fo SYSCTL_VNET_STRING
173.Fa parent nbr name access arg len descr
174.Fc
175.Fo SYSCTL_VNET_STRUCT
176.Fa parent nbr name access ptr type descr
177.Fc
178.Fo SYSCTL_VNET_UINT
179.Fa parent nbr name access ptr val descr
180.Fc
181.Fo VNET_SYSCTL_ARG
182.Fa req arg1
183.Fc
184.\" ------------------------------------------------------------
185.Sh DESCRIPTION
186.Nm
187is the name of a technique to virtualize the network stack.
188The basic idea is to change global resources most notably variables into
189per network stack resources and have functions, sysctls, eventhandlers,
190etc. access and handle them in the context of the correct instance.
191Each (virtual) network stack is attached to a
192.Em prison ,
193with
194.Vt vnet0
195being the unrestricted default network stack of the base system.
196.Pp
197The global defines for
198.Dv VNET_SETNAME
199and
200.Dv VNET_SYMPREFIX
201are shared with
202.Xr kvm 3
203to access internals for debugging reasons.
204.\" ------------------------------------------------------------
205.Ss "Variable Declaration"
206.\"
207Variables are virtualized by using the
208.Fn VNET_DEFINE
209macro rather than writing them out as
210.Em type name .
211One can still use static initialization or storage class specifiers, e.g.,
212.Pp
213.Dl Li static VNET_DEFINE(int, foo) = 1;
214or
215.Dl Li static VNET_DEFINE(SLIST_HEAD(, bar), bars);
216.Pp
217Static initialization is not possible when the virtualized variable
218would need to be referenced, e.g., with
219.Dq TAILQ_HEAD_INITIALIZER() .
220In that case a
221.Fn VNET_SYSINIT
222based initialization function must be used.
223.Pp
224External variables have to be declared using the
225.Fn VNET_DECLARE
226macro.
227In either case the convention is to define another macro,
228that is then used throughout the implementation to access that variable.
229The variable name is usually prefixed by
230.Em V_
231to express that it is virtualized.
232The
233.Fn VNET
234macro will then translate accesses to that variable to the copy of the
235currently selected instance (see the
236.Sx "Virtual instance selection"
237section):
238.Pp
239.Dl Li #define	V_name	VNET(name)
240.Pp
241.Em NOTE:
242Do not confuse this with the convention used by
243.Xr VFS 9 .
244.Pp
245The
246.Fn VNET_NAME
247macro returns the offset within the memory region of the virtual network
248stack instance.
249It is usually only used with
250.Fn SYSCTL_VNET_*
251macros.
252.\" ------------------------------------------------------------
253.Ss "Virtual Instance Selection"
254.\"
255There are three different places where the current virtual
256network stack pointer is stored and can be taken from:
257.Bl -enum -offset indent
258.It
259a
260.Em prison :
261.Dl "(struct prison *)->pr_vnet"
262.Pp
263For convenience the following macros are provided:
264.Bd -literal -compact -offset indent
265.Fn CRED_TO_VNET "struct ucred *"
266.Fn TD_TO_VNET "struct thread *"
267.Fn P_TO_VNET "struct proc *"
268.Ed
269.It
270a
271.Em socket :
272.Dl "(struct socket *)->so_vnet"
273.It
274an
275.Em interface :
276.Dl "(struct ifnet *)->if_vnet"
277.El
278.Pp
279.\"
280In addition the currently active instance is cached in
281.Dq "curthread->td_vnet"
282which is usually only accessed through the
283.Dv curvnet
284macro.
285.Pp
286.\"
287To set the correct context of the current virtual network instance, use the
288.Fn CURVNET_SET
289or
290.Fn CURVNET_SET_QUIET
291macros.
292The
293.Fn CURVNET_SET_QUIET
294version will not record vnet recursions in case the kernel was compiled
295with
296.Cd "options VNET_DEBUG"
297and should thus only be used in well known cases, where recursion is
298unavoidable.
299Both macros will save the previous state on the stack and it must be restored
300with the
301.Fn CURVNET_RESTORE
302macro.
303.Pp
304.Em NOTE:
305As the previous state is saved on the stack, you cannot have multiple
306.Fn CURVNET_SET
307calls in the same block.
308.Pp
309.Em NOTE:
310As the previous state is saved on the stack, a
311.Fn CURVNET_RESTORE
312call has to be in the same block as the
313.Fn CURVNET_SET
314call or in a subblock with the same idea of the saved instances as the
315outer block.
316.Pp
317.Em NOTE:
318As each macro is a set of operations and, as previously explained, cannot
319be put into its own block when defined, one cannot conditionally set
320the current vnet context.
321The following will
322.Em not
323work:
324.Bd -literal -offset indent
325if (condition)
326	CURVNET_SET(vnet);
327.Ed
328.Pp
329nor would this work:
330.Bd -literal -offset indent
331if (condition) {
332	CURVNET_SET(vnet);
333}
334CURVNET_RESTORE();
335.Ed
336.Pp
337.\"
338Sometimes one needs to loop over all virtual instances, for example to update
339virtual from global state, to run a function from a
340.Xr callout 9
341for each instance, etc.
342For those cases the
343.Fn VNET_ITERATOR_DECL
344and
345.Fn VNET_FOREACH
346macros are provided.
347The former macro defines the variable that iterates over the loop,
348and the latter loops over all of the virtual network stack instances.
349See
350.Sx "Locking"
351for how to savely traverse the list of all virtual instances.
352.Pp
353.\"
354The
355.Fn IS_DEFAULT_VNET
356macro provides a safe way to check whether the currently active instance is the
357unrestricted default network stack of the base system
358.Pq Vt vnet0 .
359.Pp
360.\"
361The
362.Fn VNET_ASSERT
363macro provides a way to conditionally add assertions that are only active with
364.Cd "options VIMAGE"
365compiled in and either
366.Cd "options VNET_DEBUG"
367or
368.Cd "options INVARIANTS"
369enabled as well.
370It uses the same semantics as
371.Xr KASSERT 9 .
372.\" ------------------------------------------------------------
373.Ss "Locking"
374.\"
375For public access to the list of virtual network stack instances
376e.g., by the
377.Fn VNET_FOREACH
378macro, read locks are provided.
379Macros are used to abstract from the actual type of the locks.
380If a caller may sleep while traversing the list, it must use the
381.Fn VNET_LIST_RLOCK
382and
383.Fn VNET_LIST_RUNLOCK
384macros.
385Otherwise, the caller can use
386.Fn VNET_LIST_RLOCK_NOSLEEP
387and
388.Fn VNET_LIST_RUNLOCK_NOSLEEP .
389.\" ------------------------------------------------------------
390.Ss "Startup and Teardown Functions"
391.\"
392To start or tear down a virtual network stack instance the internal
393functions
394.Fn vnet_alloc
395and
396.Fn vnet_destroy
397are provided and called from the jail framework.
398They run the publicly provided methods to handle network stack
399startup and teardown.
400.Pp
401For public control, the system startup interface has been enhanced
402to not only handle a system boot but to also handle a virtual
403network stack startup and teardown.
404To the base system the
405.Fn VNET_SYSINIT
406and
407.Fn VNET_SYSUNINIT
408macros look exactly as if there were no virtual network stack.
409In fact, if
410.Cd "options VIMAGE"
411is not compiled in they are compiled to the standard
412.Fn SYSINIT
413macros.
414In addition to that they are run for each virtual network stack
415when starting or, in reverse order, when shutting down.
416.\" ------------------------------------------------------------
417.Ss "Eventhandlers"
418.\"
419Eventhandlers can be handled in two ways:
420.Pp
421.Bl -enum -offset indent -compact
422.It
423save the
424.Em tags
425returned in each virtual instance and properly free the eventhandlers
426on teardown using those, or
427.It
428use one eventhandler that will iterate over all virtual network
429stack instances.
430.El
431.Pp
432For the first case one can just use the normal
433.Xr EVENTHANDLER 9
434functions, while for the second case the
435.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER
436and
437.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG
438macros are provided.
439These differ in that
440.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG
441takes an extra first argument that will carry the
442.Fa "tag"
443upon return.
444Eventhandlers registered with either of these will not run
445.Fa func
446directly but
447.Fa func
448will be called from an internal iterator function for each vnet.
449Both macros can only be used for eventhandlers that do not take
450additional arguments, as the variadic arguments from an
451.Xr EVENTHANDLER_INVOKE 9
452call will be ignored.
453.\" ------------------------------------------------------------
454.Ss "Sysctl Handling"
455.\"
456A
457.Xr sysctl 9
458can be virtualized by using one of the
459.Fn SYSCTL_VNET_*
460macros.
461.Pp
462They take the same arguments as the standard
463.Xr sysctl 9
464functions, with the only difference, that the
465.Fa ptr
466argument has to be passed as
467.Ql &VNET_NAME(foo)
468instead of
469.Ql &foo
470so that the variable can be selected from the correct memory
471region of the virtual network stack instance of the caller.
472.Pp
473For the very rare case a sysctl handler function would want to
474handle
475.Fa arg1
476itself the
477.Fn VNET_SYSCTL_ARG req arg1
478is provided that will translate the
479.Fa arg1
480argument to the correct memory address in the virtual network stack
481context of the caller.
482.\" ------------------------------------------------------------
483.Sh SEE ALSO
484.Xr jail 2 ,
485.Xr kvm 3 ,
486.Xr EVENTHANDLER 9 ,
487.\" .Xr pcpu 9 ,
488.Xr KASSERT 9 ,
489.Xr sysctl 9
490.\" .Xr SYSINIT 9
491.Pp
492Marko Zec, Implementing a Clonable Network Stack in the FreeBSD Kernel,
493USENIX ATC'03, June 2003, Boston
494.Sh HISTORY
495The virtual network stack implementation first appeared in
496.Fx 8.0 .
497.Sh AUTHORS
498.An -nosplit
499The
500.Nm
501framework has been designed and implemented at the University of Zagreb by
502.An Marko Zec ,
503and later extended and refined by
504.An Bjoern A. Zeeb
505and
506.An Robert Watson ,
507under contract to the FreeBSD Foundation.
508.Pp
509This manual page was written by
510.An Bjoern A. Zeeb, CK Software GmbH,
511under sponsorship from the FreeBSD Foundation.
512