xref: /freebsd/share/man/man9/vnet.9 (revision dcf58f92e2c19a32fc171f763698e711c719badc)
1.\"-
2.\" Copyright (c) 2010 The FreeBSD Foundation
3.\" All rights reserved.
4.\"
5.\" This documentation was written by CK Software GmbH under sponsorship from
6.\" the FreeBSD Foundation.
7.\"
8.\" Redistribution and use in source and binary forms, with or without
9.\" modification, are permitted provided that the following conditions
10.\" are met:
11.\" 1. Redistributions of source code must retain the above copyright
12.\"    notice, this list of conditions and the following disclaimer.
13.\" 2. Redistributions in binary form must reproduce the above copyright
14.\"    notice, this list of conditions and the following disclaimer in the
15.\"    documentation and/or other materials provided with the distribution.
16.\"
17.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27.\" SUCH DAMAGE.
28.\"
29.\" $FreeBSD$
30.\"
31.Dd November 20, 2014
32.Dt VNET 9
33.Os
34.Sh NAME
35.Nm VNET
36.Nd "network subsystem virtualization infrastructure"
37.Sh SYNOPSIS
38.Cd "options VIMAGE"
39.Cd "options VNET_DEBUG"
40.Pp
41.In sys/vnet.h
42.Pp
43.\"------------------------------------------------------------
44.Ss "Constants and Global Variables"
45.\"
46.Dv VNET_SETNAME
47.\"	"set_vnet"
48.Dv VNET_SYMPREFIX
49.\"	"vnet_entry_"
50.Vt extern struct vnet *vnet0;
51.\"------------------------------------------------------------
52.Ss "Variable Declaration"
53.Fo VNET
54.Fa "name"
55.Fc
56.\"
57.Fo VNET_NAME
58.Fa "name"
59.Fc
60.\"
61.Fo VNET_DECLARE
62.Fa "type" "name"
63.Fc
64.\"
65.Fo VNET_DEFINE
66.Fa "type" "name"
67.Fc
68.\"
69.Bd -literal
70#define	V_name	VNET(name)
71.Ed
72.\" ------------------------------------------------------------
73.Ss "Virtual Instance Selection"
74.\"
75.Fo CRED_TO_VNET
76.Fa "struct ucred *"
77.Fc
78.\"
79.Fo TD_TO_VNET
80.Fa "struct thread *"
81.Fc
82.\"
83.Fo P_TO_VNET
84.Fa "struct proc *"
85.Fc
86.\"
87.Fo IS_DEFAULT_VNET
88.Fa "struct vnet *"
89.Fc
90.\"
91.Fo VNET_ASSERT
92.Fa exp msg
93.Fc
94.\"
95.Fo CURVNET_SET
96.Fa "struct vnet *"
97.Fc
98.\"
99.Fo CURVNET_SET_QUIET
100.Fa "struct vnet *"
101.Fc
102.\"
103.Fo CURVNET_RESTORE
104.Fc
105.\"
106.Fo VNET_ITERATOR_DECL
107.Fa "struct vnet *"
108.Fc
109.\"
110.Fo VNET_FOREACH
111.Fa "struct vnet *"
112.Fc
113.\" ------------------------------------------------------------
114.Ss "Locking"
115.\"
116.Fo VNET_LIST_RLOCK
117.Fc
118.Fo VNET_LIST_RUNLOCK
119.Fc
120.Fo VNET_LIST_RLOCK_NOSLEEP
121.Fc
122.Fo VNET_LIST_RUNLOCK_NOSLEEP
123.Fc
124.\" ------------------------------------------------------------
125.Ss "Startup and Teardown Functions"
126.\"
127.Ft "struct vnet *"
128.Fo vnet_alloc
129.Fa void
130.Fc
131.\"
132.Ft void
133.Fo vnet_destroy
134.Fa "struct vnet *"
135.Fc
136.\"
137.Fo VNET_SYSINIT
138.Fa ident
139.Fa "enum sysinit_sub_id subsystem"
140.Fa "enum sysinit_elem_order order"
141.Fa "sysinit_cfunc_t func"
142.Fa "const void *arg"
143.Fc
144.\"
145.Fo VNET_SYSUNINIT
146.Fa ident
147.Fa "enum sysinit_sub_id subsystem"
148.Fa "enum sysinit_elem_order order"
149.Fa "sysinit_cfunc_t func"
150.Fa "const void *arg"
151.Fc
152.\" ------------------------------------------------------------
153.Ss "Eventhandlers"
154.\"
155.Fo VNET_GLOBAL_EVENTHANDLER_REGISTER
156.Fa "const char *name"
157.Fa "void *func"
158.Fa "void *arg"
159.Fa "int priority"
160.Fc
161.\"
162.Fo VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG
163.Fa "eventhandler_tag tag"
164.Fa "const char *name"
165.Fa "void *func"
166.Fa "void *arg"
167.Fa "int priority"
168.Fc
169.\" ------------------------------------------------------------
170.Ss "Sysctl Handling"
171.Fo SYSCTL_VNET_INT
172.Fa parent nbr name access ptr val descr
173.Fc
174.Fo SYSCTL_VNET_PROC
175.Fa parent nbr name access ptr arg handler fmt descr
176.Fc
177.Fo SYSCTL_VNET_STRING
178.Fa parent nbr name access arg len descr
179.Fc
180.Fo SYSCTL_VNET_STRUCT
181.Fa parent nbr name access ptr type descr
182.Fc
183.Fo SYSCTL_VNET_UINT
184.Fa parent nbr name access ptr val descr
185.Fc
186.Fo VNET_SYSCTL_ARG
187.Fa req arg1
188.Fc
189.\" ------------------------------------------------------------
190.Sh DESCRIPTION
191.Nm
192is the name of a technique to virtualize the network stack.
193The basic idea is to change global resources most notably variables into
194per network stack resources and have functions, sysctls, eventhandlers,
195etc. access and handle them in the context of the correct instance.
196Each (virtual) network stack is attached to a
197.Em prison ,
198with
199.Vt vnet0
200being the unrestricted default network stack of the base system.
201.Pp
202The global defines for
203.Dv VNET_SETNAME
204and
205.Dv VNET_SYMPREFIX
206are shared with
207.Xr kvm 3
208to access internals for debugging reasons.
209.\" ------------------------------------------------------------
210.Ss "Variable Declaration"
211.\"
212Variables are virtualized by using the
213.Fn VNET_DEFINE
214macro rather than writing them out as
215.Em type name .
216One can still use static initialization or storage class specifiers, e.g.,
217.Pp
218.Dl Li static VNET_DEFINE(int, foo) = 1;
219or
220.Dl Li static VNET_DEFINE(SLIST_HEAD(, bar), bars);
221.Pp
222Static initialization is not possible when the virtualized variable
223would need to be referenced, e.g., with
224.Dq TAILQ_HEAD_INITIALIZER() .
225In that case a
226.Fn VNET_SYSINIT
227based initialization function must be used.
228.Pp
229External variables have to be declared using the
230.Fn VNET_DECLARE
231macro.
232In either case the convention is to define another macro,
233that is then used throughout the implementation to access that variable.
234The variable name is usually prefixed by
235.Em V_
236to express that it is virtualized.
237The
238.Fn VNET
239macro will then translate accesses to that variable to the copy of the
240currently selected instance (see the
241.Sx "Virtual instance selection"
242section):
243.Pp
244.Dl Li #define	V_name	VNET(name)
245.Pp
246.Em NOTE:
247Do not confuse this with the convention used by
248.Xr VFS 9 .
249.Pp
250The
251.Fn VNET_NAME
252macro returns the offset within the memory region of the virtual network
253stack instance.
254It is usually only used with
255.Fn SYSCTL_VNET_*
256macros.
257.\" ------------------------------------------------------------
258.Ss "Virtual Instance Selection"
259.\"
260There are three different places where the current virtual
261network stack pointer is stored and can be taken from:
262.Bl -enum -offset indent
263.It
264a
265.Em prison :
266.Dl "(struct prison *)->pr_vnet"
267.Pp
268For convenience the following macros are provided:
269.Bd -literal -compact -offset indent
270.Fn CRED_TO_VNET "struct ucred *"
271.Fn TD_TO_VNET "struct thread *"
272.Fn P_TO_VNET "struct proc *"
273.Ed
274.It
275a
276.Em socket :
277.Dl "(struct socket *)->so_vnet"
278.It
279an
280.Em interface :
281.Dl "(struct ifnet *)->if_vnet"
282.El
283.Pp
284.\"
285In addition the currently active instance is cached in
286.Dq "curthread->td_vnet"
287which is usually only accessed through the
288.Dv curvnet
289macro.
290.Pp
291.\"
292To set the correct context of the current virtual network instance, use the
293.Fn CURVNET_SET
294or
295.Fn CURVNET_SET_QUIET
296macros.
297The
298.Fn CURVNET_SET_QUIET
299version will not record vnet recursions in case the kernel was compiled
300with
301.Cd "options VNET_DEBUG"
302and should thus only be used in well known cases, where recursion is
303unavoidable.
304Both macros will save the previous state on the stack and it must be restored
305with the
306.Fn CURVNET_RESTORE
307macro.
308.Pp
309.Em NOTE:
310As the previous state is saved on the stack, you cannot have multiple
311.Fn CURVNET_SET
312calls in the same block.
313.Pp
314.Em NOTE:
315As the previous state is saved on the stack, a
316.Fn CURVNET_RESTORE
317call has to be in the same block as the
318.Fn CURVNET_SET
319call or in a subblock with the same idea of the saved instances as the
320outer block.
321.Pp
322.Em NOTE:
323As each macro is a set of operations and, as previously explained, cannot
324be put into its own block when defined, one cannot conditionally set
325the current vnet context.
326The following will
327.Em not
328work:
329.Bd -literal -offset indent
330if (condition)
331	CURVNET_SET(vnet);
332.Ed
333.Pp
334nor would this work:
335.Bd -literal -offset indent
336if (condition) {
337	CURVNET_SET(vnet);
338}
339CURVNET_RESTORE();
340.Ed
341.Pp
342.\"
343Sometimes one needs to loop over all virtual instances, for example to update
344virtual from global state, to run a function from a
345.Xr callout 9
346for each instance, etc.
347For those cases the
348.Fn VNET_ITERATOR_DECL
349and
350.Fn VNET_FOREACH
351macros are provided.
352The former macro defines the variable that iterates over the loop,
353and the latter loops over all of the virtual network stack instances.
354See
355.Sx "Locking"
356for how to savely traverse the list of all virtual instances.
357.Pp
358.\"
359The
360.Fn IS_DEFAULT_VNET
361macro provides a safe way to check whether the currently active instance is the
362unrestricted default network stack of the base system
363.Pq Vt vnet0 .
364.Pp
365.\"
366The
367.Fn VNET_ASSERT
368macro provides a way to conditionally add assertions that are only active with
369.Cd "options VIMAGE"
370compiled in and either
371.Cd "options VNET_DEBUG"
372or
373.Cd "options INVARIANTS"
374enabled as well.
375It uses the same semantics as
376.Xr KASSERT 9 .
377.\" ------------------------------------------------------------
378.Ss "Locking"
379.\"
380For public access to the list of virtual network stack instances
381e.g., by the
382.Fn VNET_FOREACH
383macro, read locks are provided.
384Macros are used to abstract from the actual type of the locks.
385If a caller may sleep while traversing the list, it must use the
386.Fn VNET_LIST_RLOCK
387and
388.Fn VNET_LIST_RUNLOCK
389macros.
390Otherwise, the caller can use
391.Fn VNET_LIST_RLOCK_NOSLEEP
392and
393.Fn VNET_LIST_RUNLOCK_NOSLEEP .
394.\" ------------------------------------------------------------
395.Ss "Startup and Teardown Functions"
396.\"
397To start or tear down a virtual network stack instance the internal
398functions
399.Fn vnet_alloc
400and
401.Fn vnet_destroy
402are provided and called from the jail framework.
403They run the publicly provided methods to handle network stack
404startup and teardown.
405.Pp
406For public control, the system startup interface has been enhanced
407to not only handle a system boot but to also handle a virtual
408network stack startup and teardown.
409To the base system the
410.Fn VNET_SYSINIT
411and
412.Fn VNET_SYSUNINIT
413macros look exactly as if there were no virtual network stack.
414In fact, if
415.Cd "options VIMAGE"
416is not compiled in they are compiled to the standard
417.Fn SYSINIT
418macros.
419In addition to that they are run for each virtual network stack
420when starting or, in reverse order, when shutting down.
421.\" ------------------------------------------------------------
422.Ss "Eventhandlers"
423.\"
424Eventhandlers can be handled in two ways:
425.Pp
426.Bl -enum -offset indent -compact
427.It
428save the
429.Em tags
430returned in each virtual instance and properly free the eventhandlers
431on teardown using those, or
432.It
433use one eventhandler that will iterate over all virtual network
434stack instances.
435.El
436.Pp
437For the first case one can just use the normal
438.Xr EVENTHANDLER 9
439functions, while for the second case the
440.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER
441and
442.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG
443macros are provided.
444These differ in that
445.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG
446takes an extra first argument that will carry the
447.Fa "tag"
448upon return.
449Eventhandlers registered with either of these will not run
450.Fa func
451directly but
452.Fa func
453will be called from an internal iterator function for each vnet.
454Both macros can only be used for eventhandlers that do not take
455additional arguments, as the variadic arguments from an
456.Xr EVENTHANDLER_INVOKE 9
457call will be ignored.
458.\" ------------------------------------------------------------
459.Ss "Sysctl Handling"
460.\"
461A
462.Xr sysctl 9
463can be virtualized by using one of the
464.Fn SYSCTL_VNET_*
465macros.
466.Pp
467They take the same arguments as the standard
468.Xr sysctl 9
469functions, with the only difference, that the
470.Fa ptr
471argument has to be passed as
472.Ql &VNET_NAME(foo)
473instead of
474.Ql &foo
475so that the variable can be selected from the correct memory
476region of the virtual network stack instance of the caller.
477.Pp
478For the very rare case a sysctl handler function would want to
479handle
480.Fa arg1
481itself the
482.Fn VNET_SYSCTL_ARG req arg1
483is provided that will translate the
484.Fa arg1
485argument to the correct memory address in the virtual network stack
486context of the caller.
487.\" ------------------------------------------------------------
488.Sh SEE ALSO
489.Xr jail 2 ,
490.Xr kvm 3 ,
491.Xr EVENTHANDLER 9 ,
492.\" .Xr pcpu 9 ,
493.Xr KASSERT 9 ,
494.Xr sysctl 9
495.\" .Xr SYSINIT 9
496.Sh HISTORY
497The virtual network stack implementation first appeared in
498.Fx 8.0 .
499.Sh AUTHORS
500This manual page was written by
501.An Bjoern A. Zeeb, CK Software GmbH,
502under sponsorship from the FreeBSD Foundation.
503