xref: /freebsd/share/man/man9/vnet.9 (revision 1a498d2e689f9e8220e2ad64b018eb1f0d11127e)
1.\"-
2.\" Copyright (c) 2010 The FreeBSD Foundation
3.\" All rights reserved.
4.\"
5.\" This documentation was written by CK Software GmbH under sponsorship from
6.\" the FreeBSD Foundation.
7.\"
8.\" Redistribution and use in source and binary forms, with or without
9.\" modification, are permitted provided that the following conditions
10.\" are met:
11.\" 1. Redistributions of source code must retain the above copyright
12.\"    notice, this list of conditions and the following disclaimer.
13.\" 2. Redistributions in binary form must reproduce the above copyright
14.\"    notice, this list of conditions and the following disclaimer in the
15.\"    documentation and/or other materials provided with the distribution.
16.\"
17.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27.\" SUCH DAMAGE.
28.\"
29.\" $FreeBSD$
30.\"
31.Dd July 24, 2018
32.Dt VNET 9
33.Os
34.Sh NAME
35.Nm VNET
36.Nd "network subsystem virtualization infrastructure"
37.Sh SYNOPSIS
38.Cd "options VIMAGE"
39.Cd "options VNET_DEBUG"
40.Pp
41.In sys/vnet.h
42.Pp
43.\"------------------------------------------------------------
44.Ss "Constants and Global Variables"
45.\"
46.Dv VNET_SETNAME
47.\"	"set_vnet"
48.Dv VNET_SYMPREFIX
49.\"	"vnet_entry_"
50.Vt extern struct vnet *vnet0;
51.\"------------------------------------------------------------
52.Ss "Variable Declaration"
53.Fo VNET
54.Fa "name"
55.Fc
56.\"
57.Fo VNET_NAME
58.Fa "name"
59.Fc
60.\"
61.Fo VNET_DECLARE
62.Fa "type" "name"
63.Fc
64.\"
65.Fo VNET_DEFINE
66.Fa "type" "name"
67.Fc
68.\"
69.Fo VNET_DEFINE_STATIC
70.Fa "type" "name"
71.Fc
72.\"
73.Bd -literal
74#define	V_name	VNET(name)
75.Ed
76.\" ------------------------------------------------------------
77.Ss "Virtual Instance Selection"
78.\"
79.Fo CRED_TO_VNET
80.Fa "struct ucred *"
81.Fc
82.\"
83.Fo TD_TO_VNET
84.Fa "struct thread *"
85.Fc
86.\"
87.Fo P_TO_VNET
88.Fa "struct proc *"
89.Fc
90.\"
91.Fo IS_DEFAULT_VNET
92.Fa "struct vnet *"
93.Fc
94.\"
95.Fo VNET_ASSERT
96.Fa exp msg
97.Fc
98.\"
99.Fo CURVNET_SET
100.Fa "struct vnet *"
101.Fc
102.\"
103.Fo CURVNET_SET_QUIET
104.Fa "struct vnet *"
105.Fc
106.\"
107.Fn CURVNET_RESTORE
108.\"
109.Fo VNET_ITERATOR_DECL
110.Fa "struct vnet *"
111.Fc
112.\"
113.Fo VNET_FOREACH
114.Fa "struct vnet *"
115.Fc
116.\" ------------------------------------------------------------
117.Ss "Locking"
118.\"
119.Fn VNET_LIST_RLOCK
120.Fn VNET_LIST_RUNLOCK
121.Fn VNET_LIST_RLOCK_NOSLEEP
122.Fn VNET_LIST_RUNLOCK_NOSLEEP
123.\" ------------------------------------------------------------
124.Ss "Startup and Teardown Functions"
125.\"
126.Ft "struct vnet *"
127.Fo vnet_alloc
128.Fa void
129.Fc
130.\"
131.Ft void
132.Fo vnet_destroy
133.Fa "struct vnet *"
134.Fc
135.\"
136.Fo VNET_SYSINIT
137.Fa ident
138.Fa "enum sysinit_sub_id subsystem"
139.Fa "enum sysinit_elem_order order"
140.Fa "sysinit_cfunc_t func"
141.Fa "const void *arg"
142.Fc
143.\"
144.Fo VNET_SYSUNINIT
145.Fa ident
146.Fa "enum sysinit_sub_id subsystem"
147.Fa "enum sysinit_elem_order order"
148.Fa "sysinit_cfunc_t func"
149.Fa "const void *arg"
150.Fc
151.\" ------------------------------------------------------------
152.Ss "Eventhandlers"
153.\"
154.Fo VNET_GLOBAL_EVENTHANDLER_REGISTER
155.Fa "const char *name"
156.Fa "void *func"
157.Fa "void *arg"
158.Fa "int priority"
159.Fc
160.\"
161.Fo VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG
162.Fa "eventhandler_tag tag"
163.Fa "const char *name"
164.Fa "void *func"
165.Fa "void *arg"
166.Fa "int priority"
167.Fc
168.\" ------------------------------------------------------------
169.Ss "Sysctl Handling"
170.Fo SYSCTL_VNET_INT
171.Fa parent nbr name access ptr val descr
172.Fc
173.Fo SYSCTL_VNET_PROC
174.Fa parent nbr name access ptr arg handler fmt descr
175.Fc
176.Fo SYSCTL_VNET_STRING
177.Fa parent nbr name access arg len descr
178.Fc
179.Fo SYSCTL_VNET_STRUCT
180.Fa parent nbr name access ptr type descr
181.Fc
182.Fo SYSCTL_VNET_UINT
183.Fa parent nbr name access ptr val descr
184.Fc
185.Fo VNET_SYSCTL_ARG
186.Fa req arg1
187.Fc
188.\" ------------------------------------------------------------
189.Sh DESCRIPTION
190.Nm
191is the name of a technique to virtualize the network stack.
192The basic idea is to change global resources most notably variables into
193per network stack resources and have functions, sysctls, eventhandlers,
194etc. access and handle them in the context of the correct instance.
195Each (virtual) network stack is attached to a
196.Em prison ,
197with
198.Vt vnet0
199being the unrestricted default network stack of the base system.
200.Pp
201The global defines for
202.Dv VNET_SETNAME
203and
204.Dv VNET_SYMPREFIX
205are shared with
206.Xr kvm 3
207to access internals for debugging reasons.
208.\" ------------------------------------------------------------
209.Ss "Variable Declaration"
210.\"
211Variables are virtualized by using the
212.Fn VNET_DEFINE
213macro rather than writing them out as
214.Em type name .
215One can still use static initialization, e.g.,
216.Pp
217.Dl Li VNET_DEFINE(int, foo) = 1;
218.Pp
219Variables declared with the static keyword can use the
220.Fn VNET_DEFINE_STATIC
221macro, e.g.,
222.Pp
223.Dl Li VNET_DEFINE_STATIC(SLIST_HEAD(, bar), bars);
224.Pp
225Static initialization is not possible when the virtualized variable
226would need to be referenced, e.g., with
227.Dq TAILQ_HEAD_INITIALIZER() .
228In that case a
229.Fn VNET_SYSINIT
230based initialization function must be used.
231.Pp
232External variables have to be declared using the
233.Fn VNET_DECLARE
234macro.
235In either case the convention is to define another macro,
236that is then used throughout the implementation to access that variable.
237The variable name is usually prefixed by
238.Em V_
239to express that it is virtualized.
240The
241.Fn VNET
242macro will then translate accesses to that variable to the copy of the
243currently selected instance (see the
244.Sx "Virtual instance selection"
245section):
246.Pp
247.Dl Li #define	V_name	VNET(name)
248.Pp
249.Em NOTE:
250Do not confuse this with the convention used by
251.Xr VFS 9 .
252.Pp
253The
254.Fn VNET_NAME
255macro returns the offset within the memory region of the virtual network
256stack instance.
257It is usually only used with
258.Fn SYSCTL_VNET_*
259macros.
260.\" ------------------------------------------------------------
261.Ss "Virtual Instance Selection"
262.\"
263There are three different places where the current virtual
264network stack pointer is stored and can be taken from:
265.Bl -enum -offset indent
266.It
267a
268.Em prison :
269.Dl "(struct prison *)->pr_vnet"
270.Pp
271For convenience the following macros are provided:
272.Bd -literal -compact -offset indent
273.Fn CRED_TO_VNET "struct ucred *"
274.Fn TD_TO_VNET "struct thread *"
275.Fn P_TO_VNET "struct proc *"
276.Ed
277.It
278a
279.Em socket :
280.Dl "(struct socket *)->so_vnet"
281.It
282an
283.Em interface :
284.Dl "(struct ifnet *)->if_vnet"
285.El
286.Pp
287.\"
288In addition the currently active instance is cached in
289.Dq "curthread->td_vnet"
290which is usually only accessed through the
291.Dv curvnet
292macro.
293.Pp
294.\"
295To set the correct context of the current virtual network instance, use the
296.Fn CURVNET_SET
297or
298.Fn CURVNET_SET_QUIET
299macros.
300The
301.Fn CURVNET_SET_QUIET
302version will not record vnet recursions in case the kernel was compiled
303with
304.Cd "options VNET_DEBUG"
305and should thus only be used in well known cases, where recursion is
306unavoidable.
307Both macros will save the previous state on the stack and it must be restored
308with the
309.Fn CURVNET_RESTORE
310macro.
311.Pp
312.Em NOTE:
313As the previous state is saved on the stack, you cannot have multiple
314.Fn CURVNET_SET
315calls in the same block.
316.Pp
317.Em NOTE:
318As the previous state is saved on the stack, a
319.Fn CURVNET_RESTORE
320call has to be in the same block as the
321.Fn CURVNET_SET
322call or in a subblock with the same idea of the saved instances as the
323outer block.
324.Pp
325.Em NOTE:
326As each macro is a set of operations and, as previously explained, cannot
327be put into its own block when defined, one cannot conditionally set
328the current vnet context.
329The following will
330.Em not
331work:
332.Bd -literal -offset indent
333if (condition)
334	CURVNET_SET(vnet);
335.Ed
336.Pp
337nor would this work:
338.Bd -literal -offset indent
339if (condition) {
340	CURVNET_SET(vnet);
341}
342CURVNET_RESTORE();
343.Ed
344.Pp
345.\"
346Sometimes one needs to loop over all virtual instances, for example to update
347virtual from global state, to run a function from a
348.Xr callout 9
349for each instance, etc.
350For those cases the
351.Fn VNET_ITERATOR_DECL
352and
353.Fn VNET_FOREACH
354macros are provided.
355The former macro defines the variable that iterates over the loop,
356and the latter loops over all of the virtual network stack instances.
357See
358.Sx "Locking"
359for how to savely traverse the list of all virtual instances.
360.Pp
361.\"
362The
363.Fn IS_DEFAULT_VNET
364macro provides a safe way to check whether the currently active instance is the
365unrestricted default network stack of the base system
366.Pq Vt vnet0 .
367.Pp
368.\"
369The
370.Fn VNET_ASSERT
371macro provides a way to conditionally add assertions that are only active with
372.Cd "options VIMAGE"
373compiled in and either
374.Cd "options VNET_DEBUG"
375or
376.Cd "options INVARIANTS"
377enabled as well.
378It uses the same semantics as
379.Xr KASSERT 9 .
380.\" ------------------------------------------------------------
381.Ss "Locking"
382.\"
383For public access to the list of virtual network stack instances
384e.g., by the
385.Fn VNET_FOREACH
386macro, read locks are provided.
387Macros are used to abstract from the actual type of the locks.
388If a caller may sleep while traversing the list, it must use the
389.Fn VNET_LIST_RLOCK
390and
391.Fn VNET_LIST_RUNLOCK
392macros.
393Otherwise, the caller can use
394.Fn VNET_LIST_RLOCK_NOSLEEP
395and
396.Fn VNET_LIST_RUNLOCK_NOSLEEP .
397.\" ------------------------------------------------------------
398.Ss "Startup and Teardown Functions"
399.\"
400To start or tear down a virtual network stack instance the internal
401functions
402.Fn vnet_alloc
403and
404.Fn vnet_destroy
405are provided and called from the jail framework.
406They run the publicly provided methods to handle network stack
407startup and teardown.
408.Pp
409For public control, the system startup interface has been enhanced
410to not only handle a system boot but to also handle a virtual
411network stack startup and teardown.
412To the base system the
413.Fn VNET_SYSINIT
414and
415.Fn VNET_SYSUNINIT
416macros look exactly as if there were no virtual network stack.
417In fact, if
418.Cd "options VIMAGE"
419is not compiled in they are compiled to the standard
420.Fn SYSINIT
421macros.
422In addition to that they are run for each virtual network stack
423when starting or, in reverse order, when shutting down.
424.\" ------------------------------------------------------------
425.Ss "Eventhandlers"
426.\"
427Eventhandlers can be handled in two ways:
428.Pp
429.Bl -enum -offset indent -compact
430.It
431save the
432.Em tags
433returned in each virtual instance and properly free the eventhandlers
434on teardown using those, or
435.It
436use one eventhandler that will iterate over all virtual network
437stack instances.
438.El
439.Pp
440For the first case one can just use the normal
441.Xr EVENTHANDLER 9
442functions, while for the second case the
443.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER
444and
445.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG
446macros are provided.
447These differ in that
448.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG
449takes an extra first argument that will carry the
450.Fa "tag"
451upon return.
452Eventhandlers registered with either of these will not run
453.Fa func
454directly but
455.Fa func
456will be called from an internal iterator function for each vnet.
457Both macros can only be used for eventhandlers that do not take
458additional arguments, as the variadic arguments from an
459.Xr EVENTHANDLER_INVOKE 9
460call will be ignored.
461.\" ------------------------------------------------------------
462.Ss "Sysctl Handling"
463.\"
464A
465.Xr sysctl 9
466can be virtualized by using one of the
467.Fn SYSCTL_VNET_*
468macros.
469.Pp
470They take the same arguments as the standard
471.Xr sysctl 9
472functions, with the only difference, that the
473.Fa ptr
474argument has to be passed as
475.Ql &VNET_NAME(foo)
476instead of
477.Ql &foo
478so that the variable can be selected from the correct memory
479region of the virtual network stack instance of the caller.
480.Pp
481For the very rare case a sysctl handler function would want to
482handle
483.Fa arg1
484itself the
485.Fn VNET_SYSCTL_ARG req arg1
486is provided that will translate the
487.Fa arg1
488argument to the correct memory address in the virtual network stack
489context of the caller.
490.\" ------------------------------------------------------------
491.Sh SEE ALSO
492.Xr jail 2 ,
493.Xr kvm 3 ,
494.Xr EVENTHANDLER 9 ,
495.\" .Xr pcpu 9 ,
496.Xr KASSERT 9 ,
497.Xr sysctl 9
498.\" .Xr SYSINIT 9
499.Pp
500Marko Zec, Implementing a Clonable Network Stack in the FreeBSD Kernel,
501USENIX ATC'03, June 2003, Boston
502.Sh HISTORY
503The virtual network stack implementation first appeared in
504.Fx 8.0 .
505.Sh AUTHORS
506.An -nosplit
507The
508.Nm
509framework has been designed and implemented at the University of Zagreb by
510.An Marko Zec ,
511and later extended and refined by
512.An Bjoern A. Zeeb
513and
514.An Robert Watson ,
515under contract to the FreeBSD Foundation.
516.Pp
517This manual page was written by
518.An Bjoern A. Zeeb, CK Software GmbH,
519under sponsorship from the FreeBSD Foundation.
520