xref: /freebsd/share/man/man9/VNET.9 (revision 22886d3af0657dae3b547b21e612642bb47e0321)
1.\"-
2.\" Copyright (c) 2010 The FreeBSD Foundation
3.\"
4.\" This documentation was written by CK Software GmbH under sponsorship from
5.\" the FreeBSD Foundation.
6.\"
7.\" Redistribution and use in source and binary forms, with or without
8.\" modification, are permitted provided that the following conditions
9.\" are met:
10.\" 1. Redistributions of source code must retain the above copyright
11.\"    notice, this list of conditions and the following disclaimer.
12.\" 2. Redistributions in binary form must reproduce the above copyright
13.\"    notice, this list of conditions and the following disclaimer in the
14.\"    documentation and/or other materials provided with the distribution.
15.\"
16.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26.\" SUCH DAMAGE.
27.\"
28.Dd September 19, 2025
29.Dt VNET 9
30.Os
31.Sh NAME
32.Nm VNET
33.Nd "network subsystem virtualization infrastructure"
34.Sh SYNOPSIS
35.Cd "options VIMAGE"
36.Cd "options VNET_DEBUG"
37.Pp
38.In net/vnet.h
39.\"------------------------------------------------------------
40.Ss "Constants and Global Variables"
41.\"
42.Dv VNET_SETNAME
43.\"	"set_vnet"
44.Dv VNET_SYMPREFIX
45.\"	"vnet_entry_"
46.Vt extern struct vnet *vnet0;
47.\"------------------------------------------------------------
48.Ss "Variable Declaration"
49.Fo VNET
50.Fa "name"
51.Fc
52.\"
53.Fo VNET_NAME
54.Fa "name"
55.Fc
56.\"
57.Fo VNET_DECLARE
58.Fa "type" "name"
59.Fc
60.\"
61.Fo VNET_DEFINE
62.Fa "type" "name"
63.Fc
64.\"
65.Fo VNET_DEFINE_STATIC
66.Fa "type" "name"
67.Fc
68.\"
69.Bd -literal
70#define	V_name	VNET(name)
71.Ed
72.\" ------------------------------------------------------------
73.Ss "Virtual Instance Selection"
74.\"
75.Fo CRED_TO_VNET
76.Fa "struct ucred *"
77.Fc
78.\"
79.Fo TD_TO_VNET
80.Fa "struct thread *"
81.Fc
82.\"
83.Fo P_TO_VNET
84.Fa "struct proc *"
85.Fc
86.\"
87.Fo IS_DEFAULT_VNET
88.Fa "struct vnet *"
89.Fc
90.\"
91.Fo VNET_ASSERT
92.Fa exp msg
93.Fc
94.\"
95.Fo CURVNET_SET
96.Fa "struct vnet *"
97.Fc
98.\"
99.Fo CURVNET_SET_QUIET
100.Fa "struct vnet *"
101.Fc
102.\"
103.Fn CURVNET_RESTORE
104.\"
105.Fo VNET_ITERATOR_DECL
106.Fa "struct vnet *"
107.Fc
108.\"
109.Fo VNET_FOREACH
110.Fa "struct vnet *"
111.Fc
112.\" ------------------------------------------------------------
113.Ss "Locking"
114.\"
115.Fn VNET_LIST_RLOCK
116.Fn VNET_LIST_RUNLOCK
117.Fn VNET_LIST_RLOCK_NOSLEEP
118.Fn VNET_LIST_RUNLOCK_NOSLEEP
119.\" ------------------------------------------------------------
120.Ss "Startup and Teardown Functions"
121.\"
122.Ft "struct vnet *"
123.Fo vnet_alloc
124.Fa void
125.Fc
126.\"
127.Ft void
128.Fo vnet_destroy
129.Fa "struct vnet *"
130.Fc
131.\"
132.Fo VNET_SYSINIT
133.Fa ident
134.Fa "enum sysinit_sub_id subsystem"
135.Fa "enum sysinit_elem_order order"
136.Fa "sysinit_cfunc_t func"
137.Fa "const void *arg"
138.Fc
139.\"
140.Fo VNET_SYSUNINIT
141.Fa ident
142.Fa "enum sysinit_sub_id subsystem"
143.Fa "enum sysinit_elem_order order"
144.Fa "sysinit_cfunc_t func"
145.Fa "const void *arg"
146.Fc
147.\" ------------------------------------------------------------
148.Ss "Eventhandlers"
149.\"
150.Fo VNET_GLOBAL_EVENTHANDLER_REGISTER
151.Fa "const char *name"
152.Fa "void *func"
153.Fa "void *arg"
154.Fa "int priority"
155.Fc
156.\"
157.Fo VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG
158.Fa "eventhandler_tag tag"
159.Fa "const char *name"
160.Fa "void *func"
161.Fa "void *arg"
162.Fa "int priority"
163.Fc
164.\" ------------------------------------------------------------
165.Sh DESCRIPTION
166.Nm
167is the name of a technique to virtualize the network stack.
168The basic idea is to change global resources most notably variables into
169per network stack resources and have functions, sysctls, eventhandlers,
170etc. access and handle them in the context of the correct instance.
171Each (virtual) network stack is attached to a
172.Em prison ,
173with
174.Vt vnet0
175being the unrestricted default network stack of the base system.
176.Pp
177The global defines for
178.Dv VNET_SETNAME
179and
180.Dv VNET_SYMPREFIX
181are shared with
182.Xr kvm 3
183to access internals for debugging reasons.
184.\" ------------------------------------------------------------
185.Ss "Variable Declaration"
186.\"
187Variables are virtualized by using the
188.Fn VNET_DEFINE
189macro rather than writing them out as
190.Em type name .
191One can still use static initialization, e.g.,
192.Pp
193.Dl Li VNET_DEFINE(int, foo) = 1;
194.Pp
195Variables declared with the static keyword can use the
196.Fn VNET_DEFINE_STATIC
197macro, e.g.,
198.Pp
199.Dl Li VNET_DEFINE_STATIC(SLIST_HEAD(, bar), bars);
200.Pp
201Static initialization is not possible when the virtualized variable
202would need to be referenced, e.g., with
203.Dq TAILQ_HEAD_INITIALIZER() .
204In that case a
205.Fn VNET_SYSINIT
206based initialization function must be used.
207.Pp
208External variables have to be declared using the
209.Fn VNET_DECLARE
210macro.
211In either case the convention is to define another macro,
212that is then used throughout the implementation to access that variable.
213The variable name is usually prefixed by
214.Em V_
215to express that it is virtualized.
216The
217.Fn VNET
218macro will then translate accesses to that variable to the copy of the
219currently selected instance (see the
220.Sx "Virtual instance selection"
221section):
222.Pp
223.Dl Li #define	V_name	VNET(name)
224.Pp
225.Em NOTE:
226Do not confuse this with the convention used by
227.Xr VFS 9 .
228.Pp
229The
230.Fn VNET_NAME
231macro returns the offset within the memory region of the virtual network
232stack instance.
233.\" ------------------------------------------------------------
234.Ss "Virtual Instance Selection"
235.\"
236There are three different places where the current virtual
237network stack pointer is stored and can be taken from:
238.Bl -enum -offset indent
239.It
240a
241.Em prison :
242.Dl "(struct prison *)->pr_vnet"
243.Pp
244For convenience the following macros are provided:
245.Bd -literal -compact -offset indent
246.Fn CRED_TO_VNET "struct ucred *"
247.Fn TD_TO_VNET "struct thread *"
248.Fn P_TO_VNET "struct proc *"
249.Ed
250.It
251a
252.Em socket :
253.Dl "(struct socket *)->so_vnet"
254.It
255an
256.Em interface :
257.Dl "(struct ifnet *)->if_vnet"
258.El
259.Pp
260.\"
261In addition the currently active instance is cached in
262.Dq "curthread->td_vnet"
263which is usually only accessed through the
264.Dv curvnet
265macro.
266.Pp
267.\"
268To set the correct context of the current virtual network instance, use the
269.Fn CURVNET_SET
270or
271.Fn CURVNET_SET_QUIET
272macros.
273The
274.Fn CURVNET_SET_QUIET
275version will not record vnet recursions in case the kernel was compiled
276with
277.Cd "options VNET_DEBUG"
278and should thus only be used in well known cases, where recursion is
279unavoidable.
280Both macros will save the previous state on the stack and it must be restored
281with the
282.Fn CURVNET_RESTORE
283macro.
284.Pp
285.Em NOTE:
286As the previous state is saved on the stack, you cannot have multiple
287.Fn CURVNET_SET
288calls in the same block.
289.Pp
290.Em NOTE:
291As the previous state is saved on the stack, a
292.Fn CURVNET_RESTORE
293call has to be in the same block as the
294.Fn CURVNET_SET
295call or in a subblock with the same idea of the saved instances as the
296outer block.
297.Pp
298.Em NOTE:
299As each macro is a set of operations and, as previously explained, cannot
300be put into its own block when defined, one cannot conditionally set
301the current vnet context.
302The following will
303.Em not
304work:
305.Bd -literal -offset indent
306if (condition)
307	CURVNET_SET(vnet);
308.Ed
309.Pp
310nor would this work:
311.Bd -literal -offset indent
312if (condition) {
313	CURVNET_SET(vnet);
314}
315CURVNET_RESTORE();
316.Ed
317.Pp
318.\"
319Sometimes one needs to loop over all virtual instances, for example to update
320virtual from global state, to run a function from a
321.Xr callout 9
322for each instance, etc.
323For those cases the
324.Fn VNET_ITERATOR_DECL
325and
326.Fn VNET_FOREACH
327macros are provided.
328The former macro defines the variable that iterates over the loop,
329and the latter loops over all of the virtual network stack instances.
330See
331.Sx "Locking"
332for how to savely traverse the list of all virtual instances.
333.Pp
334.\"
335The
336.Fn IS_DEFAULT_VNET
337macro provides a safe way to check whether the currently active instance is the
338unrestricted default network stack of the base system
339.Pq Vt vnet0 .
340.Pp
341.\"
342The
343.Fn VNET_ASSERT
344macro provides a way to conditionally add assertions that are only active with
345.Cd "options VIMAGE"
346compiled in and either
347.Cd "options VNET_DEBUG"
348or
349.Cd "options INVARIANTS"
350enabled as well.
351It uses the same semantics as
352.Xr KASSERT 9 .
353.\" ------------------------------------------------------------
354.Ss "Locking"
355.\"
356For public access to the list of virtual network stack instances
357e.g., by the
358.Fn VNET_FOREACH
359macro, read locks are provided.
360Macros are used to abstract from the actual type of the locks.
361If a caller may sleep while traversing the list, it must use the
362.Fn VNET_LIST_RLOCK
363and
364.Fn VNET_LIST_RUNLOCK
365macros.
366Otherwise, the caller can use
367.Fn VNET_LIST_RLOCK_NOSLEEP
368and
369.Fn VNET_LIST_RUNLOCK_NOSLEEP .
370.\" ------------------------------------------------------------
371.Ss "Startup and Teardown Functions"
372.\"
373To start or tear down a virtual network stack instance the internal
374functions
375.Fn vnet_alloc
376and
377.Fn vnet_destroy
378are provided and called from the jail framework.
379They run the publicly provided methods to handle network stack
380startup and teardown.
381.Pp
382For public control, the system startup interface has been enhanced
383to not only handle a system boot but to also handle a virtual
384network stack startup and teardown.
385To the base system the
386.Fn VNET_SYSINIT
387and
388.Fn VNET_SYSUNINIT
389macros look exactly as if there were no virtual network stack.
390In fact, if
391.Cd "options VIMAGE"
392is not compiled in they are compiled to the standard
393.Fn SYSINIT
394macros.
395In addition to that they are run for each virtual network stack
396when starting or, in reverse order, when shutting down.
397.\" ------------------------------------------------------------
398.Ss "Eventhandlers"
399.\"
400Eventhandlers can be handled in two ways:
401.Pp
402.Bl -enum -offset indent -compact
403.It
404save the
405.Em tags
406returned in each virtual instance and properly free the eventhandlers
407on teardown using those, or
408.It
409use one eventhandler that will iterate over all virtual network
410stack instances.
411.El
412.Pp
413For the first case one can just use the normal
414.Xr EVENTHANDLER 9
415functions, while for the second case the
416.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER
417and
418.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG
419macros are provided.
420These differ in that
421.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG
422takes an extra first argument that will carry the
423.Fa "tag"
424upon return.
425Eventhandlers registered with either of these will not run
426.Fa func
427directly but
428.Fa func
429will be called from an internal iterator function for each vnet.
430Both macros can only be used for eventhandlers that do not take
431additional arguments, as the variadic arguments from an
432.Xr EVENTHANDLER_INVOKE 9
433call will be ignored.
434.\" ------------------------------------------------------------
435.Ss "Sysctl Handling"
436.\"
437A
438.Xr sysctl 9
439can be virtualized by adding the
440.Dv CTLFLAG_VNET
441control flag to the ctlflags bitmask of the macros.
442.\" ------------------------------------------------------------
443.Sh SEE ALSO
444.Xr jail 2 ,
445.Xr kvm 3 ,
446.Xr EVENTHANDLER 9 ,
447.\" .Xr pcpu 9 ,
448.Xr KASSERT 9 ,
449.Xr sysctl 9
450.\" .Xr SYSINIT 9
451.Pp
452Marko Zec, Implementing a Clonable Network Stack in the FreeBSD Kernel,
453USENIX ATC'03, June 2003, Boston
454.Sh HISTORY
455The virtual network stack implementation first appeared in
456.Fx 8.0 .
457.Sh AUTHORS
458.An -nosplit
459The
460.Nm
461framework was designed and implemented at the University of Zagreb by
462.An Marko Zec
463under sponsorship of the FreeBSD Foundation and NLnet Foundation,
464and later extended and refined by
465.An Bjoern A. Zeeb
466(also under FreeBSD Foundation sponsorship), and
467.An Robert Watson .
468.Pp
469This manual page was written by
470.An Bjoern A. Zeeb, CK Software GmbH,
471under sponsorship from the FreeBSD Foundation.
472