1.\"- 2.\" Copyright (c) 2010 The FreeBSD Foundation 3.\" 4.\" This documentation was written by CK Software GmbH under sponsorship from 5.\" the FreeBSD Foundation. 6.\" 7.\" Redistribution and use in source and binary forms, with or without 8.\" modification, are permitted provided that the following conditions 9.\" are met: 10.\" 1. Redistributions of source code must retain the above copyright 11.\" notice, this list of conditions and the following disclaimer. 12.\" 2. Redistributions in binary form must reproduce the above copyright 13.\" notice, this list of conditions and the following disclaimer in the 14.\" documentation and/or other materials provided with the distribution. 15.\" 16.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26.\" SUCH DAMAGE. 27.\" 28.Dd December 10, 2020 29.Dt VNET 9 30.Os 31.Sh NAME 32.Nm VNET 33.Nd "network subsystem virtualization infrastructure" 34.Sh SYNOPSIS 35.Cd "options VIMAGE" 36.Cd "options VNET_DEBUG" 37.Pp 38.In net/vnet.h 39.\"------------------------------------------------------------ 40.Ss "Constants and Global Variables" 41.\" 42.Dv VNET_SETNAME 43.\" "set_vnet" 44.Dv VNET_SYMPREFIX 45.\" "vnet_entry_" 46.Vt extern struct vnet *vnet0; 47.\"------------------------------------------------------------ 48.Ss "Variable Declaration" 49.Fo VNET 50.Fa "name" 51.Fc 52.\" 53.Fo VNET_NAME 54.Fa "name" 55.Fc 56.\" 57.Fo VNET_DECLARE 58.Fa "type" "name" 59.Fc 60.\" 61.Fo VNET_DEFINE 62.Fa "type" "name" 63.Fc 64.\" 65.Fo VNET_DEFINE_STATIC 66.Fa "type" "name" 67.Fc 68.\" 69.Bd -literal 70#define V_name VNET(name) 71.Ed 72.\" ------------------------------------------------------------ 73.Ss "Virtual Instance Selection" 74.\" 75.Fo CRED_TO_VNET 76.Fa "struct ucred *" 77.Fc 78.\" 79.Fo TD_TO_VNET 80.Fa "struct thread *" 81.Fc 82.\" 83.Fo P_TO_VNET 84.Fa "struct proc *" 85.Fc 86.\" 87.Fo IS_DEFAULT_VNET 88.Fa "struct vnet *" 89.Fc 90.\" 91.Fo VNET_ASSERT 92.Fa exp msg 93.Fc 94.\" 95.Fo CURVNET_SET 96.Fa "struct vnet *" 97.Fc 98.\" 99.Fo CURVNET_SET_QUIET 100.Fa "struct vnet *" 101.Fc 102.\" 103.Fn CURVNET_RESTORE 104.\" 105.Fo VNET_ITERATOR_DECL 106.Fa "struct vnet *" 107.Fc 108.\" 109.Fo VNET_FOREACH 110.Fa "struct vnet *" 111.Fc 112.\" ------------------------------------------------------------ 113.Ss "Locking" 114.\" 115.Fn VNET_LIST_RLOCK 116.Fn VNET_LIST_RUNLOCK 117.Fn VNET_LIST_RLOCK_NOSLEEP 118.Fn VNET_LIST_RUNLOCK_NOSLEEP 119.\" ------------------------------------------------------------ 120.Ss "Startup and Teardown Functions" 121.\" 122.Ft "struct vnet *" 123.Fo vnet_alloc 124.Fa void 125.Fc 126.\" 127.Ft void 128.Fo vnet_destroy 129.Fa "struct vnet *" 130.Fc 131.\" 132.Fo VNET_SYSINIT 133.Fa ident 134.Fa "enum sysinit_sub_id subsystem" 135.Fa "enum sysinit_elem_order order" 136.Fa "sysinit_cfunc_t func" 137.Fa "const void *arg" 138.Fc 139.\" 140.Fo VNET_SYSUNINIT 141.Fa ident 142.Fa "enum sysinit_sub_id subsystem" 143.Fa "enum sysinit_elem_order order" 144.Fa "sysinit_cfunc_t func" 145.Fa "const void *arg" 146.Fc 147.\" ------------------------------------------------------------ 148.Ss "Eventhandlers" 149.\" 150.Fo VNET_GLOBAL_EVENTHANDLER_REGISTER 151.Fa "const char *name" 152.Fa "void *func" 153.Fa "void *arg" 154.Fa "int priority" 155.Fc 156.\" 157.Fo VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG 158.Fa "eventhandler_tag tag" 159.Fa "const char *name" 160.Fa "void *func" 161.Fa "void *arg" 162.Fa "int priority" 163.Fc 164.\" ------------------------------------------------------------ 165.Ss "Sysctl Handling" 166.Fo SYSCTL_VNET_INT 167.Fa parent nbr name access ptr val descr 168.Fc 169.Fo SYSCTL_VNET_PROC 170.Fa parent nbr name access ptr arg handler fmt descr 171.Fc 172.Fo SYSCTL_VNET_STRING 173.Fa parent nbr name access arg len descr 174.Fc 175.Fo SYSCTL_VNET_STRUCT 176.Fa parent nbr name access ptr type descr 177.Fc 178.Fo SYSCTL_VNET_UINT 179.Fa parent nbr name access ptr val descr 180.Fc 181.Fo VNET_SYSCTL_ARG 182.Fa req arg1 183.Fc 184.\" ------------------------------------------------------------ 185.Sh DESCRIPTION 186.Nm 187is the name of a technique to virtualize the network stack. 188The basic idea is to change global resources most notably variables into 189per network stack resources and have functions, sysctls, eventhandlers, 190etc. access and handle them in the context of the correct instance. 191Each (virtual) network stack is attached to a 192.Em prison , 193with 194.Vt vnet0 195being the unrestricted default network stack of the base system. 196.Pp 197The global defines for 198.Dv VNET_SETNAME 199and 200.Dv VNET_SYMPREFIX 201are shared with 202.Xr kvm 3 203to access internals for debugging reasons. 204.\" ------------------------------------------------------------ 205.Ss "Variable Declaration" 206.\" 207Variables are virtualized by using the 208.Fn VNET_DEFINE 209macro rather than writing them out as 210.Em type name . 211One can still use static initialization, e.g., 212.Pp 213.Dl Li VNET_DEFINE(int, foo) = 1; 214.Pp 215Variables declared with the static keyword can use the 216.Fn VNET_DEFINE_STATIC 217macro, e.g., 218.Pp 219.Dl Li VNET_DEFINE_STATIC(SLIST_HEAD(, bar), bars); 220.Pp 221Static initialization is not possible when the virtualized variable 222would need to be referenced, e.g., with 223.Dq TAILQ_HEAD_INITIALIZER() . 224In that case a 225.Fn VNET_SYSINIT 226based initialization function must be used. 227.Pp 228External variables have to be declared using the 229.Fn VNET_DECLARE 230macro. 231In either case the convention is to define another macro, 232that is then used throughout the implementation to access that variable. 233The variable name is usually prefixed by 234.Em V_ 235to express that it is virtualized. 236The 237.Fn VNET 238macro will then translate accesses to that variable to the copy of the 239currently selected instance (see the 240.Sx "Virtual instance selection" 241section): 242.Pp 243.Dl Li #define V_name VNET(name) 244.Pp 245.Em NOTE: 246Do not confuse this with the convention used by 247.Xr VFS 9 . 248.Pp 249The 250.Fn VNET_NAME 251macro returns the offset within the memory region of the virtual network 252stack instance. 253It is usually only used with 254.Fn SYSCTL_VNET_* 255macros. 256.\" ------------------------------------------------------------ 257.Ss "Virtual Instance Selection" 258.\" 259There are three different places where the current virtual 260network stack pointer is stored and can be taken from: 261.Bl -enum -offset indent 262.It 263a 264.Em prison : 265.Dl "(struct prison *)->pr_vnet" 266.Pp 267For convenience the following macros are provided: 268.Bd -literal -compact -offset indent 269.Fn CRED_TO_VNET "struct ucred *" 270.Fn TD_TO_VNET "struct thread *" 271.Fn P_TO_VNET "struct proc *" 272.Ed 273.It 274a 275.Em socket : 276.Dl "(struct socket *)->so_vnet" 277.It 278an 279.Em interface : 280.Dl "(struct ifnet *)->if_vnet" 281.El 282.Pp 283.\" 284In addition the currently active instance is cached in 285.Dq "curthread->td_vnet" 286which is usually only accessed through the 287.Dv curvnet 288macro. 289.Pp 290.\" 291To set the correct context of the current virtual network instance, use the 292.Fn CURVNET_SET 293or 294.Fn CURVNET_SET_QUIET 295macros. 296The 297.Fn CURVNET_SET_QUIET 298version will not record vnet recursions in case the kernel was compiled 299with 300.Cd "options VNET_DEBUG" 301and should thus only be used in well known cases, where recursion is 302unavoidable. 303Both macros will save the previous state on the stack and it must be restored 304with the 305.Fn CURVNET_RESTORE 306macro. 307.Pp 308.Em NOTE: 309As the previous state is saved on the stack, you cannot have multiple 310.Fn CURVNET_SET 311calls in the same block. 312.Pp 313.Em NOTE: 314As the previous state is saved on the stack, a 315.Fn CURVNET_RESTORE 316call has to be in the same block as the 317.Fn CURVNET_SET 318call or in a subblock with the same idea of the saved instances as the 319outer block. 320.Pp 321.Em NOTE: 322As each macro is a set of operations and, as previously explained, cannot 323be put into its own block when defined, one cannot conditionally set 324the current vnet context. 325The following will 326.Em not 327work: 328.Bd -literal -offset indent 329if (condition) 330 CURVNET_SET(vnet); 331.Ed 332.Pp 333nor would this work: 334.Bd -literal -offset indent 335if (condition) { 336 CURVNET_SET(vnet); 337} 338CURVNET_RESTORE(); 339.Ed 340.Pp 341.\" 342Sometimes one needs to loop over all virtual instances, for example to update 343virtual from global state, to run a function from a 344.Xr callout 9 345for each instance, etc. 346For those cases the 347.Fn VNET_ITERATOR_DECL 348and 349.Fn VNET_FOREACH 350macros are provided. 351The former macro defines the variable that iterates over the loop, 352and the latter loops over all of the virtual network stack instances. 353See 354.Sx "Locking" 355for how to savely traverse the list of all virtual instances. 356.Pp 357.\" 358The 359.Fn IS_DEFAULT_VNET 360macro provides a safe way to check whether the currently active instance is the 361unrestricted default network stack of the base system 362.Pq Vt vnet0 . 363.Pp 364.\" 365The 366.Fn VNET_ASSERT 367macro provides a way to conditionally add assertions that are only active with 368.Cd "options VIMAGE" 369compiled in and either 370.Cd "options VNET_DEBUG" 371or 372.Cd "options INVARIANTS" 373enabled as well. 374It uses the same semantics as 375.Xr KASSERT 9 . 376.\" ------------------------------------------------------------ 377.Ss "Locking" 378.\" 379For public access to the list of virtual network stack instances 380e.g., by the 381.Fn VNET_FOREACH 382macro, read locks are provided. 383Macros are used to abstract from the actual type of the locks. 384If a caller may sleep while traversing the list, it must use the 385.Fn VNET_LIST_RLOCK 386and 387.Fn VNET_LIST_RUNLOCK 388macros. 389Otherwise, the caller can use 390.Fn VNET_LIST_RLOCK_NOSLEEP 391and 392.Fn VNET_LIST_RUNLOCK_NOSLEEP . 393.\" ------------------------------------------------------------ 394.Ss "Startup and Teardown Functions" 395.\" 396To start or tear down a virtual network stack instance the internal 397functions 398.Fn vnet_alloc 399and 400.Fn vnet_destroy 401are provided and called from the jail framework. 402They run the publicly provided methods to handle network stack 403startup and teardown. 404.Pp 405For public control, the system startup interface has been enhanced 406to not only handle a system boot but to also handle a virtual 407network stack startup and teardown. 408To the base system the 409.Fn VNET_SYSINIT 410and 411.Fn VNET_SYSUNINIT 412macros look exactly as if there were no virtual network stack. 413In fact, if 414.Cd "options VIMAGE" 415is not compiled in they are compiled to the standard 416.Fn SYSINIT 417macros. 418In addition to that they are run for each virtual network stack 419when starting or, in reverse order, when shutting down. 420.\" ------------------------------------------------------------ 421.Ss "Eventhandlers" 422.\" 423Eventhandlers can be handled in two ways: 424.Pp 425.Bl -enum -offset indent -compact 426.It 427save the 428.Em tags 429returned in each virtual instance and properly free the eventhandlers 430on teardown using those, or 431.It 432use one eventhandler that will iterate over all virtual network 433stack instances. 434.El 435.Pp 436For the first case one can just use the normal 437.Xr EVENTHANDLER 9 438functions, while for the second case the 439.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER 440and 441.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG 442macros are provided. 443These differ in that 444.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG 445takes an extra first argument that will carry the 446.Fa "tag" 447upon return. 448Eventhandlers registered with either of these will not run 449.Fa func 450directly but 451.Fa func 452will be called from an internal iterator function for each vnet. 453Both macros can only be used for eventhandlers that do not take 454additional arguments, as the variadic arguments from an 455.Xr EVENTHANDLER_INVOKE 9 456call will be ignored. 457.\" ------------------------------------------------------------ 458.Ss "Sysctl Handling" 459.\" 460A 461.Xr sysctl 9 462can be virtualized by using one of the 463.Fn SYSCTL_VNET_* 464macros. 465.Pp 466They take the same arguments as the standard 467.Xr sysctl 9 468functions, with the only difference, that the 469.Fa ptr 470argument has to be passed as 471.Ql &VNET_NAME(foo) 472instead of 473.Ql &foo 474so that the variable can be selected from the correct memory 475region of the virtual network stack instance of the caller. 476.Pp 477For the very rare case a sysctl handler function would want to 478handle 479.Fa arg1 480itself the 481.Fn VNET_SYSCTL_ARG req arg1 482is provided that will translate the 483.Fa arg1 484argument to the correct memory address in the virtual network stack 485context of the caller. 486.\" ------------------------------------------------------------ 487.Sh SEE ALSO 488.Xr jail 2 , 489.Xr kvm 3 , 490.Xr EVENTHANDLER 9 , 491.\" .Xr pcpu 9 , 492.Xr KASSERT 9 , 493.Xr sysctl 9 494.\" .Xr SYSINIT 9 495.Pp 496Marko Zec, Implementing a Clonable Network Stack in the FreeBSD Kernel, 497USENIX ATC'03, June 2003, Boston 498.Sh HISTORY 499The virtual network stack implementation first appeared in 500.Fx 8.0 . 501.Sh AUTHORS 502.An -nosplit 503The 504.Nm 505framework was designed and implemented at the University of Zagreb by 506.An Marko Zec 507under sponsorship of the FreeBSD Foundation and NLnet Foundation, 508and later extended and refined by 509.An Bjoern A. Zeeb 510(also under FreeBSD Foundation sponsorship), and 511.An Robert Watson . 512.Pp 513This manual page was written by 514.An Bjoern A. Zeeb, CK Software GmbH, 515under sponsorship from the FreeBSD Foundation. 516