1.\"- 2.\" Copyright (c) 2010 The FreeBSD Foundation 3.\" All rights reserved. 4.\" 5.\" This documentation was written by CK Software GmbH under sponsorship from 6.\" the FreeBSD Foundation. 7.\" 8.\" Redistribution and use in source and binary forms, with or without 9.\" modification, are permitted provided that the following conditions 10.\" are met: 11.\" 1. Redistributions of source code must retain the above copyright 12.\" notice, this list of conditions and the following disclaimer. 13.\" 2. Redistributions in binary form must reproduce the above copyright 14.\" notice, this list of conditions and the following disclaimer in the 15.\" documentation and/or other materials provided with the distribution. 16.\" 17.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27.\" SUCH DAMAGE. 28.\" 29.\" $FreeBSD$ 30.\" 31.Dd July 24, 2018 32.Dt VNET 9 33.Os 34.Sh NAME 35.Nm VNET 36.Nd "network subsystem virtualization infrastructure" 37.Sh SYNOPSIS 38.Cd "options VIMAGE" 39.Cd "options VNET_DEBUG" 40.Pp 41.In sys/vnet.h 42.\"------------------------------------------------------------ 43.Ss "Constants and Global Variables" 44.\" 45.Dv VNET_SETNAME 46.\" "set_vnet" 47.Dv VNET_SYMPREFIX 48.\" "vnet_entry_" 49.Vt extern struct vnet *vnet0; 50.\"------------------------------------------------------------ 51.Ss "Variable Declaration" 52.Fo VNET 53.Fa "name" 54.Fc 55.\" 56.Fo VNET_NAME 57.Fa "name" 58.Fc 59.\" 60.Fo VNET_DECLARE 61.Fa "type" "name" 62.Fc 63.\" 64.Fo VNET_DEFINE 65.Fa "type" "name" 66.Fc 67.\" 68.Fo VNET_DEFINE_STATIC 69.Fa "type" "name" 70.Fc 71.\" 72.Bd -literal 73#define V_name VNET(name) 74.Ed 75.\" ------------------------------------------------------------ 76.Ss "Virtual Instance Selection" 77.\" 78.Fo CRED_TO_VNET 79.Fa "struct ucred *" 80.Fc 81.\" 82.Fo TD_TO_VNET 83.Fa "struct thread *" 84.Fc 85.\" 86.Fo P_TO_VNET 87.Fa "struct proc *" 88.Fc 89.\" 90.Fo IS_DEFAULT_VNET 91.Fa "struct vnet *" 92.Fc 93.\" 94.Fo VNET_ASSERT 95.Fa exp msg 96.Fc 97.\" 98.Fo CURVNET_SET 99.Fa "struct vnet *" 100.Fc 101.\" 102.Fo CURVNET_SET_QUIET 103.Fa "struct vnet *" 104.Fc 105.\" 106.Fn CURVNET_RESTORE 107.\" 108.Fo VNET_ITERATOR_DECL 109.Fa "struct vnet *" 110.Fc 111.\" 112.Fo VNET_FOREACH 113.Fa "struct vnet *" 114.Fc 115.\" ------------------------------------------------------------ 116.Ss "Locking" 117.\" 118.Fn VNET_LIST_RLOCK 119.Fn VNET_LIST_RUNLOCK 120.Fn VNET_LIST_RLOCK_NOSLEEP 121.Fn VNET_LIST_RUNLOCK_NOSLEEP 122.\" ------------------------------------------------------------ 123.Ss "Startup and Teardown Functions" 124.\" 125.Ft "struct vnet *" 126.Fo vnet_alloc 127.Fa void 128.Fc 129.\" 130.Ft void 131.Fo vnet_destroy 132.Fa "struct vnet *" 133.Fc 134.\" 135.Fo VNET_SYSINIT 136.Fa ident 137.Fa "enum sysinit_sub_id subsystem" 138.Fa "enum sysinit_elem_order order" 139.Fa "sysinit_cfunc_t func" 140.Fa "const void *arg" 141.Fc 142.\" 143.Fo VNET_SYSUNINIT 144.Fa ident 145.Fa "enum sysinit_sub_id subsystem" 146.Fa "enum sysinit_elem_order order" 147.Fa "sysinit_cfunc_t func" 148.Fa "const void *arg" 149.Fc 150.\" ------------------------------------------------------------ 151.Ss "Eventhandlers" 152.\" 153.Fo VNET_GLOBAL_EVENTHANDLER_REGISTER 154.Fa "const char *name" 155.Fa "void *func" 156.Fa "void *arg" 157.Fa "int priority" 158.Fc 159.\" 160.Fo VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG 161.Fa "eventhandler_tag tag" 162.Fa "const char *name" 163.Fa "void *func" 164.Fa "void *arg" 165.Fa "int priority" 166.Fc 167.\" ------------------------------------------------------------ 168.Ss "Sysctl Handling" 169.Fo SYSCTL_VNET_INT 170.Fa parent nbr name access ptr val descr 171.Fc 172.Fo SYSCTL_VNET_PROC 173.Fa parent nbr name access ptr arg handler fmt descr 174.Fc 175.Fo SYSCTL_VNET_STRING 176.Fa parent nbr name access arg len descr 177.Fc 178.Fo SYSCTL_VNET_STRUCT 179.Fa parent nbr name access ptr type descr 180.Fc 181.Fo SYSCTL_VNET_UINT 182.Fa parent nbr name access ptr val descr 183.Fc 184.Fo VNET_SYSCTL_ARG 185.Fa req arg1 186.Fc 187.\" ------------------------------------------------------------ 188.Sh DESCRIPTION 189.Nm 190is the name of a technique to virtualize the network stack. 191The basic idea is to change global resources most notably variables into 192per network stack resources and have functions, sysctls, eventhandlers, 193etc. access and handle them in the context of the correct instance. 194Each (virtual) network stack is attached to a 195.Em prison , 196with 197.Vt vnet0 198being the unrestricted default network stack of the base system. 199.Pp 200The global defines for 201.Dv VNET_SETNAME 202and 203.Dv VNET_SYMPREFIX 204are shared with 205.Xr kvm 3 206to access internals for debugging reasons. 207.\" ------------------------------------------------------------ 208.Ss "Variable Declaration" 209.\" 210Variables are virtualized by using the 211.Fn VNET_DEFINE 212macro rather than writing them out as 213.Em type name . 214One can still use static initialization, e.g., 215.Pp 216.Dl Li VNET_DEFINE(int, foo) = 1; 217.Pp 218Variables declared with the static keyword can use the 219.Fn VNET_DEFINE_STATIC 220macro, e.g., 221.Pp 222.Dl Li VNET_DEFINE_STATIC(SLIST_HEAD(, bar), bars); 223.Pp 224Static initialization is not possible when the virtualized variable 225would need to be referenced, e.g., with 226.Dq TAILQ_HEAD_INITIALIZER() . 227In that case a 228.Fn VNET_SYSINIT 229based initialization function must be used. 230.Pp 231External variables have to be declared using the 232.Fn VNET_DECLARE 233macro. 234In either case the convention is to define another macro, 235that is then used throughout the implementation to access that variable. 236The variable name is usually prefixed by 237.Em V_ 238to express that it is virtualized. 239The 240.Fn VNET 241macro will then translate accesses to that variable to the copy of the 242currently selected instance (see the 243.Sx "Virtual instance selection" 244section): 245.Pp 246.Dl Li #define V_name VNET(name) 247.Pp 248.Em NOTE: 249Do not confuse this with the convention used by 250.Xr VFS 9 . 251.Pp 252The 253.Fn VNET_NAME 254macro returns the offset within the memory region of the virtual network 255stack instance. 256It is usually only used with 257.Fn SYSCTL_VNET_* 258macros. 259.\" ------------------------------------------------------------ 260.Ss "Virtual Instance Selection" 261.\" 262There are three different places where the current virtual 263network stack pointer is stored and can be taken from: 264.Bl -enum -offset indent 265.It 266a 267.Em prison : 268.Dl "(struct prison *)->pr_vnet" 269.Pp 270For convenience the following macros are provided: 271.Bd -literal -compact -offset indent 272.Fn CRED_TO_VNET "struct ucred *" 273.Fn TD_TO_VNET "struct thread *" 274.Fn P_TO_VNET "struct proc *" 275.Ed 276.It 277a 278.Em socket : 279.Dl "(struct socket *)->so_vnet" 280.It 281an 282.Em interface : 283.Dl "(struct ifnet *)->if_vnet" 284.El 285.Pp 286.\" 287In addition the currently active instance is cached in 288.Dq "curthread->td_vnet" 289which is usually only accessed through the 290.Dv curvnet 291macro. 292.Pp 293.\" 294To set the correct context of the current virtual network instance, use the 295.Fn CURVNET_SET 296or 297.Fn CURVNET_SET_QUIET 298macros. 299The 300.Fn CURVNET_SET_QUIET 301version will not record vnet recursions in case the kernel was compiled 302with 303.Cd "options VNET_DEBUG" 304and should thus only be used in well known cases, where recursion is 305unavoidable. 306Both macros will save the previous state on the stack and it must be restored 307with the 308.Fn CURVNET_RESTORE 309macro. 310.Pp 311.Em NOTE: 312As the previous state is saved on the stack, you cannot have multiple 313.Fn CURVNET_SET 314calls in the same block. 315.Pp 316.Em NOTE: 317As the previous state is saved on the stack, a 318.Fn CURVNET_RESTORE 319call has to be in the same block as the 320.Fn CURVNET_SET 321call or in a subblock with the same idea of the saved instances as the 322outer block. 323.Pp 324.Em NOTE: 325As each macro is a set of operations and, as previously explained, cannot 326be put into its own block when defined, one cannot conditionally set 327the current vnet context. 328The following will 329.Em not 330work: 331.Bd -literal -offset indent 332if (condition) 333 CURVNET_SET(vnet); 334.Ed 335.Pp 336nor would this work: 337.Bd -literal -offset indent 338if (condition) { 339 CURVNET_SET(vnet); 340} 341CURVNET_RESTORE(); 342.Ed 343.Pp 344.\" 345Sometimes one needs to loop over all virtual instances, for example to update 346virtual from global state, to run a function from a 347.Xr callout 9 348for each instance, etc. 349For those cases the 350.Fn VNET_ITERATOR_DECL 351and 352.Fn VNET_FOREACH 353macros are provided. 354The former macro defines the variable that iterates over the loop, 355and the latter loops over all of the virtual network stack instances. 356See 357.Sx "Locking" 358for how to savely traverse the list of all virtual instances. 359.Pp 360.\" 361The 362.Fn IS_DEFAULT_VNET 363macro provides a safe way to check whether the currently active instance is the 364unrestricted default network stack of the base system 365.Pq Vt vnet0 . 366.Pp 367.\" 368The 369.Fn VNET_ASSERT 370macro provides a way to conditionally add assertions that are only active with 371.Cd "options VIMAGE" 372compiled in and either 373.Cd "options VNET_DEBUG" 374or 375.Cd "options INVARIANTS" 376enabled as well. 377It uses the same semantics as 378.Xr KASSERT 9 . 379.\" ------------------------------------------------------------ 380.Ss "Locking" 381.\" 382For public access to the list of virtual network stack instances 383e.g., by the 384.Fn VNET_FOREACH 385macro, read locks are provided. 386Macros are used to abstract from the actual type of the locks. 387If a caller may sleep while traversing the list, it must use the 388.Fn VNET_LIST_RLOCK 389and 390.Fn VNET_LIST_RUNLOCK 391macros. 392Otherwise, the caller can use 393.Fn VNET_LIST_RLOCK_NOSLEEP 394and 395.Fn VNET_LIST_RUNLOCK_NOSLEEP . 396.\" ------------------------------------------------------------ 397.Ss "Startup and Teardown Functions" 398.\" 399To start or tear down a virtual network stack instance the internal 400functions 401.Fn vnet_alloc 402and 403.Fn vnet_destroy 404are provided and called from the jail framework. 405They run the publicly provided methods to handle network stack 406startup and teardown. 407.Pp 408For public control, the system startup interface has been enhanced 409to not only handle a system boot but to also handle a virtual 410network stack startup and teardown. 411To the base system the 412.Fn VNET_SYSINIT 413and 414.Fn VNET_SYSUNINIT 415macros look exactly as if there were no virtual network stack. 416In fact, if 417.Cd "options VIMAGE" 418is not compiled in they are compiled to the standard 419.Fn SYSINIT 420macros. 421In addition to that they are run for each virtual network stack 422when starting or, in reverse order, when shutting down. 423.\" ------------------------------------------------------------ 424.Ss "Eventhandlers" 425.\" 426Eventhandlers can be handled in two ways: 427.Pp 428.Bl -enum -offset indent -compact 429.It 430save the 431.Em tags 432returned in each virtual instance and properly free the eventhandlers 433on teardown using those, or 434.It 435use one eventhandler that will iterate over all virtual network 436stack instances. 437.El 438.Pp 439For the first case one can just use the normal 440.Xr EVENTHANDLER 9 441functions, while for the second case the 442.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER 443and 444.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG 445macros are provided. 446These differ in that 447.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG 448takes an extra first argument that will carry the 449.Fa "tag" 450upon return. 451Eventhandlers registered with either of these will not run 452.Fa func 453directly but 454.Fa func 455will be called from an internal iterator function for each vnet. 456Both macros can only be used for eventhandlers that do not take 457additional arguments, as the variadic arguments from an 458.Xr EVENTHANDLER_INVOKE 9 459call will be ignored. 460.\" ------------------------------------------------------------ 461.Ss "Sysctl Handling" 462.\" 463A 464.Xr sysctl 9 465can be virtualized by using one of the 466.Fn SYSCTL_VNET_* 467macros. 468.Pp 469They take the same arguments as the standard 470.Xr sysctl 9 471functions, with the only difference, that the 472.Fa ptr 473argument has to be passed as 474.Ql &VNET_NAME(foo) 475instead of 476.Ql &foo 477so that the variable can be selected from the correct memory 478region of the virtual network stack instance of the caller. 479.Pp 480For the very rare case a sysctl handler function would want to 481handle 482.Fa arg1 483itself the 484.Fn VNET_SYSCTL_ARG req arg1 485is provided that will translate the 486.Fa arg1 487argument to the correct memory address in the virtual network stack 488context of the caller. 489.\" ------------------------------------------------------------ 490.Sh SEE ALSO 491.Xr jail 2 , 492.Xr kvm 3 , 493.Xr EVENTHANDLER 9 , 494.\" .Xr pcpu 9 , 495.Xr KASSERT 9 , 496.Xr sysctl 9 497.\" .Xr SYSINIT 9 498.Pp 499Marko Zec, Implementing a Clonable Network Stack in the FreeBSD Kernel, 500USENIX ATC'03, June 2003, Boston 501.Sh HISTORY 502The virtual network stack implementation first appeared in 503.Fx 8.0 . 504.Sh AUTHORS 505.An -nosplit 506The 507.Nm 508framework was designed and implemented at the University of Zagreb by 509.An Marko Zec 510under sponsorship of the FreeBSD Foundation and NLnet Foundation, 511and later extended and refined by 512.An Bjoern A. Zeeb 513(also under FreeBSD Foundation sponsorship), and 514.An Robert Watson . 515.Pp 516This manual page was written by 517.An Bjoern A. Zeeb, CK Software GmbH, 518under sponsorship from the FreeBSD Foundation. 519