1.\"- 2.\" Copyright (c) 2010 The FreeBSD Foundation 3.\" All rights reserved. 4.\" 5.\" This documentation was written by CK Software GmbH under sponsorship from 6.\" the FreeBSD Foundation. 7.\" 8.\" Redistribution and use in source and binary forms, with or without 9.\" modification, are permitted provided that the following conditions 10.\" are met: 11.\" 1. Redistributions of source code must retain the above copyright 12.\" notice, this list of conditions and the following disclaimer. 13.\" 2. Redistributions in binary form must reproduce the above copyright 14.\" notice, this list of conditions and the following disclaimer in the 15.\" documentation and/or other materials provided with the distribution. 16.\" 17.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27.\" SUCH DAMAGE. 28.\" 29.\" $FreeBSD$ 30.\" 31.Dd May 16, 2018 32.Dt VNET 9 33.Os 34.Sh NAME 35.Nm VNET 36.Nd "network subsystem virtualization infrastructure" 37.Sh SYNOPSIS 38.Cd "options VIMAGE" 39.Cd "options VNET_DEBUG" 40.Pp 41.In sys/vnet.h 42.Pp 43.\"------------------------------------------------------------ 44.Ss "Constants and Global Variables" 45.\" 46.Dv VNET_SETNAME 47.\" "set_vnet" 48.Dv VNET_SYMPREFIX 49.\" "vnet_entry_" 50.Vt extern struct vnet *vnet0; 51.\"------------------------------------------------------------ 52.Ss "Variable Declaration" 53.Fo VNET 54.Fa "name" 55.Fc 56.\" 57.Fo VNET_NAME 58.Fa "name" 59.Fc 60.\" 61.Fo VNET_DECLARE 62.Fa "type" "name" 63.Fc 64.\" 65.Fo VNET_DEFINE 66.Fa "type" "name" 67.Fc 68.\" 69.Bd -literal 70#define V_name VNET(name) 71.Ed 72.\" ------------------------------------------------------------ 73.Ss "Virtual Instance Selection" 74.\" 75.Fo CRED_TO_VNET 76.Fa "struct ucred *" 77.Fc 78.\" 79.Fo TD_TO_VNET 80.Fa "struct thread *" 81.Fc 82.\" 83.Fo P_TO_VNET 84.Fa "struct proc *" 85.Fc 86.\" 87.Fo IS_DEFAULT_VNET 88.Fa "struct vnet *" 89.Fc 90.\" 91.Fo VNET_ASSERT 92.Fa exp msg 93.Fc 94.\" 95.Fo CURVNET_SET 96.Fa "struct vnet *" 97.Fc 98.\" 99.Fo CURVNET_SET_QUIET 100.Fa "struct vnet *" 101.Fc 102.\" 103.Fn CURVNET_RESTORE 104.\" 105.Fo VNET_ITERATOR_DECL 106.Fa "struct vnet *" 107.Fc 108.\" 109.Fo VNET_FOREACH 110.Fa "struct vnet *" 111.Fc 112.\" ------------------------------------------------------------ 113.Ss "Locking" 114.\" 115.Fn VNET_LIST_RLOCK 116.Fn VNET_LIST_RUNLOCK 117.Fn VNET_LIST_RLOCK_NOSLEEP 118.Fn VNET_LIST_RUNLOCK_NOSLEEP 119.\" ------------------------------------------------------------ 120.Ss "Startup and Teardown Functions" 121.\" 122.Ft "struct vnet *" 123.Fo vnet_alloc 124.Fa void 125.Fc 126.\" 127.Ft void 128.Fo vnet_destroy 129.Fa "struct vnet *" 130.Fc 131.\" 132.Fo VNET_SYSINIT 133.Fa ident 134.Fa "enum sysinit_sub_id subsystem" 135.Fa "enum sysinit_elem_order order" 136.Fa "sysinit_cfunc_t func" 137.Fa "const void *arg" 138.Fc 139.\" 140.Fo VNET_SYSUNINIT 141.Fa ident 142.Fa "enum sysinit_sub_id subsystem" 143.Fa "enum sysinit_elem_order order" 144.Fa "sysinit_cfunc_t func" 145.Fa "const void *arg" 146.Fc 147.\" ------------------------------------------------------------ 148.Ss "Eventhandlers" 149.\" 150.Fo VNET_GLOBAL_EVENTHANDLER_REGISTER 151.Fa "const char *name" 152.Fa "void *func" 153.Fa "void *arg" 154.Fa "int priority" 155.Fc 156.\" 157.Fo VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG 158.Fa "eventhandler_tag tag" 159.Fa "const char *name" 160.Fa "void *func" 161.Fa "void *arg" 162.Fa "int priority" 163.Fc 164.\" ------------------------------------------------------------ 165.Ss "Sysctl Handling" 166.Fo SYSCTL_VNET_INT 167.Fa parent nbr name access ptr val descr 168.Fc 169.Fo SYSCTL_VNET_PROC 170.Fa parent nbr name access ptr arg handler fmt descr 171.Fc 172.Fo SYSCTL_VNET_STRING 173.Fa parent nbr name access arg len descr 174.Fc 175.Fo SYSCTL_VNET_STRUCT 176.Fa parent nbr name access ptr type descr 177.Fc 178.Fo SYSCTL_VNET_UINT 179.Fa parent nbr name access ptr val descr 180.Fc 181.Fo VNET_SYSCTL_ARG 182.Fa req arg1 183.Fc 184.\" ------------------------------------------------------------ 185.Sh DESCRIPTION 186.Nm 187is the name of a technique to virtualize the network stack. 188The basic idea is to change global resources most notably variables into 189per network stack resources and have functions, sysctls, eventhandlers, 190etc. access and handle them in the context of the correct instance. 191Each (virtual) network stack is attached to a 192.Em prison , 193with 194.Vt vnet0 195being the unrestricted default network stack of the base system. 196.Pp 197The global defines for 198.Dv VNET_SETNAME 199and 200.Dv VNET_SYMPREFIX 201are shared with 202.Xr kvm 3 203to access internals for debugging reasons. 204.\" ------------------------------------------------------------ 205.Ss "Variable Declaration" 206.\" 207Variables are virtualized by using the 208.Fn VNET_DEFINE 209macro rather than writing them out as 210.Em type name . 211One can still use static initialization or storage class specifiers, e.g., 212.Pp 213.Dl Li static VNET_DEFINE(int, foo) = 1; 214or 215.Dl Li static VNET_DEFINE(SLIST_HEAD(, bar), bars); 216.Pp 217Static initialization is not possible when the virtualized variable 218would need to be referenced, e.g., with 219.Dq TAILQ_HEAD_INITIALIZER() . 220In that case a 221.Fn VNET_SYSINIT 222based initialization function must be used. 223.Pp 224External variables have to be declared using the 225.Fn VNET_DECLARE 226macro. 227In either case the convention is to define another macro, 228that is then used throughout the implementation to access that variable. 229The variable name is usually prefixed by 230.Em V_ 231to express that it is virtualized. 232The 233.Fn VNET 234macro will then translate accesses to that variable to the copy of the 235currently selected instance (see the 236.Sx "Virtual instance selection" 237section): 238.Pp 239.Dl Li #define V_name VNET(name) 240.Pp 241.Em NOTE: 242Do not confuse this with the convention used by 243.Xr VFS 9 . 244.Pp 245The 246.Fn VNET_NAME 247macro returns the offset within the memory region of the virtual network 248stack instance. 249It is usually only used with 250.Fn SYSCTL_VNET_* 251macros. 252.\" ------------------------------------------------------------ 253.Ss "Virtual Instance Selection" 254.\" 255There are three different places where the current virtual 256network stack pointer is stored and can be taken from: 257.Bl -enum -offset indent 258.It 259a 260.Em prison : 261.Dl "(struct prison *)->pr_vnet" 262.Pp 263For convenience the following macros are provided: 264.Bd -literal -compact -offset indent 265.Fn CRED_TO_VNET "struct ucred *" 266.Fn TD_TO_VNET "struct thread *" 267.Fn P_TO_VNET "struct proc *" 268.Ed 269.It 270a 271.Em socket : 272.Dl "(struct socket *)->so_vnet" 273.It 274an 275.Em interface : 276.Dl "(struct ifnet *)->if_vnet" 277.El 278.Pp 279.\" 280In addition the currently active instance is cached in 281.Dq "curthread->td_vnet" 282which is usually only accessed through the 283.Dv curvnet 284macro. 285.Pp 286.\" 287To set the correct context of the current virtual network instance, use the 288.Fn CURVNET_SET 289or 290.Fn CURVNET_SET_QUIET 291macros. 292The 293.Fn CURVNET_SET_QUIET 294version will not record vnet recursions in case the kernel was compiled 295with 296.Cd "options VNET_DEBUG" 297and should thus only be used in well known cases, where recursion is 298unavoidable. 299Both macros will save the previous state on the stack and it must be restored 300with the 301.Fn CURVNET_RESTORE 302macro. 303.Pp 304.Em NOTE: 305As the previous state is saved on the stack, you cannot have multiple 306.Fn CURVNET_SET 307calls in the same block. 308.Pp 309.Em NOTE: 310As the previous state is saved on the stack, a 311.Fn CURVNET_RESTORE 312call has to be in the same block as the 313.Fn CURVNET_SET 314call or in a subblock with the same idea of the saved instances as the 315outer block. 316.Pp 317.Em NOTE: 318As each macro is a set of operations and, as previously explained, cannot 319be put into its own block when defined, one cannot conditionally set 320the current vnet context. 321The following will 322.Em not 323work: 324.Bd -literal -offset indent 325if (condition) 326 CURVNET_SET(vnet); 327.Ed 328.Pp 329nor would this work: 330.Bd -literal -offset indent 331if (condition) { 332 CURVNET_SET(vnet); 333} 334CURVNET_RESTORE(); 335.Ed 336.Pp 337.\" 338Sometimes one needs to loop over all virtual instances, for example to update 339virtual from global state, to run a function from a 340.Xr callout 9 341for each instance, etc. 342For those cases the 343.Fn VNET_ITERATOR_DECL 344and 345.Fn VNET_FOREACH 346macros are provided. 347The former macro defines the variable that iterates over the loop, 348and the latter loops over all of the virtual network stack instances. 349See 350.Sx "Locking" 351for how to savely traverse the list of all virtual instances. 352.Pp 353.\" 354The 355.Fn IS_DEFAULT_VNET 356macro provides a safe way to check whether the currently active instance is the 357unrestricted default network stack of the base system 358.Pq Vt vnet0 . 359.Pp 360.\" 361The 362.Fn VNET_ASSERT 363macro provides a way to conditionally add assertions that are only active with 364.Cd "options VIMAGE" 365compiled in and either 366.Cd "options VNET_DEBUG" 367or 368.Cd "options INVARIANTS" 369enabled as well. 370It uses the same semantics as 371.Xr KASSERT 9 . 372.\" ------------------------------------------------------------ 373.Ss "Locking" 374.\" 375For public access to the list of virtual network stack instances 376e.g., by the 377.Fn VNET_FOREACH 378macro, read locks are provided. 379Macros are used to abstract from the actual type of the locks. 380If a caller may sleep while traversing the list, it must use the 381.Fn VNET_LIST_RLOCK 382and 383.Fn VNET_LIST_RUNLOCK 384macros. 385Otherwise, the caller can use 386.Fn VNET_LIST_RLOCK_NOSLEEP 387and 388.Fn VNET_LIST_RUNLOCK_NOSLEEP . 389.\" ------------------------------------------------------------ 390.Ss "Startup and Teardown Functions" 391.\" 392To start or tear down a virtual network stack instance the internal 393functions 394.Fn vnet_alloc 395and 396.Fn vnet_destroy 397are provided and called from the jail framework. 398They run the publicly provided methods to handle network stack 399startup and teardown. 400.Pp 401For public control, the system startup interface has been enhanced 402to not only handle a system boot but to also handle a virtual 403network stack startup and teardown. 404To the base system the 405.Fn VNET_SYSINIT 406and 407.Fn VNET_SYSUNINIT 408macros look exactly as if there were no virtual network stack. 409In fact, if 410.Cd "options VIMAGE" 411is not compiled in they are compiled to the standard 412.Fn SYSINIT 413macros. 414In addition to that they are run for each virtual network stack 415when starting or, in reverse order, when shutting down. 416.\" ------------------------------------------------------------ 417.Ss "Eventhandlers" 418.\" 419Eventhandlers can be handled in two ways: 420.Pp 421.Bl -enum -offset indent -compact 422.It 423save the 424.Em tags 425returned in each virtual instance and properly free the eventhandlers 426on teardown using those, or 427.It 428use one eventhandler that will iterate over all virtual network 429stack instances. 430.El 431.Pp 432For the first case one can just use the normal 433.Xr EVENTHANDLER 9 434functions, while for the second case the 435.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER 436and 437.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG 438macros are provided. 439These differ in that 440.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG 441takes an extra first argument that will carry the 442.Fa "tag" 443upon return. 444Eventhandlers registered with either of these will not run 445.Fa func 446directly but 447.Fa func 448will be called from an internal iterator function for each vnet. 449Both macros can only be used for eventhandlers that do not take 450additional arguments, as the variadic arguments from an 451.Xr EVENTHANDLER_INVOKE 9 452call will be ignored. 453.\" ------------------------------------------------------------ 454.Ss "Sysctl Handling" 455.\" 456A 457.Xr sysctl 9 458can be virtualized by using one of the 459.Fn SYSCTL_VNET_* 460macros. 461.Pp 462They take the same arguments as the standard 463.Xr sysctl 9 464functions, with the only difference, that the 465.Fa ptr 466argument has to be passed as 467.Ql &VNET_NAME(foo) 468instead of 469.Ql &foo 470so that the variable can be selected from the correct memory 471region of the virtual network stack instance of the caller. 472.Pp 473For the very rare case a sysctl handler function would want to 474handle 475.Fa arg1 476itself the 477.Fn VNET_SYSCTL_ARG req arg1 478is provided that will translate the 479.Fa arg1 480argument to the correct memory address in the virtual network stack 481context of the caller. 482.\" ------------------------------------------------------------ 483.Sh SEE ALSO 484.Xr jail 2 , 485.Xr kvm 3 , 486.Xr EVENTHANDLER 9 , 487.\" .Xr pcpu 9 , 488.Xr KASSERT 9 , 489.Xr sysctl 9 490.\" .Xr SYSINIT 9 491.Pp 492Marko Zec, Implementing a Clonable Network Stack in the FreeBSD Kernel, 493USENIX ATC'03, June 2003, Boston 494.Sh HISTORY 495The virtual network stack implementation first appeared in 496.Fx 8.0 . 497.Sh AUTHORS 498.An -nosplit 499The 500.Nm 501framework has been designed and implemented at the University of Zagreb by 502.An Marko Zec , 503and later extended and refined by 504.An Bjoern A. Zeeb 505and 506.An Robert Watson , 507under contract to the FreeBSD Foundation. 508.Pp 509This manual page was written by 510.An Bjoern A. Zeeb, CK Software GmbH, 511under sponsorship from the FreeBSD Foundation. 512