1.\"- 2.\" Copyright (c) 2010 The FreeBSD Foundation 3.\" All rights reserved. 4.\" 5.\" This documentation was written by CK Software GmbH under sponsorship from 6.\" the FreeBSD Foundation. 7.\" 8.\" Redistribution and use in source and binary forms, with or without 9.\" modification, are permitted provided that the following conditions 10.\" are met: 11.\" 1. Redistributions of source code must retain the above copyright 12.\" notice, this list of conditions and the following disclaimer. 13.\" 2. Redistributions in binary form must reproduce the above copyright 14.\" notice, this list of conditions and the following disclaimer in the 15.\" documentation and/or other materials provided with the distribution. 16.\" 17.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27.\" SUCH DAMAGE. 28.\" 29.\" $FreeBSD$ 30.\" 31.Dd November 20, 2014 32.Dt VNET 9 33.Os 34.Sh NAME 35.Nm VNET 36.Nd "network subsystem virtualization infrastructure" 37.Sh SYNOPSIS 38.Cd "options VIMAGE" 39.Cd "options VNET_DEBUG" 40.Pp 41.In sys/vnet.h 42.Pp 43.\"------------------------------------------------------------ 44.Ss "Constants and Global Variables" 45.\" 46.Dv VNET_SETNAME 47.\" "set_vnet" 48.Dv VNET_SYMPREFIX 49.\" "vnet_entry_" 50.Vt extern struct vnet *vnet0; 51.\"------------------------------------------------------------ 52.Ss "Variable Declaration" 53.Fo VNET 54.Fa "name" 55.Fc 56.\" 57.Fo VNET_NAME 58.Fa "name" 59.Fc 60.\" 61.Fo VNET_DECLARE 62.Fa "type" "name" 63.Fc 64.\" 65.Fo VNET_DEFINE 66.Fa "type" "name" 67.Fc 68.\" 69.Bd -literal 70#define V_name VNET(name) 71.Ed 72.\" ------------------------------------------------------------ 73.Ss "Virtual Instance Selection" 74.\" 75.Fo CRED_TO_VNET 76.Fa "struct ucred *" 77.Fc 78.\" 79.Fo TD_TO_VNET 80.Fa "struct thread *" 81.Fc 82.\" 83.Fo P_TO_VNET 84.Fa "struct proc *" 85.Fc 86.\" 87.Fo IS_DEFAULT_VNET 88.Fa "struct vnet *" 89.Fc 90.\" 91.Fo VNET_ASSERT 92.Fa exp msg 93.Fc 94.\" 95.Fo CURVNET_SET 96.Fa "struct vnet *" 97.Fc 98.\" 99.Fo CURVNET_SET_QUIET 100.Fa "struct vnet *" 101.Fc 102.\" 103.Fo CURVNET_RESTORE 104.Fc 105.\" 106.Fo VNET_ITERATOR_DECL 107.Fa "struct vnet *" 108.Fc 109.\" 110.Fo VNET_FOREACH 111.Fa "struct vnet *" 112.Fc 113.\" ------------------------------------------------------------ 114.Ss "Locking" 115.\" 116.Fo VNET_LIST_RLOCK 117.Fc 118.Fo VNET_LIST_RUNLOCK 119.Fc 120.Fo VNET_LIST_RLOCK_NOSLEEP 121.Fc 122.Fo VNET_LIST_RUNLOCK_NOSLEEP 123.Fc 124.\" ------------------------------------------------------------ 125.Ss "Startup and Teardown Functions" 126.\" 127.Ft "struct vnet *" 128.Fo vnet_alloc 129.Fa void 130.Fc 131.\" 132.Ft void 133.Fo vnet_destroy 134.Fa "struct vnet *" 135.Fc 136.\" 137.Fo VNET_SYSINIT 138.Fa ident 139.Fa "enum sysinit_sub_id subsystem" 140.Fa "enum sysinit_elem_order order" 141.Fa "sysinit_cfunc_t func" 142.Fa "const void *arg" 143.Fc 144.\" 145.Fo VNET_SYSUNINIT 146.Fa ident 147.Fa "enum sysinit_sub_id subsystem" 148.Fa "enum sysinit_elem_order order" 149.Fa "sysinit_cfunc_t func" 150.Fa "const void *arg" 151.Fc 152.\" ------------------------------------------------------------ 153.Ss "Eventhandlers" 154.\" 155.Fo VNET_GLOBAL_EVENTHANDLER_REGISTER 156.Fa "const char *name" 157.Fa "void *func" 158.Fa "void *arg" 159.Fa "int priority" 160.Fc 161.\" 162.Fo VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG 163.Fa "eventhandler_tag tag" 164.Fa "const char *name" 165.Fa "void *func" 166.Fa "void *arg" 167.Fa "int priority" 168.Fc 169.\" ------------------------------------------------------------ 170.Ss "Sysctl Handling" 171.Fo SYSCTL_VNET_INT 172.Fa parent nbr name access ptr val descr 173.Fc 174.Fo SYSCTL_VNET_PROC 175.Fa parent nbr name access ptr arg handler fmt descr 176.Fc 177.Fo SYSCTL_VNET_STRING 178.Fa parent nbr name access arg len descr 179.Fc 180.Fo SYSCTL_VNET_STRUCT 181.Fa parent nbr name access ptr type descr 182.Fc 183.Fo SYSCTL_VNET_UINT 184.Fa parent nbr name access ptr val descr 185.Fc 186.Fo VNET_SYSCTL_ARG 187.Fa req arg1 188.Fc 189.\" ------------------------------------------------------------ 190.Sh DESCRIPTION 191.Nm 192is the name of a technique to virtualize the network stack. 193The basic idea is to change global resources most notably variables into 194per network stack resources and have functions, sysctls, eventhandlers, 195etc. access and handle them in the context of the correct instance. 196Each (virtual) network stack is attached to a 197.Em prison , 198with 199.Vt vnet0 200being the unrestricted default network stack of the base system. 201.Pp 202The global defines for 203.Dv VNET_SETNAME 204and 205.Dv VNET_SYMPREFIX 206are shared with 207.Xr kvm 3 208to access internals for debugging reasons. 209.\" ------------------------------------------------------------ 210.Ss "Variable Declaration" 211.\" 212Variables are virtualized by using the 213.Fn VNET_DEFINE 214macro rather than writing them out as 215.Em type name . 216One can still use static initialization or storage class specifiers, e.g., 217.Pp 218.Dl Li static VNET_DEFINE(int, foo) = 1; 219or 220.Dl Li static VNET_DEFINE(SLIST_HEAD(, bar), bars); 221.Pp 222Static initialization is not possible when the virtualized variable 223would need to be referenced, e.g., with 224.Dq TAILQ_HEAD_INITIALIZER() . 225In that case a 226.Fn VNET_SYSINIT 227based initialization function must be used. 228.Pp 229External variables have to be declared using the 230.Fn VNET_DECLARE 231macro. 232In either case the convention is to define another macro, 233that is then used throughout the implementation to access that variable. 234The variable name is usually prefixed by 235.Em V_ 236to express that it is virtualized. 237The 238.Fn VNET 239macro will then translate accesses to that variable to the copy of the 240currently selected instance (see the 241.Sx "Virtual instance selection" 242section): 243.Pp 244.Dl Li #define V_name VNET(name) 245.Pp 246.Em NOTE: 247Do not confuse this with the convention used by 248.Xr VFS 9 . 249.Pp 250The 251.Fn VNET_NAME 252macro returns the offset within the memory region of the virtual network 253stack instance. 254It is usually only used with 255.Fn SYSCTL_VNET_* 256macros. 257.\" ------------------------------------------------------------ 258.Ss "Virtual Instance Selection" 259.\" 260There are three different places where the current virtual 261network stack pointer is stored and can be taken from: 262.Bl -enum -offset indent 263.It 264a 265.Em prison : 266.Dl "(struct prison *)->pr_vnet" 267.Pp 268For convenience the following macros are provided: 269.Bd -literal -compact -offset indent 270.Fn CRED_TO_VNET "struct ucred *" 271.Fn TD_TO_VNET "struct thread *" 272.Fn P_TO_VNET "struct proc *" 273.Ed 274.It 275a 276.Em socket : 277.Dl "(struct socket *)->so_vnet" 278.It 279an 280.Em interface : 281.Dl "(struct ifnet *)->if_vnet" 282.El 283.Pp 284.\" 285In addition the currently active instance is cached in 286.Dq "curthread->td_vnet" 287which is usually only accessed through the 288.Dv curvnet 289macro. 290.Pp 291.\" 292To set the correct context of the current virtual network instance, use the 293.Fn CURVNET_SET 294or 295.Fn CURVNET_SET_QUIET 296macros. 297The 298.Fn CURVNET_SET_QUIET 299version will not record vnet recursions in case the kernel was compiled 300with 301.Cd "options VNET_DEBUG" 302and should thus only be used in well known cases, where recursion is 303unavoidable. 304Both macros will save the previous state on the stack and it must be restored 305with the 306.Fn CURVNET_RESTORE 307macro. 308.Pp 309.Em NOTE: 310As the previous state is saved on the stack, you cannot have multiple 311.Fn CURVNET_SET 312calls in the same block. 313.Pp 314.Em NOTE: 315As the previous state is saved on the stack, a 316.Fn CURVNET_RESTORE 317call has to be in the same block as the 318.Fn CURVNET_SET 319call or in a subblock with the same idea of the saved instances as the 320outer block. 321.Pp 322.Em NOTE: 323As each macro is a set of operations and, as previously explained, cannot 324be put into its own block when defined, one cannot conditionally set 325the current vnet context. 326The following will 327.Em not 328work: 329.Bd -literal -offset indent 330if (condition) 331 CURVNET_SET(vnet); 332.Ed 333.Pp 334nor would this work: 335.Bd -literal -offset indent 336if (condition) { 337 CURVNET_SET(vnet); 338} 339CURVNET_RESTORE(); 340.Ed 341.Pp 342.\" 343Sometimes one needs to loop over all virtual instances, for example to update 344virtual from global state, to run a function from a 345.Xr callout 9 346for each instance, etc. 347For those cases the 348.Fn VNET_ITERATOR_DECL 349and 350.Fn VNET_FOREACH 351macros are provided. 352The former macro defines the variable that iterates over the loop, 353and the latter loops over all of the virtual network stack instances. 354See 355.Sx "Locking" 356for how to savely traverse the list of all virtual instances. 357.Pp 358.\" 359The 360.Fn IS_DEFAULT_VNET 361macro provides a safe way to check whether the currently active instance is the 362unrestricted default network stack of the base system 363.Pq Vt vnet0 . 364.Pp 365.\" 366The 367.Fn VNET_ASSERT 368macro provides a way to conditionally add assertions that are only active with 369.Cd "options VIMAGE" 370compiled in and either 371.Cd "options VNET_DEBUG" 372or 373.Cd "options INVARIANTS" 374enabled as well. 375It uses the same semantics as 376.Xr KASSERT 9 . 377.\" ------------------------------------------------------------ 378.Ss "Locking" 379.\" 380For public access to the list of virtual network stack instances 381e.g., by the 382.Fn VNET_FOREACH 383macro, read locks are provided. 384Macros are used to abstract from the actual type of the locks. 385If a caller may sleep while traversing the list, it must use the 386.Fn VNET_LIST_RLOCK 387and 388.Fn VNET_LIST_RUNLOCK 389macros. 390Otherwise, the caller can use 391.Fn VNET_LIST_RLOCK_NOSLEEP 392and 393.Fn VNET_LIST_RUNLOCK_NOSLEEP . 394.\" ------------------------------------------------------------ 395.Ss "Startup and Teardown Functions" 396.\" 397To start or tear down a virtual network stack instance the internal 398functions 399.Fn vnet_alloc 400and 401.Fn vnet_destroy 402are provided and called from the jail framework. 403They run the publicly provided methods to handle network stack 404startup and teardown. 405.Pp 406For public control, the system startup interface has been enhanced 407to not only handle a system boot but to also handle a virtual 408network stack startup and teardown. 409To the base system the 410.Fn VNET_SYSINIT 411and 412.Fn VNET_SYSUNINIT 413macros look exactly as if there were no virtual network stack. 414In fact, if 415.Cd "options VIMAGE" 416is not compiled in they are compiled to the standard 417.Fn SYSINIT 418macros. 419In addition to that they are run for each virtual network stack 420when starting or, in reverse order, when shutting down. 421.\" ------------------------------------------------------------ 422.Ss "Eventhandlers" 423.\" 424Eventhandlers can be handled in two ways: 425.Pp 426.Bl -enum -offset indent -compact 427.It 428save the 429.Em tags 430returned in each virtual instance and properly free the eventhandlers 431on teardown using those, or 432.It 433use one eventhandler that will iterate over all virtual network 434stack instances. 435.El 436.Pp 437For the first case one can just use the normal 438.Xr EVENTHANDLER 9 439functions, while for the second case the 440.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER 441and 442.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG 443macros are provided. 444These differ in that 445.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG 446takes an extra first argument that will carry the 447.Fa "tag" 448upon return. 449Eventhandlers registered with either of these will not run 450.Fa func 451directly but 452.Fa func 453will be called from an internal iterator function for each vnet. 454Both macros can only be used for eventhandlers that do not take 455additional arguments, as the variadic arguments from an 456.Xr EVENTHANDLER_INVOKE 9 457call will be ignored. 458.\" ------------------------------------------------------------ 459.Ss "Sysctl Handling" 460.\" 461A 462.Xr sysctl 9 463can be virtualized by using one of the 464.Fn SYSCTL_VNET_* 465macros. 466.Pp 467They take the same arguments as the standard 468.Xr sysctl 9 469functions, with the only difference, that the 470.Fa ptr 471argument has to be passed as 472.Ql &VNET_NAME(foo) 473instead of 474.Ql &foo 475so that the variable can be selected from the correct memory 476region of the virtual network stack instance of the caller. 477.Pp 478For the very rare case a sysctl handler function would want to 479handle 480.Fa arg1 481itself the 482.Fn VNET_SYSCTL_ARG req arg1 483is provided that will translate the 484.Fa arg1 485argument to the correct memory address in the virtual network stack 486context of the caller. 487.\" ------------------------------------------------------------ 488.Sh SEE ALSO 489.Xr jail 2 , 490.Xr kvm 3 , 491.Xr EVENTHANDLER 9 , 492.\" .Xr pcpu 9 , 493.Xr KASSERT 9 , 494.Xr sysctl 9 495.\" .Xr SYSINIT 9 496.Sh HISTORY 497The virtual network stack implementation first appeared in 498.Fx 8.0 . 499.Sh AUTHORS 500This manual page was written by 501.An Bjoern A. Zeeb, CK Software GmbH, 502under sponsorship from the FreeBSD Foundation. 503