1.\"- 2.\" Copyright (c) 2010 The FreeBSD Foundation 3.\" 4.\" This documentation was written by CK Software GmbH under sponsorship from 5.\" the FreeBSD Foundation. 6.\" 7.\" Redistribution and use in source and binary forms, with or without 8.\" modification, are permitted provided that the following conditions 9.\" are met: 10.\" 1. Redistributions of source code must retain the above copyright 11.\" notice, this list of conditions and the following disclaimer. 12.\" 2. Redistributions in binary form must reproduce the above copyright 13.\" notice, this list of conditions and the following disclaimer in the 14.\" documentation and/or other materials provided with the distribution. 15.\" 16.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26.\" SUCH DAMAGE. 27.\" 28.\" $FreeBSD$ 29.\" 30.Dd December 10, 2020 31.Dt VNET 9 32.Os 33.Sh NAME 34.Nm VNET 35.Nd "network subsystem virtualization infrastructure" 36.Sh SYNOPSIS 37.Cd "options VIMAGE" 38.Cd "options VNET_DEBUG" 39.Pp 40.In net/vnet.h 41.\"------------------------------------------------------------ 42.Ss "Constants and Global Variables" 43.\" 44.Dv VNET_SETNAME 45.\" "set_vnet" 46.Dv VNET_SYMPREFIX 47.\" "vnet_entry_" 48.Vt extern struct vnet *vnet0; 49.\"------------------------------------------------------------ 50.Ss "Variable Declaration" 51.Fo VNET 52.Fa "name" 53.Fc 54.\" 55.Fo VNET_NAME 56.Fa "name" 57.Fc 58.\" 59.Fo VNET_DECLARE 60.Fa "type" "name" 61.Fc 62.\" 63.Fo VNET_DEFINE 64.Fa "type" "name" 65.Fc 66.\" 67.Fo VNET_DEFINE_STATIC 68.Fa "type" "name" 69.Fc 70.\" 71.Bd -literal 72#define V_name VNET(name) 73.Ed 74.\" ------------------------------------------------------------ 75.Ss "Virtual Instance Selection" 76.\" 77.Fo CRED_TO_VNET 78.Fa "struct ucred *" 79.Fc 80.\" 81.Fo TD_TO_VNET 82.Fa "struct thread *" 83.Fc 84.\" 85.Fo P_TO_VNET 86.Fa "struct proc *" 87.Fc 88.\" 89.Fo IS_DEFAULT_VNET 90.Fa "struct vnet *" 91.Fc 92.\" 93.Fo VNET_ASSERT 94.Fa exp msg 95.Fc 96.\" 97.Fo CURVNET_SET 98.Fa "struct vnet *" 99.Fc 100.\" 101.Fo CURVNET_SET_QUIET 102.Fa "struct vnet *" 103.Fc 104.\" 105.Fn CURVNET_RESTORE 106.\" 107.Fo VNET_ITERATOR_DECL 108.Fa "struct vnet *" 109.Fc 110.\" 111.Fo VNET_FOREACH 112.Fa "struct vnet *" 113.Fc 114.\" ------------------------------------------------------------ 115.Ss "Locking" 116.\" 117.Fn VNET_LIST_RLOCK 118.Fn VNET_LIST_RUNLOCK 119.Fn VNET_LIST_RLOCK_NOSLEEP 120.Fn VNET_LIST_RUNLOCK_NOSLEEP 121.\" ------------------------------------------------------------ 122.Ss "Startup and Teardown Functions" 123.\" 124.Ft "struct vnet *" 125.Fo vnet_alloc 126.Fa void 127.Fc 128.\" 129.Ft void 130.Fo vnet_destroy 131.Fa "struct vnet *" 132.Fc 133.\" 134.Fo VNET_SYSINIT 135.Fa ident 136.Fa "enum sysinit_sub_id subsystem" 137.Fa "enum sysinit_elem_order order" 138.Fa "sysinit_cfunc_t func" 139.Fa "const void *arg" 140.Fc 141.\" 142.Fo VNET_SYSUNINIT 143.Fa ident 144.Fa "enum sysinit_sub_id subsystem" 145.Fa "enum sysinit_elem_order order" 146.Fa "sysinit_cfunc_t func" 147.Fa "const void *arg" 148.Fc 149.\" ------------------------------------------------------------ 150.Ss "Eventhandlers" 151.\" 152.Fo VNET_GLOBAL_EVENTHANDLER_REGISTER 153.Fa "const char *name" 154.Fa "void *func" 155.Fa "void *arg" 156.Fa "int priority" 157.Fc 158.\" 159.Fo VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG 160.Fa "eventhandler_tag tag" 161.Fa "const char *name" 162.Fa "void *func" 163.Fa "void *arg" 164.Fa "int priority" 165.Fc 166.\" ------------------------------------------------------------ 167.Ss "Sysctl Handling" 168.Fo SYSCTL_VNET_INT 169.Fa parent nbr name access ptr val descr 170.Fc 171.Fo SYSCTL_VNET_PROC 172.Fa parent nbr name access ptr arg handler fmt descr 173.Fc 174.Fo SYSCTL_VNET_STRING 175.Fa parent nbr name access arg len descr 176.Fc 177.Fo SYSCTL_VNET_STRUCT 178.Fa parent nbr name access ptr type descr 179.Fc 180.Fo SYSCTL_VNET_UINT 181.Fa parent nbr name access ptr val descr 182.Fc 183.Fo VNET_SYSCTL_ARG 184.Fa req arg1 185.Fc 186.\" ------------------------------------------------------------ 187.Sh DESCRIPTION 188.Nm 189is the name of a technique to virtualize the network stack. 190The basic idea is to change global resources most notably variables into 191per network stack resources and have functions, sysctls, eventhandlers, 192etc. access and handle them in the context of the correct instance. 193Each (virtual) network stack is attached to a 194.Em prison , 195with 196.Vt vnet0 197being the unrestricted default network stack of the base system. 198.Pp 199The global defines for 200.Dv VNET_SETNAME 201and 202.Dv VNET_SYMPREFIX 203are shared with 204.Xr kvm 3 205to access internals for debugging reasons. 206.\" ------------------------------------------------------------ 207.Ss "Variable Declaration" 208.\" 209Variables are virtualized by using the 210.Fn VNET_DEFINE 211macro rather than writing them out as 212.Em type name . 213One can still use static initialization, e.g., 214.Pp 215.Dl Li VNET_DEFINE(int, foo) = 1; 216.Pp 217Variables declared with the static keyword can use the 218.Fn VNET_DEFINE_STATIC 219macro, e.g., 220.Pp 221.Dl Li VNET_DEFINE_STATIC(SLIST_HEAD(, bar), bars); 222.Pp 223Static initialization is not possible when the virtualized variable 224would need to be referenced, e.g., with 225.Dq TAILQ_HEAD_INITIALIZER() . 226In that case a 227.Fn VNET_SYSINIT 228based initialization function must be used. 229.Pp 230External variables have to be declared using the 231.Fn VNET_DECLARE 232macro. 233In either case the convention is to define another macro, 234that is then used throughout the implementation to access that variable. 235The variable name is usually prefixed by 236.Em V_ 237to express that it is virtualized. 238The 239.Fn VNET 240macro will then translate accesses to that variable to the copy of the 241currently selected instance (see the 242.Sx "Virtual instance selection" 243section): 244.Pp 245.Dl Li #define V_name VNET(name) 246.Pp 247.Em NOTE: 248Do not confuse this with the convention used by 249.Xr VFS 9 . 250.Pp 251The 252.Fn VNET_NAME 253macro returns the offset within the memory region of the virtual network 254stack instance. 255It is usually only used with 256.Fn SYSCTL_VNET_* 257macros. 258.\" ------------------------------------------------------------ 259.Ss "Virtual Instance Selection" 260.\" 261There are three different places where the current virtual 262network stack pointer is stored and can be taken from: 263.Bl -enum -offset indent 264.It 265a 266.Em prison : 267.Dl "(struct prison *)->pr_vnet" 268.Pp 269For convenience the following macros are provided: 270.Bd -literal -compact -offset indent 271.Fn CRED_TO_VNET "struct ucred *" 272.Fn TD_TO_VNET "struct thread *" 273.Fn P_TO_VNET "struct proc *" 274.Ed 275.It 276a 277.Em socket : 278.Dl "(struct socket *)->so_vnet" 279.It 280an 281.Em interface : 282.Dl "(struct ifnet *)->if_vnet" 283.El 284.Pp 285.\" 286In addition the currently active instance is cached in 287.Dq "curthread->td_vnet" 288which is usually only accessed through the 289.Dv curvnet 290macro. 291.Pp 292.\" 293To set the correct context of the current virtual network instance, use the 294.Fn CURVNET_SET 295or 296.Fn CURVNET_SET_QUIET 297macros. 298The 299.Fn CURVNET_SET_QUIET 300version will not record vnet recursions in case the kernel was compiled 301with 302.Cd "options VNET_DEBUG" 303and should thus only be used in well known cases, where recursion is 304unavoidable. 305Both macros will save the previous state on the stack and it must be restored 306with the 307.Fn CURVNET_RESTORE 308macro. 309.Pp 310.Em NOTE: 311As the previous state is saved on the stack, you cannot have multiple 312.Fn CURVNET_SET 313calls in the same block. 314.Pp 315.Em NOTE: 316As the previous state is saved on the stack, a 317.Fn CURVNET_RESTORE 318call has to be in the same block as the 319.Fn CURVNET_SET 320call or in a subblock with the same idea of the saved instances as the 321outer block. 322.Pp 323.Em NOTE: 324As each macro is a set of operations and, as previously explained, cannot 325be put into its own block when defined, one cannot conditionally set 326the current vnet context. 327The following will 328.Em not 329work: 330.Bd -literal -offset indent 331if (condition) 332 CURVNET_SET(vnet); 333.Ed 334.Pp 335nor would this work: 336.Bd -literal -offset indent 337if (condition) { 338 CURVNET_SET(vnet); 339} 340CURVNET_RESTORE(); 341.Ed 342.Pp 343.\" 344Sometimes one needs to loop over all virtual instances, for example to update 345virtual from global state, to run a function from a 346.Xr callout 9 347for each instance, etc. 348For those cases the 349.Fn VNET_ITERATOR_DECL 350and 351.Fn VNET_FOREACH 352macros are provided. 353The former macro defines the variable that iterates over the loop, 354and the latter loops over all of the virtual network stack instances. 355See 356.Sx "Locking" 357for how to savely traverse the list of all virtual instances. 358.Pp 359.\" 360The 361.Fn IS_DEFAULT_VNET 362macro provides a safe way to check whether the currently active instance is the 363unrestricted default network stack of the base system 364.Pq Vt vnet0 . 365.Pp 366.\" 367The 368.Fn VNET_ASSERT 369macro provides a way to conditionally add assertions that are only active with 370.Cd "options VIMAGE" 371compiled in and either 372.Cd "options VNET_DEBUG" 373or 374.Cd "options INVARIANTS" 375enabled as well. 376It uses the same semantics as 377.Xr KASSERT 9 . 378.\" ------------------------------------------------------------ 379.Ss "Locking" 380.\" 381For public access to the list of virtual network stack instances 382e.g., by the 383.Fn VNET_FOREACH 384macro, read locks are provided. 385Macros are used to abstract from the actual type of the locks. 386If a caller may sleep while traversing the list, it must use the 387.Fn VNET_LIST_RLOCK 388and 389.Fn VNET_LIST_RUNLOCK 390macros. 391Otherwise, the caller can use 392.Fn VNET_LIST_RLOCK_NOSLEEP 393and 394.Fn VNET_LIST_RUNLOCK_NOSLEEP . 395.\" ------------------------------------------------------------ 396.Ss "Startup and Teardown Functions" 397.\" 398To start or tear down a virtual network stack instance the internal 399functions 400.Fn vnet_alloc 401and 402.Fn vnet_destroy 403are provided and called from the jail framework. 404They run the publicly provided methods to handle network stack 405startup and teardown. 406.Pp 407For public control, the system startup interface has been enhanced 408to not only handle a system boot but to also handle a virtual 409network stack startup and teardown. 410To the base system the 411.Fn VNET_SYSINIT 412and 413.Fn VNET_SYSUNINIT 414macros look exactly as if there were no virtual network stack. 415In fact, if 416.Cd "options VIMAGE" 417is not compiled in they are compiled to the standard 418.Fn SYSINIT 419macros. 420In addition to that they are run for each virtual network stack 421when starting or, in reverse order, when shutting down. 422.\" ------------------------------------------------------------ 423.Ss "Eventhandlers" 424.\" 425Eventhandlers can be handled in two ways: 426.Pp 427.Bl -enum -offset indent -compact 428.It 429save the 430.Em tags 431returned in each virtual instance and properly free the eventhandlers 432on teardown using those, or 433.It 434use one eventhandler that will iterate over all virtual network 435stack instances. 436.El 437.Pp 438For the first case one can just use the normal 439.Xr EVENTHANDLER 9 440functions, while for the second case the 441.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER 442and 443.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG 444macros are provided. 445These differ in that 446.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG 447takes an extra first argument that will carry the 448.Fa "tag" 449upon return. 450Eventhandlers registered with either of these will not run 451.Fa func 452directly but 453.Fa func 454will be called from an internal iterator function for each vnet. 455Both macros can only be used for eventhandlers that do not take 456additional arguments, as the variadic arguments from an 457.Xr EVENTHANDLER_INVOKE 9 458call will be ignored. 459.\" ------------------------------------------------------------ 460.Ss "Sysctl Handling" 461.\" 462A 463.Xr sysctl 9 464can be virtualized by using one of the 465.Fn SYSCTL_VNET_* 466macros. 467.Pp 468They take the same arguments as the standard 469.Xr sysctl 9 470functions, with the only difference, that the 471.Fa ptr 472argument has to be passed as 473.Ql &VNET_NAME(foo) 474instead of 475.Ql &foo 476so that the variable can be selected from the correct memory 477region of the virtual network stack instance of the caller. 478.Pp 479For the very rare case a sysctl handler function would want to 480handle 481.Fa arg1 482itself the 483.Fn VNET_SYSCTL_ARG req arg1 484is provided that will translate the 485.Fa arg1 486argument to the correct memory address in the virtual network stack 487context of the caller. 488.\" ------------------------------------------------------------ 489.Sh SEE ALSO 490.Xr jail 2 , 491.Xr kvm 3 , 492.Xr EVENTHANDLER 9 , 493.\" .Xr pcpu 9 , 494.Xr KASSERT 9 , 495.Xr sysctl 9 496.\" .Xr SYSINIT 9 497.Pp 498Marko Zec, Implementing a Clonable Network Stack in the FreeBSD Kernel, 499USENIX ATC'03, June 2003, Boston 500.Sh HISTORY 501The virtual network stack implementation first appeared in 502.Fx 8.0 . 503.Sh AUTHORS 504.An -nosplit 505The 506.Nm 507framework was designed and implemented at the University of Zagreb by 508.An Marko Zec 509under sponsorship of the FreeBSD Foundation and NLnet Foundation, 510and later extended and refined by 511.An Bjoern A. Zeeb 512(also under FreeBSD Foundation sponsorship), and 513.An Robert Watson . 514.Pp 515This manual page was written by 516.An Bjoern A. Zeeb, CK Software GmbH, 517under sponsorship from the FreeBSD Foundation. 518