1.\"- 2.\" Copyright (c) 2010 The FreeBSD Foundation 3.\" All rights reserved. 4.\" 5.\" This documentation was written by CK Software GmbH under sponsorship from 6.\" the FreeBSD Foundation. 7.\" 8.\" Redistribution and use in source and binary forms, with or without 9.\" modification, are permitted provided that the following conditions 10.\" are met: 11.\" 1. Redistributions of source code must retain the above copyright 12.\" notice, this list of conditions and the following disclaimer. 13.\" 2. Redistributions in binary form must reproduce the above copyright 14.\" notice, this list of conditions and the following disclaimer in the 15.\" documentation and/or other materials provided with the distribution. 16.\" 17.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27.\" SUCH DAMAGE. 28.\" 29.\" $FreeBSD$ 30.\" 31.Dd July 24, 2018 32.Dt VNET 9 33.Os 34.Sh NAME 35.Nm VNET 36.Nd "network subsystem virtualization infrastructure" 37.Sh SYNOPSIS 38.Cd "options VIMAGE" 39.Cd "options VNET_DEBUG" 40.Pp 41.In sys/vnet.h 42.Pp 43.\"------------------------------------------------------------ 44.Ss "Constants and Global Variables" 45.\" 46.Dv VNET_SETNAME 47.\" "set_vnet" 48.Dv VNET_SYMPREFIX 49.\" "vnet_entry_" 50.Vt extern struct vnet *vnet0; 51.\"------------------------------------------------------------ 52.Ss "Variable Declaration" 53.Fo VNET 54.Fa "name" 55.Fc 56.\" 57.Fo VNET_NAME 58.Fa "name" 59.Fc 60.\" 61.Fo VNET_DECLARE 62.Fa "type" "name" 63.Fc 64.\" 65.Fo VNET_DEFINE 66.Fa "type" "name" 67.Fc 68.\" 69.Fo VNET_DEFINE_STATIC 70.Fa "type" "name" 71.Fc 72.\" 73.Bd -literal 74#define V_name VNET(name) 75.Ed 76.\" ------------------------------------------------------------ 77.Ss "Virtual Instance Selection" 78.\" 79.Fo CRED_TO_VNET 80.Fa "struct ucred *" 81.Fc 82.\" 83.Fo TD_TO_VNET 84.Fa "struct thread *" 85.Fc 86.\" 87.Fo P_TO_VNET 88.Fa "struct proc *" 89.Fc 90.\" 91.Fo IS_DEFAULT_VNET 92.Fa "struct vnet *" 93.Fc 94.\" 95.Fo VNET_ASSERT 96.Fa exp msg 97.Fc 98.\" 99.Fo CURVNET_SET 100.Fa "struct vnet *" 101.Fc 102.\" 103.Fo CURVNET_SET_QUIET 104.Fa "struct vnet *" 105.Fc 106.\" 107.Fn CURVNET_RESTORE 108.\" 109.Fo VNET_ITERATOR_DECL 110.Fa "struct vnet *" 111.Fc 112.\" 113.Fo VNET_FOREACH 114.Fa "struct vnet *" 115.Fc 116.\" ------------------------------------------------------------ 117.Ss "Locking" 118.\" 119.Fn VNET_LIST_RLOCK 120.Fn VNET_LIST_RUNLOCK 121.Fn VNET_LIST_RLOCK_NOSLEEP 122.Fn VNET_LIST_RUNLOCK_NOSLEEP 123.\" ------------------------------------------------------------ 124.Ss "Startup and Teardown Functions" 125.\" 126.Ft "struct vnet *" 127.Fo vnet_alloc 128.Fa void 129.Fc 130.\" 131.Ft void 132.Fo vnet_destroy 133.Fa "struct vnet *" 134.Fc 135.\" 136.Fo VNET_SYSINIT 137.Fa ident 138.Fa "enum sysinit_sub_id subsystem" 139.Fa "enum sysinit_elem_order order" 140.Fa "sysinit_cfunc_t func" 141.Fa "const void *arg" 142.Fc 143.\" 144.Fo VNET_SYSUNINIT 145.Fa ident 146.Fa "enum sysinit_sub_id subsystem" 147.Fa "enum sysinit_elem_order order" 148.Fa "sysinit_cfunc_t func" 149.Fa "const void *arg" 150.Fc 151.\" ------------------------------------------------------------ 152.Ss "Eventhandlers" 153.\" 154.Fo VNET_GLOBAL_EVENTHANDLER_REGISTER 155.Fa "const char *name" 156.Fa "void *func" 157.Fa "void *arg" 158.Fa "int priority" 159.Fc 160.\" 161.Fo VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG 162.Fa "eventhandler_tag tag" 163.Fa "const char *name" 164.Fa "void *func" 165.Fa "void *arg" 166.Fa "int priority" 167.Fc 168.\" ------------------------------------------------------------ 169.Ss "Sysctl Handling" 170.Fo SYSCTL_VNET_INT 171.Fa parent nbr name access ptr val descr 172.Fc 173.Fo SYSCTL_VNET_PROC 174.Fa parent nbr name access ptr arg handler fmt descr 175.Fc 176.Fo SYSCTL_VNET_STRING 177.Fa parent nbr name access arg len descr 178.Fc 179.Fo SYSCTL_VNET_STRUCT 180.Fa parent nbr name access ptr type descr 181.Fc 182.Fo SYSCTL_VNET_UINT 183.Fa parent nbr name access ptr val descr 184.Fc 185.Fo VNET_SYSCTL_ARG 186.Fa req arg1 187.Fc 188.\" ------------------------------------------------------------ 189.Sh DESCRIPTION 190.Nm 191is the name of a technique to virtualize the network stack. 192The basic idea is to change global resources most notably variables into 193per network stack resources and have functions, sysctls, eventhandlers, 194etc. access and handle them in the context of the correct instance. 195Each (virtual) network stack is attached to a 196.Em prison , 197with 198.Vt vnet0 199being the unrestricted default network stack of the base system. 200.Pp 201The global defines for 202.Dv VNET_SETNAME 203and 204.Dv VNET_SYMPREFIX 205are shared with 206.Xr kvm 3 207to access internals for debugging reasons. 208.\" ------------------------------------------------------------ 209.Ss "Variable Declaration" 210.\" 211Variables are virtualized by using the 212.Fn VNET_DEFINE 213macro rather than writing them out as 214.Em type name . 215One can still use static initialization, e.g., 216.Pp 217.Dl Li VNET_DEFINE(int, foo) = 1; 218.Pp 219Variables declared with the static keyword can use the 220.Fn VNET_DEFINE_STATIC 221macro, e.g., 222.Pp 223.Dl Li VNET_DEFINE_STATIC(SLIST_HEAD(, bar), bars); 224.Pp 225Static initialization is not possible when the virtualized variable 226would need to be referenced, e.g., with 227.Dq TAILQ_HEAD_INITIALIZER() . 228In that case a 229.Fn VNET_SYSINIT 230based initialization function must be used. 231.Pp 232External variables have to be declared using the 233.Fn VNET_DECLARE 234macro. 235In either case the convention is to define another macro, 236that is then used throughout the implementation to access that variable. 237The variable name is usually prefixed by 238.Em V_ 239to express that it is virtualized. 240The 241.Fn VNET 242macro will then translate accesses to that variable to the copy of the 243currently selected instance (see the 244.Sx "Virtual instance selection" 245section): 246.Pp 247.Dl Li #define V_name VNET(name) 248.Pp 249.Em NOTE: 250Do not confuse this with the convention used by 251.Xr VFS 9 . 252.Pp 253The 254.Fn VNET_NAME 255macro returns the offset within the memory region of the virtual network 256stack instance. 257It is usually only used with 258.Fn SYSCTL_VNET_* 259macros. 260.\" ------------------------------------------------------------ 261.Ss "Virtual Instance Selection" 262.\" 263There are three different places where the current virtual 264network stack pointer is stored and can be taken from: 265.Bl -enum -offset indent 266.It 267a 268.Em prison : 269.Dl "(struct prison *)->pr_vnet" 270.Pp 271For convenience the following macros are provided: 272.Bd -literal -compact -offset indent 273.Fn CRED_TO_VNET "struct ucred *" 274.Fn TD_TO_VNET "struct thread *" 275.Fn P_TO_VNET "struct proc *" 276.Ed 277.It 278a 279.Em socket : 280.Dl "(struct socket *)->so_vnet" 281.It 282an 283.Em interface : 284.Dl "(struct ifnet *)->if_vnet" 285.El 286.Pp 287.\" 288In addition the currently active instance is cached in 289.Dq "curthread->td_vnet" 290which is usually only accessed through the 291.Dv curvnet 292macro. 293.Pp 294.\" 295To set the correct context of the current virtual network instance, use the 296.Fn CURVNET_SET 297or 298.Fn CURVNET_SET_QUIET 299macros. 300The 301.Fn CURVNET_SET_QUIET 302version will not record vnet recursions in case the kernel was compiled 303with 304.Cd "options VNET_DEBUG" 305and should thus only be used in well known cases, where recursion is 306unavoidable. 307Both macros will save the previous state on the stack and it must be restored 308with the 309.Fn CURVNET_RESTORE 310macro. 311.Pp 312.Em NOTE: 313As the previous state is saved on the stack, you cannot have multiple 314.Fn CURVNET_SET 315calls in the same block. 316.Pp 317.Em NOTE: 318As the previous state is saved on the stack, a 319.Fn CURVNET_RESTORE 320call has to be in the same block as the 321.Fn CURVNET_SET 322call or in a subblock with the same idea of the saved instances as the 323outer block. 324.Pp 325.Em NOTE: 326As each macro is a set of operations and, as previously explained, cannot 327be put into its own block when defined, one cannot conditionally set 328the current vnet context. 329The following will 330.Em not 331work: 332.Bd -literal -offset indent 333if (condition) 334 CURVNET_SET(vnet); 335.Ed 336.Pp 337nor would this work: 338.Bd -literal -offset indent 339if (condition) { 340 CURVNET_SET(vnet); 341} 342CURVNET_RESTORE(); 343.Ed 344.Pp 345.\" 346Sometimes one needs to loop over all virtual instances, for example to update 347virtual from global state, to run a function from a 348.Xr callout 9 349for each instance, etc. 350For those cases the 351.Fn VNET_ITERATOR_DECL 352and 353.Fn VNET_FOREACH 354macros are provided. 355The former macro defines the variable that iterates over the loop, 356and the latter loops over all of the virtual network stack instances. 357See 358.Sx "Locking" 359for how to savely traverse the list of all virtual instances. 360.Pp 361.\" 362The 363.Fn IS_DEFAULT_VNET 364macro provides a safe way to check whether the currently active instance is the 365unrestricted default network stack of the base system 366.Pq Vt vnet0 . 367.Pp 368.\" 369The 370.Fn VNET_ASSERT 371macro provides a way to conditionally add assertions that are only active with 372.Cd "options VIMAGE" 373compiled in and either 374.Cd "options VNET_DEBUG" 375or 376.Cd "options INVARIANTS" 377enabled as well. 378It uses the same semantics as 379.Xr KASSERT 9 . 380.\" ------------------------------------------------------------ 381.Ss "Locking" 382.\" 383For public access to the list of virtual network stack instances 384e.g., by the 385.Fn VNET_FOREACH 386macro, read locks are provided. 387Macros are used to abstract from the actual type of the locks. 388If a caller may sleep while traversing the list, it must use the 389.Fn VNET_LIST_RLOCK 390and 391.Fn VNET_LIST_RUNLOCK 392macros. 393Otherwise, the caller can use 394.Fn VNET_LIST_RLOCK_NOSLEEP 395and 396.Fn VNET_LIST_RUNLOCK_NOSLEEP . 397.\" ------------------------------------------------------------ 398.Ss "Startup and Teardown Functions" 399.\" 400To start or tear down a virtual network stack instance the internal 401functions 402.Fn vnet_alloc 403and 404.Fn vnet_destroy 405are provided and called from the jail framework. 406They run the publicly provided methods to handle network stack 407startup and teardown. 408.Pp 409For public control, the system startup interface has been enhanced 410to not only handle a system boot but to also handle a virtual 411network stack startup and teardown. 412To the base system the 413.Fn VNET_SYSINIT 414and 415.Fn VNET_SYSUNINIT 416macros look exactly as if there were no virtual network stack. 417In fact, if 418.Cd "options VIMAGE" 419is not compiled in they are compiled to the standard 420.Fn SYSINIT 421macros. 422In addition to that they are run for each virtual network stack 423when starting or, in reverse order, when shutting down. 424.\" ------------------------------------------------------------ 425.Ss "Eventhandlers" 426.\" 427Eventhandlers can be handled in two ways: 428.Pp 429.Bl -enum -offset indent -compact 430.It 431save the 432.Em tags 433returned in each virtual instance and properly free the eventhandlers 434on teardown using those, or 435.It 436use one eventhandler that will iterate over all virtual network 437stack instances. 438.El 439.Pp 440For the first case one can just use the normal 441.Xr EVENTHANDLER 9 442functions, while for the second case the 443.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER 444and 445.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG 446macros are provided. 447These differ in that 448.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG 449takes an extra first argument that will carry the 450.Fa "tag" 451upon return. 452Eventhandlers registered with either of these will not run 453.Fa func 454directly but 455.Fa func 456will be called from an internal iterator function for each vnet. 457Both macros can only be used for eventhandlers that do not take 458additional arguments, as the variadic arguments from an 459.Xr EVENTHANDLER_INVOKE 9 460call will be ignored. 461.\" ------------------------------------------------------------ 462.Ss "Sysctl Handling" 463.\" 464A 465.Xr sysctl 9 466can be virtualized by using one of the 467.Fn SYSCTL_VNET_* 468macros. 469.Pp 470They take the same arguments as the standard 471.Xr sysctl 9 472functions, with the only difference, that the 473.Fa ptr 474argument has to be passed as 475.Ql &VNET_NAME(foo) 476instead of 477.Ql &foo 478so that the variable can be selected from the correct memory 479region of the virtual network stack instance of the caller. 480.Pp 481For the very rare case a sysctl handler function would want to 482handle 483.Fa arg1 484itself the 485.Fn VNET_SYSCTL_ARG req arg1 486is provided that will translate the 487.Fa arg1 488argument to the correct memory address in the virtual network stack 489context of the caller. 490.\" ------------------------------------------------------------ 491.Sh SEE ALSO 492.Xr jail 2 , 493.Xr kvm 3 , 494.Xr EVENTHANDLER 9 , 495.\" .Xr pcpu 9 , 496.Xr KASSERT 9 , 497.Xr sysctl 9 498.\" .Xr SYSINIT 9 499.Pp 500Marko Zec, Implementing a Clonable Network Stack in the FreeBSD Kernel, 501USENIX ATC'03, June 2003, Boston 502.Sh HISTORY 503The virtual network stack implementation first appeared in 504.Fx 8.0 . 505.Sh AUTHORS 506.An -nosplit 507The 508.Nm 509framework was designed and implemented at the University of Zagreb by 510.An Marko Zec 511under sponsorship of the FreeBSD Foundation and NLnet Foundation, 512and later extended and refined by 513.An Bjoern A. Zeeb 514(also under FreeBSD Foundation sponsorship), and 515.An Robert Watson . 516.Pp 517This manual page was written by 518.An Bjoern A. Zeeb, CK Software GmbH, 519under sponsorship from the FreeBSD Foundation. 520