1eddfbb76SRobert Watson /*- 2530c0060SRobert Watson * Copyright (c) 2004-2009 University of Zagreb 3530c0060SRobert Watson * Copyright (c) 2006-2009 FreeBSD Foundation 4530c0060SRobert Watson * All rights reserved. 5530c0060SRobert Watson * 6530c0060SRobert Watson * This software was developed by the University of Zagreb and the 7530c0060SRobert Watson * FreeBSD Foundation under sponsorship by the Stichting NLnet and the 8530c0060SRobert Watson * FreeBSD Foundation. 9530c0060SRobert Watson * 10eddfbb76SRobert Watson * Copyright (c) 2009 Jeffrey Roberson <jeff@freebsd.org> 11eddfbb76SRobert Watson * Copyright (c) 2009 Robert N. M. Watson 12eddfbb76SRobert Watson * All rights reserved. 13eddfbb76SRobert Watson * 14eddfbb76SRobert Watson * Redistribution and use in source and binary forms, with or without 15eddfbb76SRobert Watson * modification, are permitted provided that the following conditions 16eddfbb76SRobert Watson * are met: 17eddfbb76SRobert Watson * 1. Redistributions of source code must retain the above copyright 18eddfbb76SRobert Watson * notice, this list of conditions and the following disclaimer. 19eddfbb76SRobert Watson * 2. Redistributions in binary form must reproduce the above copyright 20eddfbb76SRobert Watson * notice, this list of conditions and the following disclaimer in the 21eddfbb76SRobert Watson * documentation and/or other materials provided with the distribution. 22eddfbb76SRobert Watson * 23eddfbb76SRobert Watson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 24eddfbb76SRobert Watson * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25eddfbb76SRobert Watson * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26eddfbb76SRobert Watson * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 27eddfbb76SRobert Watson * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28eddfbb76SRobert Watson * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29eddfbb76SRobert Watson * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30eddfbb76SRobert Watson * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31eddfbb76SRobert Watson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32eddfbb76SRobert Watson * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33eddfbb76SRobert Watson * SUCH DAMAGE. 34eddfbb76SRobert Watson */ 35eddfbb76SRobert Watson 36eddfbb76SRobert Watson #include <sys/cdefs.h> 37eddfbb76SRobert Watson __FBSDID("$FreeBSD$"); 38eddfbb76SRobert Watson 39530c0060SRobert Watson #include "opt_ddb.h" 40530c0060SRobert Watson 41eddfbb76SRobert Watson #include <sys/param.h> 42eddfbb76SRobert Watson #include <sys/kernel.h> 43530c0060SRobert Watson #include <sys/jail.h> 44eddfbb76SRobert Watson #include <sys/systm.h> 45eddfbb76SRobert Watson #include <sys/sysctl.h> 46eddfbb76SRobert Watson #include <sys/linker_set.h> 47eddfbb76SRobert Watson #include <sys/lock.h> 48eddfbb76SRobert Watson #include <sys/malloc.h> 49eddfbb76SRobert Watson #include <sys/proc.h> 50530c0060SRobert Watson #include <sys/socket.h> 51eddfbb76SRobert Watson #include <sys/sx.h> 52eddfbb76SRobert Watson #include <sys/sysctl.h> 53eddfbb76SRobert Watson 54530c0060SRobert Watson #ifdef DDB 55530c0060SRobert Watson #include <ddb/ddb.h> 56530c0060SRobert Watson #endif 57530c0060SRobert Watson 58530c0060SRobert Watson #include <net/if.h> 59530c0060SRobert Watson #include <net/if_var.h> 60eddfbb76SRobert Watson #include <net/vnet.h> 61eddfbb76SRobert Watson 62eddfbb76SRobert Watson /*- 63ed3db012SRobert Watson * This file implements core functions for virtual network stacks: 64ed3db012SRobert Watson * 65530c0060SRobert Watson * - Core virtual network stack management functions. 66530c0060SRobert Watson * 67ed3db012SRobert Watson * - Virtual network stack memory allocator, which virtualized global 68ed3db012SRobert Watson * variables in the network stack 69ed3db012SRobert Watson * 70ed3db012SRobert Watson * - Virtualized SYSINIT's/SYSUNINIT's, which allow network stack subsystems 71ed3db012SRobert Watson * to register startup/shutdown events to be run for each virtual network 72ed3db012SRobert Watson * stack instance. 73530c0060SRobert Watson */ 74530c0060SRobert Watson 75530c0060SRobert Watson MALLOC_DEFINE(M_VNET, "vnet", "network stack control block"); 76530c0060SRobert Watson 77530c0060SRobert Watson /* 78530c0060SRobert Watson * The virtual network stack list has two read-write locks, one sleepable and 79530c0060SRobert Watson * the other not, so that the list can be stablized and walked in a variety 80530c0060SRobert Watson * of network stack contexts. Both must be acquired exclusively to modify 81530c0060SRobert Watson * the list. 82530c0060SRobert Watson */ 83530c0060SRobert Watson struct rwlock vnet_rwlock; 84530c0060SRobert Watson struct sx vnet_sxlock; 85530c0060SRobert Watson 86530c0060SRobert Watson #define VNET_LIST_WLOCK() do { \ 87530c0060SRobert Watson sx_xlock(&vnet_sxlock); \ 88530c0060SRobert Watson rw_wlock(&vnet_rwlock); \ 89530c0060SRobert Watson } while (0) 90530c0060SRobert Watson 91530c0060SRobert Watson #define VNET_LIST_WUNLOCK() do { \ 92530c0060SRobert Watson rw_wunlock(&vnet_rwlock); \ 93530c0060SRobert Watson sx_xunlock(&vnet_sxlock); \ 94530c0060SRobert Watson } while (0) 95530c0060SRobert Watson 96530c0060SRobert Watson struct vnet_list_head vnet_head; 97530c0060SRobert Watson struct vnet *vnet0; 98530c0060SRobert Watson 99530c0060SRobert Watson /* 100ed3db012SRobert Watson * The virtual network stack allocator provides storage for virtualized 101ed3db012SRobert Watson * global variables. These variables are defined/declared using the 102ed3db012SRobert Watson * VNET_DEFINE()/VNET_DECLARE() macros, which place them in the 'set_vnet' 103ed3db012SRobert Watson * linker set. The details of the implementation are somewhat subtle, but 104ed3db012SRobert Watson * allow the majority of most network subsystems to maintain 105eddfbb76SRobert Watson * virtualization-agnostic. 106eddfbb76SRobert Watson * 107eddfbb76SRobert Watson * The virtual network stack allocator handles variables in the base kernel 108eddfbb76SRobert Watson * vs. modules in similar but different ways. In both cases, virtualized 109eddfbb76SRobert Watson * global variables are marked as such by being declared to be part of the 110eddfbb76SRobert Watson * vnet linker set. These "master" copies of global variables serve two 111eddfbb76SRobert Watson * functions: 112eddfbb76SRobert Watson * 113eddfbb76SRobert Watson * (1) They contain static initialization or "default" values for global 114eddfbb76SRobert Watson * variables which will be propagated to each virtual network stack 115eddfbb76SRobert Watson * instance when created. As with normal global variables, they default 116eddfbb76SRobert Watson * to zero-filled. 117eddfbb76SRobert Watson * 118eddfbb76SRobert Watson * (2) They act as unique global names by which the variable can be referred 119eddfbb76SRobert Watson * to, regardless of network stack instance. The single global symbol 120eddfbb76SRobert Watson * will be used to calculate the location of a per-virtual instance 121eddfbb76SRobert Watson * variable at run-time. 122eddfbb76SRobert Watson * 123eddfbb76SRobert Watson * Each virtual network stack instance has a complete copy of each 124eddfbb76SRobert Watson * virtualized global variable, stored in a malloc'd block of memory 125eddfbb76SRobert Watson * referred to by vnet->vnet_data_mem. Critical to the design is that each 126eddfbb76SRobert Watson * per-instance memory block is laid out identically to the master block so 127eddfbb76SRobert Watson * that the offset of each global variable is the same across all blocks. To 128eddfbb76SRobert Watson * optimize run-time access, a precalculated 'base' address, 129eddfbb76SRobert Watson * vnet->vnet_data_base, is stored in each vnet, and is the amount that can 130eddfbb76SRobert Watson * be added to the address of a 'master' instance of a variable to get to the 131eddfbb76SRobert Watson * per-vnet instance. 132eddfbb76SRobert Watson * 133eddfbb76SRobert Watson * Virtualized global variables are handled in a similar manner, but as each 134eddfbb76SRobert Watson * module has its own 'set_vnet' linker set, and we want to keep all 135eddfbb76SRobert Watson * virtualized globals togther, we reserve space in the kernel's linker set 136eddfbb76SRobert Watson * for potential module variables using a per-vnet character array, 137eddfbb76SRobert Watson * 'modspace'. The virtual network stack allocator maintains a free list to 138eddfbb76SRobert Watson * track what space in the array is free (all, initially) and as modules are 139eddfbb76SRobert Watson * linked, allocates portions of the space to specific globals. The kernel 140eddfbb76SRobert Watson * module linker queries the virtual network stack allocator and will 141eddfbb76SRobert Watson * bind references of the global to the location during linking. It also 142eddfbb76SRobert Watson * calls into the virtual network stack allocator, once the memory is 143eddfbb76SRobert Watson * initialized, in order to propagate the new static initializations to all 144eddfbb76SRobert Watson * existing virtual network stack instances so that the soon-to-be executing 145eddfbb76SRobert Watson * module will find every network stack instance with proper default values. 146eddfbb76SRobert Watson */ 147eddfbb76SRobert Watson 148eddfbb76SRobert Watson /* 149eddfbb76SRobert Watson * Location of the kernel's 'set_vnet' linker set. 150eddfbb76SRobert Watson */ 151eddfbb76SRobert Watson extern uintptr_t *__start_set_vnet; 152eddfbb76SRobert Watson extern uintptr_t *__stop_set_vnet; 153eddfbb76SRobert Watson 154eddfbb76SRobert Watson #define VNET_START (uintptr_t)&__start_set_vnet 155eddfbb76SRobert Watson #define VNET_STOP (uintptr_t)&__stop_set_vnet 156eddfbb76SRobert Watson 157eddfbb76SRobert Watson /* 158eddfbb76SRobert Watson * Number of bytes of data in the 'set_vnet' linker set, and hence the total 159eddfbb76SRobert Watson * size of all kernel virtualized global variables, and the malloc(9) type 160eddfbb76SRobert Watson * that will be used to allocate it. 161eddfbb76SRobert Watson */ 162eddfbb76SRobert Watson #define VNET_BYTES (VNET_STOP - VNET_START) 163eddfbb76SRobert Watson 164eddfbb76SRobert Watson MALLOC_DEFINE(M_VNET_DATA, "vnet_data", "VNET data"); 165eddfbb76SRobert Watson 166eddfbb76SRobert Watson /* 167eddfbb76SRobert Watson * VNET_MODMIN is the minimum number of bytes we will reserve for the sum of 168eddfbb76SRobert Watson * global variables across all loaded modules. As this actually sizes an 169eddfbb76SRobert Watson * array declared as a virtualized global variable in the kernel itself, and 170eddfbb76SRobert Watson * we want the virtualized global variable space to be page-sized, we may 171eddfbb76SRobert Watson * have more space than that in practice. 172eddfbb76SRobert Watson */ 173eddfbb76SRobert Watson #define VNET_MODMIN 8192 174eddfbb76SRobert Watson #define VNET_SIZE roundup2(VNET_BYTES, PAGE_SIZE) 175eddfbb76SRobert Watson #define VNET_MODSIZE (VNET_SIZE - (VNET_BYTES - VNET_MODMIN)) 176eddfbb76SRobert Watson 177eddfbb76SRobert Watson /* 178eddfbb76SRobert Watson * Space to store virtualized global variables from loadable kernel modules, 179eddfbb76SRobert Watson * and the free list to manage it. 180eddfbb76SRobert Watson */ 181eddfbb76SRobert Watson static VNET_DEFINE(char, modspace[VNET_MODMIN]); 182eddfbb76SRobert Watson 183d0728d71SRobert Watson /* 184d0728d71SRobert Watson * Global lists of subsystem constructor and destructors for vnets. 185d0728d71SRobert Watson * They are registered via VNET_SYSINIT() and VNET_SYSUNINIT(). The 186d0728d71SRobert Watson * lists are protected by the vnet_sxlock global lock. 187d0728d71SRobert Watson */ 188d0728d71SRobert Watson static TAILQ_HEAD(vnet_sysinit_head, vnet_sysinit) vnet_constructors = 189d0728d71SRobert Watson TAILQ_HEAD_INITIALIZER(vnet_constructors); 190d0728d71SRobert Watson static TAILQ_HEAD(vnet_sysuninit_head, vnet_sysinit) vnet_destructors = 191d0728d71SRobert Watson TAILQ_HEAD_INITIALIZER(vnet_destructors); 192d0728d71SRobert Watson 193eddfbb76SRobert Watson struct vnet_data_free { 194eddfbb76SRobert Watson uintptr_t vnd_start; 195eddfbb76SRobert Watson int vnd_len; 196eddfbb76SRobert Watson TAILQ_ENTRY(vnet_data_free) vnd_link; 197eddfbb76SRobert Watson }; 198eddfbb76SRobert Watson 199eddfbb76SRobert Watson MALLOC_DEFINE(M_VNET_DATA_FREE, "vnet_data_free", "VNET resource accounting"); 200eddfbb76SRobert Watson static TAILQ_HEAD(, vnet_data_free) vnet_data_free_head = 201eddfbb76SRobert Watson TAILQ_HEAD_INITIALIZER(vnet_data_free_head); 202eddfbb76SRobert Watson static struct sx vnet_data_free_lock; 203eddfbb76SRobert Watson 204eddfbb76SRobert Watson /* 205530c0060SRobert Watson * Allocate a virtual network stack. 206530c0060SRobert Watson */ 207530c0060SRobert Watson struct vnet * 208530c0060SRobert Watson vnet_alloc(void) 209530c0060SRobert Watson { 210530c0060SRobert Watson struct vnet *vnet; 211530c0060SRobert Watson 212530c0060SRobert Watson vnet = malloc(sizeof(struct vnet), M_VNET, M_WAITOK | M_ZERO); 213530c0060SRobert Watson vnet->vnet_magic_n = VNET_MAGIC_N; 2146bc2c7b7SRobert Watson 2156bc2c7b7SRobert Watson /* 2166bc2c7b7SRobert Watson * Allocate storage for virtualized global variables and copy in 2176bc2c7b7SRobert Watson * initial values form our 'master' copy. 2186bc2c7b7SRobert Watson */ 2196bc2c7b7SRobert Watson vnet->vnet_data_mem = malloc(VNET_SIZE, M_VNET_DATA, M_WAITOK); 2206bc2c7b7SRobert Watson memcpy(vnet->vnet_data_mem, (void *)VNET_START, VNET_BYTES); 2216bc2c7b7SRobert Watson 2226bc2c7b7SRobert Watson /* 2236bc2c7b7SRobert Watson * All use of vnet-specific data will immediately subtract VNET_START 2246bc2c7b7SRobert Watson * from the base memory pointer, so pre-calculate that now to avoid 2256bc2c7b7SRobert Watson * it on each use. 2266bc2c7b7SRobert Watson */ 2276bc2c7b7SRobert Watson vnet->vnet_data_base = (uintptr_t)vnet->vnet_data_mem - VNET_START; 228530c0060SRobert Watson 229530c0060SRobert Watson /* Initialize / attach vnet module instances. */ 230530c0060SRobert Watson CURVNET_SET_QUIET(vnet); 231530c0060SRobert Watson 232530c0060SRobert Watson sx_xlock(&vnet_sxlock); 233530c0060SRobert Watson vnet_sysinit(); 234530c0060SRobert Watson CURVNET_RESTORE(); 235530c0060SRobert Watson 236530c0060SRobert Watson rw_wlock(&vnet_rwlock); 237530c0060SRobert Watson LIST_INSERT_HEAD(&vnet_head, vnet, vnet_le); 238530c0060SRobert Watson VNET_LIST_WUNLOCK(); 239530c0060SRobert Watson 240530c0060SRobert Watson return (vnet); 241530c0060SRobert Watson } 242530c0060SRobert Watson 243530c0060SRobert Watson /* 244530c0060SRobert Watson * Destroy a virtual network stack. 245530c0060SRobert Watson */ 246530c0060SRobert Watson void 247530c0060SRobert Watson vnet_destroy(struct vnet *vnet) 248530c0060SRobert Watson { 249530c0060SRobert Watson struct ifnet *ifp, *nifp; 250530c0060SRobert Watson 251530c0060SRobert Watson KASSERT(vnet->vnet_sockcnt == 0, 252530c0060SRobert Watson ("%s: vnet still has sockets", __func__)); 253530c0060SRobert Watson 254530c0060SRobert Watson VNET_LIST_WLOCK(); 255530c0060SRobert Watson LIST_REMOVE(vnet, vnet_le); 256530c0060SRobert Watson rw_wunlock(&vnet_rwlock); 257530c0060SRobert Watson 258530c0060SRobert Watson CURVNET_SET_QUIET(vnet); 259530c0060SRobert Watson 260530c0060SRobert Watson /* Return all inherited interfaces to their parent vnets. */ 261530c0060SRobert Watson TAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) { 262530c0060SRobert Watson if (ifp->if_home_vnet != ifp->if_vnet) 263530c0060SRobert Watson if_vmove(ifp, ifp->if_home_vnet); 264530c0060SRobert Watson } 265530c0060SRobert Watson 266530c0060SRobert Watson vnet_sysuninit(); 267530c0060SRobert Watson sx_xunlock(&vnet_sxlock); 268530c0060SRobert Watson 269530c0060SRobert Watson CURVNET_RESTORE(); 270530c0060SRobert Watson 2716bc2c7b7SRobert Watson /* 2726bc2c7b7SRobert Watson * Release storage for the virtual network stack instance. 2736bc2c7b7SRobert Watson */ 2746bc2c7b7SRobert Watson free(vnet->vnet_data_mem, M_VNET_DATA); 2756bc2c7b7SRobert Watson vnet->vnet_data_mem = NULL; 2766bc2c7b7SRobert Watson vnet->vnet_data_base = 0; 277530c0060SRobert Watson vnet->vnet_magic_n = 0xdeadbeef; 278530c0060SRobert Watson free(vnet, M_VNET); 279530c0060SRobert Watson } 280530c0060SRobert Watson 281530c0060SRobert Watson /* 282530c0060SRobert Watson * Boot time initialization and allocation of virtual network stacks. 283530c0060SRobert Watson */ 284530c0060SRobert Watson static void 285530c0060SRobert Watson vnet_init_prelink(void *arg) 286530c0060SRobert Watson { 287530c0060SRobert Watson 288530c0060SRobert Watson rw_init(&vnet_rwlock, "vnet_rwlock"); 289530c0060SRobert Watson sx_init(&vnet_sxlock, "vnet_sxlock"); 290530c0060SRobert Watson LIST_INIT(&vnet_head); 291530c0060SRobert Watson } 292530c0060SRobert Watson SYSINIT(vnet_init_prelink, SI_SUB_VNET_PRELINK, SI_ORDER_FIRST, 293530c0060SRobert Watson vnet_init_prelink, NULL); 294530c0060SRobert Watson 295530c0060SRobert Watson static void 296530c0060SRobert Watson vnet0_init(void *arg) 297530c0060SRobert Watson { 298530c0060SRobert Watson 299530c0060SRobert Watson /* 300530c0060SRobert Watson * We MUST clear curvnet in vi_init_done() before going SMP, 301530c0060SRobert Watson * otherwise CURVNET_SET() macros would scream about unnecessary 302530c0060SRobert Watson * curvnet recursions. 303530c0060SRobert Watson */ 304530c0060SRobert Watson curvnet = prison0.pr_vnet = vnet0 = vnet_alloc(); 305530c0060SRobert Watson } 306530c0060SRobert Watson SYSINIT(vnet0_init, SI_SUB_VNET, SI_ORDER_FIRST, vnet0_init, NULL); 307530c0060SRobert Watson 308530c0060SRobert Watson static void 309530c0060SRobert Watson vnet_init_done(void *unused) 310530c0060SRobert Watson { 311530c0060SRobert Watson 312530c0060SRobert Watson curvnet = NULL; 313530c0060SRobert Watson } 314530c0060SRobert Watson 315530c0060SRobert Watson SYSINIT(vnet_init_done, SI_SUB_VNET_DONE, SI_ORDER_FIRST, vnet_init_done, 316530c0060SRobert Watson NULL); 317530c0060SRobert Watson 318530c0060SRobert Watson /* 319eddfbb76SRobert Watson * Once on boot, initialize the modspace freelist to entirely cover modspace. 320eddfbb76SRobert Watson */ 321eddfbb76SRobert Watson static void 322eddfbb76SRobert Watson vnet_data_startup(void *dummy __unused) 323eddfbb76SRobert Watson { 324eddfbb76SRobert Watson struct vnet_data_free *df; 325eddfbb76SRobert Watson 326eddfbb76SRobert Watson df = malloc(sizeof(*df), M_VNET_DATA_FREE, M_WAITOK | M_ZERO); 327eddfbb76SRobert Watson df->vnd_start = (uintptr_t)&VNET_NAME(modspace); 328eddfbb76SRobert Watson df->vnd_len = VNET_MODSIZE; 329eddfbb76SRobert Watson TAILQ_INSERT_HEAD(&vnet_data_free_head, df, vnd_link); 330eddfbb76SRobert Watson sx_init(&vnet_data_free_lock, "vnet_data alloc lock"); 331eddfbb76SRobert Watson } 332eddfbb76SRobert Watson SYSINIT(vnet_data, SI_SUB_KLD, SI_ORDER_FIRST, vnet_data_startup, 0); 333eddfbb76SRobert Watson 334eddfbb76SRobert Watson /* 335eddfbb76SRobert Watson * When a module is loaded and requires storage for a virtualized global 336eddfbb76SRobert Watson * variable, allocate space from the modspace free list. This interface 337eddfbb76SRobert Watson * should be used only by the kernel linker. 338eddfbb76SRobert Watson */ 339eddfbb76SRobert Watson void * 340eddfbb76SRobert Watson vnet_data_alloc(int size) 341eddfbb76SRobert Watson { 342eddfbb76SRobert Watson struct vnet_data_free *df; 343eddfbb76SRobert Watson void *s; 344eddfbb76SRobert Watson 345eddfbb76SRobert Watson s = NULL; 346eddfbb76SRobert Watson size = roundup2(size, sizeof(void *)); 347eddfbb76SRobert Watson sx_xlock(&vnet_data_free_lock); 348eddfbb76SRobert Watson TAILQ_FOREACH(df, &vnet_data_free_head, vnd_link) { 349eddfbb76SRobert Watson if (df->vnd_len < size) 350eddfbb76SRobert Watson continue; 351eddfbb76SRobert Watson if (df->vnd_len == size) { 352eddfbb76SRobert Watson s = (void *)df->vnd_start; 353eddfbb76SRobert Watson TAILQ_REMOVE(&vnet_data_free_head, df, vnd_link); 354eddfbb76SRobert Watson free(df, M_VNET_DATA_FREE); 355eddfbb76SRobert Watson break; 356eddfbb76SRobert Watson } 357eddfbb76SRobert Watson s = (void *)df->vnd_start; 358eddfbb76SRobert Watson df->vnd_len -= size; 359eddfbb76SRobert Watson df->vnd_start = df->vnd_start + size; 360eddfbb76SRobert Watson break; 361eddfbb76SRobert Watson } 362eddfbb76SRobert Watson sx_xunlock(&vnet_data_free_lock); 363eddfbb76SRobert Watson 364eddfbb76SRobert Watson return (s); 365eddfbb76SRobert Watson } 366eddfbb76SRobert Watson 367eddfbb76SRobert Watson /* 368eddfbb76SRobert Watson * Free space for a virtualized global variable on module unload. 369eddfbb76SRobert Watson */ 370eddfbb76SRobert Watson void 371eddfbb76SRobert Watson vnet_data_free(void *start_arg, int size) 372eddfbb76SRobert Watson { 373eddfbb76SRobert Watson struct vnet_data_free *df; 374eddfbb76SRobert Watson struct vnet_data_free *dn; 375eddfbb76SRobert Watson uintptr_t start; 376eddfbb76SRobert Watson uintptr_t end; 377eddfbb76SRobert Watson 378eddfbb76SRobert Watson size = roundup2(size, sizeof(void *)); 379eddfbb76SRobert Watson start = (uintptr_t)start_arg; 380eddfbb76SRobert Watson end = start + size; 381eddfbb76SRobert Watson /* 382eddfbb76SRobert Watson * Free a region of space and merge it with as many neighbors as 383eddfbb76SRobert Watson * possible. Keeping the list sorted simplifies this operation. 384eddfbb76SRobert Watson */ 385eddfbb76SRobert Watson sx_xlock(&vnet_data_free_lock); 386eddfbb76SRobert Watson TAILQ_FOREACH(df, &vnet_data_free_head, vnd_link) { 387eddfbb76SRobert Watson if (df->vnd_start > end) 388eddfbb76SRobert Watson break; 389eddfbb76SRobert Watson /* 390eddfbb76SRobert Watson * If we expand at the end of an entry we may have to 391eddfbb76SRobert Watson * merge it with the one following it as well. 392eddfbb76SRobert Watson */ 393eddfbb76SRobert Watson if (df->vnd_start + df->vnd_len == start) { 394eddfbb76SRobert Watson df->vnd_len += size; 395eddfbb76SRobert Watson dn = TAILQ_NEXT(df, vnd_link); 396eddfbb76SRobert Watson if (df->vnd_start + df->vnd_len == dn->vnd_start) { 397eddfbb76SRobert Watson df->vnd_len += dn->vnd_len; 398eddfbb76SRobert Watson TAILQ_REMOVE(&vnet_data_free_head, dn, vnd_link); 399eddfbb76SRobert Watson free(dn, M_VNET_DATA_FREE); 400eddfbb76SRobert Watson } 401eddfbb76SRobert Watson sx_xunlock(&vnet_data_free_lock); 402eddfbb76SRobert Watson return; 403eddfbb76SRobert Watson } 404eddfbb76SRobert Watson if (df->vnd_start == end) { 405eddfbb76SRobert Watson df->vnd_start = start; 406eddfbb76SRobert Watson df->vnd_len += size; 407eddfbb76SRobert Watson sx_xunlock(&vnet_data_free_lock); 408eddfbb76SRobert Watson return; 409eddfbb76SRobert Watson } 410eddfbb76SRobert Watson } 411eddfbb76SRobert Watson dn = malloc(sizeof(*df), M_VNET_DATA_FREE, M_WAITOK | M_ZERO); 412eddfbb76SRobert Watson dn->vnd_start = start; 413eddfbb76SRobert Watson dn->vnd_len = size; 414eddfbb76SRobert Watson if (df) 415eddfbb76SRobert Watson TAILQ_INSERT_BEFORE(df, dn, vnd_link); 416eddfbb76SRobert Watson else 417eddfbb76SRobert Watson TAILQ_INSERT_TAIL(&vnet_data_free_head, dn, vnd_link); 418eddfbb76SRobert Watson sx_xunlock(&vnet_data_free_lock); 419eddfbb76SRobert Watson } 420eddfbb76SRobert Watson 421eddfbb76SRobert Watson /* 422eddfbb76SRobert Watson * When a new virtualized global variable has been allocated, propagate its 423eddfbb76SRobert Watson * initial value to each already-allocated virtual network stack instance. 424eddfbb76SRobert Watson */ 425eddfbb76SRobert Watson void 426eddfbb76SRobert Watson vnet_data_copy(void *start, int size) 427eddfbb76SRobert Watson { 4287429a3f3SRobert Watson struct vnet *vnet; 429eddfbb76SRobert Watson 4307429a3f3SRobert Watson VNET_LIST_RLOCK(); 4317429a3f3SRobert Watson LIST_FOREACH(vnet, &vnet_head, vnet_le) 4327429a3f3SRobert Watson memcpy((void *)((uintptr_t)vnet->vnet_data_base + 4337429a3f3SRobert Watson (uintptr_t)start), start, size); 4347429a3f3SRobert Watson VNET_LIST_RUNLOCK(); 435eddfbb76SRobert Watson } 436eddfbb76SRobert Watson 437eddfbb76SRobert Watson /* 438eddfbb76SRobert Watson * Variants on sysctl_handle_foo that know how to handle virtualized global 439eddfbb76SRobert Watson * variables: if 'arg1' is a pointer, then we transform it to the local vnet 440eddfbb76SRobert Watson * offset. 441eddfbb76SRobert Watson */ 442eddfbb76SRobert Watson int 443eddfbb76SRobert Watson vnet_sysctl_handle_int(SYSCTL_HANDLER_ARGS) 444eddfbb76SRobert Watson { 445eddfbb76SRobert Watson 446eddfbb76SRobert Watson if (arg1 != NULL) 447eddfbb76SRobert Watson arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1); 448eddfbb76SRobert Watson return (sysctl_handle_int(oidp, arg1, arg2, req)); 449eddfbb76SRobert Watson } 450eddfbb76SRobert Watson 451eddfbb76SRobert Watson int 452eddfbb76SRobert Watson vnet_sysctl_handle_opaque(SYSCTL_HANDLER_ARGS) 453eddfbb76SRobert Watson { 454eddfbb76SRobert Watson 455eddfbb76SRobert Watson if (arg1 != NULL) 456eddfbb76SRobert Watson arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1); 457eddfbb76SRobert Watson return (sysctl_handle_opaque(oidp, arg1, arg2, req)); 458eddfbb76SRobert Watson } 459eddfbb76SRobert Watson 460eddfbb76SRobert Watson int 461eddfbb76SRobert Watson vnet_sysctl_handle_string(SYSCTL_HANDLER_ARGS) 462eddfbb76SRobert Watson { 463eddfbb76SRobert Watson 464eddfbb76SRobert Watson if (arg1 != NULL) 465eddfbb76SRobert Watson arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1); 466eddfbb76SRobert Watson return (sysctl_handle_string(oidp, arg1, arg2, req)); 467eddfbb76SRobert Watson } 468eddfbb76SRobert Watson 469eddfbb76SRobert Watson int 470eddfbb76SRobert Watson vnet_sysctl_handle_uint(SYSCTL_HANDLER_ARGS) 471eddfbb76SRobert Watson { 472eddfbb76SRobert Watson 473eddfbb76SRobert Watson if (arg1 != NULL) 474eddfbb76SRobert Watson arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1); 475eddfbb76SRobert Watson return (sysctl_handle_int(oidp, arg1, arg2, req)); 476eddfbb76SRobert Watson } 477d0728d71SRobert Watson 478d0728d71SRobert Watson /* 479d0728d71SRobert Watson * Support for special SYSINIT handlers registered via VNET_SYSINIT() 480d0728d71SRobert Watson * and VNET_SYSUNINIT(). 481d0728d71SRobert Watson */ 482d0728d71SRobert Watson void 483d0728d71SRobert Watson vnet_register_sysinit(void *arg) 484d0728d71SRobert Watson { 485d0728d71SRobert Watson struct vnet_sysinit *vs, *vs2; 486d0728d71SRobert Watson struct vnet *vnet; 487d0728d71SRobert Watson 488d0728d71SRobert Watson vs = arg; 489d0728d71SRobert Watson KASSERT(vs->subsystem > SI_SUB_VNET, ("vnet sysinit too early")); 490d0728d71SRobert Watson 491d0728d71SRobert Watson /* Add the constructor to the global list of vnet constructors. */ 492d0728d71SRobert Watson sx_xlock(&vnet_sxlock); 493d0728d71SRobert Watson TAILQ_FOREACH(vs2, &vnet_constructors, link) { 494d0728d71SRobert Watson if (vs2->subsystem > vs->subsystem) 495d0728d71SRobert Watson break; 496d0728d71SRobert Watson if (vs2->subsystem == vs->subsystem && vs2->order > vs->order) 497d0728d71SRobert Watson break; 498d0728d71SRobert Watson } 499d0728d71SRobert Watson if (vs2 != NULL) 500d0728d71SRobert Watson TAILQ_INSERT_BEFORE(vs2, vs, link); 501d0728d71SRobert Watson else 502d0728d71SRobert Watson TAILQ_INSERT_TAIL(&vnet_constructors, vs, link); 503d0728d71SRobert Watson 504d0728d71SRobert Watson /* 505d0728d71SRobert Watson * Invoke the constructor on all the existing vnets when it is 506d0728d71SRobert Watson * registered. 507d0728d71SRobert Watson */ 508d0728d71SRobert Watson VNET_FOREACH(vnet) { 509d0728d71SRobert Watson CURVNET_SET_QUIET(vnet); 510d0728d71SRobert Watson vs->func(vs->arg); 511d0728d71SRobert Watson CURVNET_RESTORE(); 512d0728d71SRobert Watson } 513d0728d71SRobert Watson sx_xunlock(&vnet_sxlock); 514d0728d71SRobert Watson } 515d0728d71SRobert Watson 516d0728d71SRobert Watson void 517d0728d71SRobert Watson vnet_deregister_sysinit(void *arg) 518d0728d71SRobert Watson { 519d0728d71SRobert Watson struct vnet_sysinit *vs; 520d0728d71SRobert Watson 521d0728d71SRobert Watson vs = arg; 522d0728d71SRobert Watson 523d0728d71SRobert Watson /* Remove the constructor from the global list of vnet constructors. */ 524d0728d71SRobert Watson sx_xlock(&vnet_sxlock); 525d0728d71SRobert Watson TAILQ_REMOVE(&vnet_constructors, vs, link); 526d0728d71SRobert Watson sx_xunlock(&vnet_sxlock); 527d0728d71SRobert Watson } 528d0728d71SRobert Watson 529d0728d71SRobert Watson void 530d0728d71SRobert Watson vnet_register_sysuninit(void *arg) 531d0728d71SRobert Watson { 532d0728d71SRobert Watson struct vnet_sysinit *vs, *vs2; 533d0728d71SRobert Watson 534d0728d71SRobert Watson vs = arg; 535d0728d71SRobert Watson 536d0728d71SRobert Watson /* Add the destructor to the global list of vnet destructors. */ 537d0728d71SRobert Watson sx_xlock(&vnet_sxlock); 538d0728d71SRobert Watson TAILQ_FOREACH(vs2, &vnet_destructors, link) { 539d0728d71SRobert Watson if (vs2->subsystem > vs->subsystem) 540d0728d71SRobert Watson break; 541d0728d71SRobert Watson if (vs2->subsystem == vs->subsystem && vs2->order > vs->order) 542d0728d71SRobert Watson break; 543d0728d71SRobert Watson } 544d0728d71SRobert Watson if (vs2 != NULL) 545d0728d71SRobert Watson TAILQ_INSERT_BEFORE(vs2, vs, link); 546d0728d71SRobert Watson else 547d0728d71SRobert Watson TAILQ_INSERT_TAIL(&vnet_destructors, vs, link); 548d0728d71SRobert Watson sx_xunlock(&vnet_sxlock); 549d0728d71SRobert Watson } 550d0728d71SRobert Watson 551d0728d71SRobert Watson void 552d0728d71SRobert Watson vnet_deregister_sysuninit(void *arg) 553d0728d71SRobert Watson { 554d0728d71SRobert Watson struct vnet_sysinit *vs; 555d0728d71SRobert Watson struct vnet *vnet; 556d0728d71SRobert Watson 557d0728d71SRobert Watson vs = arg; 558d0728d71SRobert Watson 559d0728d71SRobert Watson /* 560d0728d71SRobert Watson * Invoke the destructor on all the existing vnets when it is 561d0728d71SRobert Watson * deregistered. 562d0728d71SRobert Watson */ 563d0728d71SRobert Watson sx_xlock(&vnet_sxlock); 564d0728d71SRobert Watson VNET_FOREACH(vnet) { 565d0728d71SRobert Watson CURVNET_SET_QUIET(vnet); 566d0728d71SRobert Watson vs->func(vs->arg); 567d0728d71SRobert Watson CURVNET_RESTORE(); 568d0728d71SRobert Watson } 569d0728d71SRobert Watson 570d0728d71SRobert Watson /* Remove the destructor from the global list of vnet destructors. */ 571d0728d71SRobert Watson TAILQ_REMOVE(&vnet_destructors, vs, link); 572d0728d71SRobert Watson sx_xunlock(&vnet_sxlock); 573d0728d71SRobert Watson } 574d0728d71SRobert Watson 575d0728d71SRobert Watson /* 576d0728d71SRobert Watson * Invoke all registered vnet constructors on the current vnet. Used 577d0728d71SRobert Watson * during vnet construction. The caller is responsible for ensuring 578d0728d71SRobert Watson * the new vnet is the current vnet and that the vnet_sxlock lock is 579d0728d71SRobert Watson * locked. 580d0728d71SRobert Watson */ 581d0728d71SRobert Watson void 582d0728d71SRobert Watson vnet_sysinit(void) 583d0728d71SRobert Watson { 584d0728d71SRobert Watson struct vnet_sysinit *vs; 585d0728d71SRobert Watson 586d0728d71SRobert Watson sx_assert(&vnet_sxlock, SA_LOCKED); 587d0728d71SRobert Watson TAILQ_FOREACH(vs, &vnet_constructors, link) { 588d0728d71SRobert Watson vs->func(vs->arg); 589d0728d71SRobert Watson } 590d0728d71SRobert Watson } 591d0728d71SRobert Watson 592d0728d71SRobert Watson /* 593d0728d71SRobert Watson * Invoke all registered vnet destructors on the current vnet. Used 594d0728d71SRobert Watson * during vnet destruction. The caller is responsible for ensuring 595d0728d71SRobert Watson * the dying vnet is the current vnet and that the vnet_sxlock lock is 596d0728d71SRobert Watson * locked. 597d0728d71SRobert Watson */ 598d0728d71SRobert Watson void 599d0728d71SRobert Watson vnet_sysuninit(void) 600d0728d71SRobert Watson { 601d0728d71SRobert Watson struct vnet_sysinit *vs; 602d0728d71SRobert Watson 603d0728d71SRobert Watson sx_assert(&vnet_sxlock, SA_LOCKED); 604d0728d71SRobert Watson TAILQ_FOREACH_REVERSE(vs, &vnet_destructors, vnet_sysuninit_head, 605d0728d71SRobert Watson link) { 606d0728d71SRobert Watson vs->func(vs->arg); 607d0728d71SRobert Watson } 608d0728d71SRobert Watson } 609530c0060SRobert Watson 610530c0060SRobert Watson #ifdef DDB 611530c0060SRobert Watson DB_SHOW_COMMAND(vnets, db_show_vnets) 612530c0060SRobert Watson { 613530c0060SRobert Watson VNET_ITERATOR_DECL(vnet_iter); 614530c0060SRobert Watson 615530c0060SRobert Watson #if SIZE_MAX == UINT32_MAX /* 32-bit arch */ 616530c0060SRobert Watson db_printf(" vnet ifs socks\n"); 617530c0060SRobert Watson #else /* 64-bit arch, most probaly... */ 618530c0060SRobert Watson db_printf(" vnet ifs socks\n"); 619530c0060SRobert Watson #endif 620530c0060SRobert Watson VNET_FOREACH(vnet_iter) { 621530c0060SRobert Watson db_printf("%p %3d %5d\n", vnet_iter, vnet_iter->vnet_ifcnt, 622530c0060SRobert Watson vnet_iter->vnet_sockcnt); 623530c0060SRobert Watson } 624530c0060SRobert Watson } 625530c0060SRobert Watson #endif 626