1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 /* This file is dual-licensed; see usr/src/contrib/bhyve/LICENSE */ 12 13 /* 14 * Copyright (c) 2018, Joyent, Inc. 15 * Copyright 2022 Oxide Computer Company 16 */ 17 18 #include <sys/cpuvar.h> 19 #include <sys/debug.h> 20 #include <sys/kmem.h> 21 #include <sys/ksynch.h> 22 #include <sys/list.h> 23 #include <sys/types.h> 24 #include <sys/vmm.h> 25 #include <sys/vmm_kernel.h> 26 #include <sys/vmm_impl.h> 27 #include <sys/zone.h> 28 29 /* 30 * zone specific data 31 * 32 * Zone specific data is used to keep an association between zones and the vmm 33 * instances that may be running in them. This is used to ensure that vmm 34 * instances do not outlive their parent zone. 35 * 36 * Locking strategy 37 * 38 * The global vmm_zsd_lock is held while modifying vmm_zsd_list. 39 * 40 * The per zone vz_lock in vmm_zsd_t is held while reading or writing anything 41 * within in vmm_zsd_t instance. This is important to ensure that there's not 42 * an accidental VM creating as a zone is going down. 43 */ 44 45 /* 46 * One of these per zone. 47 */ 48 struct vmm_zsd { 49 list_t vz_vmms; /* vmm instances in the zone */ 50 list_node_t vz_linkage; /* link to other zones */ 51 boolean_t vz_active; /* B_FALSE early in shutdown callback */ 52 zoneid_t vz_zoneid; 53 kmutex_t vz_lock; 54 }; 55 56 static kmutex_t vmm_zsd_lock; /* Protects vmm_zsd_list */ 57 static list_t vmm_zsd_list; /* Linkage between all zsd instances */ 58 59 static zone_key_t vmm_zsd_key; 60 61 int 62 vmm_zsd_add_vm(vmm_softc_t *sc) 63 { 64 vmm_zsd_t *zsd; 65 66 ASSERT(sc->vmm_zone != NULL); 67 68 mutex_enter(&vmm_zsd_lock); 69 70 for (zsd = list_head(&vmm_zsd_list); zsd != NULL; 71 zsd = list_next(&vmm_zsd_list, zsd)) { 72 if (zsd->vz_zoneid == sc->vmm_zone->zone_id) { 73 break; 74 } 75 } 76 77 VERIFY(zsd != NULL); 78 mutex_exit(&vmm_zsd_lock); 79 80 mutex_enter(&zsd->vz_lock); 81 if (!zsd->vz_active) { 82 mutex_exit(&zsd->vz_lock); 83 return (ENOSYS); 84 } 85 86 sc->vmm_zsd = zsd; 87 list_insert_tail(&zsd->vz_vmms, sc); 88 89 mutex_exit(&zsd->vz_lock); 90 91 return (0); 92 } 93 94 void 95 vmm_zsd_rem_vm(vmm_softc_t *sc) 96 { 97 vmm_zsd_t *zsd = sc->vmm_zsd; 98 99 mutex_enter(&zsd->vz_lock); 100 101 list_remove(&zsd->vz_vmms, sc); 102 sc->vmm_zsd = NULL; 103 104 mutex_exit(&zsd->vz_lock); 105 } 106 107 static void * 108 vmm_zsd_create(zoneid_t zid) 109 { 110 vmm_zsd_t *zsd; 111 zone_t *zone; 112 113 zsd = kmem_zalloc(sizeof (*zsd), KM_SLEEP); 114 115 list_create(&zsd->vz_vmms, sizeof (vmm_softc_t), 116 offsetof(vmm_softc_t, vmm_zsd_linkage)); 117 118 zsd->vz_zoneid = zid; 119 120 mutex_init(&zsd->vz_lock, NULL, MUTEX_DEFAULT, NULL); 121 122 /* 123 * If the vmm module is loaded while this zone is in the midst of 124 * shutting down, vmm_zsd_destroy() may be called without 125 * vmm_zsd_shutdown() ever being called. If it is shutting down, there 126 * is no sense in letting any in-flight VM creation succeed so set 127 * vz_active accordingly. 128 * 129 * zone_find_by_id_nolock() is used rather than zone_find_by_id() 130 * so that the zone is returned regardless of state. 131 */ 132 zone = zone_find_by_id_nolock(zid); 133 VERIFY(zone != NULL); 134 zsd->vz_active = zone_status_get(zone) < ZONE_IS_SHUTTING_DOWN; 135 136 mutex_enter(&vmm_zsd_lock); 137 list_insert_tail(&vmm_zsd_list, zsd); 138 mutex_exit(&vmm_zsd_lock); 139 140 return (zsd); 141 } 142 143 /* 144 * Tells all runing VMs in the zone to poweroff. This does not reclaim guest 145 * resources (memory, etc.). 146 */ 147 static void 148 vmm_zsd_shutdown(zoneid_t zid, void *data) 149 { 150 vmm_zsd_t *zsd = data; 151 vmm_softc_t *sc; 152 153 mutex_enter(&zsd->vz_lock); 154 155 /* 156 * This may already be B_FALSE. See comment in vmm_zsd_create(). If it 157 * is already B_FALSE we will take a quick trip through the empty list. 158 */ 159 zsd->vz_active = B_FALSE; 160 161 for (sc = list_head(&zsd->vz_vmms); sc != NULL; 162 sc = list_next(&zsd->vz_vmms, sc)) { 163 /* Send a poweroff to the VM, whether running or not. */ 164 (void) vm_suspend(sc->vmm_vm, VM_SUSPEND_POWEROFF); 165 } 166 mutex_exit(&zsd->vz_lock); 167 } 168 169 /* 170 * Reap all VMs that remain and free up guest resources. 171 */ 172 static void 173 vmm_zsd_destroy(zoneid_t zid, void *data) 174 { 175 vmm_zsd_t *zsd = data; 176 vmm_softc_t *sc; 177 178 mutex_enter(&vmm_zsd_lock); 179 list_remove(&vmm_zsd_list, zsd); 180 mutex_exit(&vmm_zsd_lock); 181 182 mutex_enter(&zsd->vz_lock); 183 ASSERT(!zsd->vz_active); 184 185 while ((sc = list_remove_head(&zsd->vz_vmms)) != NULL) { 186 int err; 187 188 /* 189 * This frees all resources associated with the vm, including 190 * sc. 191 */ 192 err = vmm_zone_vm_destroy(sc); 193 ASSERT3S(err, ==, 0); 194 } 195 196 mutex_exit(&zsd->vz_lock); 197 mutex_destroy(&zsd->vz_lock); 198 199 kmem_free(zsd, sizeof (*zsd)); 200 } 201 202 void 203 vmm_zsd_init(void) 204 { 205 mutex_init(&vmm_zsd_lock, NULL, MUTEX_DEFAULT, NULL); 206 list_create(&vmm_zsd_list, sizeof (vmm_zsd_t), 207 offsetof(vmm_zsd_t, vz_linkage)); 208 zone_key_create(&vmm_zsd_key, vmm_zsd_create, vmm_zsd_shutdown, 209 vmm_zsd_destroy); 210 } 211 212 void 213 vmm_zsd_fini(void) 214 { 215 /* Calls vmm_zsd_destroy() on all zones. */ 216 VERIFY0(zone_key_delete(vmm_zsd_key)); 217 218 ASSERT(list_is_empty(&vmm_zsd_list)); 219 list_destroy(&vmm_zsd_list); 220 mutex_destroy(&vmm_zsd_lock); 221 } 222