xref: /illumos-gate/usr/src/uts/intel/io/vmm/vmm_zsd.c (revision dd72704bd9e794056c558153663c739e2012d721)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 /* This file is dual-licensed; see usr/src/contrib/bhyve/LICENSE */
12 
13 /*
14  * Copyright (c) 2018, Joyent, Inc.
15  * Copyright 2022 Oxide Computer Company
16  */
17 
18 #include <sys/cpuvar.h>
19 #include <sys/debug.h>
20 #include <sys/kmem.h>
21 #include <sys/ksynch.h>
22 #include <sys/list.h>
23 #include <sys/types.h>
24 #include <sys/vmm.h>
25 #include <sys/vmm_kernel.h>
26 #include <sys/vmm_impl.h>
27 #include <sys/zone.h>
28 
29 /*
30  * zone specific data
31  *
32  * Zone specific data is used to keep an association between zones and the vmm
33  * instances that may be running in them.  This is used to ensure that vmm
34  * instances do not outlive their parent zone.
35  *
36  * Locking strategy
37  *
38  * The global vmm_zsd_lock is held while modifying vmm_zsd_list.
39  *
40  * The per zone vz_lock in vmm_zsd_t is held while reading or writing anything
41  * within in vmm_zsd_t instance.  This is important to ensure that there's not
42  * an accidental VM creating as a zone is going down.
43  */
44 
45 /*
46  * One of these per zone.
47  */
48 struct vmm_zsd {
49 	list_t		vz_vmms;	/* vmm instances in the zone */
50 	list_node_t	vz_linkage;	/* link to other zones */
51 	boolean_t	vz_active;	/* B_FALSE early in shutdown callback */
52 	zoneid_t	vz_zoneid;
53 	kmutex_t	vz_lock;
54 };
55 
56 static kmutex_t vmm_zsd_lock;		/* Protects vmm_zsd_list */
57 static list_t vmm_zsd_list;		/* Linkage between all zsd instances */
58 
59 static zone_key_t vmm_zsd_key;
60 
61 int
62 vmm_zsd_add_vm(vmm_softc_t *sc)
63 {
64 	vmm_zsd_t *zsd;
65 
66 	ASSERT(sc->vmm_zone != NULL);
67 
68 	mutex_enter(&vmm_zsd_lock);
69 
70 	for (zsd = list_head(&vmm_zsd_list); zsd != NULL;
71 	    zsd = list_next(&vmm_zsd_list, zsd)) {
72 		if (zsd->vz_zoneid == sc->vmm_zone->zone_id) {
73 			break;
74 		}
75 	}
76 
77 	VERIFY(zsd != NULL);
78 	mutex_exit(&vmm_zsd_lock);
79 
80 	mutex_enter(&zsd->vz_lock);
81 	if (!zsd->vz_active) {
82 		mutex_exit(&zsd->vz_lock);
83 		return (ENOSYS);
84 	}
85 
86 	sc->vmm_zsd = zsd;
87 	list_insert_tail(&zsd->vz_vmms, sc);
88 
89 	mutex_exit(&zsd->vz_lock);
90 
91 	return (0);
92 }
93 
94 void
95 vmm_zsd_rem_vm(vmm_softc_t *sc)
96 {
97 	vmm_zsd_t *zsd = sc->vmm_zsd;
98 
99 	mutex_enter(&zsd->vz_lock);
100 
101 	list_remove(&zsd->vz_vmms, sc);
102 	sc->vmm_zsd = NULL;
103 
104 	mutex_exit(&zsd->vz_lock);
105 }
106 
107 static void *
108 vmm_zsd_create(zoneid_t zid)
109 {
110 	vmm_zsd_t *zsd;
111 	zone_t *zone;
112 
113 	zsd = kmem_zalloc(sizeof (*zsd), KM_SLEEP);
114 
115 	list_create(&zsd->vz_vmms, sizeof (vmm_softc_t),
116 	    offsetof(vmm_softc_t, vmm_zsd_linkage));
117 
118 	zsd->vz_zoneid = zid;
119 
120 	mutex_init(&zsd->vz_lock, NULL, MUTEX_DEFAULT, NULL);
121 
122 	/*
123 	 * If the vmm module is loaded while this zone is in the midst of
124 	 * shutting down, vmm_zsd_destroy() may be called without
125 	 * vmm_zsd_shutdown() ever being called. If it is shutting down, there
126 	 * is no sense in letting any in-flight VM creation succeed so set
127 	 * vz_active accordingly.
128 	 *
129 	 * zone_find_by_id_nolock() is used rather than zone_find_by_id()
130 	 * so that the zone is returned regardless of state.
131 	 */
132 	zone = zone_find_by_id_nolock(zid);
133 	VERIFY(zone != NULL);
134 	zsd->vz_active = zone_status_get(zone) < ZONE_IS_SHUTTING_DOWN;
135 
136 	mutex_enter(&vmm_zsd_lock);
137 	list_insert_tail(&vmm_zsd_list, zsd);
138 	mutex_exit(&vmm_zsd_lock);
139 
140 	return (zsd);
141 }
142 
143 /*
144  * Tells all running VMs in the zone to poweroff.  This does not reclaim guest
145  * resources (memory, etc.).
146  */
147 static void
148 vmm_zsd_shutdown(zoneid_t zid, void *data)
149 {
150 	vmm_zsd_t *zsd = data;
151 	vmm_softc_t *sc;
152 
153 	mutex_enter(&zsd->vz_lock);
154 
155 	/*
156 	 * This may already be B_FALSE. See comment in vmm_zsd_create(). If it
157 	 * is already B_FALSE we will take a quick trip through the empty list.
158 	 */
159 	zsd->vz_active = B_FALSE;
160 
161 	for (sc = list_head(&zsd->vz_vmms); sc != NULL;
162 	    sc = list_next(&zsd->vz_vmms, sc)) {
163 		/* Send a poweroff to the VM, whether running or not. */
164 		(void) vm_suspend(sc->vmm_vm, VM_SUSPEND_POWEROFF);
165 	}
166 	mutex_exit(&zsd->vz_lock);
167 }
168 
169 /*
170  * Reap all VMs that remain and free up guest resources.
171  */
172 static void
173 vmm_zsd_destroy(zoneid_t zid, void *data)
174 {
175 	vmm_zsd_t *zsd = data;
176 	vmm_softc_t *sc;
177 
178 	mutex_enter(&vmm_zsd_lock);
179 	list_remove(&vmm_zsd_list, zsd);
180 	mutex_exit(&vmm_zsd_lock);
181 
182 	mutex_enter(&zsd->vz_lock);
183 	ASSERT(!zsd->vz_active);
184 
185 	while ((sc = list_remove_head(&zsd->vz_vmms)) != NULL) {
186 		vmm_zone_vm_destroy(sc);
187 	}
188 
189 	mutex_exit(&zsd->vz_lock);
190 	mutex_destroy(&zsd->vz_lock);
191 
192 	kmem_free(zsd, sizeof (*zsd));
193 }
194 
195 void
196 vmm_zsd_init(void)
197 {
198 	mutex_init(&vmm_zsd_lock, NULL, MUTEX_DEFAULT, NULL);
199 	list_create(&vmm_zsd_list, sizeof (vmm_zsd_t),
200 	    offsetof(vmm_zsd_t, vz_linkage));
201 	zone_key_create(&vmm_zsd_key, vmm_zsd_create, vmm_zsd_shutdown,
202 	    vmm_zsd_destroy);
203 }
204 
205 void
206 vmm_zsd_fini(void)
207 {
208 	/* Calls vmm_zsd_destroy() on all zones. */
209 	VERIFY0(zone_key_delete(vmm_zsd_key));
210 
211 	ASSERT(list_is_empty(&vmm_zsd_list));
212 	list_destroy(&vmm_zsd_list);
213 	mutex_destroy(&vmm_zsd_lock);
214 }
215