xref: /illumos-gate/usr/src/uts/intel/io/vmm/vmm_zsd.c (revision badf94ff3599fab15963f6c532929e9bc411757a)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 /* This file is dual-licensed; see usr/src/contrib/bhyve/LICENSE */
12 
13 /*
14  * Copyright (c) 2018, Joyent, Inc.
15  */
16 
17 #include <sys/cpuvar.h>
18 #include <sys/debug.h>
19 #include <sys/kmem.h>
20 #include <sys/ksynch.h>
21 #include <sys/list.h>
22 #include <sys/types.h>
23 #include <sys/vmm.h>
24 #include <sys/vmm_kernel.h>
25 #include <sys/vmm_impl.h>
26 #include <sys/zone.h>
27 
28 /*
29  * zone specific data
30  *
31  * Zone specific data is used to keep an association between zones and the vmm
32  * instances that may be running in them.  This is used to ensure that vmm
33  * instances do not outlive their parent zone.
34  *
35  * Locking strategy
36  *
37  * The global vmm_zsd_lock is held while modifying vmm_zsd_list.
38  *
39  * The per zone vz_lock in vmm_zsd_t is held while reading or writing anything
40  * within in vmm_zsd_t instance.  This is important to ensure that there's not
41  * an accidental VM creating as a zone is going down.
42  */
43 
44 /*
45  * One of these per zone.
46  */
47 struct vmm_zsd {
48 	list_t		vz_vmms;	/* vmm instances in the zone */
49 	list_node_t	vz_linkage;	/* link to other zones */
50 	boolean_t	vz_active;	/* B_FALSE early in shutdown callback */
51 	zoneid_t	vz_zoneid;
52 	kmutex_t	vz_lock;
53 };
54 
55 static kmutex_t vmm_zsd_lock;		/* Protects vmm_zsd_list */
56 static list_t vmm_zsd_list;		/* Linkage between all zsd instances */
57 
58 static zone_key_t vmm_zsd_key;
59 
60 int
61 vmm_zsd_add_vm(vmm_softc_t *sc)
62 {
63 	vmm_zsd_t *zsd;
64 
65 	ASSERT(sc->vmm_zone != NULL);
66 
67 	mutex_enter(&vmm_zsd_lock);
68 
69 	for (zsd = list_head(&vmm_zsd_list); zsd != NULL;
70 	    zsd = list_next(&vmm_zsd_list, zsd)) {
71 		if (zsd->vz_zoneid == sc->vmm_zone->zone_id) {
72 			break;
73 		}
74 	}
75 
76 	VERIFY(zsd != NULL);
77 	mutex_exit(&vmm_zsd_lock);
78 
79 	mutex_enter(&zsd->vz_lock);
80 	if (!zsd->vz_active) {
81 		mutex_exit(&zsd->vz_lock);
82 		return (ENOSYS);
83 	}
84 
85 	sc->vmm_zsd = zsd;
86 	list_insert_tail(&zsd->vz_vmms, sc);
87 
88 	mutex_exit(&zsd->vz_lock);
89 
90 	return (0);
91 }
92 
93 void
94 vmm_zsd_rem_vm(vmm_softc_t *sc)
95 {
96 	vmm_zsd_t *zsd = sc->vmm_zsd;
97 
98 	mutex_enter(&zsd->vz_lock);
99 
100 	list_remove(&zsd->vz_vmms, sc);
101 	sc->vmm_zsd = NULL;
102 
103 	mutex_exit(&zsd->vz_lock);
104 }
105 
106 static void *
107 vmm_zsd_create(zoneid_t zid)
108 {
109 	vmm_zsd_t *zsd;
110 	zone_t *zone;
111 
112 	zsd = kmem_zalloc(sizeof (*zsd), KM_SLEEP);
113 
114 	list_create(&zsd->vz_vmms, sizeof (vmm_softc_t),
115 	    offsetof(vmm_softc_t, vmm_zsd_linkage));
116 
117 	zsd->vz_zoneid = zid;
118 
119 	mutex_init(&zsd->vz_lock, NULL, MUTEX_DEFAULT, NULL);
120 
121 	/*
122 	 * If the vmm module is loaded while this zone is in the midst of
123 	 * shutting down, vmm_zsd_destroy() may be called without
124 	 * vmm_zsd_shutdown() ever being called. If it is shutting down, there
125 	 * is no sense in letting any in-flight VM creation succeed so set
126 	 * vz_active accordingly.
127 	 *
128 	 * zone_find_by_id_nolock() is used rather than zone_find_by_id()
129 	 * so that the zone is returned regardless of state.
130 	 */
131 	zone = zone_find_by_id_nolock(zid);
132 	VERIFY(zone != NULL);
133 	zsd->vz_active = zone_status_get(zone) < ZONE_IS_SHUTTING_DOWN;
134 
135 	mutex_enter(&vmm_zsd_lock);
136 	list_insert_tail(&vmm_zsd_list, zsd);
137 	mutex_exit(&vmm_zsd_lock);
138 
139 	return (zsd);
140 }
141 
142 /*
143  * Tells all runing VMs in the zone to poweroff.  This does not reclaim guest
144  * resources (memory, etc.).
145  */
146 static void
147 vmm_zsd_shutdown(zoneid_t zid, void *data)
148 {
149 	vmm_zsd_t *zsd = data;
150 	vmm_softc_t *sc;
151 
152 	mutex_enter(&zsd->vz_lock);
153 
154 	/*
155 	 * This may already be B_FALSE. See comment in vmm_zsd_create(). If it
156 	 * is already B_FALSE we will take a quick trip through the empty list.
157 	 */
158 	zsd->vz_active = B_FALSE;
159 
160 	for (sc = list_head(&zsd->vz_vmms); sc != NULL;
161 	    sc = list_next(&zsd->vz_vmms, sc)) {
162 		/* Send a poweroff to the VM, whether running or not. */
163 		(void) vm_suspend(sc->vmm_vm, VM_SUSPEND_POWEROFF);
164 	}
165 	mutex_exit(&zsd->vz_lock);
166 }
167 
168 /*
169  * Reap all VMs that remain and free up guest resources.
170  */
171 static void
172 vmm_zsd_destroy(zoneid_t zid, void *data)
173 {
174 	vmm_zsd_t *zsd = data;
175 	vmm_softc_t *sc;
176 
177 	mutex_enter(&vmm_zsd_lock);
178 	list_remove(&vmm_zsd_list, zsd);
179 	mutex_exit(&vmm_zsd_lock);
180 
181 	mutex_enter(&zsd->vz_lock);
182 	ASSERT(!zsd->vz_active);
183 
184 	while ((sc = list_remove_head(&zsd->vz_vmms)) != NULL) {
185 		int err;
186 
187 		/*
188 		 * This frees all resources associated with the vm, including
189 		 * sc.
190 		 */
191 		err = vmm_do_vm_destroy(sc, B_FALSE);
192 		ASSERT3S(err, ==, 0);
193 	}
194 
195 	mutex_exit(&zsd->vz_lock);
196 	mutex_destroy(&zsd->vz_lock);
197 
198 	kmem_free(zsd, sizeof (*zsd));
199 }
200 
201 void
202 vmm_zsd_init(void)
203 {
204 	mutex_init(&vmm_zsd_lock, NULL, MUTEX_DEFAULT, NULL);
205 	list_create(&vmm_zsd_list, sizeof (vmm_zsd_t),
206 	    offsetof(vmm_zsd_t, vz_linkage));
207 	zone_key_create(&vmm_zsd_key, vmm_zsd_create, vmm_zsd_shutdown,
208 	    vmm_zsd_destroy);
209 }
210 
211 void
212 vmm_zsd_fini(void)
213 {
214 	/* Calls vmm_zsd_destroy() on all zones. */
215 	VERIFY0(zone_key_delete(vmm_zsd_key));
216 
217 	ASSERT(list_is_empty(&vmm_zsd_list));
218 	list_destroy(&vmm_zsd_list);
219 	mutex_destroy(&vmm_zsd_lock);
220 }
221