xref: /freebsd/sys/dev/vmm/vmm_vm.c (revision ed85203fb7a0334041db6da07e45ddda4caef13d)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  */
7 
8 #include <sys/param.h>
9 #include <sys/kernel.h>
10 #include <sys/lock.h>
11 #include <sys/mutex.h>
12 #include <sys/proc.h>
13 #include <sys/sx.h>
14 #include <sys/sysctl.h>
15 
16 #include <machine/smp.h>
17 
18 #include <dev/vmm/vmm_vm.h>
19 
20 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, NULL);
21 
22 int vmm_ipinum;
23 SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
24     "IPI vector used for vcpu notifications");
25 
26 /*
27  * Invoke the rendezvous function on the specified vcpu if applicable.  Return
28  * true if the rendezvous is finished, false otherwise.
29  */
30 static bool
vm_rendezvous(struct vcpu * vcpu)31 vm_rendezvous(struct vcpu *vcpu)
32 {
33 	struct vm *vm = vcpu->vm;
34 	int vcpuid;
35 
36 	mtx_assert(&vcpu->vm->rendezvous_mtx, MA_OWNED);
37 	KASSERT(vcpu->vm->rendezvous_func != NULL,
38 	    ("vm_rendezvous: no rendezvous pending"));
39 
40 	/* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */
41 	CPU_AND(&vm->rendezvous_req_cpus, &vm->rendezvous_req_cpus,
42 	    &vm->active_cpus);
43 
44 	vcpuid = vcpu->vcpuid;
45 	if (CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) &&
46 	    !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) {
47 		(*vm->rendezvous_func)(vcpu, vm->rendezvous_arg);
48 		CPU_SET(vcpuid, &vm->rendezvous_done_cpus);
49 	}
50 	if (CPU_CMP(&vm->rendezvous_req_cpus, &vm->rendezvous_done_cpus) == 0) {
51 		CPU_ZERO(&vm->rendezvous_req_cpus);
52 		vm->rendezvous_func = NULL;
53 		wakeup(&vm->rendezvous_func);
54 		return (true);
55 	}
56 	return (false);
57 }
58 
59 int
vm_handle_rendezvous(struct vcpu * vcpu)60 vm_handle_rendezvous(struct vcpu *vcpu)
61 {
62 	struct vm *vm;
63 	struct thread *td;
64 
65 	td = curthread;
66 	vm = vcpu->vm;
67 
68 	mtx_lock(&vm->rendezvous_mtx);
69 	while (vm->rendezvous_func != NULL) {
70 		if (vm_rendezvous(vcpu))
71 			break;
72 
73 		mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0,
74 		    "vmrndv", hz);
75 		if (td_ast_pending(td, TDA_SUSPEND)) {
76 			int error;
77 
78 			mtx_unlock(&vm->rendezvous_mtx);
79 			error = thread_check_susp(td, true);
80 			if (error != 0)
81 				return (error);
82 			mtx_lock(&vm->rendezvous_mtx);
83 		}
84 	}
85 	mtx_unlock(&vm->rendezvous_mtx);
86 	return (0);
87 }
88 
89 static void
vcpu_wait_idle(struct vcpu * vcpu)90 vcpu_wait_idle(struct vcpu *vcpu)
91 {
92 	KASSERT(vcpu->state != VCPU_IDLE, ("vcpu already idle"));
93 
94 	vcpu->reqidle = 1;
95 	vcpu_notify_event_locked(vcpu);
96 	msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
97 }
98 
99 int
vcpu_set_state_locked(struct vcpu * vcpu,enum vcpu_state newstate,bool from_idle)100 vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
101     bool from_idle)
102 {
103 	int error;
104 
105 	vcpu_assert_locked(vcpu);
106 
107 	/*
108 	 * State transitions from the vmmdev_ioctl() must always begin from
109 	 * the VCPU_IDLE state. This guarantees that there is only a single
110 	 * ioctl() operating on a vcpu at any point.
111 	 */
112 	if (from_idle) {
113 		while (vcpu->state != VCPU_IDLE)
114 			vcpu_wait_idle(vcpu);
115 	} else {
116 		KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
117 		    "vcpu idle state"));
118 	}
119 
120 	if (vcpu->state == VCPU_RUNNING) {
121 		KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
122 		    "mismatch for running vcpu", curcpu, vcpu->hostcpu));
123 	} else {
124 		KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
125 		    "vcpu that is not running", vcpu->hostcpu));
126 	}
127 
128 	/*
129 	 * The following state transitions are allowed:
130 	 * IDLE -> FROZEN -> IDLE
131 	 * FROZEN -> RUNNING -> FROZEN
132 	 * FROZEN -> SLEEPING -> FROZEN
133 	 */
134 	switch (vcpu->state) {
135 	case VCPU_IDLE:
136 	case VCPU_RUNNING:
137 	case VCPU_SLEEPING:
138 		error = (newstate != VCPU_FROZEN);
139 		break;
140 	case VCPU_FROZEN:
141 		error = (newstate == VCPU_FROZEN);
142 		break;
143 	default:
144 		error = 1;
145 		break;
146 	}
147 
148 	if (error)
149 		return (EBUSY);
150 
151 	vcpu->state = newstate;
152 	if (newstate == VCPU_RUNNING)
153 		vcpu->hostcpu = curcpu;
154 	else
155 		vcpu->hostcpu = NOCPU;
156 
157 	if (newstate == VCPU_IDLE)
158 		wakeup(&vcpu->state);
159 
160 	return (0);
161 }
162 
163 /*
164  * Try to lock all of the vCPUs in the VM while taking care to avoid deadlocks
165  * with vm_smp_rendezvous().
166  *
167  * The complexity here suggests that the rendezvous mechanism needs a rethink.
168  */
169 int
vcpu_set_state_all(struct vm * vm,enum vcpu_state newstate)170 vcpu_set_state_all(struct vm *vm, enum vcpu_state newstate)
171 {
172 	cpuset_t locked;
173 	struct vcpu *vcpu;
174 	int error, i;
175 	uint16_t maxcpus;
176 
177 	KASSERT(newstate != VCPU_IDLE,
178 	    ("vcpu_set_state_all: invalid target state %d", newstate));
179 
180 	error = 0;
181 	CPU_ZERO(&locked);
182 	maxcpus = vm->maxcpus;
183 
184 	mtx_lock(&vm->rendezvous_mtx);
185 restart:
186 	if (vm->rendezvous_func != NULL) {
187 		/*
188 		 * If we have a pending rendezvous, then the initiator may be
189 		 * blocked waiting for other vCPUs to execute the callback.  The
190 		 * current thread may be a vCPU thread so we must not block
191 		 * waiting for the initiator, otherwise we get a deadlock.
192 		 * Thus, execute the callback on behalf of any idle vCPUs.
193 		 */
194 		for (i = 0; i < maxcpus; i++) {
195 			vcpu = vm_vcpu(vm, i);
196 			if (vcpu == NULL)
197 				continue;
198 			vcpu_lock(vcpu);
199 			if (vcpu->state == VCPU_IDLE) {
200 				(void)vcpu_set_state_locked(vcpu, VCPU_FROZEN,
201 				    true);
202 				CPU_SET(i, &locked);
203 			}
204 			if (CPU_ISSET(i, &locked)) {
205 				/*
206 				 * We can safely execute the callback on this
207 				 * vCPU's behalf.
208 				 */
209 				vcpu_unlock(vcpu);
210 				(void)vm_rendezvous(vcpu);
211 				vcpu_lock(vcpu);
212 			}
213 			vcpu_unlock(vcpu);
214 		}
215 	}
216 
217 	/*
218 	 * Now wait for remaining vCPUs to become idle.  This may include the
219 	 * initiator of a rendezvous that is currently blocked on the rendezvous
220 	 * mutex.
221 	 */
222 	CPU_FOREACH_ISCLR(i, &locked) {
223 		if (i >= maxcpus)
224 			break;
225 		vcpu = vm_vcpu(vm, i);
226 		if (vcpu == NULL)
227 			continue;
228 		vcpu_lock(vcpu);
229 		while (vcpu->state != VCPU_IDLE) {
230 			mtx_unlock(&vm->rendezvous_mtx);
231 			vcpu_wait_idle(vcpu);
232 			vcpu_unlock(vcpu);
233 			mtx_lock(&vm->rendezvous_mtx);
234 			if (vm->rendezvous_func != NULL)
235 				goto restart;
236 			vcpu_lock(vcpu);
237 		}
238 		error = vcpu_set_state_locked(vcpu, newstate, true);
239 		vcpu_unlock(vcpu);
240 		if (error != 0) {
241 			/* Roll back state changes. */
242 			CPU_FOREACH_ISSET(i, &locked)
243 				(void)vcpu_set_state(vcpu, VCPU_IDLE, false);
244 			break;
245 		}
246 		CPU_SET(i, &locked);
247 	}
248 	mtx_unlock(&vm->rendezvous_mtx);
249 	return (error);
250 }
251 
252 
253 int
vcpu_set_state(struct vcpu * vcpu,enum vcpu_state newstate,bool from_idle)254 vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle)
255 {
256 	int error;
257 
258 	vcpu_lock(vcpu);
259 	error = vcpu_set_state_locked(vcpu, newstate, from_idle);
260 	vcpu_unlock(vcpu);
261 
262 	return (error);
263 }
264 
265 enum vcpu_state
vcpu_get_state(struct vcpu * vcpu,int * hostcpu)266 vcpu_get_state(struct vcpu *vcpu, int *hostcpu)
267 {
268 	enum vcpu_state state;
269 
270 	vcpu_lock(vcpu);
271 	state = vcpu->state;
272 	if (hostcpu != NULL)
273 		*hostcpu = vcpu->hostcpu;
274 	vcpu_unlock(vcpu);
275 
276 	return (state);
277 }
278 
279 /*
280  * This function is called to ensure that a vcpu "sees" a pending event
281  * as soon as possible:
282  * - If the vcpu thread is sleeping then it is woken up.
283  * - If the vcpu is running on a different host_cpu then an IPI will be directed
284  *   to the host_cpu to cause the vcpu to trap into the hypervisor.
285  */
286 void
vcpu_notify_event_locked(struct vcpu * vcpu)287 vcpu_notify_event_locked(struct vcpu *vcpu)
288 {
289 	int hostcpu;
290 
291 	hostcpu = vcpu->hostcpu;
292 	if (vcpu->state == VCPU_RUNNING) {
293 		KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
294 		if (hostcpu != curcpu) {
295 			ipi_cpu(hostcpu, vmm_ipinum);
296 		} else {
297 			/*
298 			 * If the 'vcpu' is running on 'curcpu' then it must
299 			 * be sending a notification to itself (e.g. SELF_IPI).
300 			 * The pending event will be picked up when the vcpu
301 			 * transitions back to guest context.
302 			 */
303 		}
304 	} else {
305 		KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
306 		    "with hostcpu %d", vcpu->state, hostcpu));
307 		if (vcpu->state == VCPU_SLEEPING)
308 			wakeup_one(vcpu);
309 	}
310 }
311 
312 void
vcpu_notify_event(struct vcpu * vcpu)313 vcpu_notify_event(struct vcpu *vcpu)
314 {
315 	vcpu_lock(vcpu);
316 	vcpu_notify_event_locked(vcpu);
317 	vcpu_unlock(vcpu);
318 }
319 
320 int
vcpu_debugged(struct vcpu * vcpu)321 vcpu_debugged(struct vcpu *vcpu)
322 {
323 	return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus));
324 }
325 
326 void
vm_lock_vcpus(struct vm * vm)327 vm_lock_vcpus(struct vm *vm)
328 {
329 	sx_xlock(&vm->vcpus_init_lock);
330 }
331 
332 void
vm_unlock_vcpus(struct vm * vm)333 vm_unlock_vcpus(struct vm *vm)
334 {
335 	sx_unlock(&vm->vcpus_init_lock);
336 }
337 
338 void
vm_disable_vcpu_creation(struct vm * vm)339 vm_disable_vcpu_creation(struct vm *vm)
340 {
341 	sx_xlock(&vm->vcpus_init_lock);
342 	vm->dying = true;
343 	sx_xunlock(&vm->vcpus_init_lock);
344 }
345 
346 uint16_t
vm_get_maxcpus(struct vm * vm)347 vm_get_maxcpus(struct vm *vm)
348 {
349 	return (vm->maxcpus);
350 }
351 
352 void
vm_get_topology(struct vm * vm,uint16_t * sockets,uint16_t * cores,uint16_t * threads,uint16_t * maxcpus)353 vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
354     uint16_t *threads, uint16_t *maxcpus)
355 {
356 	*sockets = vm->sockets;
357 	*cores = vm->cores;
358 	*threads = vm->threads;
359 	*maxcpus = vm->maxcpus;
360 }
361 
362 int
vm_set_topology(struct vm * vm,uint16_t sockets,uint16_t cores,uint16_t threads,uint16_t maxcpus __unused)363 vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
364     uint16_t threads, uint16_t maxcpus __unused)
365 {
366 	/* Ignore maxcpus. */
367 	if (sockets * cores * threads > vm->maxcpus)
368 		return (EINVAL);
369 	vm->sockets = sockets;
370 	vm->cores = cores;
371 	vm->threads = threads;
372 	return (0);
373 }
374 
375 int
vm_suspend(struct vm * vm,enum vm_suspend_how how)376 vm_suspend(struct vm *vm, enum vm_suspend_how how)
377 {
378 	int i;
379 
380 	if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST)
381 		return (EINVAL);
382 
383 	if (atomic_cmpset_int(&vm->suspend, 0, how) == 0)
384 		return (EALREADY);
385 
386 	/*
387 	 * Notify all active vcpus that they are now suspended.
388 	 */
389 	for (i = 0; i < vm->maxcpus; i++) {
390 		if (CPU_ISSET(i, &vm->active_cpus))
391 			vcpu_notify_event(vm_vcpu(vm, i));
392 	}
393 
394 	return (0);
395 }
396 
397 int
vm_reinit(struct vm * vm)398 vm_reinit(struct vm *vm)
399 {
400 	int error;
401 
402 	/*
403 	 * A virtual machine can be reset only if all vcpus are suspended.
404 	 */
405 	if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
406 		vm_reset(vm);
407 		error = 0;
408 	} else {
409 		error = EBUSY;
410 	}
411 
412 	return (error);
413 }
414 
415 int
vm_activate_cpu(struct vcpu * vcpu)416 vm_activate_cpu(struct vcpu *vcpu)
417 {
418 	struct vm *vm = vcpu->vm;
419 
420 	if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
421 		return (EBUSY);
422 
423 	CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus);
424 	return (0);
425 }
426 
427 int
vm_suspend_cpu(struct vm * vm,struct vcpu * vcpu)428 vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu)
429 {
430 	if (vcpu == NULL) {
431 		vm->debug_cpus = vm->active_cpus;
432 		for (int i = 0; i < vm->maxcpus; i++) {
433 			if (CPU_ISSET(i, &vm->active_cpus))
434 				vcpu_notify_event(vm_vcpu(vm, i));
435 		}
436 	} else {
437 		if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
438 			return (EINVAL);
439 
440 		CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
441 		vcpu_notify_event(vcpu);
442 	}
443 	return (0);
444 }
445 
446 int
vm_resume_cpu(struct vm * vm,struct vcpu * vcpu)447 vm_resume_cpu(struct vm *vm, struct vcpu *vcpu)
448 {
449 	if (vcpu == NULL) {
450 		CPU_ZERO(&vm->debug_cpus);
451 	} else {
452 		if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus))
453 			return (EINVAL);
454 
455 		CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
456 	}
457 	return (0);
458 }
459 
460 cpuset_t
vm_active_cpus(struct vm * vm)461 vm_active_cpus(struct vm *vm)
462 {
463 	return (vm->active_cpus);
464 }
465 
466 cpuset_t
vm_debug_cpus(struct vm * vm)467 vm_debug_cpus(struct vm *vm)
468 {
469 	return (vm->debug_cpus);
470 }
471 
472 cpuset_t
vm_suspended_cpus(struct vm * vm)473 vm_suspended_cpus(struct vm *vm)
474 {
475 	return (vm->suspended_cpus);
476 }
477