1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * All rights reserved.
6 */
7
8 #include <sys/param.h>
9 #include <sys/kernel.h>
10 #include <sys/lock.h>
11 #include <sys/mutex.h>
12 #include <sys/proc.h>
13 #include <sys/sx.h>
14 #include <sys/sysctl.h>
15
16 #include <machine/smp.h>
17
18 #include <dev/vmm/vmm_vm.h>
19
20 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, NULL);
21
22 int vmm_ipinum;
23 SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
24 "IPI vector used for vcpu notifications");
25
26 /*
27 * Invoke the rendezvous function on the specified vcpu if applicable. Return
28 * true if the rendezvous is finished, false otherwise.
29 */
30 static bool
vm_rendezvous(struct vcpu * vcpu)31 vm_rendezvous(struct vcpu *vcpu)
32 {
33 struct vm *vm = vcpu->vm;
34 int vcpuid;
35
36 mtx_assert(&vcpu->vm->rendezvous_mtx, MA_OWNED);
37 KASSERT(vcpu->vm->rendezvous_func != NULL,
38 ("vm_rendezvous: no rendezvous pending"));
39
40 /* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */
41 CPU_AND(&vm->rendezvous_req_cpus, &vm->rendezvous_req_cpus,
42 &vm->active_cpus);
43
44 vcpuid = vcpu->vcpuid;
45 if (CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) &&
46 !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) {
47 (*vm->rendezvous_func)(vcpu, vm->rendezvous_arg);
48 CPU_SET(vcpuid, &vm->rendezvous_done_cpus);
49 }
50 if (CPU_CMP(&vm->rendezvous_req_cpus, &vm->rendezvous_done_cpus) == 0) {
51 CPU_ZERO(&vm->rendezvous_req_cpus);
52 vm->rendezvous_func = NULL;
53 wakeup(&vm->rendezvous_func);
54 return (true);
55 }
56 return (false);
57 }
58
59 int
vm_handle_rendezvous(struct vcpu * vcpu)60 vm_handle_rendezvous(struct vcpu *vcpu)
61 {
62 struct vm *vm;
63 struct thread *td;
64
65 td = curthread;
66 vm = vcpu->vm;
67
68 mtx_lock(&vm->rendezvous_mtx);
69 while (vm->rendezvous_func != NULL) {
70 if (vm_rendezvous(vcpu))
71 break;
72
73 mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0,
74 "vmrndv", hz);
75 if (td_ast_pending(td, TDA_SUSPEND)) {
76 int error;
77
78 mtx_unlock(&vm->rendezvous_mtx);
79 error = thread_check_susp(td, true);
80 if (error != 0)
81 return (error);
82 mtx_lock(&vm->rendezvous_mtx);
83 }
84 }
85 mtx_unlock(&vm->rendezvous_mtx);
86 return (0);
87 }
88
89 static void
vcpu_wait_idle(struct vcpu * vcpu)90 vcpu_wait_idle(struct vcpu *vcpu)
91 {
92 KASSERT(vcpu->state != VCPU_IDLE, ("vcpu already idle"));
93
94 vcpu->reqidle = 1;
95 vcpu_notify_event_locked(vcpu);
96 msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
97 }
98
99 int
vcpu_set_state_locked(struct vcpu * vcpu,enum vcpu_state newstate,bool from_idle)100 vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
101 bool from_idle)
102 {
103 int error;
104
105 vcpu_assert_locked(vcpu);
106
107 /*
108 * State transitions from the vmmdev_ioctl() must always begin from
109 * the VCPU_IDLE state. This guarantees that there is only a single
110 * ioctl() operating on a vcpu at any point.
111 */
112 if (from_idle) {
113 while (vcpu->state != VCPU_IDLE)
114 vcpu_wait_idle(vcpu);
115 } else {
116 KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
117 "vcpu idle state"));
118 }
119
120 if (vcpu->state == VCPU_RUNNING) {
121 KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
122 "mismatch for running vcpu", curcpu, vcpu->hostcpu));
123 } else {
124 KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
125 "vcpu that is not running", vcpu->hostcpu));
126 }
127
128 /*
129 * The following state transitions are allowed:
130 * IDLE -> FROZEN -> IDLE
131 * FROZEN -> RUNNING -> FROZEN
132 * FROZEN -> SLEEPING -> FROZEN
133 */
134 switch (vcpu->state) {
135 case VCPU_IDLE:
136 case VCPU_RUNNING:
137 case VCPU_SLEEPING:
138 error = (newstate != VCPU_FROZEN);
139 break;
140 case VCPU_FROZEN:
141 error = (newstate == VCPU_FROZEN);
142 break;
143 default:
144 error = 1;
145 break;
146 }
147
148 if (error)
149 return (EBUSY);
150
151 vcpu->state = newstate;
152 if (newstate == VCPU_RUNNING)
153 vcpu->hostcpu = curcpu;
154 else
155 vcpu->hostcpu = NOCPU;
156
157 if (newstate == VCPU_IDLE)
158 wakeup(&vcpu->state);
159
160 return (0);
161 }
162
163 /*
164 * Try to lock all of the vCPUs in the VM while taking care to avoid deadlocks
165 * with vm_smp_rendezvous().
166 *
167 * The complexity here suggests that the rendezvous mechanism needs a rethink.
168 */
169 int
vcpu_set_state_all(struct vm * vm,enum vcpu_state newstate)170 vcpu_set_state_all(struct vm *vm, enum vcpu_state newstate)
171 {
172 cpuset_t locked;
173 struct vcpu *vcpu;
174 int error, i;
175 uint16_t maxcpus;
176
177 KASSERT(newstate != VCPU_IDLE,
178 ("vcpu_set_state_all: invalid target state %d", newstate));
179
180 error = 0;
181 CPU_ZERO(&locked);
182 maxcpus = vm->maxcpus;
183
184 mtx_lock(&vm->rendezvous_mtx);
185 restart:
186 if (vm->rendezvous_func != NULL) {
187 /*
188 * If we have a pending rendezvous, then the initiator may be
189 * blocked waiting for other vCPUs to execute the callback. The
190 * current thread may be a vCPU thread so we must not block
191 * waiting for the initiator, otherwise we get a deadlock.
192 * Thus, execute the callback on behalf of any idle vCPUs.
193 */
194 for (i = 0; i < maxcpus; i++) {
195 vcpu = vm_vcpu(vm, i);
196 if (vcpu == NULL)
197 continue;
198 vcpu_lock(vcpu);
199 if (vcpu->state == VCPU_IDLE) {
200 (void)vcpu_set_state_locked(vcpu, VCPU_FROZEN,
201 true);
202 CPU_SET(i, &locked);
203 }
204 if (CPU_ISSET(i, &locked)) {
205 /*
206 * We can safely execute the callback on this
207 * vCPU's behalf.
208 */
209 vcpu_unlock(vcpu);
210 (void)vm_rendezvous(vcpu);
211 vcpu_lock(vcpu);
212 }
213 vcpu_unlock(vcpu);
214 }
215 }
216
217 /*
218 * Now wait for remaining vCPUs to become idle. This may include the
219 * initiator of a rendezvous that is currently blocked on the rendezvous
220 * mutex.
221 */
222 CPU_FOREACH_ISCLR(i, &locked) {
223 if (i >= maxcpus)
224 break;
225 vcpu = vm_vcpu(vm, i);
226 if (vcpu == NULL)
227 continue;
228 vcpu_lock(vcpu);
229 while (vcpu->state != VCPU_IDLE) {
230 mtx_unlock(&vm->rendezvous_mtx);
231 vcpu_wait_idle(vcpu);
232 vcpu_unlock(vcpu);
233 mtx_lock(&vm->rendezvous_mtx);
234 if (vm->rendezvous_func != NULL)
235 goto restart;
236 vcpu_lock(vcpu);
237 }
238 error = vcpu_set_state_locked(vcpu, newstate, true);
239 vcpu_unlock(vcpu);
240 if (error != 0) {
241 /* Roll back state changes. */
242 CPU_FOREACH_ISSET(i, &locked)
243 (void)vcpu_set_state(vcpu, VCPU_IDLE, false);
244 break;
245 }
246 CPU_SET(i, &locked);
247 }
248 mtx_unlock(&vm->rendezvous_mtx);
249 return (error);
250 }
251
252
253 int
vcpu_set_state(struct vcpu * vcpu,enum vcpu_state newstate,bool from_idle)254 vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle)
255 {
256 int error;
257
258 vcpu_lock(vcpu);
259 error = vcpu_set_state_locked(vcpu, newstate, from_idle);
260 vcpu_unlock(vcpu);
261
262 return (error);
263 }
264
265 enum vcpu_state
vcpu_get_state(struct vcpu * vcpu,int * hostcpu)266 vcpu_get_state(struct vcpu *vcpu, int *hostcpu)
267 {
268 enum vcpu_state state;
269
270 vcpu_lock(vcpu);
271 state = vcpu->state;
272 if (hostcpu != NULL)
273 *hostcpu = vcpu->hostcpu;
274 vcpu_unlock(vcpu);
275
276 return (state);
277 }
278
279 /*
280 * This function is called to ensure that a vcpu "sees" a pending event
281 * as soon as possible:
282 * - If the vcpu thread is sleeping then it is woken up.
283 * - If the vcpu is running on a different host_cpu then an IPI will be directed
284 * to the host_cpu to cause the vcpu to trap into the hypervisor.
285 */
286 void
vcpu_notify_event_locked(struct vcpu * vcpu)287 vcpu_notify_event_locked(struct vcpu *vcpu)
288 {
289 int hostcpu;
290
291 hostcpu = vcpu->hostcpu;
292 if (vcpu->state == VCPU_RUNNING) {
293 KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
294 if (hostcpu != curcpu) {
295 ipi_cpu(hostcpu, vmm_ipinum);
296 } else {
297 /*
298 * If the 'vcpu' is running on 'curcpu' then it must
299 * be sending a notification to itself (e.g. SELF_IPI).
300 * The pending event will be picked up when the vcpu
301 * transitions back to guest context.
302 */
303 }
304 } else {
305 KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
306 "with hostcpu %d", vcpu->state, hostcpu));
307 if (vcpu->state == VCPU_SLEEPING)
308 wakeup_one(vcpu);
309 }
310 }
311
312 void
vcpu_notify_event(struct vcpu * vcpu)313 vcpu_notify_event(struct vcpu *vcpu)
314 {
315 vcpu_lock(vcpu);
316 vcpu_notify_event_locked(vcpu);
317 vcpu_unlock(vcpu);
318 }
319
320 int
vcpu_debugged(struct vcpu * vcpu)321 vcpu_debugged(struct vcpu *vcpu)
322 {
323 return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus));
324 }
325
326 void
vm_lock_vcpus(struct vm * vm)327 vm_lock_vcpus(struct vm *vm)
328 {
329 sx_xlock(&vm->vcpus_init_lock);
330 }
331
332 void
vm_unlock_vcpus(struct vm * vm)333 vm_unlock_vcpus(struct vm *vm)
334 {
335 sx_unlock(&vm->vcpus_init_lock);
336 }
337
338 void
vm_disable_vcpu_creation(struct vm * vm)339 vm_disable_vcpu_creation(struct vm *vm)
340 {
341 sx_xlock(&vm->vcpus_init_lock);
342 vm->dying = true;
343 sx_xunlock(&vm->vcpus_init_lock);
344 }
345
346 uint16_t
vm_get_maxcpus(struct vm * vm)347 vm_get_maxcpus(struct vm *vm)
348 {
349 return (vm->maxcpus);
350 }
351
352 void
vm_get_topology(struct vm * vm,uint16_t * sockets,uint16_t * cores,uint16_t * threads,uint16_t * maxcpus)353 vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
354 uint16_t *threads, uint16_t *maxcpus)
355 {
356 *sockets = vm->sockets;
357 *cores = vm->cores;
358 *threads = vm->threads;
359 *maxcpus = vm->maxcpus;
360 }
361
362 int
vm_set_topology(struct vm * vm,uint16_t sockets,uint16_t cores,uint16_t threads,uint16_t maxcpus __unused)363 vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
364 uint16_t threads, uint16_t maxcpus __unused)
365 {
366 /* Ignore maxcpus. */
367 if (sockets * cores * threads > vm->maxcpus)
368 return (EINVAL);
369 vm->sockets = sockets;
370 vm->cores = cores;
371 vm->threads = threads;
372 return (0);
373 }
374
375 int
vm_suspend(struct vm * vm,enum vm_suspend_how how)376 vm_suspend(struct vm *vm, enum vm_suspend_how how)
377 {
378 int i;
379
380 if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST)
381 return (EINVAL);
382
383 if (atomic_cmpset_int(&vm->suspend, 0, how) == 0)
384 return (EALREADY);
385
386 /*
387 * Notify all active vcpus that they are now suspended.
388 */
389 for (i = 0; i < vm->maxcpus; i++) {
390 if (CPU_ISSET(i, &vm->active_cpus))
391 vcpu_notify_event(vm_vcpu(vm, i));
392 }
393
394 return (0);
395 }
396
397 int
vm_reinit(struct vm * vm)398 vm_reinit(struct vm *vm)
399 {
400 int error;
401
402 /*
403 * A virtual machine can be reset only if all vcpus are suspended.
404 */
405 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
406 vm_reset(vm);
407 error = 0;
408 } else {
409 error = EBUSY;
410 }
411
412 return (error);
413 }
414
415 int
vm_activate_cpu(struct vcpu * vcpu)416 vm_activate_cpu(struct vcpu *vcpu)
417 {
418 struct vm *vm = vcpu->vm;
419
420 if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
421 return (EBUSY);
422
423 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus);
424 return (0);
425 }
426
427 int
vm_suspend_cpu(struct vm * vm,struct vcpu * vcpu)428 vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu)
429 {
430 if (vcpu == NULL) {
431 vm->debug_cpus = vm->active_cpus;
432 for (int i = 0; i < vm->maxcpus; i++) {
433 if (CPU_ISSET(i, &vm->active_cpus))
434 vcpu_notify_event(vm_vcpu(vm, i));
435 }
436 } else {
437 if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
438 return (EINVAL);
439
440 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
441 vcpu_notify_event(vcpu);
442 }
443 return (0);
444 }
445
446 int
vm_resume_cpu(struct vm * vm,struct vcpu * vcpu)447 vm_resume_cpu(struct vm *vm, struct vcpu *vcpu)
448 {
449 if (vcpu == NULL) {
450 CPU_ZERO(&vm->debug_cpus);
451 } else {
452 if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus))
453 return (EINVAL);
454
455 CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
456 }
457 return (0);
458 }
459
460 cpuset_t
vm_active_cpus(struct vm * vm)461 vm_active_cpus(struct vm *vm)
462 {
463 return (vm->active_cpus);
464 }
465
466 cpuset_t
vm_debug_cpus(struct vm * vm)467 vm_debug_cpus(struct vm *vm)
468 {
469 return (vm->debug_cpus);
470 }
471
472 cpuset_t
vm_suspended_cpus(struct vm * vm)473 vm_suspended_cpus(struct vm *vm)
474 {
475 return (vm->suspended_cpus);
476 }
477