1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 */ 7 8 #include <sys/param.h> 9 #include <sys/kernel.h> 10 #include <sys/lock.h> 11 #include <sys/mutex.h> 12 #include <sys/proc.h> 13 #include <sys/sx.h> 14 #include <sys/sysctl.h> 15 16 #include <machine/smp.h> 17 18 #include <dev/vmm/vmm_vm.h> 19 20 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, NULL); 21 22 int vmm_ipinum; 23 SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, 24 "IPI vector used for vcpu notifications"); 25 26 /* 27 * Invoke the rendezvous function on the specified vcpu if applicable. Return 28 * true if the rendezvous is finished, false otherwise. 29 */ 30 static bool 31 vm_rendezvous(struct vcpu *vcpu) 32 { 33 struct vm *vm = vcpu->vm; 34 int vcpuid; 35 36 mtx_assert(&vcpu->vm->rendezvous_mtx, MA_OWNED); 37 KASSERT(vcpu->vm->rendezvous_func != NULL, 38 ("vm_rendezvous: no rendezvous pending")); 39 40 /* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */ 41 CPU_AND(&vm->rendezvous_req_cpus, &vm->rendezvous_req_cpus, 42 &vm->active_cpus); 43 44 vcpuid = vcpu->vcpuid; 45 if (CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) && 46 !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) { 47 (*vm->rendezvous_func)(vcpu, vm->rendezvous_arg); 48 CPU_SET(vcpuid, &vm->rendezvous_done_cpus); 49 } 50 if (CPU_CMP(&vm->rendezvous_req_cpus, &vm->rendezvous_done_cpus) == 0) { 51 CPU_ZERO(&vm->rendezvous_req_cpus); 52 vm->rendezvous_func = NULL; 53 wakeup(&vm->rendezvous_func); 54 return (true); 55 } 56 return (false); 57 } 58 59 int 60 vm_handle_rendezvous(struct vcpu *vcpu) 61 { 62 struct vm *vm; 63 struct thread *td; 64 65 td = curthread; 66 vm = vcpu->vm; 67 68 mtx_lock(&vm->rendezvous_mtx); 69 while (vm->rendezvous_func != NULL) { 70 if (vm_rendezvous(vcpu)) 71 break; 72 73 mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0, 74 "vmrndv", hz); 75 if (td_ast_pending(td, TDA_SUSPEND)) { 76 int error; 77 78 mtx_unlock(&vm->rendezvous_mtx); 79 error = thread_check_susp(td, true); 80 if (error != 0) 81 return (error); 82 mtx_lock(&vm->rendezvous_mtx); 83 } 84 } 85 mtx_unlock(&vm->rendezvous_mtx); 86 return (0); 87 } 88 89 static void 90 vcpu_wait_idle(struct vcpu *vcpu) 91 { 92 KASSERT(vcpu->state != VCPU_IDLE, ("vcpu already idle")); 93 94 vcpu->reqidle = 1; 95 vcpu_notify_event_locked(vcpu); 96 msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); 97 } 98 99 int 100 vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, 101 bool from_idle) 102 { 103 int error; 104 105 vcpu_assert_locked(vcpu); 106 107 /* 108 * State transitions from the vmmdev_ioctl() must always begin from 109 * the VCPU_IDLE state. This guarantees that there is only a single 110 * ioctl() operating on a vcpu at any point. 111 */ 112 if (from_idle) { 113 while (vcpu->state != VCPU_IDLE) 114 vcpu_wait_idle(vcpu); 115 } else { 116 KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " 117 "vcpu idle state")); 118 } 119 120 if (vcpu->state == VCPU_RUNNING) { 121 KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " 122 "mismatch for running vcpu", curcpu, vcpu->hostcpu)); 123 } else { 124 KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " 125 "vcpu that is not running", vcpu->hostcpu)); 126 } 127 128 /* 129 * The following state transitions are allowed: 130 * IDLE -> FROZEN -> IDLE 131 * FROZEN -> RUNNING -> FROZEN 132 * FROZEN -> SLEEPING -> FROZEN 133 */ 134 switch (vcpu->state) { 135 case VCPU_IDLE: 136 case VCPU_RUNNING: 137 case VCPU_SLEEPING: 138 error = (newstate != VCPU_FROZEN); 139 break; 140 case VCPU_FROZEN: 141 error = (newstate == VCPU_FROZEN); 142 break; 143 default: 144 error = 1; 145 break; 146 } 147 148 if (error) 149 return (EBUSY); 150 151 vcpu->state = newstate; 152 if (newstate == VCPU_RUNNING) 153 vcpu->hostcpu = curcpu; 154 else 155 vcpu->hostcpu = NOCPU; 156 157 if (newstate == VCPU_IDLE) 158 wakeup(&vcpu->state); 159 160 return (0); 161 } 162 163 /* 164 * Try to lock all of the vCPUs in the VM while taking care to avoid deadlocks 165 * with vm_smp_rendezvous(). 166 * 167 * The complexity here suggests that the rendezvous mechanism needs a rethink. 168 */ 169 int 170 vcpu_set_state_all(struct vm *vm, enum vcpu_state newstate) 171 { 172 cpuset_t locked; 173 struct vcpu *vcpu; 174 int error, i; 175 uint16_t maxcpus; 176 177 KASSERT(newstate != VCPU_IDLE, 178 ("vcpu_set_state_all: invalid target state %d", newstate)); 179 180 error = 0; 181 CPU_ZERO(&locked); 182 maxcpus = vm->maxcpus; 183 184 mtx_lock(&vm->rendezvous_mtx); 185 restart: 186 if (vm->rendezvous_func != NULL) { 187 /* 188 * If we have a pending rendezvous, then the initiator may be 189 * blocked waiting for other vCPUs to execute the callback. The 190 * current thread may be a vCPU thread so we must not block 191 * waiting for the initiator, otherwise we get a deadlock. 192 * Thus, execute the callback on behalf of any idle vCPUs. 193 */ 194 for (i = 0; i < maxcpus; i++) { 195 vcpu = vm_vcpu(vm, i); 196 if (vcpu == NULL) 197 continue; 198 vcpu_lock(vcpu); 199 if (vcpu->state == VCPU_IDLE) { 200 (void)vcpu_set_state_locked(vcpu, VCPU_FROZEN, 201 true); 202 CPU_SET(i, &locked); 203 } 204 if (CPU_ISSET(i, &locked)) { 205 /* 206 * We can safely execute the callback on this 207 * vCPU's behalf. 208 */ 209 vcpu_unlock(vcpu); 210 (void)vm_rendezvous(vcpu); 211 vcpu_lock(vcpu); 212 } 213 vcpu_unlock(vcpu); 214 } 215 } 216 217 /* 218 * Now wait for remaining vCPUs to become idle. This may include the 219 * initiator of a rendezvous that is currently blocked on the rendezvous 220 * mutex. 221 */ 222 CPU_FOREACH_ISCLR(i, &locked) { 223 if (i >= maxcpus) 224 break; 225 vcpu = vm_vcpu(vm, i); 226 if (vcpu == NULL) 227 continue; 228 vcpu_lock(vcpu); 229 while (vcpu->state != VCPU_IDLE) { 230 mtx_unlock(&vm->rendezvous_mtx); 231 vcpu_wait_idle(vcpu); 232 vcpu_unlock(vcpu); 233 mtx_lock(&vm->rendezvous_mtx); 234 if (vm->rendezvous_func != NULL) 235 goto restart; 236 vcpu_lock(vcpu); 237 } 238 error = vcpu_set_state_locked(vcpu, newstate, true); 239 vcpu_unlock(vcpu); 240 if (error != 0) { 241 /* Roll back state changes. */ 242 CPU_FOREACH_ISSET(i, &locked) 243 (void)vcpu_set_state(vcpu, VCPU_IDLE, false); 244 break; 245 } 246 CPU_SET(i, &locked); 247 } 248 mtx_unlock(&vm->rendezvous_mtx); 249 return (error); 250 } 251 252 253 int 254 vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) 255 { 256 int error; 257 258 vcpu_lock(vcpu); 259 error = vcpu_set_state_locked(vcpu, newstate, from_idle); 260 vcpu_unlock(vcpu); 261 262 return (error); 263 } 264 265 enum vcpu_state 266 vcpu_get_state(struct vcpu *vcpu, int *hostcpu) 267 { 268 enum vcpu_state state; 269 270 vcpu_lock(vcpu); 271 state = vcpu->state; 272 if (hostcpu != NULL) 273 *hostcpu = vcpu->hostcpu; 274 vcpu_unlock(vcpu); 275 276 return (state); 277 } 278 279 /* 280 * This function is called to ensure that a vcpu "sees" a pending event 281 * as soon as possible: 282 * - If the vcpu thread is sleeping then it is woken up. 283 * - If the vcpu is running on a different host_cpu then an IPI will be directed 284 * to the host_cpu to cause the vcpu to trap into the hypervisor. 285 */ 286 void 287 vcpu_notify_event_locked(struct vcpu *vcpu) 288 { 289 int hostcpu; 290 291 hostcpu = vcpu->hostcpu; 292 if (vcpu->state == VCPU_RUNNING) { 293 KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); 294 if (hostcpu != curcpu) { 295 ipi_cpu(hostcpu, vmm_ipinum); 296 } else { 297 /* 298 * If the 'vcpu' is running on 'curcpu' then it must 299 * be sending a notification to itself (e.g. SELF_IPI). 300 * The pending event will be picked up when the vcpu 301 * transitions back to guest context. 302 */ 303 } 304 } else { 305 KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " 306 "with hostcpu %d", vcpu->state, hostcpu)); 307 if (vcpu->state == VCPU_SLEEPING) 308 wakeup_one(vcpu); 309 } 310 } 311 312 void 313 vcpu_notify_event(struct vcpu *vcpu) 314 { 315 vcpu_lock(vcpu); 316 vcpu_notify_event_locked(vcpu); 317 vcpu_unlock(vcpu); 318 } 319 320 int 321 vcpu_debugged(struct vcpu *vcpu) 322 { 323 return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus)); 324 } 325 326 void 327 vm_lock_vcpus(struct vm *vm) 328 { 329 sx_xlock(&vm->vcpus_init_lock); 330 } 331 332 void 333 vm_unlock_vcpus(struct vm *vm) 334 { 335 sx_unlock(&vm->vcpus_init_lock); 336 } 337 338 void 339 vm_disable_vcpu_creation(struct vm *vm) 340 { 341 sx_xlock(&vm->vcpus_init_lock); 342 vm->dying = true; 343 sx_xunlock(&vm->vcpus_init_lock); 344 } 345 346 uint16_t 347 vm_get_maxcpus(struct vm *vm) 348 { 349 return (vm->maxcpus); 350 } 351 352 void 353 vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, 354 uint16_t *threads, uint16_t *maxcpus) 355 { 356 *sockets = vm->sockets; 357 *cores = vm->cores; 358 *threads = vm->threads; 359 *maxcpus = vm->maxcpus; 360 } 361 362 int 363 vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, 364 uint16_t threads, uint16_t maxcpus __unused) 365 { 366 /* Ignore maxcpus. */ 367 if (sockets * cores * threads > vm->maxcpus) 368 return (EINVAL); 369 vm->sockets = sockets; 370 vm->cores = cores; 371 vm->threads = threads; 372 return (0); 373 } 374 375 int 376 vm_suspend(struct vm *vm, enum vm_suspend_how how) 377 { 378 int i; 379 380 if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) 381 return (EINVAL); 382 383 if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) 384 return (EALREADY); 385 386 /* 387 * Notify all active vcpus that they are now suspended. 388 */ 389 for (i = 0; i < vm->maxcpus; i++) { 390 if (CPU_ISSET(i, &vm->active_cpus)) 391 vcpu_notify_event(vm_vcpu(vm, i)); 392 } 393 394 return (0); 395 } 396 397 int 398 vm_reinit(struct vm *vm) 399 { 400 int error; 401 402 /* 403 * A virtual machine can be reset only if all vcpus are suspended. 404 */ 405 if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 406 vm_reset(vm); 407 error = 0; 408 } else { 409 error = EBUSY; 410 } 411 412 return (error); 413 } 414 415 int 416 vm_activate_cpu(struct vcpu *vcpu) 417 { 418 struct vm *vm = vcpu->vm; 419 420 if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 421 return (EBUSY); 422 423 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus); 424 return (0); 425 } 426 427 int 428 vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu) 429 { 430 if (vcpu == NULL) { 431 vm->debug_cpus = vm->active_cpus; 432 for (int i = 0; i < vm->maxcpus; i++) { 433 if (CPU_ISSET(i, &vm->active_cpus)) 434 vcpu_notify_event(vm_vcpu(vm, i)); 435 } 436 } else { 437 if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) 438 return (EINVAL); 439 440 CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 441 vcpu_notify_event(vcpu); 442 } 443 return (0); 444 } 445 446 int 447 vm_resume_cpu(struct vm *vm, struct vcpu *vcpu) 448 { 449 if (vcpu == NULL) { 450 CPU_ZERO(&vm->debug_cpus); 451 } else { 452 if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus)) 453 return (EINVAL); 454 455 CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); 456 } 457 return (0); 458 } 459 460 cpuset_t 461 vm_active_cpus(struct vm *vm) 462 { 463 return (vm->active_cpus); 464 } 465 466 cpuset_t 467 vm_debug_cpus(struct vm *vm) 468 { 469 return (vm->debug_cpus); 470 } 471 472 cpuset_t 473 vm_suspended_cpus(struct vm *vm) 474 { 475 return (vm->suspended_cpus); 476 } 477