1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * svm_vmcall_test 4 * 5 * Copyright © 2021 Amazon.com, Inc. or its affiliates. 6 * 7 * Xen shared_info / pvclock testing 8 */ 9 10 #include "test_util.h" 11 #include "kvm_util.h" 12 #include "processor.h" 13 14 #include <stdint.h> 15 #include <time.h> 16 #include <sched.h> 17 #include <signal.h> 18 #include <pthread.h> 19 20 #include <sys/eventfd.h> 21 22 #define SHINFO_REGION_GVA 0xc0000000ULL 23 #define SHINFO_REGION_GPA 0xc0000000ULL 24 #define SHINFO_REGION_SLOT 10 25 26 #define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (3 * PAGE_SIZE)) 27 #define DUMMY_REGION_SLOT 11 28 29 #define SHINFO_ADDR (SHINFO_REGION_GPA) 30 #define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40) 31 #define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE) 32 #define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - 15) 33 34 #define SHINFO_VADDR (SHINFO_REGION_GVA) 35 #define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40) 36 #define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + PAGE_SIZE - 15) 37 38 #define EVTCHN_VECTOR 0x10 39 40 #define EVTCHN_TEST1 15 41 #define EVTCHN_TEST2 66 42 #define EVTCHN_TIMER 13 43 44 #define XEN_HYPERCALL_MSR 0x40000000 45 46 #define MIN_STEAL_TIME 50000 47 48 #define SHINFO_RACE_TIMEOUT 2 /* seconds */ 49 50 #define __HYPERVISOR_set_timer_op 15 51 #define __HYPERVISOR_sched_op 29 52 #define __HYPERVISOR_event_channel_op 32 53 54 #define SCHEDOP_poll 3 55 56 #define EVTCHNOP_send 4 57 58 #define EVTCHNSTAT_interdomain 2 59 60 struct evtchn_send { 61 u32 port; 62 }; 63 64 struct sched_poll { 65 u32 *ports; 66 unsigned int nr_ports; 67 u64 timeout; 68 }; 69 70 struct pvclock_vcpu_time_info { 71 u32 version; 72 u32 pad0; 73 u64 tsc_timestamp; 74 u64 system_time; 75 u32 tsc_to_system_mul; 76 s8 tsc_shift; 77 u8 flags; 78 u8 pad[2]; 79 } __attribute__((__packed__)); /* 32 bytes */ 80 81 struct pvclock_wall_clock { 82 u32 version; 83 u32 sec; 84 u32 nsec; 85 } __attribute__((__packed__)); 86 87 struct vcpu_runstate_info { 88 uint32_t state; 89 uint64_t state_entry_time; 90 uint64_t time[5]; /* Extra field for overrun check */ 91 }; 92 93 struct compat_vcpu_runstate_info { 94 uint32_t state; 95 uint64_t state_entry_time; 96 uint64_t time[5]; 97 } __attribute__((__packed__));; 98 99 struct arch_vcpu_info { 100 unsigned long cr2; 101 unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ 102 }; 103 104 struct vcpu_info { 105 uint8_t evtchn_upcall_pending; 106 uint8_t evtchn_upcall_mask; 107 unsigned long evtchn_pending_sel; 108 struct arch_vcpu_info arch; 109 struct pvclock_vcpu_time_info time; 110 }; /* 64 bytes (x86) */ 111 112 struct shared_info { 113 struct vcpu_info vcpu_info[32]; 114 unsigned long evtchn_pending[64]; 115 unsigned long evtchn_mask[64]; 116 struct pvclock_wall_clock wc; 117 uint32_t wc_sec_hi; 118 /* arch_shared_info here */ 119 }; 120 121 #define RUNSTATE_running 0 122 #define RUNSTATE_runnable 1 123 #define RUNSTATE_blocked 2 124 #define RUNSTATE_offline 3 125 126 static const char *runstate_names[] = { 127 "running", 128 "runnable", 129 "blocked", 130 "offline" 131 }; 132 133 struct { 134 struct kvm_irq_routing info; 135 struct kvm_irq_routing_entry entries[2]; 136 } irq_routes; 137 138 static volatile bool guest_saw_irq; 139 140 static void evtchn_handler(struct ex_regs *regs) 141 { 142 struct vcpu_info *vi = (void *)VCPU_INFO_VADDR; 143 vi->evtchn_upcall_pending = 0; 144 vi->evtchn_pending_sel = 0; 145 guest_saw_irq = true; 146 147 GUEST_SYNC(0x20); 148 } 149 150 static void guest_wait_for_irq(void) 151 { 152 while (!guest_saw_irq) 153 __asm__ __volatile__ ("rep nop" : : : "memory"); 154 guest_saw_irq = false; 155 } 156 157 static void guest_code(void) 158 { 159 struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR; 160 int i; 161 162 __asm__ __volatile__( 163 "sti\n" 164 "nop\n" 165 ); 166 167 /* Trigger an interrupt injection */ 168 GUEST_SYNC(0); 169 170 guest_wait_for_irq(); 171 172 /* Test having the host set runstates manually */ 173 GUEST_SYNC(RUNSTATE_runnable); 174 GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0); 175 GUEST_ASSERT(rs->state == 0); 176 177 GUEST_SYNC(RUNSTATE_blocked); 178 GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0); 179 GUEST_ASSERT(rs->state == 0); 180 181 GUEST_SYNC(RUNSTATE_offline); 182 GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0); 183 GUEST_ASSERT(rs->state == 0); 184 185 /* Test runstate time adjust */ 186 GUEST_SYNC(4); 187 GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a); 188 GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b); 189 190 /* Test runstate time set */ 191 GUEST_SYNC(5); 192 GUEST_ASSERT(rs->state_entry_time >= 0x8000); 193 GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0); 194 GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b); 195 GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a); 196 197 /* sched_yield() should result in some 'runnable' time */ 198 GUEST_SYNC(6); 199 GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME); 200 201 /* Attempt to deliver a *masked* interrupt */ 202 GUEST_SYNC(7); 203 204 /* Wait until we see the bit set */ 205 struct shared_info *si = (void *)SHINFO_VADDR; 206 while (!si->evtchn_pending[0]) 207 __asm__ __volatile__ ("rep nop" : : : "memory"); 208 209 /* Now deliver an *unmasked* interrupt */ 210 GUEST_SYNC(8); 211 212 guest_wait_for_irq(); 213 214 /* Change memslots and deliver an interrupt */ 215 GUEST_SYNC(9); 216 217 guest_wait_for_irq(); 218 219 /* Deliver event channel with KVM_XEN_HVM_EVTCHN_SEND */ 220 GUEST_SYNC(10); 221 222 guest_wait_for_irq(); 223 224 GUEST_SYNC(11); 225 226 /* Our turn. Deliver event channel (to ourselves) with 227 * EVTCHNOP_send hypercall. */ 228 unsigned long rax; 229 struct evtchn_send s = { .port = 127 }; 230 __asm__ __volatile__ ("vmcall" : 231 "=a" (rax) : 232 "a" (__HYPERVISOR_event_channel_op), 233 "D" (EVTCHNOP_send), 234 "S" (&s)); 235 236 GUEST_ASSERT(rax == 0); 237 238 guest_wait_for_irq(); 239 240 GUEST_SYNC(12); 241 242 /* Deliver "outbound" event channel to an eventfd which 243 * happens to be one of our own irqfds. */ 244 s.port = 197; 245 __asm__ __volatile__ ("vmcall" : 246 "=a" (rax) : 247 "a" (__HYPERVISOR_event_channel_op), 248 "D" (EVTCHNOP_send), 249 "S" (&s)); 250 251 GUEST_ASSERT(rax == 0); 252 253 guest_wait_for_irq(); 254 255 GUEST_SYNC(13); 256 257 /* Set a timer 100ms in the future. */ 258 __asm__ __volatile__ ("vmcall" : 259 "=a" (rax) : 260 "a" (__HYPERVISOR_set_timer_op), 261 "D" (rs->state_entry_time + 100000000)); 262 GUEST_ASSERT(rax == 0); 263 264 GUEST_SYNC(14); 265 266 /* Now wait for the timer */ 267 guest_wait_for_irq(); 268 269 GUEST_SYNC(15); 270 271 /* The host has 'restored' the timer. Just wait for it. */ 272 guest_wait_for_irq(); 273 274 GUEST_SYNC(16); 275 276 /* Poll for an event channel port which is already set */ 277 u32 ports[1] = { EVTCHN_TIMER }; 278 struct sched_poll p = { 279 .ports = ports, 280 .nr_ports = 1, 281 .timeout = 0, 282 }; 283 284 __asm__ __volatile__ ("vmcall" : 285 "=a" (rax) : 286 "a" (__HYPERVISOR_sched_op), 287 "D" (SCHEDOP_poll), 288 "S" (&p)); 289 290 GUEST_ASSERT(rax == 0); 291 292 GUEST_SYNC(17); 293 294 /* Poll for an unset port and wait for the timeout. */ 295 p.timeout = 100000000; 296 __asm__ __volatile__ ("vmcall" : 297 "=a" (rax) : 298 "a" (__HYPERVISOR_sched_op), 299 "D" (SCHEDOP_poll), 300 "S" (&p)); 301 302 GUEST_ASSERT(rax == 0); 303 304 GUEST_SYNC(18); 305 306 /* A timer will wake the masked port we're waiting on, while we poll */ 307 p.timeout = 0; 308 __asm__ __volatile__ ("vmcall" : 309 "=a" (rax) : 310 "a" (__HYPERVISOR_sched_op), 311 "D" (SCHEDOP_poll), 312 "S" (&p)); 313 314 GUEST_ASSERT(rax == 0); 315 316 GUEST_SYNC(19); 317 318 /* A timer wake an *unmasked* port which should wake us with an 319 * actual interrupt, while we're polling on a different port. */ 320 ports[0]++; 321 p.timeout = 0; 322 __asm__ __volatile__ ("vmcall" : 323 "=a" (rax) : 324 "a" (__HYPERVISOR_sched_op), 325 "D" (SCHEDOP_poll), 326 "S" (&p)); 327 328 GUEST_ASSERT(rax == 0); 329 330 guest_wait_for_irq(); 331 332 GUEST_SYNC(20); 333 334 /* Timer should have fired already */ 335 guest_wait_for_irq(); 336 337 GUEST_SYNC(21); 338 /* Racing host ioctls */ 339 340 guest_wait_for_irq(); 341 342 GUEST_SYNC(22); 343 /* Racing vmcall against host ioctl */ 344 345 ports[0] = 0; 346 347 p = (struct sched_poll) { 348 .ports = ports, 349 .nr_ports = 1, 350 .timeout = 0 351 }; 352 353 wait_for_timer: 354 /* 355 * Poll for a timer wake event while the worker thread is mucking with 356 * the shared info. KVM XEN drops timer IRQs if the shared info is 357 * invalid when the timer expires. Arbitrarily poll 100 times before 358 * giving up and asking the VMM to re-arm the timer. 100 polls should 359 * consume enough time to beat on KVM without taking too long if the 360 * timer IRQ is dropped due to an invalid event channel. 361 */ 362 for (i = 0; i < 100 && !guest_saw_irq; i++) 363 asm volatile("vmcall" 364 : "=a" (rax) 365 : "a" (__HYPERVISOR_sched_op), 366 "D" (SCHEDOP_poll), 367 "S" (&p) 368 : "memory"); 369 370 /* 371 * Re-send the timer IRQ if it was (likely) dropped due to the timer 372 * expiring while the event channel was invalid. 373 */ 374 if (!guest_saw_irq) { 375 GUEST_SYNC(23); 376 goto wait_for_timer; 377 } 378 guest_saw_irq = false; 379 380 GUEST_SYNC(24); 381 } 382 383 static int cmp_timespec(struct timespec *a, struct timespec *b) 384 { 385 if (a->tv_sec > b->tv_sec) 386 return 1; 387 else if (a->tv_sec < b->tv_sec) 388 return -1; 389 else if (a->tv_nsec > b->tv_nsec) 390 return 1; 391 else if (a->tv_nsec < b->tv_nsec) 392 return -1; 393 else 394 return 0; 395 } 396 397 static struct vcpu_info *vinfo; 398 static struct kvm_vcpu *vcpu; 399 400 static void handle_alrm(int sig) 401 { 402 if (vinfo) 403 printf("evtchn_upcall_pending 0x%x\n", vinfo->evtchn_upcall_pending); 404 vcpu_dump(stdout, vcpu, 0); 405 TEST_FAIL("IRQ delivery timed out"); 406 } 407 408 static void *juggle_shinfo_state(void *arg) 409 { 410 struct kvm_vm *vm = (struct kvm_vm *)arg; 411 412 struct kvm_xen_hvm_attr cache_activate = { 413 .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, 414 .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE 415 }; 416 417 struct kvm_xen_hvm_attr cache_deactivate = { 418 .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, 419 .u.shared_info.gfn = KVM_XEN_INVALID_GFN 420 }; 421 422 for (;;) { 423 __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate); 424 __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate); 425 pthread_testcancel(); 426 } 427 428 return NULL; 429 } 430 431 int main(int argc, char *argv[]) 432 { 433 struct timespec min_ts, max_ts, vm_ts; 434 struct kvm_xen_hvm_attr evt_reset; 435 struct kvm_vm *vm; 436 pthread_t thread; 437 bool verbose; 438 int ret; 439 440 verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) || 441 !strncmp(argv[1], "--verbose", 10)); 442 443 int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM); 444 TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO); 445 446 bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE); 447 bool do_runstate_flag = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG); 448 bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL); 449 bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND); 450 451 clock_gettime(CLOCK_REALTIME, &min_ts); 452 453 vm = vm_create_with_one_vcpu(&vcpu, guest_code); 454 455 /* Map a region for the shared_info page */ 456 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 457 SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 3, 0); 458 virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 3); 459 460 struct shared_info *shinfo = addr_gpa2hva(vm, SHINFO_VADDR); 461 462 int zero_fd = open("/dev/zero", O_RDONLY); 463 TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero"); 464 465 struct kvm_xen_hvm_config hvmc = { 466 .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL, 467 .msr = XEN_HYPERCALL_MSR, 468 }; 469 470 /* Let the kernel know that we *will* use it for sending all 471 * event channels, which lets it intercept SCHEDOP_poll */ 472 if (do_evtchn_tests) 473 hvmc.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND; 474 475 vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc); 476 477 struct kvm_xen_hvm_attr lm = { 478 .type = KVM_XEN_ATTR_TYPE_LONG_MODE, 479 .u.long_mode = 1, 480 }; 481 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); 482 483 if (do_runstate_flag) { 484 struct kvm_xen_hvm_attr ruf = { 485 .type = KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG, 486 .u.runstate_update_flag = 1, 487 }; 488 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ruf); 489 490 ruf.u.runstate_update_flag = 0; 491 vm_ioctl(vm, KVM_XEN_HVM_GET_ATTR, &ruf); 492 TEST_ASSERT(ruf.u.runstate_update_flag == 1, 493 "Failed to read back RUNSTATE_UPDATE_FLAG attr"); 494 } 495 496 struct kvm_xen_hvm_attr ha = { 497 .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, 498 .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE, 499 }; 500 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha); 501 502 /* 503 * Test what happens when the HVA of the shinfo page is remapped after 504 * the kernel has a reference to it. But make sure we copy the clock 505 * info over since that's only set at setup time, and we test it later. 506 */ 507 struct pvclock_wall_clock wc_copy = shinfo->wc; 508 void *m = mmap(shinfo, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, zero_fd, 0); 509 TEST_ASSERT(m == shinfo, "Failed to map /dev/zero over shared info"); 510 shinfo->wc = wc_copy; 511 512 struct kvm_xen_vcpu_attr vi = { 513 .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, 514 .u.gpa = VCPU_INFO_ADDR, 515 }; 516 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vi); 517 518 struct kvm_xen_vcpu_attr pvclock = { 519 .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO, 520 .u.gpa = PVTIME_ADDR, 521 }; 522 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &pvclock); 523 524 struct kvm_xen_hvm_attr vec = { 525 .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR, 526 .u.vector = EVTCHN_VECTOR, 527 }; 528 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec); 529 530 vm_init_descriptor_tables(vm); 531 vcpu_init_descriptor_tables(vcpu); 532 vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler); 533 534 if (do_runstate_tests) { 535 struct kvm_xen_vcpu_attr st = { 536 .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, 537 .u.gpa = RUNSTATE_ADDR, 538 }; 539 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st); 540 } 541 542 int irq_fd[2] = { -1, -1 }; 543 544 if (do_eventfd_tests) { 545 irq_fd[0] = eventfd(0, 0); 546 irq_fd[1] = eventfd(0, 0); 547 548 /* Unexpected, but not a KVM failure */ 549 if (irq_fd[0] == -1 || irq_fd[1] == -1) 550 do_evtchn_tests = do_eventfd_tests = false; 551 } 552 553 if (do_eventfd_tests) { 554 irq_routes.info.nr = 2; 555 556 irq_routes.entries[0].gsi = 32; 557 irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN; 558 irq_routes.entries[0].u.xen_evtchn.port = EVTCHN_TEST1; 559 irq_routes.entries[0].u.xen_evtchn.vcpu = vcpu->id; 560 irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; 561 562 irq_routes.entries[1].gsi = 33; 563 irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN; 564 irq_routes.entries[1].u.xen_evtchn.port = EVTCHN_TEST2; 565 irq_routes.entries[1].u.xen_evtchn.vcpu = vcpu->id; 566 irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; 567 568 vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info); 569 570 struct kvm_irqfd ifd = { }; 571 572 ifd.fd = irq_fd[0]; 573 ifd.gsi = 32; 574 vm_ioctl(vm, KVM_IRQFD, &ifd); 575 576 ifd.fd = irq_fd[1]; 577 ifd.gsi = 33; 578 vm_ioctl(vm, KVM_IRQFD, &ifd); 579 580 struct sigaction sa = { }; 581 sa.sa_handler = handle_alrm; 582 sigaction(SIGALRM, &sa, NULL); 583 } 584 585 struct kvm_xen_vcpu_attr tmr = { 586 .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER, 587 .u.timer.port = EVTCHN_TIMER, 588 .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL, 589 .u.timer.expires_ns = 0 590 }; 591 592 if (do_evtchn_tests) { 593 struct kvm_xen_hvm_attr inj = { 594 .type = KVM_XEN_ATTR_TYPE_EVTCHN, 595 .u.evtchn.send_port = 127, 596 .u.evtchn.type = EVTCHNSTAT_interdomain, 597 .u.evtchn.flags = 0, 598 .u.evtchn.deliver.port.port = EVTCHN_TEST1, 599 .u.evtchn.deliver.port.vcpu = vcpu->id + 1, 600 .u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL, 601 }; 602 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj); 603 604 /* Test migration to a different vCPU */ 605 inj.u.evtchn.flags = KVM_XEN_EVTCHN_UPDATE; 606 inj.u.evtchn.deliver.port.vcpu = vcpu->id; 607 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj); 608 609 inj.u.evtchn.send_port = 197; 610 inj.u.evtchn.deliver.eventfd.port = 0; 611 inj.u.evtchn.deliver.eventfd.fd = irq_fd[1]; 612 inj.u.evtchn.flags = 0; 613 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj); 614 615 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 616 } 617 vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR); 618 vinfo->evtchn_upcall_pending = 0; 619 620 struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR); 621 rs->state = 0x5a; 622 623 bool evtchn_irq_expected = false; 624 625 for (;;) { 626 volatile struct kvm_run *run = vcpu->run; 627 struct ucall uc; 628 629 vcpu_run(vcpu); 630 631 TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, 632 "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n", 633 run->exit_reason, 634 exit_reason_str(run->exit_reason)); 635 636 switch (get_ucall(vcpu, &uc)) { 637 case UCALL_ABORT: 638 REPORT_GUEST_ASSERT(uc); 639 /* NOT REACHED */ 640 case UCALL_SYNC: { 641 struct kvm_xen_vcpu_attr rst; 642 long rundelay; 643 644 if (do_runstate_tests) 645 TEST_ASSERT(rs->state_entry_time == rs->time[0] + 646 rs->time[1] + rs->time[2] + rs->time[3], 647 "runstate times don't add up"); 648 649 switch (uc.args[1]) { 650 case 0: 651 if (verbose) 652 printf("Delivering evtchn upcall\n"); 653 evtchn_irq_expected = true; 654 vinfo->evtchn_upcall_pending = 1; 655 break; 656 657 case RUNSTATE_runnable...RUNSTATE_offline: 658 TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen"); 659 if (!do_runstate_tests) 660 goto done; 661 if (verbose) 662 printf("Testing runstate %s\n", runstate_names[uc.args[1]]); 663 rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT; 664 rst.u.runstate.state = uc.args[1]; 665 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst); 666 break; 667 668 case 4: 669 if (verbose) 670 printf("Testing RUNSTATE_ADJUST\n"); 671 rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST; 672 memset(&rst.u, 0, sizeof(rst.u)); 673 rst.u.runstate.state = (uint64_t)-1; 674 rst.u.runstate.time_blocked = 675 0x5a - rs->time[RUNSTATE_blocked]; 676 rst.u.runstate.time_offline = 677 0x6b6b - rs->time[RUNSTATE_offline]; 678 rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked - 679 rst.u.runstate.time_offline; 680 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst); 681 break; 682 683 case 5: 684 if (verbose) 685 printf("Testing RUNSTATE_DATA\n"); 686 rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA; 687 memset(&rst.u, 0, sizeof(rst.u)); 688 rst.u.runstate.state = RUNSTATE_running; 689 rst.u.runstate.state_entry_time = 0x6b6b + 0x5a; 690 rst.u.runstate.time_blocked = 0x6b6b; 691 rst.u.runstate.time_offline = 0x5a; 692 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst); 693 break; 694 695 case 6: 696 if (verbose) 697 printf("Testing steal time\n"); 698 /* Yield until scheduler delay exceeds target */ 699 rundelay = get_run_delay() + MIN_STEAL_TIME; 700 do { 701 sched_yield(); 702 } while (get_run_delay() < rundelay); 703 break; 704 705 case 7: 706 if (!do_eventfd_tests) 707 goto done; 708 if (verbose) 709 printf("Testing masked event channel\n"); 710 shinfo->evtchn_mask[0] = 1UL << EVTCHN_TEST1; 711 eventfd_write(irq_fd[0], 1UL); 712 alarm(1); 713 break; 714 715 case 8: 716 if (verbose) 717 printf("Testing unmasked event channel\n"); 718 /* Unmask that, but deliver the other one */ 719 shinfo->evtchn_pending[0] = 0; 720 shinfo->evtchn_mask[0] = 0; 721 eventfd_write(irq_fd[1], 1UL); 722 evtchn_irq_expected = true; 723 alarm(1); 724 break; 725 726 case 9: 727 TEST_ASSERT(!evtchn_irq_expected, 728 "Expected event channel IRQ but it didn't happen"); 729 shinfo->evtchn_pending[1] = 0; 730 if (verbose) 731 printf("Testing event channel after memslot change\n"); 732 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 733 DUMMY_REGION_GPA, DUMMY_REGION_SLOT, 1, 0); 734 eventfd_write(irq_fd[0], 1UL); 735 evtchn_irq_expected = true; 736 alarm(1); 737 break; 738 739 case 10: 740 TEST_ASSERT(!evtchn_irq_expected, 741 "Expected event channel IRQ but it didn't happen"); 742 if (!do_evtchn_tests) 743 goto done; 744 745 shinfo->evtchn_pending[0] = 0; 746 if (verbose) 747 printf("Testing injection with KVM_XEN_HVM_EVTCHN_SEND\n"); 748 749 struct kvm_irq_routing_xen_evtchn e; 750 e.port = EVTCHN_TEST2; 751 e.vcpu = vcpu->id; 752 e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; 753 754 vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &e); 755 evtchn_irq_expected = true; 756 alarm(1); 757 break; 758 759 case 11: 760 TEST_ASSERT(!evtchn_irq_expected, 761 "Expected event channel IRQ but it didn't happen"); 762 shinfo->evtchn_pending[1] = 0; 763 764 if (verbose) 765 printf("Testing guest EVTCHNOP_send direct to evtchn\n"); 766 evtchn_irq_expected = true; 767 alarm(1); 768 break; 769 770 case 12: 771 TEST_ASSERT(!evtchn_irq_expected, 772 "Expected event channel IRQ but it didn't happen"); 773 shinfo->evtchn_pending[0] = 0; 774 775 if (verbose) 776 printf("Testing guest EVTCHNOP_send to eventfd\n"); 777 evtchn_irq_expected = true; 778 alarm(1); 779 break; 780 781 case 13: 782 TEST_ASSERT(!evtchn_irq_expected, 783 "Expected event channel IRQ but it didn't happen"); 784 shinfo->evtchn_pending[1] = 0; 785 786 if (verbose) 787 printf("Testing guest oneshot timer\n"); 788 break; 789 790 case 14: 791 memset(&tmr, 0, sizeof(tmr)); 792 tmr.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER; 793 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); 794 TEST_ASSERT(tmr.u.timer.port == EVTCHN_TIMER, 795 "Timer port not returned"); 796 TEST_ASSERT(tmr.u.timer.priority == KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL, 797 "Timer priority not returned"); 798 TEST_ASSERT(tmr.u.timer.expires_ns > rs->state_entry_time, 799 "Timer expiry not returned"); 800 evtchn_irq_expected = true; 801 alarm(1); 802 break; 803 804 case 15: 805 TEST_ASSERT(!evtchn_irq_expected, 806 "Expected event channel IRQ but it didn't happen"); 807 shinfo->evtchn_pending[0] = 0; 808 809 if (verbose) 810 printf("Testing restored oneshot timer\n"); 811 812 tmr.u.timer.expires_ns = rs->state_entry_time + 100000000; 813 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 814 evtchn_irq_expected = true; 815 alarm(1); 816 break; 817 818 case 16: 819 TEST_ASSERT(!evtchn_irq_expected, 820 "Expected event channel IRQ but it didn't happen"); 821 822 if (verbose) 823 printf("Testing SCHEDOP_poll with already pending event\n"); 824 shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 1UL << EVTCHN_TIMER; 825 alarm(1); 826 break; 827 828 case 17: 829 if (verbose) 830 printf("Testing SCHEDOP_poll timeout\n"); 831 shinfo->evtchn_pending[0] = 0; 832 alarm(1); 833 break; 834 835 case 18: 836 if (verbose) 837 printf("Testing SCHEDOP_poll wake on masked event\n"); 838 839 tmr.u.timer.expires_ns = rs->state_entry_time + 100000000; 840 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 841 alarm(1); 842 break; 843 844 case 19: 845 shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 0; 846 if (verbose) 847 printf("Testing SCHEDOP_poll wake on unmasked event\n"); 848 849 evtchn_irq_expected = true; 850 tmr.u.timer.expires_ns = rs->state_entry_time + 100000000; 851 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 852 853 /* Read it back and check the pending time is reported correctly */ 854 tmr.u.timer.expires_ns = 0; 855 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); 856 TEST_ASSERT(tmr.u.timer.expires_ns == rs->state_entry_time + 100000000, 857 "Timer not reported pending"); 858 alarm(1); 859 break; 860 861 case 20: 862 TEST_ASSERT(!evtchn_irq_expected, 863 "Expected event channel IRQ but it didn't happen"); 864 /* Read timer and check it is no longer pending */ 865 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); 866 TEST_ASSERT(!tmr.u.timer.expires_ns, "Timer still reported pending"); 867 868 shinfo->evtchn_pending[0] = 0; 869 if (verbose) 870 printf("Testing timer in the past\n"); 871 872 evtchn_irq_expected = true; 873 tmr.u.timer.expires_ns = rs->state_entry_time - 100000000ULL; 874 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 875 alarm(1); 876 break; 877 878 case 21: 879 TEST_ASSERT(!evtchn_irq_expected, 880 "Expected event channel IRQ but it didn't happen"); 881 alarm(0); 882 883 if (verbose) 884 printf("Testing shinfo lock corruption (KVM_XEN_HVM_EVTCHN_SEND)\n"); 885 886 ret = pthread_create(&thread, NULL, &juggle_shinfo_state, (void *)vm); 887 TEST_ASSERT(ret == 0, "pthread_create() failed: %s", strerror(ret)); 888 889 struct kvm_irq_routing_xen_evtchn uxe = { 890 .port = 1, 891 .vcpu = vcpu->id, 892 .priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL 893 }; 894 895 evtchn_irq_expected = true; 896 for (time_t t = time(NULL) + SHINFO_RACE_TIMEOUT; time(NULL) < t;) 897 __vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &uxe); 898 break; 899 900 case 22: 901 TEST_ASSERT(!evtchn_irq_expected, 902 "Expected event channel IRQ but it didn't happen"); 903 904 if (verbose) 905 printf("Testing shinfo lock corruption (SCHEDOP_poll)\n"); 906 907 shinfo->evtchn_pending[0] = 1; 908 909 evtchn_irq_expected = true; 910 tmr.u.timer.expires_ns = rs->state_entry_time + 911 SHINFO_RACE_TIMEOUT * 1000000000ULL; 912 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 913 break; 914 915 case 23: 916 /* 917 * Optional and possibly repeated sync point. 918 * Injecting the timer IRQ may fail if the 919 * shinfo is invalid when the timer expires. 920 * If the timer has expired but the IRQ hasn't 921 * been delivered, rearm the timer and retry. 922 */ 923 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); 924 925 /* Resume the guest if the timer is still pending. */ 926 if (tmr.u.timer.expires_ns) 927 break; 928 929 /* All done if the IRQ was delivered. */ 930 if (!evtchn_irq_expected) 931 break; 932 933 tmr.u.timer.expires_ns = rs->state_entry_time + 934 SHINFO_RACE_TIMEOUT * 1000000000ULL; 935 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 936 break; 937 case 24: 938 TEST_ASSERT(!evtchn_irq_expected, 939 "Expected event channel IRQ but it didn't happen"); 940 941 ret = pthread_cancel(thread); 942 TEST_ASSERT(ret == 0, "pthread_cancel() failed: %s", strerror(ret)); 943 944 ret = pthread_join(thread, 0); 945 TEST_ASSERT(ret == 0, "pthread_join() failed: %s", strerror(ret)); 946 goto done; 947 948 case 0x20: 949 TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ"); 950 evtchn_irq_expected = false; 951 break; 952 } 953 break; 954 } 955 case UCALL_DONE: 956 goto done; 957 default: 958 TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); 959 } 960 } 961 962 done: 963 evt_reset.type = KVM_XEN_ATTR_TYPE_EVTCHN; 964 evt_reset.u.evtchn.flags = KVM_XEN_EVTCHN_RESET; 965 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &evt_reset); 966 967 alarm(0); 968 clock_gettime(CLOCK_REALTIME, &max_ts); 969 970 /* 971 * Just a *really* basic check that things are being put in the 972 * right place. The actual calculations are much the same for 973 * Xen as they are for the KVM variants, so no need to check. 974 */ 975 struct pvclock_wall_clock *wc; 976 struct pvclock_vcpu_time_info *ti, *ti2; 977 978 wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00); 979 ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20); 980 ti2 = addr_gpa2hva(vm, PVTIME_ADDR); 981 982 if (verbose) { 983 printf("Wall clock (v %d) %d.%09d\n", wc->version, wc->sec, wc->nsec); 984 printf("Time info 1: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n", 985 ti->version, ti->tsc_timestamp, ti->system_time, ti->tsc_to_system_mul, 986 ti->tsc_shift, ti->flags); 987 printf("Time info 2: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n", 988 ti2->version, ti2->tsc_timestamp, ti2->system_time, ti2->tsc_to_system_mul, 989 ti2->tsc_shift, ti2->flags); 990 } 991 992 vm_ts.tv_sec = wc->sec; 993 vm_ts.tv_nsec = wc->nsec; 994 TEST_ASSERT(wc->version && !(wc->version & 1), 995 "Bad wallclock version %x", wc->version); 996 TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old"); 997 TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new"); 998 999 TEST_ASSERT(ti->version && !(ti->version & 1), 1000 "Bad time_info version %x", ti->version); 1001 TEST_ASSERT(ti2->version && !(ti2->version & 1), 1002 "Bad time_info version %x", ti->version); 1003 1004 if (do_runstate_tests) { 1005 /* 1006 * Fetch runstate and check sanity. Strictly speaking in the 1007 * general case we might not expect the numbers to be identical 1008 * but in this case we know we aren't running the vCPU any more. 1009 */ 1010 struct kvm_xen_vcpu_attr rst = { 1011 .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA, 1012 }; 1013 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &rst); 1014 1015 if (verbose) { 1016 printf("Runstate: %s(%d), entry %" PRIu64 " ns\n", 1017 rs->state <= RUNSTATE_offline ? runstate_names[rs->state] : "unknown", 1018 rs->state, rs->state_entry_time); 1019 for (int i = RUNSTATE_running; i <= RUNSTATE_offline; i++) { 1020 printf("State %s: %" PRIu64 " ns\n", 1021 runstate_names[i], rs->time[i]); 1022 } 1023 } 1024 1025 /* 1026 * Exercise runstate info at all points across the page boundary, in 1027 * 32-bit and 64-bit mode. In particular, test the case where it is 1028 * configured in 32-bit mode and then switched to 64-bit mode while 1029 * active, which takes it onto the second page. 1030 */ 1031 unsigned long runstate_addr; 1032 struct compat_vcpu_runstate_info *crs; 1033 for (runstate_addr = SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - sizeof(*rs) - 4; 1034 runstate_addr < SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE + 4; runstate_addr++) { 1035 1036 rs = addr_gpa2hva(vm, runstate_addr); 1037 crs = (void *)rs; 1038 1039 memset(rs, 0xa5, sizeof(*rs)); 1040 1041 /* Set to compatibility mode */ 1042 lm.u.long_mode = 0; 1043 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); 1044 1045 /* Set runstate to new address (kernel will write it) */ 1046 struct kvm_xen_vcpu_attr st = { 1047 .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, 1048 .u.gpa = runstate_addr, 1049 }; 1050 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st); 1051 1052 if (verbose) 1053 printf("Compatibility runstate at %08lx\n", runstate_addr); 1054 1055 TEST_ASSERT(crs->state == rst.u.runstate.state, "Runstate mismatch"); 1056 TEST_ASSERT(crs->state_entry_time == rst.u.runstate.state_entry_time, 1057 "State entry time mismatch"); 1058 TEST_ASSERT(crs->time[RUNSTATE_running] == rst.u.runstate.time_running, 1059 "Running time mismatch"); 1060 TEST_ASSERT(crs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable, 1061 "Runnable time mismatch"); 1062 TEST_ASSERT(crs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked, 1063 "Blocked time mismatch"); 1064 TEST_ASSERT(crs->time[RUNSTATE_offline] == rst.u.runstate.time_offline, 1065 "Offline time mismatch"); 1066 TEST_ASSERT(crs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL, 1067 "Structure overrun"); 1068 TEST_ASSERT(crs->state_entry_time == crs->time[0] + 1069 crs->time[1] + crs->time[2] + crs->time[3], 1070 "runstate times don't add up"); 1071 1072 1073 /* Now switch to 64-bit mode */ 1074 lm.u.long_mode = 1; 1075 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); 1076 1077 memset(rs, 0xa5, sizeof(*rs)); 1078 1079 /* Don't change the address, just trigger a write */ 1080 struct kvm_xen_vcpu_attr adj = { 1081 .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST, 1082 .u.runstate.state = (uint64_t)-1 1083 }; 1084 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &adj); 1085 1086 if (verbose) 1087 printf("64-bit runstate at %08lx\n", runstate_addr); 1088 1089 TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch"); 1090 TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time, 1091 "State entry time mismatch"); 1092 TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running, 1093 "Running time mismatch"); 1094 TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable, 1095 "Runnable time mismatch"); 1096 TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked, 1097 "Blocked time mismatch"); 1098 TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline, 1099 "Offline time mismatch"); 1100 TEST_ASSERT(rs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL, 1101 "Structure overrun"); 1102 1103 TEST_ASSERT(rs->state_entry_time == rs->time[0] + 1104 rs->time[1] + rs->time[2] + rs->time[3], 1105 "runstate times don't add up"); 1106 } 1107 } 1108 1109 kvm_vm_free(vm); 1110 return 0; 1111 } 1112