1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * xapic_ipi_test 4 * 5 * Copyright (C) 2020, Google LLC. 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2. 8 * 9 * Test that when the APIC is in xAPIC mode, a vCPU can send an IPI to wake 10 * another vCPU that is halted when KVM's backing page for the APIC access 11 * address has been moved by mm. 12 * 13 * The test starts two vCPUs: one that sends IPIs and one that continually 14 * executes HLT. The sender checks that the halter has woken from the HLT and 15 * has reentered HLT before sending the next IPI. While the vCPUs are running, 16 * the host continually calls migrate_pages to move all of the process' pages 17 * amongst the available numa nodes on the machine. 18 * 19 * Migration is a command line option. When used on non-numa machines will 20 * exit with error. Test is still usefull on non-numa for testing IPIs. 21 */ 22 #include <getopt.h> 23 #include <pthread.h> 24 #include <inttypes.h> 25 #include <string.h> 26 #include <time.h> 27 28 #include "kvm_util.h" 29 #include "numaif.h" 30 #include "processor.h" 31 #include "test_util.h" 32 #include "vmx.h" 33 34 /* Default running time for the test */ 35 #define DEFAULT_RUN_SECS 3 36 37 /* Default delay between migrate_pages calls (microseconds) */ 38 #define DEFAULT_DELAY_USECS 500000 39 40 /* 41 * Vector for IPI from sender vCPU to halting vCPU. 42 * Value is arbitrary and was chosen for the alternating bit pattern. Any 43 * value should work. 44 */ 45 #define IPI_VECTOR 0xa5 46 47 /* 48 * Incremented in the IPI handler. Provides evidence to the sender that the IPI 49 * arrived at the destination 50 */ 51 static volatile uint64_t ipis_rcvd; 52 53 /* Data struct shared between host main thread and vCPUs */ 54 struct test_data_page { 55 uint32_t halter_apic_id; 56 volatile uint64_t hlt_count; 57 volatile uint64_t wake_count; 58 uint64_t ipis_sent; 59 uint64_t migrations_attempted; 60 uint64_t migrations_completed; 61 uint32_t icr; 62 uint32_t icr2; 63 uint32_t halter_tpr; 64 uint32_t halter_ppr; 65 66 /* 67 * Record local version register as a cross-check that APIC access 68 * worked. Value should match what KVM reports (APIC_VERSION in 69 * arch/x86/kvm/lapic.c). If test is failing, check that values match 70 * to determine whether APIC access exits are working. 71 */ 72 uint32_t halter_lvr; 73 }; 74 75 struct thread_params { 76 struct test_data_page *data; 77 struct kvm_vcpu *vcpu; 78 uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */ 79 }; 80 81 void verify_apic_base_addr(void) 82 { 83 uint64_t msr = rdmsr(MSR_IA32_APICBASE); 84 uint64_t base = GET_APIC_BASE(msr); 85 86 GUEST_ASSERT(base == APIC_DEFAULT_GPA); 87 } 88 89 static void halter_guest_code(struct test_data_page *data) 90 { 91 verify_apic_base_addr(); 92 xapic_enable(); 93 94 data->halter_apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID)); 95 data->halter_lvr = xapic_read_reg(APIC_LVR); 96 97 /* 98 * Loop forever HLTing and recording halts & wakes. Disable interrupts 99 * each time around to minimize window between signaling the pending 100 * halt to the sender vCPU and executing the halt. No need to disable on 101 * first run as this vCPU executes first and the host waits for it to 102 * signal going into first halt before starting the sender vCPU. Record 103 * TPR and PPR for diagnostic purposes in case the test fails. 104 */ 105 for (;;) { 106 data->halter_tpr = xapic_read_reg(APIC_TASKPRI); 107 data->halter_ppr = xapic_read_reg(APIC_PROCPRI); 108 data->hlt_count++; 109 safe_halt(); 110 cli(); 111 data->wake_count++; 112 } 113 } 114 115 /* 116 * Runs on halter vCPU when IPI arrives. Write an arbitrary non-zero value to 117 * enable diagnosing errant writes to the APIC access address backing page in 118 * case of test failure. 119 */ 120 static void guest_ipi_handler(struct ex_regs *regs) 121 { 122 ipis_rcvd++; 123 xapic_write_reg(APIC_EOI, 77); 124 } 125 126 static void sender_guest_code(struct test_data_page *data) 127 { 128 uint64_t last_wake_count; 129 uint64_t last_hlt_count; 130 uint64_t last_ipis_rcvd_count; 131 uint32_t icr_val; 132 uint32_t icr2_val; 133 uint64_t tsc_start; 134 135 verify_apic_base_addr(); 136 xapic_enable(); 137 138 /* 139 * Init interrupt command register for sending IPIs 140 * 141 * Delivery mode=fixed, per SDM: 142 * "Delivers the interrupt specified in the vector field to the target 143 * processor." 144 * 145 * Destination mode=physical i.e. specify target by its local APIC 146 * ID. This vCPU assumes that the halter vCPU has already started and 147 * set data->halter_apic_id. 148 */ 149 icr_val = (APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR); 150 icr2_val = SET_APIC_DEST_FIELD(data->halter_apic_id); 151 data->icr = icr_val; 152 data->icr2 = icr2_val; 153 154 last_wake_count = data->wake_count; 155 last_hlt_count = data->hlt_count; 156 last_ipis_rcvd_count = ipis_rcvd; 157 for (;;) { 158 /* 159 * Send IPI to halter vCPU. 160 * First IPI can be sent unconditionally because halter vCPU 161 * starts earlier. 162 */ 163 xapic_write_reg(APIC_ICR2, icr2_val); 164 xapic_write_reg(APIC_ICR, icr_val); 165 data->ipis_sent++; 166 167 /* 168 * Wait up to ~1 sec for halter to indicate that it has: 169 * 1. Received the IPI 170 * 2. Woken up from the halt 171 * 3. Gone back into halt 172 * Current CPUs typically run at 2.x Ghz which is ~2 173 * billion ticks per second. 174 */ 175 tsc_start = rdtsc(); 176 while (rdtsc() - tsc_start < 2000000000) { 177 if ((ipis_rcvd != last_ipis_rcvd_count) && 178 (data->wake_count != last_wake_count) && 179 (data->hlt_count != last_hlt_count)) 180 break; 181 } 182 183 GUEST_ASSERT((ipis_rcvd != last_ipis_rcvd_count) && 184 (data->wake_count != last_wake_count) && 185 (data->hlt_count != last_hlt_count)); 186 187 last_wake_count = data->wake_count; 188 last_hlt_count = data->hlt_count; 189 last_ipis_rcvd_count = ipis_rcvd; 190 } 191 } 192 193 static void *vcpu_thread(void *arg) 194 { 195 struct thread_params *params = (struct thread_params *)arg; 196 struct kvm_vcpu *vcpu = params->vcpu; 197 struct ucall uc; 198 int old; 199 int r; 200 201 r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old); 202 TEST_ASSERT(r == 0, 203 "pthread_setcanceltype failed on vcpu_id=%u with errno=%d", 204 vcpu->id, r); 205 206 fprintf(stderr, "vCPU thread running vCPU %u\n", vcpu->id); 207 vcpu_run(vcpu); 208 209 TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); 210 211 if (get_ucall(vcpu, &uc) == UCALL_ABORT) { 212 TEST_ASSERT(false, 213 "vCPU %u exited with error: %s.\n" 214 "Sending vCPU sent %lu IPIs to halting vCPU\n" 215 "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n" 216 "Halter TPR=%#x PPR=%#x LVR=%#x\n" 217 "Migrations attempted: %lu\n" 218 "Migrations completed: %lu", 219 vcpu->id, (const char *)uc.args[0], 220 params->data->ipis_sent, params->data->hlt_count, 221 params->data->wake_count, 222 *params->pipis_rcvd, params->data->halter_tpr, 223 params->data->halter_ppr, params->data->halter_lvr, 224 params->data->migrations_attempted, 225 params->data->migrations_completed); 226 } 227 228 return NULL; 229 } 230 231 static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu) 232 { 233 void *retval; 234 int r; 235 236 r = pthread_cancel(thread); 237 TEST_ASSERT(r == 0, 238 "pthread_cancel on vcpu_id=%d failed with errno=%d", 239 vcpu->id, r); 240 241 r = pthread_join(thread, &retval); 242 TEST_ASSERT(r == 0, 243 "pthread_join on vcpu_id=%d failed with errno=%d", 244 vcpu->id, r); 245 TEST_ASSERT(retval == PTHREAD_CANCELED, 246 "expected retval=%p, got %p", PTHREAD_CANCELED, 247 retval); 248 } 249 250 void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs, 251 uint64_t *pipis_rcvd) 252 { 253 long pages_not_moved; 254 unsigned long nodemask = 0; 255 unsigned long nodemasks[sizeof(nodemask) * 8]; 256 int nodes = 0; 257 time_t start_time, last_update, now; 258 time_t interval_secs = 1; 259 int i, r; 260 int from, to; 261 unsigned long bit; 262 uint64_t hlt_count; 263 uint64_t wake_count; 264 uint64_t ipis_sent; 265 266 fprintf(stderr, "Calling migrate_pages every %d microseconds\n", 267 delay_usecs); 268 269 /* Get set of first 64 numa nodes available */ 270 r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8, 271 0, MPOL_F_MEMS_ALLOWED); 272 TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno); 273 274 fprintf(stderr, "Numa nodes found amongst first %lu possible nodes " 275 "(each 1-bit indicates node is present): %#lx\n", 276 sizeof(nodemask) * 8, nodemask); 277 278 /* Init array of masks containing a single-bit in each, one for each 279 * available node. migrate_pages called below requires specifying nodes 280 * as bit masks. 281 */ 282 for (i = 0, bit = 1; i < sizeof(nodemask) * 8; i++, bit <<= 1) { 283 if (nodemask & bit) { 284 nodemasks[nodes] = nodemask & bit; 285 nodes++; 286 } 287 } 288 289 TEST_ASSERT(nodes > 1, 290 "Did not find at least 2 numa nodes. Can't do migration"); 291 292 fprintf(stderr, "Migrating amongst %d nodes found\n", nodes); 293 294 from = 0; 295 to = 1; 296 start_time = time(NULL); 297 last_update = start_time; 298 299 ipis_sent = data->ipis_sent; 300 hlt_count = data->hlt_count; 301 wake_count = data->wake_count; 302 303 while ((int)(time(NULL) - start_time) < run_secs) { 304 data->migrations_attempted++; 305 306 /* 307 * migrate_pages with PID=0 will migrate all pages of this 308 * process between the nodes specified as bitmasks. The page 309 * backing the APIC access address belongs to this process 310 * because it is allocated by KVM in the context of the 311 * KVM_CREATE_VCPU ioctl. If that assumption ever changes this 312 * test may break or give a false positive signal. 313 */ 314 pages_not_moved = migrate_pages(0, sizeof(nodemasks[from]), 315 &nodemasks[from], 316 &nodemasks[to]); 317 if (pages_not_moved < 0) 318 fprintf(stderr, 319 "migrate_pages failed, errno=%d\n", errno); 320 else if (pages_not_moved > 0) 321 fprintf(stderr, 322 "migrate_pages could not move %ld pages\n", 323 pages_not_moved); 324 else 325 data->migrations_completed++; 326 327 from = to; 328 to++; 329 if (to == nodes) 330 to = 0; 331 332 now = time(NULL); 333 if (((now - start_time) % interval_secs == 0) && 334 (now != last_update)) { 335 last_update = now; 336 fprintf(stderr, 337 "%lu seconds: Migrations attempted=%lu completed=%lu, " 338 "IPIs sent=%lu received=%lu, HLTs=%lu wakes=%lu\n", 339 now - start_time, data->migrations_attempted, 340 data->migrations_completed, 341 data->ipis_sent, *pipis_rcvd, 342 data->hlt_count, data->wake_count); 343 344 TEST_ASSERT(ipis_sent != data->ipis_sent && 345 hlt_count != data->hlt_count && 346 wake_count != data->wake_count, 347 "IPI, HLT and wake count have not increased " 348 "in the last %lu seconds. " 349 "HLTer is likely hung.", interval_secs); 350 351 ipis_sent = data->ipis_sent; 352 hlt_count = data->hlt_count; 353 wake_count = data->wake_count; 354 } 355 usleep(delay_usecs); 356 } 357 } 358 359 void get_cmdline_args(int argc, char *argv[], int *run_secs, 360 bool *migrate, int *delay_usecs) 361 { 362 for (;;) { 363 int opt = getopt(argc, argv, "s:d:m"); 364 365 if (opt == -1) 366 break; 367 switch (opt) { 368 case 's': 369 *run_secs = parse_size(optarg); 370 break; 371 case 'm': 372 *migrate = true; 373 break; 374 case 'd': 375 *delay_usecs = parse_size(optarg); 376 break; 377 default: 378 TEST_ASSERT(false, 379 "Usage: -s <runtime seconds>. Default is %d seconds.\n" 380 "-m adds calls to migrate_pages while vCPUs are running." 381 " Default is no migrations.\n" 382 "-d <delay microseconds> - delay between migrate_pages() calls." 383 " Default is %d microseconds.", 384 DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS); 385 } 386 } 387 } 388 389 int main(int argc, char *argv[]) 390 { 391 int r; 392 int wait_secs; 393 const int max_halter_wait = 10; 394 int run_secs = 0; 395 int delay_usecs = 0; 396 struct test_data_page *data; 397 vm_vaddr_t test_data_page_vaddr; 398 bool migrate = false; 399 pthread_t threads[2]; 400 struct thread_params params[2]; 401 struct kvm_vm *vm; 402 uint64_t *pipis_rcvd; 403 404 get_cmdline_args(argc, argv, &run_secs, &migrate, &delay_usecs); 405 if (run_secs <= 0) 406 run_secs = DEFAULT_RUN_SECS; 407 if (delay_usecs <= 0) 408 delay_usecs = DEFAULT_DELAY_USECS; 409 410 vm = vm_create_with_one_vcpu(¶ms[0].vcpu, halter_guest_code); 411 412 vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler); 413 414 virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); 415 416 params[1].vcpu = vm_vcpu_add(vm, 1, sender_guest_code); 417 418 test_data_page_vaddr = vm_vaddr_alloc_page(vm); 419 data = addr_gva2hva(vm, test_data_page_vaddr); 420 memset(data, 0, sizeof(*data)); 421 params[0].data = data; 422 params[1].data = data; 423 424 vcpu_args_set(params[0].vcpu, 1, test_data_page_vaddr); 425 vcpu_args_set(params[1].vcpu, 1, test_data_page_vaddr); 426 427 pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd); 428 params[0].pipis_rcvd = pipis_rcvd; 429 params[1].pipis_rcvd = pipis_rcvd; 430 431 /* Start halter vCPU thread and wait for it to execute first HLT. */ 432 r = pthread_create(&threads[0], NULL, vcpu_thread, ¶ms[0]); 433 TEST_ASSERT(r == 0, 434 "pthread_create halter failed errno=%d", errno); 435 fprintf(stderr, "Halter vCPU thread started\n"); 436 437 wait_secs = 0; 438 while ((wait_secs < max_halter_wait) && !data->hlt_count) { 439 sleep(1); 440 wait_secs++; 441 } 442 443 TEST_ASSERT(data->hlt_count, 444 "Halter vCPU did not execute first HLT within %d seconds", 445 max_halter_wait); 446 447 fprintf(stderr, 448 "Halter vCPU thread reported its APIC ID: %u after %d seconds.\n", 449 data->halter_apic_id, wait_secs); 450 451 r = pthread_create(&threads[1], NULL, vcpu_thread, ¶ms[1]); 452 TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno); 453 454 fprintf(stderr, 455 "IPI sender vCPU thread started. Letting vCPUs run for %d seconds.\n", 456 run_secs); 457 458 if (!migrate) 459 sleep(run_secs); 460 else 461 do_migrations(data, run_secs, delay_usecs, pipis_rcvd); 462 463 /* 464 * Cancel threads and wait for them to stop. 465 */ 466 cancel_join_vcpu_thread(threads[0], params[0].vcpu); 467 cancel_join_vcpu_thread(threads[1], params[1].vcpu); 468 469 /* 470 * If the host support Idle HLT, i.e. KVM *might* be using Idle HLT, 471 * then the number of HLT exits may be less than the number of HLTs 472 * that were executed, as Idle HLT elides the exit if the vCPU has an 473 * unmasked, pending IRQ (or NMI). 474 */ 475 if (this_cpu_has(X86_FEATURE_IDLE_HLT)) 476 TEST_ASSERT(data->hlt_count >= vcpu_get_stat(params[0].vcpu, halt_exits), 477 "HLT insns = %lu, HLT exits = %lu", 478 data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits)); 479 else 480 TEST_ASSERT_EQ(data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits)); 481 482 fprintf(stderr, 483 "Test successful after running for %d seconds.\n" 484 "Sending vCPU sent %lu IPIs to halting vCPU\n" 485 "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n" 486 "Halter APIC ID=%#x\n" 487 "Sender ICR value=%#x ICR2 value=%#x\n" 488 "Halter TPR=%#x PPR=%#x LVR=%#x\n" 489 "Migrations attempted: %lu\n" 490 "Migrations completed: %lu\n", 491 run_secs, data->ipis_sent, 492 data->hlt_count, data->wake_count, *pipis_rcvd, 493 data->halter_apic_id, 494 data->icr, data->icr2, 495 data->halter_tpr, data->halter_ppr, data->halter_lvr, 496 data->migrations_attempted, data->migrations_completed); 497 498 kvm_vm_free(vm); 499 500 return 0; 501 } 502