1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * xapic_ipi_test 4 * 5 * Copyright (C) 2020, Google LLC. 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2. 8 * 9 * Test that when the APIC is in xAPIC mode, a vCPU can send an IPI to wake 10 * another vCPU that is halted when KVM's backing page for the APIC access 11 * address has been moved by mm. 12 * 13 * The test starts two vCPUs: one that sends IPIs and one that continually 14 * executes HLT. The sender checks that the halter has woken from the HLT and 15 * has reentered HLT before sending the next IPI. While the vCPUs are running, 16 * the host continually calls migrate_pages to move all of the process' pages 17 * amongst the available numa nodes on the machine. 18 * 19 * Migration is a command line option. When used on non-numa machines will 20 * exit with error. Test is still usefull on non-numa for testing IPIs. 21 */ 22 #include <getopt.h> 23 #include <pthread.h> 24 #include <inttypes.h> 25 #include <string.h> 26 #include <time.h> 27 28 #include "kvm_util.h" 29 #include "numaif.h" 30 #include "processor.h" 31 #include "test_util.h" 32 #include "vmx.h" 33 34 /* Default running time for the test */ 35 #define DEFAULT_RUN_SECS 3 36 37 /* Default delay between migrate_pages calls (microseconds) */ 38 #define DEFAULT_DELAY_USECS 500000 39 40 /* 41 * Vector for IPI from sender vCPU to halting vCPU. 42 * Value is arbitrary and was chosen for the alternating bit pattern. Any 43 * value should work. 44 */ 45 #define IPI_VECTOR 0xa5 46 47 /* 48 * Incremented in the IPI handler. Provides evidence to the sender that the IPI 49 * arrived at the destination 50 */ 51 static volatile uint64_t ipis_rcvd; 52 53 /* Data struct shared between host main thread and vCPUs */ 54 struct test_data_page { 55 uint32_t halter_apic_id; 56 volatile uint64_t hlt_count; 57 volatile uint64_t wake_count; 58 uint64_t ipis_sent; 59 uint64_t migrations_attempted; 60 uint64_t migrations_completed; 61 uint32_t icr; 62 uint32_t icr2; 63 uint32_t halter_tpr; 64 uint32_t halter_ppr; 65 66 /* 67 * Record local version register as a cross-check that APIC access 68 * worked. Value should match what KVM reports (APIC_VERSION in 69 * arch/x86/kvm/lapic.c). If test is failing, check that values match 70 * to determine whether APIC access exits are working. 71 */ 72 uint32_t halter_lvr; 73 }; 74 75 struct thread_params { 76 struct test_data_page *data; 77 struct kvm_vcpu *vcpu; 78 uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */ 79 }; 80 81 void verify_apic_base_addr(void) 82 { 83 uint64_t msr = rdmsr(MSR_IA32_APICBASE); 84 uint64_t base = GET_APIC_BASE(msr); 85 86 GUEST_ASSERT(base == APIC_DEFAULT_GPA); 87 } 88 89 static void halter_guest_code(struct test_data_page *data) 90 { 91 verify_apic_base_addr(); 92 xapic_enable(); 93 94 data->halter_apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID)); 95 data->halter_lvr = xapic_read_reg(APIC_LVR); 96 97 /* 98 * Loop forever HLTing and recording halts & wakes. Disable interrupts 99 * each time around to minimize window between signaling the pending 100 * halt to the sender vCPU and executing the halt. No need to disable on 101 * first run as this vCPU executes first and the host waits for it to 102 * signal going into first halt before starting the sender vCPU. Record 103 * TPR and PPR for diagnostic purposes in case the test fails. 104 */ 105 for (;;) { 106 data->halter_tpr = xapic_read_reg(APIC_TASKPRI); 107 data->halter_ppr = xapic_read_reg(APIC_PROCPRI); 108 data->hlt_count++; 109 asm volatile("sti; hlt; cli"); 110 data->wake_count++; 111 } 112 } 113 114 /* 115 * Runs on halter vCPU when IPI arrives. Write an arbitrary non-zero value to 116 * enable diagnosing errant writes to the APIC access address backing page in 117 * case of test failure. 118 */ 119 static void guest_ipi_handler(struct ex_regs *regs) 120 { 121 ipis_rcvd++; 122 xapic_write_reg(APIC_EOI, 77); 123 } 124 125 static void sender_guest_code(struct test_data_page *data) 126 { 127 uint64_t last_wake_count; 128 uint64_t last_hlt_count; 129 uint64_t last_ipis_rcvd_count; 130 uint32_t icr_val; 131 uint32_t icr2_val; 132 uint64_t tsc_start; 133 134 verify_apic_base_addr(); 135 xapic_enable(); 136 137 /* 138 * Init interrupt command register for sending IPIs 139 * 140 * Delivery mode=fixed, per SDM: 141 * "Delivers the interrupt specified in the vector field to the target 142 * processor." 143 * 144 * Destination mode=physical i.e. specify target by its local APIC 145 * ID. This vCPU assumes that the halter vCPU has already started and 146 * set data->halter_apic_id. 147 */ 148 icr_val = (APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR); 149 icr2_val = SET_APIC_DEST_FIELD(data->halter_apic_id); 150 data->icr = icr_val; 151 data->icr2 = icr2_val; 152 153 last_wake_count = data->wake_count; 154 last_hlt_count = data->hlt_count; 155 last_ipis_rcvd_count = ipis_rcvd; 156 for (;;) { 157 /* 158 * Send IPI to halter vCPU. 159 * First IPI can be sent unconditionally because halter vCPU 160 * starts earlier. 161 */ 162 xapic_write_reg(APIC_ICR2, icr2_val); 163 xapic_write_reg(APIC_ICR, icr_val); 164 data->ipis_sent++; 165 166 /* 167 * Wait up to ~1 sec for halter to indicate that it has: 168 * 1. Received the IPI 169 * 2. Woken up from the halt 170 * 3. Gone back into halt 171 * Current CPUs typically run at 2.x Ghz which is ~2 172 * billion ticks per second. 173 */ 174 tsc_start = rdtsc(); 175 while (rdtsc() - tsc_start < 2000000000) { 176 if ((ipis_rcvd != last_ipis_rcvd_count) && 177 (data->wake_count != last_wake_count) && 178 (data->hlt_count != last_hlt_count)) 179 break; 180 } 181 182 GUEST_ASSERT((ipis_rcvd != last_ipis_rcvd_count) && 183 (data->wake_count != last_wake_count) && 184 (data->hlt_count != last_hlt_count)); 185 186 last_wake_count = data->wake_count; 187 last_hlt_count = data->hlt_count; 188 last_ipis_rcvd_count = ipis_rcvd; 189 } 190 } 191 192 static void *vcpu_thread(void *arg) 193 { 194 struct thread_params *params = (struct thread_params *)arg; 195 struct kvm_vcpu *vcpu = params->vcpu; 196 struct ucall uc; 197 int old; 198 int r; 199 200 r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old); 201 TEST_ASSERT(r == 0, 202 "pthread_setcanceltype failed on vcpu_id=%u with errno=%d", 203 vcpu->id, r); 204 205 fprintf(stderr, "vCPU thread running vCPU %u\n", vcpu->id); 206 vcpu_run(vcpu); 207 208 TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); 209 210 if (get_ucall(vcpu, &uc) == UCALL_ABORT) { 211 TEST_ASSERT(false, 212 "vCPU %u exited with error: %s.\n" 213 "Sending vCPU sent %lu IPIs to halting vCPU\n" 214 "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n" 215 "Halter TPR=%#x PPR=%#x LVR=%#x\n" 216 "Migrations attempted: %lu\n" 217 "Migrations completed: %lu", 218 vcpu->id, (const char *)uc.args[0], 219 params->data->ipis_sent, params->data->hlt_count, 220 params->data->wake_count, 221 *params->pipis_rcvd, params->data->halter_tpr, 222 params->data->halter_ppr, params->data->halter_lvr, 223 params->data->migrations_attempted, 224 params->data->migrations_completed); 225 } 226 227 return NULL; 228 } 229 230 static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu) 231 { 232 void *retval; 233 int r; 234 235 r = pthread_cancel(thread); 236 TEST_ASSERT(r == 0, 237 "pthread_cancel on vcpu_id=%d failed with errno=%d", 238 vcpu->id, r); 239 240 r = pthread_join(thread, &retval); 241 TEST_ASSERT(r == 0, 242 "pthread_join on vcpu_id=%d failed with errno=%d", 243 vcpu->id, r); 244 TEST_ASSERT(retval == PTHREAD_CANCELED, 245 "expected retval=%p, got %p", PTHREAD_CANCELED, 246 retval); 247 } 248 249 void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs, 250 uint64_t *pipis_rcvd) 251 { 252 long pages_not_moved; 253 unsigned long nodemask = 0; 254 unsigned long nodemasks[sizeof(nodemask) * 8]; 255 int nodes = 0; 256 time_t start_time, last_update, now; 257 time_t interval_secs = 1; 258 int i, r; 259 int from, to; 260 unsigned long bit; 261 uint64_t hlt_count; 262 uint64_t wake_count; 263 uint64_t ipis_sent; 264 265 fprintf(stderr, "Calling migrate_pages every %d microseconds\n", 266 delay_usecs); 267 268 /* Get set of first 64 numa nodes available */ 269 r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8, 270 0, MPOL_F_MEMS_ALLOWED); 271 TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno); 272 273 fprintf(stderr, "Numa nodes found amongst first %lu possible nodes " 274 "(each 1-bit indicates node is present): %#lx\n", 275 sizeof(nodemask) * 8, nodemask); 276 277 /* Init array of masks containing a single-bit in each, one for each 278 * available node. migrate_pages called below requires specifying nodes 279 * as bit masks. 280 */ 281 for (i = 0, bit = 1; i < sizeof(nodemask) * 8; i++, bit <<= 1) { 282 if (nodemask & bit) { 283 nodemasks[nodes] = nodemask & bit; 284 nodes++; 285 } 286 } 287 288 TEST_ASSERT(nodes > 1, 289 "Did not find at least 2 numa nodes. Can't do migration"); 290 291 fprintf(stderr, "Migrating amongst %d nodes found\n", nodes); 292 293 from = 0; 294 to = 1; 295 start_time = time(NULL); 296 last_update = start_time; 297 298 ipis_sent = data->ipis_sent; 299 hlt_count = data->hlt_count; 300 wake_count = data->wake_count; 301 302 while ((int)(time(NULL) - start_time) < run_secs) { 303 data->migrations_attempted++; 304 305 /* 306 * migrate_pages with PID=0 will migrate all pages of this 307 * process between the nodes specified as bitmasks. The page 308 * backing the APIC access address belongs to this process 309 * because it is allocated by KVM in the context of the 310 * KVM_CREATE_VCPU ioctl. If that assumption ever changes this 311 * test may break or give a false positive signal. 312 */ 313 pages_not_moved = migrate_pages(0, sizeof(nodemasks[from]), 314 &nodemasks[from], 315 &nodemasks[to]); 316 if (pages_not_moved < 0) 317 fprintf(stderr, 318 "migrate_pages failed, errno=%d\n", errno); 319 else if (pages_not_moved > 0) 320 fprintf(stderr, 321 "migrate_pages could not move %ld pages\n", 322 pages_not_moved); 323 else 324 data->migrations_completed++; 325 326 from = to; 327 to++; 328 if (to == nodes) 329 to = 0; 330 331 now = time(NULL); 332 if (((now - start_time) % interval_secs == 0) && 333 (now != last_update)) { 334 last_update = now; 335 fprintf(stderr, 336 "%lu seconds: Migrations attempted=%lu completed=%lu, " 337 "IPIs sent=%lu received=%lu, HLTs=%lu wakes=%lu\n", 338 now - start_time, data->migrations_attempted, 339 data->migrations_completed, 340 data->ipis_sent, *pipis_rcvd, 341 data->hlt_count, data->wake_count); 342 343 TEST_ASSERT(ipis_sent != data->ipis_sent && 344 hlt_count != data->hlt_count && 345 wake_count != data->wake_count, 346 "IPI, HLT and wake count have not increased " 347 "in the last %lu seconds. " 348 "HLTer is likely hung.", interval_secs); 349 350 ipis_sent = data->ipis_sent; 351 hlt_count = data->hlt_count; 352 wake_count = data->wake_count; 353 } 354 usleep(delay_usecs); 355 } 356 } 357 358 void get_cmdline_args(int argc, char *argv[], int *run_secs, 359 bool *migrate, int *delay_usecs) 360 { 361 for (;;) { 362 int opt = getopt(argc, argv, "s:d:m"); 363 364 if (opt == -1) 365 break; 366 switch (opt) { 367 case 's': 368 *run_secs = parse_size(optarg); 369 break; 370 case 'm': 371 *migrate = true; 372 break; 373 case 'd': 374 *delay_usecs = parse_size(optarg); 375 break; 376 default: 377 TEST_ASSERT(false, 378 "Usage: -s <runtime seconds>. Default is %d seconds.\n" 379 "-m adds calls to migrate_pages while vCPUs are running." 380 " Default is no migrations.\n" 381 "-d <delay microseconds> - delay between migrate_pages() calls." 382 " Default is %d microseconds.", 383 DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS); 384 } 385 } 386 } 387 388 int main(int argc, char *argv[]) 389 { 390 int r; 391 int wait_secs; 392 const int max_halter_wait = 10; 393 int run_secs = 0; 394 int delay_usecs = 0; 395 struct test_data_page *data; 396 vm_vaddr_t test_data_page_vaddr; 397 bool migrate = false; 398 pthread_t threads[2]; 399 struct thread_params params[2]; 400 struct kvm_vm *vm; 401 uint64_t *pipis_rcvd; 402 403 get_cmdline_args(argc, argv, &run_secs, &migrate, &delay_usecs); 404 if (run_secs <= 0) 405 run_secs = DEFAULT_RUN_SECS; 406 if (delay_usecs <= 0) 407 delay_usecs = DEFAULT_DELAY_USECS; 408 409 vm = vm_create_with_one_vcpu(¶ms[0].vcpu, halter_guest_code); 410 411 vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler); 412 413 virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); 414 415 params[1].vcpu = vm_vcpu_add(vm, 1, sender_guest_code); 416 417 test_data_page_vaddr = vm_vaddr_alloc_page(vm); 418 data = addr_gva2hva(vm, test_data_page_vaddr); 419 memset(data, 0, sizeof(*data)); 420 params[0].data = data; 421 params[1].data = data; 422 423 vcpu_args_set(params[0].vcpu, 1, test_data_page_vaddr); 424 vcpu_args_set(params[1].vcpu, 1, test_data_page_vaddr); 425 426 pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd); 427 params[0].pipis_rcvd = pipis_rcvd; 428 params[1].pipis_rcvd = pipis_rcvd; 429 430 /* Start halter vCPU thread and wait for it to execute first HLT. */ 431 r = pthread_create(&threads[0], NULL, vcpu_thread, ¶ms[0]); 432 TEST_ASSERT(r == 0, 433 "pthread_create halter failed errno=%d", errno); 434 fprintf(stderr, "Halter vCPU thread started\n"); 435 436 wait_secs = 0; 437 while ((wait_secs < max_halter_wait) && !data->hlt_count) { 438 sleep(1); 439 wait_secs++; 440 } 441 442 TEST_ASSERT(data->hlt_count, 443 "Halter vCPU did not execute first HLT within %d seconds", 444 max_halter_wait); 445 446 fprintf(stderr, 447 "Halter vCPU thread reported its APIC ID: %u after %d seconds.\n", 448 data->halter_apic_id, wait_secs); 449 450 r = pthread_create(&threads[1], NULL, vcpu_thread, ¶ms[1]); 451 TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno); 452 453 fprintf(stderr, 454 "IPI sender vCPU thread started. Letting vCPUs run for %d seconds.\n", 455 run_secs); 456 457 if (!migrate) 458 sleep(run_secs); 459 else 460 do_migrations(data, run_secs, delay_usecs, pipis_rcvd); 461 462 /* 463 * Cancel threads and wait for them to stop. 464 */ 465 cancel_join_vcpu_thread(threads[0], params[0].vcpu); 466 cancel_join_vcpu_thread(threads[1], params[1].vcpu); 467 468 fprintf(stderr, 469 "Test successful after running for %d seconds.\n" 470 "Sending vCPU sent %lu IPIs to halting vCPU\n" 471 "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n" 472 "Halter APIC ID=%#x\n" 473 "Sender ICR value=%#x ICR2 value=%#x\n" 474 "Halter TPR=%#x PPR=%#x LVR=%#x\n" 475 "Migrations attempted: %lu\n" 476 "Migrations completed: %lu\n", 477 run_secs, data->ipis_sent, 478 data->hlt_count, data->wake_count, *pipis_rcvd, 479 data->halter_apic_id, 480 data->icr, data->icr2, 481 data->halter_tpr, data->halter_ppr, data->halter_lvr, 482 data->migrations_attempted, data->migrations_completed); 483 484 kvm_vm_free(vm); 485 486 return 0; 487 } 488