1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * xapic_ipi_test
4 *
5 * Copyright (C) 2020, Google LLC.
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2.
8 *
9 * Test that when the APIC is in xAPIC mode, a vCPU can send an IPI to wake
10 * another vCPU that is halted when KVM's backing page for the APIC access
11 * address has been moved by mm.
12 *
13 * The test starts two vCPUs: one that sends IPIs and one that continually
14 * executes HLT. The sender checks that the halter has woken from the HLT and
15 * has reentered HLT before sending the next IPI. While the vCPUs are running,
16 * the host continually calls migrate_pages to move all of the process' pages
17 * amongst the available numa nodes on the machine.
18 *
19 * Migration is a command line option. When used on non-numa machines will
20 * exit with error. Test is still usefull on non-numa for testing IPIs.
21 */
22 #include <getopt.h>
23 #include <pthread.h>
24 #include <inttypes.h>
25 #include <string.h>
26 #include <time.h>
27
28 #include "kvm_util.h"
29 #include "numaif.h"
30 #include "processor.h"
31 #include "test_util.h"
32 #include "vmx.h"
33
34 /* Default running time for the test */
35 #define DEFAULT_RUN_SECS 3
36
37 /* Default delay between migrate_pages calls (microseconds) */
38 #define DEFAULT_DELAY_USECS 500000
39
40 /*
41 * Vector for IPI from sender vCPU to halting vCPU.
42 * Value is arbitrary and was chosen for the alternating bit pattern. Any
43 * value should work.
44 */
45 #define IPI_VECTOR 0xa5
46
47 /*
48 * Incremented in the IPI handler. Provides evidence to the sender that the IPI
49 * arrived at the destination
50 */
51 static volatile uint64_t ipis_rcvd;
52
53 /* Data struct shared between host main thread and vCPUs */
54 struct test_data_page {
55 uint32_t halter_apic_id;
56 volatile uint64_t hlt_count;
57 volatile uint64_t wake_count;
58 uint64_t ipis_sent;
59 uint64_t migrations_attempted;
60 uint64_t migrations_completed;
61 uint32_t icr;
62 uint32_t icr2;
63 uint32_t halter_tpr;
64 uint32_t halter_ppr;
65
66 /*
67 * Record local version register as a cross-check that APIC access
68 * worked. Value should match what KVM reports (APIC_VERSION in
69 * arch/x86/kvm/lapic.c). If test is failing, check that values match
70 * to determine whether APIC access exits are working.
71 */
72 uint32_t halter_lvr;
73 };
74
75 struct thread_params {
76 struct test_data_page *data;
77 struct kvm_vcpu *vcpu;
78 uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */
79 };
80
verify_apic_base_addr(void)81 void verify_apic_base_addr(void)
82 {
83 uint64_t msr = rdmsr(MSR_IA32_APICBASE);
84 uint64_t base = GET_APIC_BASE(msr);
85
86 GUEST_ASSERT(base == APIC_DEFAULT_GPA);
87 }
88
halter_guest_code(struct test_data_page * data)89 static void halter_guest_code(struct test_data_page *data)
90 {
91 verify_apic_base_addr();
92 xapic_enable();
93
94 data->halter_apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID));
95 data->halter_lvr = xapic_read_reg(APIC_LVR);
96
97 /*
98 * Loop forever HLTing and recording halts & wakes. Disable interrupts
99 * each time around to minimize window between signaling the pending
100 * halt to the sender vCPU and executing the halt. No need to disable on
101 * first run as this vCPU executes first and the host waits for it to
102 * signal going into first halt before starting the sender vCPU. Record
103 * TPR and PPR for diagnostic purposes in case the test fails.
104 */
105 for (;;) {
106 data->halter_tpr = xapic_read_reg(APIC_TASKPRI);
107 data->halter_ppr = xapic_read_reg(APIC_PROCPRI);
108 data->hlt_count++;
109 safe_halt();
110 cli();
111 data->wake_count++;
112 }
113 }
114
115 /*
116 * Runs on halter vCPU when IPI arrives. Write an arbitrary non-zero value to
117 * enable diagnosing errant writes to the APIC access address backing page in
118 * case of test failure.
119 */
guest_ipi_handler(struct ex_regs * regs)120 static void guest_ipi_handler(struct ex_regs *regs)
121 {
122 ipis_rcvd++;
123 xapic_write_reg(APIC_EOI, 77);
124 }
125
sender_guest_code(struct test_data_page * data)126 static void sender_guest_code(struct test_data_page *data)
127 {
128 uint64_t last_wake_count;
129 uint64_t last_hlt_count;
130 uint64_t last_ipis_rcvd_count;
131 uint32_t icr_val;
132 uint32_t icr2_val;
133 uint64_t tsc_start;
134
135 verify_apic_base_addr();
136 xapic_enable();
137
138 /*
139 * Init interrupt command register for sending IPIs
140 *
141 * Delivery mode=fixed, per SDM:
142 * "Delivers the interrupt specified in the vector field to the target
143 * processor."
144 *
145 * Destination mode=physical i.e. specify target by its local APIC
146 * ID. This vCPU assumes that the halter vCPU has already started and
147 * set data->halter_apic_id.
148 */
149 icr_val = (APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR);
150 icr2_val = SET_APIC_DEST_FIELD(data->halter_apic_id);
151 data->icr = icr_val;
152 data->icr2 = icr2_val;
153
154 last_wake_count = data->wake_count;
155 last_hlt_count = data->hlt_count;
156 last_ipis_rcvd_count = ipis_rcvd;
157 for (;;) {
158 /*
159 * Send IPI to halter vCPU.
160 * First IPI can be sent unconditionally because halter vCPU
161 * starts earlier.
162 */
163 xapic_write_reg(APIC_ICR2, icr2_val);
164 xapic_write_reg(APIC_ICR, icr_val);
165 data->ipis_sent++;
166
167 /*
168 * Wait up to ~1 sec for halter to indicate that it has:
169 * 1. Received the IPI
170 * 2. Woken up from the halt
171 * 3. Gone back into halt
172 * Current CPUs typically run at 2.x Ghz which is ~2
173 * billion ticks per second.
174 */
175 tsc_start = rdtsc();
176 while (rdtsc() - tsc_start < 2000000000) {
177 if ((ipis_rcvd != last_ipis_rcvd_count) &&
178 (data->wake_count != last_wake_count) &&
179 (data->hlt_count != last_hlt_count))
180 break;
181 }
182
183 GUEST_ASSERT((ipis_rcvd != last_ipis_rcvd_count) &&
184 (data->wake_count != last_wake_count) &&
185 (data->hlt_count != last_hlt_count));
186
187 last_wake_count = data->wake_count;
188 last_hlt_count = data->hlt_count;
189 last_ipis_rcvd_count = ipis_rcvd;
190 }
191 }
192
vcpu_thread(void * arg)193 static void *vcpu_thread(void *arg)
194 {
195 struct thread_params *params = (struct thread_params *)arg;
196 struct kvm_vcpu *vcpu = params->vcpu;
197 struct ucall uc;
198 int old;
199 int r;
200
201 r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
202 TEST_ASSERT(r == 0,
203 "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
204 vcpu->id, r);
205
206 fprintf(stderr, "vCPU thread running vCPU %u\n", vcpu->id);
207 vcpu_run(vcpu);
208
209 TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
210
211 if (get_ucall(vcpu, &uc) == UCALL_ABORT) {
212 TEST_ASSERT(false,
213 "vCPU %u exited with error: %s.\n"
214 "Sending vCPU sent %lu IPIs to halting vCPU\n"
215 "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
216 "Halter TPR=%#x PPR=%#x LVR=%#x\n"
217 "Migrations attempted: %lu\n"
218 "Migrations completed: %lu",
219 vcpu->id, (const char *)uc.args[0],
220 params->data->ipis_sent, params->data->hlt_count,
221 params->data->wake_count,
222 *params->pipis_rcvd, params->data->halter_tpr,
223 params->data->halter_ppr, params->data->halter_lvr,
224 params->data->migrations_attempted,
225 params->data->migrations_completed);
226 }
227
228 return NULL;
229 }
230
cancel_join_vcpu_thread(pthread_t thread,struct kvm_vcpu * vcpu)231 static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
232 {
233 void *retval;
234 int r;
235
236 r = pthread_cancel(thread);
237 TEST_ASSERT(r == 0,
238 "pthread_cancel on vcpu_id=%d failed with errno=%d",
239 vcpu->id, r);
240
241 r = pthread_join(thread, &retval);
242 TEST_ASSERT(r == 0,
243 "pthread_join on vcpu_id=%d failed with errno=%d",
244 vcpu->id, r);
245 TEST_ASSERT(retval == PTHREAD_CANCELED,
246 "expected retval=%p, got %p", PTHREAD_CANCELED,
247 retval);
248 }
249
do_migrations(struct test_data_page * data,int run_secs,int delay_usecs,uint64_t * pipis_rcvd)250 void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
251 uint64_t *pipis_rcvd)
252 {
253 long pages_not_moved;
254 unsigned long nodemask = 0;
255 unsigned long nodemasks[sizeof(nodemask) * 8];
256 int nodes = 0;
257 time_t start_time, last_update, now;
258 time_t interval_secs = 1;
259 int i;
260 int from, to;
261 unsigned long bit;
262 uint64_t hlt_count;
263 uint64_t wake_count;
264 uint64_t ipis_sent;
265
266 fprintf(stderr, "Calling migrate_pages every %d microseconds\n",
267 delay_usecs);
268
269 /* Get set of first 64 numa nodes available */
270 kvm_get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8,
271 0, MPOL_F_MEMS_ALLOWED);
272
273 fprintf(stderr, "Numa nodes found amongst first %lu possible nodes "
274 "(each 1-bit indicates node is present): %#lx\n",
275 sizeof(nodemask) * 8, nodemask);
276
277 /* Init array of masks containing a single-bit in each, one for each
278 * available node. migrate_pages called below requires specifying nodes
279 * as bit masks.
280 */
281 for (i = 0, bit = 1; i < sizeof(nodemask) * 8; i++, bit <<= 1) {
282 if (nodemask & bit) {
283 nodemasks[nodes] = nodemask & bit;
284 nodes++;
285 }
286 }
287
288 TEST_ASSERT(nodes > 1,
289 "Did not find at least 2 numa nodes. Can't do migration");
290
291 fprintf(stderr, "Migrating amongst %d nodes found\n", nodes);
292
293 from = 0;
294 to = 1;
295 start_time = time(NULL);
296 last_update = start_time;
297
298 ipis_sent = data->ipis_sent;
299 hlt_count = data->hlt_count;
300 wake_count = data->wake_count;
301
302 while ((int)(time(NULL) - start_time) < run_secs) {
303 data->migrations_attempted++;
304
305 /*
306 * migrate_pages with PID=0 will migrate all pages of this
307 * process between the nodes specified as bitmasks. The page
308 * backing the APIC access address belongs to this process
309 * because it is allocated by KVM in the context of the
310 * KVM_CREATE_VCPU ioctl. If that assumption ever changes this
311 * test may break or give a false positive signal.
312 */
313 pages_not_moved = migrate_pages(0, sizeof(nodemasks[from]),
314 &nodemasks[from],
315 &nodemasks[to]);
316 if (pages_not_moved < 0)
317 fprintf(stderr,
318 "migrate_pages failed, errno=%d\n", errno);
319 else if (pages_not_moved > 0)
320 fprintf(stderr,
321 "migrate_pages could not move %ld pages\n",
322 pages_not_moved);
323 else
324 data->migrations_completed++;
325
326 from = to;
327 to++;
328 if (to == nodes)
329 to = 0;
330
331 now = time(NULL);
332 if (((now - start_time) % interval_secs == 0) &&
333 (now != last_update)) {
334 last_update = now;
335 fprintf(stderr,
336 "%lu seconds: Migrations attempted=%lu completed=%lu, "
337 "IPIs sent=%lu received=%lu, HLTs=%lu wakes=%lu\n",
338 now - start_time, data->migrations_attempted,
339 data->migrations_completed,
340 data->ipis_sent, *pipis_rcvd,
341 data->hlt_count, data->wake_count);
342
343 TEST_ASSERT(ipis_sent != data->ipis_sent &&
344 hlt_count != data->hlt_count &&
345 wake_count != data->wake_count,
346 "IPI, HLT and wake count have not increased "
347 "in the last %lu seconds. "
348 "HLTer is likely hung.", interval_secs);
349
350 ipis_sent = data->ipis_sent;
351 hlt_count = data->hlt_count;
352 wake_count = data->wake_count;
353 }
354 usleep(delay_usecs);
355 }
356 }
357
get_cmdline_args(int argc,char * argv[],int * run_secs,bool * migrate,int * delay_usecs)358 void get_cmdline_args(int argc, char *argv[], int *run_secs,
359 bool *migrate, int *delay_usecs)
360 {
361 for (;;) {
362 int opt = getopt(argc, argv, "s:d:m");
363
364 if (opt == -1)
365 break;
366 switch (opt) {
367 case 's':
368 *run_secs = parse_size(optarg);
369 break;
370 case 'm':
371 *migrate = true;
372 break;
373 case 'd':
374 *delay_usecs = parse_size(optarg);
375 break;
376 default:
377 TEST_ASSERT(false,
378 "Usage: -s <runtime seconds>. Default is %d seconds.\n"
379 "-m adds calls to migrate_pages while vCPUs are running."
380 " Default is no migrations.\n"
381 "-d <delay microseconds> - delay between migrate_pages() calls."
382 " Default is %d microseconds.",
383 DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS);
384 }
385 }
386 }
387
main(int argc,char * argv[])388 int main(int argc, char *argv[])
389 {
390 int r;
391 int wait_secs;
392 const int max_halter_wait = 10;
393 int run_secs = 0;
394 int delay_usecs = 0;
395 struct test_data_page *data;
396 vm_vaddr_t test_data_page_vaddr;
397 bool migrate = false;
398 pthread_t threads[2];
399 struct thread_params params[2];
400 struct kvm_vm *vm;
401 uint64_t *pipis_rcvd;
402
403 get_cmdline_args(argc, argv, &run_secs, &migrate, &delay_usecs);
404 if (run_secs <= 0)
405 run_secs = DEFAULT_RUN_SECS;
406 if (delay_usecs <= 0)
407 delay_usecs = DEFAULT_DELAY_USECS;
408
409 vm = vm_create_with_one_vcpu(¶ms[0].vcpu, halter_guest_code);
410
411 vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
412
413 virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
414
415 params[1].vcpu = vm_vcpu_add(vm, 1, sender_guest_code);
416
417 test_data_page_vaddr = vm_vaddr_alloc_page(vm);
418 data = addr_gva2hva(vm, test_data_page_vaddr);
419 memset(data, 0, sizeof(*data));
420 params[0].data = data;
421 params[1].data = data;
422
423 vcpu_args_set(params[0].vcpu, 1, test_data_page_vaddr);
424 vcpu_args_set(params[1].vcpu, 1, test_data_page_vaddr);
425
426 pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd);
427 params[0].pipis_rcvd = pipis_rcvd;
428 params[1].pipis_rcvd = pipis_rcvd;
429
430 /* Start halter vCPU thread and wait for it to execute first HLT. */
431 r = pthread_create(&threads[0], NULL, vcpu_thread, ¶ms[0]);
432 TEST_ASSERT(r == 0,
433 "pthread_create halter failed errno=%d", errno);
434 fprintf(stderr, "Halter vCPU thread started\n");
435
436 wait_secs = 0;
437 while ((wait_secs < max_halter_wait) && !data->hlt_count) {
438 sleep(1);
439 wait_secs++;
440 }
441
442 TEST_ASSERT(data->hlt_count,
443 "Halter vCPU did not execute first HLT within %d seconds",
444 max_halter_wait);
445
446 fprintf(stderr,
447 "Halter vCPU thread reported its APIC ID: %u after %d seconds.\n",
448 data->halter_apic_id, wait_secs);
449
450 r = pthread_create(&threads[1], NULL, vcpu_thread, ¶ms[1]);
451 TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno);
452
453 fprintf(stderr,
454 "IPI sender vCPU thread started. Letting vCPUs run for %d seconds.\n",
455 run_secs);
456
457 if (!migrate)
458 sleep(run_secs);
459 else
460 do_migrations(data, run_secs, delay_usecs, pipis_rcvd);
461
462 /*
463 * Cancel threads and wait for them to stop.
464 */
465 cancel_join_vcpu_thread(threads[0], params[0].vcpu);
466 cancel_join_vcpu_thread(threads[1], params[1].vcpu);
467
468 /*
469 * If the host support Idle HLT, i.e. KVM *might* be using Idle HLT,
470 * then the number of HLT exits may be less than the number of HLTs
471 * that were executed, as Idle HLT elides the exit if the vCPU has an
472 * unmasked, pending IRQ (or NMI).
473 */
474 if (this_cpu_has(X86_FEATURE_IDLE_HLT))
475 TEST_ASSERT(data->hlt_count >= vcpu_get_stat(params[0].vcpu, halt_exits),
476 "HLT insns = %lu, HLT exits = %lu",
477 data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits));
478 else
479 TEST_ASSERT_EQ(data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits));
480
481 fprintf(stderr,
482 "Test successful after running for %d seconds.\n"
483 "Sending vCPU sent %lu IPIs to halting vCPU\n"
484 "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
485 "Halter APIC ID=%#x\n"
486 "Sender ICR value=%#x ICR2 value=%#x\n"
487 "Halter TPR=%#x PPR=%#x LVR=%#x\n"
488 "Migrations attempted: %lu\n"
489 "Migrations completed: %lu\n",
490 run_secs, data->ipis_sent,
491 data->hlt_count, data->wake_count, *pipis_rcvd,
492 data->halter_apic_id,
493 data->icr, data->icr2,
494 data->halter_tpr, data->halter_ppr, data->halter_lvr,
495 data->migrations_attempted, data->migrations_completed);
496
497 kvm_vm_free(vm);
498
499 return 0;
500 }
501