1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * A memslot-related performance benchmark. 4 * 5 * Copyright (C) 2021 Oracle and/or its affiliates. 6 * 7 * Basic guest setup / host vCPU thread code lifted from set_memory_region_test. 8 */ 9 #include <pthread.h> 10 #include <sched.h> 11 #include <semaphore.h> 12 #include <stdatomic.h> 13 #include <stdbool.h> 14 #include <stdint.h> 15 #include <stdio.h> 16 #include <stdlib.h> 17 #include <string.h> 18 #include <time.h> 19 #include <unistd.h> 20 21 #include <linux/compiler.h> 22 #include <linux/sizes.h> 23 24 #include <test_util.h> 25 #include <kvm_syscalls.h> 26 #include <kvm_util.h> 27 #include <processor.h> 28 #include <ucall_common.h> 29 30 #define MEM_EXTRA_SIZE SZ_64K 31 32 #define MEM_SIZE (SZ_512M + MEM_EXTRA_SIZE) 33 #define MEM_GPA SZ_256M 34 #define MEM_AUX_GPA MEM_GPA 35 #define MEM_SYNC_GPA MEM_AUX_GPA 36 #define MEM_TEST_GPA (MEM_AUX_GPA + MEM_EXTRA_SIZE) 37 #define MEM_TEST_SIZE (MEM_SIZE - MEM_EXTRA_SIZE) 38 39 /* 40 * 32 MiB is max size that gets well over 100 iterations on 509 slots. 41 * Considering that each slot needs to have at least one page up to 42 * 8194 slots in use can then be tested (although with slightly 43 * limited resolution). 44 */ 45 #define MEM_SIZE_MAP (SZ_32M + MEM_EXTRA_SIZE) 46 #define MEM_TEST_MAP_SIZE (MEM_SIZE_MAP - MEM_EXTRA_SIZE) 47 48 /* 49 * 128 MiB is min size that fills 32k slots with at least one page in each 50 * while at the same time gets 100+ iterations in such test 51 * 52 * 2 MiB chunk size like a typical huge page 53 */ 54 #define MEM_TEST_UNMAP_SIZE SZ_128M 55 #define MEM_TEST_UNMAP_CHUNK_SIZE SZ_2M 56 57 /* 58 * For the move active test the middle of the test area is placed on 59 * a memslot boundary: half lies in the memslot being moved, half in 60 * other memslot(s). 61 * 62 * We have different number of memory slots, excluding the reserved 63 * memory slot 0, on various architectures and configurations. The 64 * memory size in this test is calculated by picking the maximal 65 * last memory slot's memory size, with alignment to the largest 66 * supported page size (64KB). In this way, the selected memory 67 * size for this test is compatible with test_memslot_move_prepare(). 68 * 69 * architecture slots memory-per-slot memory-on-last-slot 70 * -------------------------------------------------------------- 71 * x86-4KB 32763 16KB 160KB 72 * arm64-4KB 32766 16KB 112KB 73 * arm64-16KB 32766 16KB 112KB 74 * arm64-64KB 8192 64KB 128KB 75 */ 76 #define MEM_TEST_MOVE_SIZE (3 * SZ_64K) 77 #define MEM_TEST_MOVE_GPA_DEST (MEM_GPA + MEM_SIZE) 78 static_assert(MEM_TEST_MOVE_SIZE <= MEM_TEST_SIZE, 79 "invalid move test region size"); 80 81 #define MEM_TEST_VAL_1 0x1122334455667788 82 #define MEM_TEST_VAL_2 0x99AABBCCDDEEFF00 83 84 struct vm_data { 85 struct kvm_vm *vm; 86 struct kvm_vcpu *vcpu; 87 pthread_t vcpu_thread; 88 u32 nslots; 89 u64 npages; 90 u64 pages_per_slot; 91 void **hva_slots; 92 bool mmio_ok; 93 u64 mmio_gpa_min; 94 u64 mmio_gpa_max; 95 }; 96 97 struct sync_area { 98 u32 guest_page_size; 99 atomic_bool start_flag; 100 atomic_bool exit_flag; 101 atomic_bool sync_flag; 102 void *move_area_ptr; 103 }; 104 105 /* 106 * Technically, we need also for the atomic bool to be address-free, which 107 * is recommended, but not strictly required, by C11 for lockless 108 * implementations. 109 * However, in practice both GCC and Clang fulfill this requirement on 110 * all KVM-supported platforms. 111 */ 112 static_assert(ATOMIC_BOOL_LOCK_FREE == 2, "atomic bool is not lockless"); 113 114 static int wait_timeout = 10; 115 static sem_t vcpu_ready; 116 117 static bool map_unmap_verify; 118 #ifdef __x86_64__ 119 static bool disable_slot_zap_quirk; 120 #endif 121 122 static bool verbose; 123 #define pr_info_v(...) \ 124 do { \ 125 if (verbose) \ 126 pr_info(__VA_ARGS__); \ 127 } while (0) 128 129 static void check_mmio_access(struct vm_data *data, struct kvm_run *run) 130 { 131 TEST_ASSERT(data->mmio_ok, "Unexpected mmio exit"); 132 TEST_ASSERT(run->mmio.is_write, "Unexpected mmio read"); 133 TEST_ASSERT(run->mmio.len == 8, 134 "Unexpected exit mmio size = %u", run->mmio.len); 135 TEST_ASSERT(run->mmio.phys_addr >= data->mmio_gpa_min && 136 run->mmio.phys_addr <= data->mmio_gpa_max, 137 "Unexpected exit mmio address = 0x%llx", 138 run->mmio.phys_addr); 139 } 140 141 static void *vcpu_worker(void *__data) 142 { 143 struct vm_data *data = __data; 144 struct kvm_vcpu *vcpu = data->vcpu; 145 struct kvm_run *run = vcpu->run; 146 struct ucall uc; 147 148 while (1) { 149 vcpu_run(vcpu); 150 151 switch (get_ucall(vcpu, &uc)) { 152 case UCALL_SYNC: 153 TEST_ASSERT(uc.args[1] == 0, 154 "Unexpected sync ucall, got %lx", 155 (ulong)uc.args[1]); 156 sem_post(&vcpu_ready); 157 continue; 158 case UCALL_NONE: 159 if (run->exit_reason == KVM_EXIT_MMIO) 160 check_mmio_access(data, run); 161 else 162 goto done; 163 break; 164 case UCALL_ABORT: 165 REPORT_GUEST_ASSERT(uc); 166 break; 167 case UCALL_DONE: 168 goto done; 169 default: 170 TEST_FAIL("Unknown ucall %lu", uc.cmd); 171 } 172 } 173 174 done: 175 return NULL; 176 } 177 178 static void wait_for_vcpu(void) 179 { 180 struct timespec ts; 181 182 TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts), 183 "clock_gettime() failed: %d", errno); 184 185 ts.tv_sec += 2; 186 TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts), 187 "sem_timedwait() failed: %d", errno); 188 } 189 190 static void *vm_gpa2hva(struct vm_data *data, gpa_t gpa, u64 *rempages) 191 { 192 gpa_t gpage, pgoffs; 193 u32 slot, slotoffs; 194 void *base; 195 u32 guest_page_size = data->vm->page_size; 196 197 TEST_ASSERT(gpa >= MEM_GPA, "Too low gpa to translate"); 198 TEST_ASSERT(gpa < MEM_GPA + data->npages * guest_page_size, 199 "Too high gpa to translate"); 200 gpa -= MEM_GPA; 201 202 gpage = gpa / guest_page_size; 203 pgoffs = gpa % guest_page_size; 204 slot = min(gpage / data->pages_per_slot, (u64)data->nslots - 1); 205 slotoffs = gpage - (slot * data->pages_per_slot); 206 207 if (rempages) { 208 u64 slotpages; 209 210 if (slot == data->nslots - 1) 211 slotpages = data->npages - slot * data->pages_per_slot; 212 else 213 slotpages = data->pages_per_slot; 214 215 TEST_ASSERT(!pgoffs, 216 "Asking for remaining pages in slot but gpa not page aligned"); 217 *rempages = slotpages - slotoffs; 218 } 219 220 base = data->hva_slots[slot]; 221 return (u8 *)base + slotoffs * guest_page_size + pgoffs; 222 } 223 224 static u64 vm_slot2gpa(struct vm_data *data, u32 slot) 225 { 226 u32 guest_page_size = data->vm->page_size; 227 228 TEST_ASSERT(slot < data->nslots, "Too high slot number"); 229 230 return MEM_GPA + slot * data->pages_per_slot * guest_page_size; 231 } 232 233 static struct vm_data *alloc_vm(void) 234 { 235 struct vm_data *data; 236 237 data = malloc(sizeof(*data)); 238 TEST_ASSERT(data, "malloc(vmdata) failed"); 239 240 data->vm = NULL; 241 data->vcpu = NULL; 242 data->hva_slots = NULL; 243 244 return data; 245 } 246 247 static bool check_slot_pages(u32 host_page_size, u32 guest_page_size, 248 u64 pages_per_slot, u64 rempages) 249 { 250 if (!pages_per_slot) 251 return false; 252 253 if ((pages_per_slot * guest_page_size) % host_page_size) 254 return false; 255 256 if ((rempages * guest_page_size) % host_page_size) 257 return false; 258 259 return true; 260 } 261 262 263 static u64 get_max_slots(struct vm_data *data, u32 host_page_size) 264 { 265 u32 guest_page_size = data->vm->page_size; 266 u64 mempages, pages_per_slot, rempages; 267 u64 slots; 268 269 mempages = data->npages; 270 slots = data->nslots; 271 while (--slots > 1) { 272 pages_per_slot = mempages / slots; 273 if (!pages_per_slot) 274 continue; 275 276 rempages = mempages % pages_per_slot; 277 if (check_slot_pages(host_page_size, guest_page_size, 278 pages_per_slot, rempages)) 279 return slots + 1; /* slot 0 is reserved */ 280 } 281 282 return 0; 283 } 284 285 static bool prepare_vm(struct vm_data *data, int nslots, u64 *maxslots, 286 void *guest_code, u64 mem_size, 287 struct timespec *slot_runtime) 288 { 289 u64 mempages, rempages; 290 u64 guest_addr; 291 u32 slot, host_page_size, guest_page_size; 292 struct timespec tstart; 293 struct sync_area *sync; 294 295 host_page_size = getpagesize(); 296 guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size; 297 mempages = mem_size / guest_page_size; 298 299 data->vm = __vm_create_with_one_vcpu(&data->vcpu, mempages, guest_code); 300 TEST_ASSERT(data->vm->page_size == guest_page_size, "Invalid VM page size"); 301 302 data->npages = mempages; 303 TEST_ASSERT(data->npages > 1, "Can't test without any memory"); 304 data->nslots = nslots; 305 data->pages_per_slot = data->npages / data->nslots; 306 rempages = data->npages % data->nslots; 307 if (!check_slot_pages(host_page_size, guest_page_size, 308 data->pages_per_slot, rempages)) { 309 *maxslots = get_max_slots(data, host_page_size); 310 return false; 311 } 312 313 data->hva_slots = malloc(sizeof(*data->hva_slots) * data->nslots); 314 TEST_ASSERT(data->hva_slots, "malloc() fail"); 315 316 pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n", 317 data->nslots, data->pages_per_slot, rempages); 318 319 clock_gettime(CLOCK_MONOTONIC, &tstart); 320 for (slot = 1, guest_addr = MEM_GPA; slot <= data->nslots; slot++) { 321 u64 npages; 322 323 npages = data->pages_per_slot; 324 if (slot == data->nslots) 325 npages += rempages; 326 327 vm_userspace_mem_region_add(data->vm, VM_MEM_SRC_ANONYMOUS, 328 guest_addr, slot, npages, 329 0); 330 guest_addr += npages * guest_page_size; 331 } 332 *slot_runtime = timespec_elapsed(tstart); 333 334 for (slot = 1, guest_addr = MEM_GPA; slot <= data->nslots; slot++) { 335 u64 npages; 336 gpa_t gpa; 337 338 npages = data->pages_per_slot; 339 if (slot == data->nslots) 340 npages += rempages; 341 342 gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr, slot); 343 TEST_ASSERT(gpa == guest_addr, 344 "vm_phy_pages_alloc() failed"); 345 346 data->hva_slots[slot - 1] = addr_gpa2hva(data->vm, guest_addr); 347 memset(data->hva_slots[slot - 1], 0, npages * guest_page_size); 348 349 guest_addr += npages * guest_page_size; 350 } 351 352 virt_map(data->vm, MEM_GPA, MEM_GPA, data->npages); 353 354 sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL); 355 sync->guest_page_size = data->vm->page_size; 356 atomic_init(&sync->start_flag, false); 357 atomic_init(&sync->exit_flag, false); 358 atomic_init(&sync->sync_flag, false); 359 360 data->mmio_ok = false; 361 362 return true; 363 } 364 365 static void launch_vm(struct vm_data *data) 366 { 367 pr_info_v("Launching the test VM\n"); 368 369 pthread_create(&data->vcpu_thread, NULL, vcpu_worker, data); 370 371 /* Ensure the guest thread is spun up. */ 372 wait_for_vcpu(); 373 } 374 375 static void free_vm(struct vm_data *data) 376 { 377 kvm_vm_free(data->vm); 378 free(data->hva_slots); 379 free(data); 380 } 381 382 static void wait_guest_exit(struct vm_data *data) 383 { 384 pthread_join(data->vcpu_thread, NULL); 385 } 386 387 static void let_guest_run(struct sync_area *sync) 388 { 389 atomic_store_explicit(&sync->start_flag, true, memory_order_release); 390 } 391 392 static void guest_spin_until_start(void) 393 { 394 struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; 395 396 while (!atomic_load_explicit(&sync->start_flag, memory_order_acquire)) 397 ; 398 } 399 400 static void make_guest_exit(struct sync_area *sync) 401 { 402 atomic_store_explicit(&sync->exit_flag, true, memory_order_release); 403 } 404 405 static bool _guest_should_exit(void) 406 { 407 struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; 408 409 return atomic_load_explicit(&sync->exit_flag, memory_order_acquire); 410 } 411 412 #define guest_should_exit() unlikely(_guest_should_exit()) 413 414 /* 415 * noinline so we can easily see how much time the host spends waiting 416 * for the guest. 417 * For the same reason use alarm() instead of polling clock_gettime() 418 * to implement a wait timeout. 419 */ 420 static noinline void host_perform_sync(struct sync_area *sync) 421 { 422 alarm(wait_timeout); 423 424 atomic_store_explicit(&sync->sync_flag, true, memory_order_release); 425 while (atomic_load_explicit(&sync->sync_flag, memory_order_acquire)) 426 ; 427 428 alarm(0); 429 } 430 431 static bool guest_perform_sync(void) 432 { 433 struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; 434 bool expected; 435 436 do { 437 if (guest_should_exit()) 438 return false; 439 440 expected = true; 441 } while (!atomic_compare_exchange_weak_explicit(&sync->sync_flag, 442 &expected, false, 443 memory_order_acq_rel, 444 memory_order_relaxed)); 445 446 return true; 447 } 448 449 static void guest_code_test_memslot_move(void) 450 { 451 struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; 452 u32 page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size); 453 uintptr_t base = (typeof(base))READ_ONCE(sync->move_area_ptr); 454 455 GUEST_SYNC(0); 456 457 guest_spin_until_start(); 458 459 while (!guest_should_exit()) { 460 uintptr_t ptr; 461 462 for (ptr = base; ptr < base + MEM_TEST_MOVE_SIZE; 463 ptr += page_size) 464 *(u64 *)ptr = MEM_TEST_VAL_1; 465 466 /* 467 * No host sync here since the MMIO exits are so expensive 468 * that the host would spend most of its time waiting for 469 * the guest and so instead of measuring memslot move 470 * performance we would measure the performance and 471 * likelihood of MMIO exits 472 */ 473 } 474 475 GUEST_DONE(); 476 } 477 478 static void guest_code_test_memslot_map(void) 479 { 480 struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; 481 u32 page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size); 482 483 GUEST_SYNC(0); 484 485 guest_spin_until_start(); 486 487 while (1) { 488 uintptr_t ptr; 489 490 for (ptr = MEM_TEST_GPA; 491 ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2; 492 ptr += page_size) 493 *(u64 *)ptr = MEM_TEST_VAL_1; 494 495 if (!guest_perform_sync()) 496 break; 497 498 for (ptr = MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2; 499 ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE; 500 ptr += page_size) 501 *(u64 *)ptr = MEM_TEST_VAL_2; 502 503 if (!guest_perform_sync()) 504 break; 505 } 506 507 GUEST_DONE(); 508 } 509 510 static void guest_code_test_memslot_unmap(void) 511 { 512 struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; 513 514 GUEST_SYNC(0); 515 516 guest_spin_until_start(); 517 518 while (1) { 519 uintptr_t ptr = MEM_TEST_GPA; 520 521 /* 522 * We can afford to access (map) just a small number of pages 523 * per host sync as otherwise the host will spend 524 * a significant amount of its time waiting for the guest 525 * (instead of doing unmap operations), so this will 526 * effectively turn this test into a map performance test. 527 * 528 * Just access a single page to be on the safe side. 529 */ 530 *(u64 *)ptr = MEM_TEST_VAL_1; 531 532 if (!guest_perform_sync()) 533 break; 534 535 ptr += MEM_TEST_UNMAP_SIZE / 2; 536 *(u64 *)ptr = MEM_TEST_VAL_2; 537 538 if (!guest_perform_sync()) 539 break; 540 } 541 542 GUEST_DONE(); 543 } 544 545 static void guest_code_test_memslot_rw(void) 546 { 547 struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; 548 u32 page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size); 549 550 GUEST_SYNC(0); 551 552 guest_spin_until_start(); 553 554 while (1) { 555 uintptr_t ptr; 556 557 for (ptr = MEM_TEST_GPA; 558 ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size) 559 *(u64 *)ptr = MEM_TEST_VAL_1; 560 561 if (!guest_perform_sync()) 562 break; 563 564 for (ptr = MEM_TEST_GPA + page_size / 2; 565 ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size) { 566 u64 val = *(u64 *)ptr; 567 568 GUEST_ASSERT_EQ(val, MEM_TEST_VAL_2); 569 *(u64 *)ptr = 0; 570 } 571 572 if (!guest_perform_sync()) 573 break; 574 } 575 576 GUEST_DONE(); 577 } 578 579 static bool test_memslot_move_prepare(struct vm_data *data, 580 struct sync_area *sync, 581 u64 *maxslots, bool isactive) 582 { 583 u32 guest_page_size = data->vm->page_size; 584 u64 movesrcgpa, movetestgpa; 585 586 #ifdef __x86_64__ 587 if (disable_slot_zap_quirk) 588 vm_enable_cap(data->vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL); 589 #endif 590 591 movesrcgpa = vm_slot2gpa(data, data->nslots - 1); 592 593 if (isactive) { 594 u64 lastpages; 595 596 vm_gpa2hva(data, movesrcgpa, &lastpages); 597 if (lastpages * guest_page_size < MEM_TEST_MOVE_SIZE / 2) { 598 *maxslots = 0; 599 return false; 600 } 601 } 602 603 movetestgpa = movesrcgpa - (MEM_TEST_MOVE_SIZE / (isactive ? 2 : 1)); 604 sync->move_area_ptr = (void *)movetestgpa; 605 606 if (isactive) { 607 data->mmio_ok = true; 608 data->mmio_gpa_min = movesrcgpa; 609 data->mmio_gpa_max = movesrcgpa + MEM_TEST_MOVE_SIZE / 2 - 1; 610 } 611 612 return true; 613 } 614 615 static bool test_memslot_move_prepare_active(struct vm_data *data, 616 struct sync_area *sync, 617 u64 *maxslots) 618 { 619 return test_memslot_move_prepare(data, sync, maxslots, true); 620 } 621 622 static bool test_memslot_move_prepare_inactive(struct vm_data *data, 623 struct sync_area *sync, 624 u64 *maxslots) 625 { 626 return test_memslot_move_prepare(data, sync, maxslots, false); 627 } 628 629 static void test_memslot_move_loop(struct vm_data *data, struct sync_area *sync) 630 { 631 u64 movesrcgpa; 632 633 movesrcgpa = vm_slot2gpa(data, data->nslots - 1); 634 vm_mem_region_move(data->vm, data->nslots - 1 + 1, 635 MEM_TEST_MOVE_GPA_DEST); 636 vm_mem_region_move(data->vm, data->nslots - 1 + 1, movesrcgpa); 637 } 638 639 static void test_memslot_do_unmap(struct vm_data *data, 640 u64 offsp, u64 count) 641 { 642 gpa_t gpa, ctr; 643 u32 guest_page_size = data->vm->page_size; 644 645 for (gpa = MEM_TEST_GPA + offsp * guest_page_size, ctr = 0; ctr < count; ) { 646 u64 npages; 647 void *hva; 648 int ret; 649 650 hva = vm_gpa2hva(data, gpa, &npages); 651 TEST_ASSERT(npages, "Empty memory slot at gptr 0x%"PRIx64, gpa); 652 npages = min(npages, count - ctr); 653 ret = madvise(hva, npages * guest_page_size, MADV_DONTNEED); 654 TEST_ASSERT(!ret, 655 "madvise(%p, MADV_DONTNEED) on VM memory should not fail for gptr 0x%"PRIx64, 656 hva, gpa); 657 ctr += npages; 658 gpa += npages * guest_page_size; 659 } 660 TEST_ASSERT(ctr == count, 661 "madvise(MADV_DONTNEED) should exactly cover all of the requested area"); 662 } 663 664 static void test_memslot_map_unmap_check(struct vm_data *data, 665 u64 offsp, u64 valexp) 666 { 667 gpa_t gpa; 668 u64 *val; 669 u32 guest_page_size = data->vm->page_size; 670 671 if (!map_unmap_verify) 672 return; 673 674 gpa = MEM_TEST_GPA + offsp * guest_page_size; 675 val = (typeof(val))vm_gpa2hva(data, gpa, NULL); 676 TEST_ASSERT(*val == valexp, 677 "Guest written values should read back correctly before unmap (%"PRIu64" vs %"PRIu64" @ %"PRIx64")", 678 *val, valexp, gpa); 679 *val = 0; 680 } 681 682 static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync) 683 { 684 u32 guest_page_size = data->vm->page_size; 685 u64 guest_pages = MEM_TEST_MAP_SIZE / guest_page_size; 686 687 /* 688 * Unmap the second half of the test area while guest writes to (maps) 689 * the first half. 690 */ 691 test_memslot_do_unmap(data, guest_pages / 2, guest_pages / 2); 692 693 /* 694 * Wait for the guest to finish writing the first half of the test 695 * area, verify the written value on the first and the last page of 696 * this area and then unmap it. 697 * Meanwhile, the guest is writing to (mapping) the second half of 698 * the test area. 699 */ 700 host_perform_sync(sync); 701 test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1); 702 test_memslot_map_unmap_check(data, guest_pages / 2 - 1, MEM_TEST_VAL_1); 703 test_memslot_do_unmap(data, 0, guest_pages / 2); 704 705 706 /* 707 * Wait for the guest to finish writing the second half of the test 708 * area and verify the written value on the first and the last page 709 * of this area. 710 * The area will be unmapped at the beginning of the next loop 711 * iteration. 712 * Meanwhile, the guest is writing to (mapping) the first half of 713 * the test area. 714 */ 715 host_perform_sync(sync); 716 test_memslot_map_unmap_check(data, guest_pages / 2, MEM_TEST_VAL_2); 717 test_memslot_map_unmap_check(data, guest_pages - 1, MEM_TEST_VAL_2); 718 } 719 720 static void test_memslot_unmap_loop_common(struct vm_data *data, 721 struct sync_area *sync, 722 u64 chunk) 723 { 724 u32 guest_page_size = data->vm->page_size; 725 u64 guest_pages = MEM_TEST_UNMAP_SIZE / guest_page_size; 726 u64 ctr; 727 728 /* 729 * Wait for the guest to finish mapping page(s) in the first half 730 * of the test area, verify the written value and then perform unmap 731 * of this area. 732 * Meanwhile, the guest is writing to (mapping) page(s) in the second 733 * half of the test area. 734 */ 735 host_perform_sync(sync); 736 test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1); 737 for (ctr = 0; ctr < guest_pages / 2; ctr += chunk) 738 test_memslot_do_unmap(data, ctr, chunk); 739 740 /* Likewise, but for the opposite host / guest areas */ 741 host_perform_sync(sync); 742 test_memslot_map_unmap_check(data, guest_pages / 2, MEM_TEST_VAL_2); 743 for (ctr = guest_pages / 2; ctr < guest_pages; ctr += chunk) 744 test_memslot_do_unmap(data, ctr, chunk); 745 } 746 747 static void test_memslot_unmap_loop(struct vm_data *data, 748 struct sync_area *sync) 749 { 750 u32 host_page_size = getpagesize(); 751 u32 guest_page_size = data->vm->page_size; 752 u64 guest_chunk_pages = guest_page_size >= host_page_size ? 753 1 : host_page_size / guest_page_size; 754 755 test_memslot_unmap_loop_common(data, sync, guest_chunk_pages); 756 } 757 758 static void test_memslot_unmap_loop_chunked(struct vm_data *data, 759 struct sync_area *sync) 760 { 761 u32 guest_page_size = data->vm->page_size; 762 u64 guest_chunk_pages = MEM_TEST_UNMAP_CHUNK_SIZE / guest_page_size; 763 764 test_memslot_unmap_loop_common(data, sync, guest_chunk_pages); 765 } 766 767 static void test_memslot_rw_loop(struct vm_data *data, struct sync_area *sync) 768 { 769 u64 gptr; 770 u32 guest_page_size = data->vm->page_size; 771 772 for (gptr = MEM_TEST_GPA + guest_page_size / 2; 773 gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += guest_page_size) 774 *(u64 *)vm_gpa2hva(data, gptr, NULL) = MEM_TEST_VAL_2; 775 776 host_perform_sync(sync); 777 778 for (gptr = MEM_TEST_GPA; 779 gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += guest_page_size) { 780 u64 *vptr = (typeof(vptr))vm_gpa2hva(data, gptr, NULL); 781 u64 val = *vptr; 782 783 TEST_ASSERT(val == MEM_TEST_VAL_1, 784 "Guest written values should read back correctly (is %"PRIu64" @ %"PRIx64")", 785 val, gptr); 786 *vptr = 0; 787 } 788 789 host_perform_sync(sync); 790 } 791 792 struct test_data { 793 const char *name; 794 u64 mem_size; 795 void (*guest_code)(void); 796 bool (*prepare)(struct vm_data *data, struct sync_area *sync, 797 u64 *maxslots); 798 void (*loop)(struct vm_data *data, struct sync_area *sync); 799 }; 800 801 static bool test_execute(int nslots, u64 *maxslots, 802 unsigned int maxtime, 803 const struct test_data *tdata, 804 u64 *nloops, 805 struct timespec *slot_runtime, 806 struct timespec *guest_runtime) 807 { 808 u64 mem_size = tdata->mem_size ? : MEM_SIZE; 809 struct vm_data *data; 810 struct sync_area *sync; 811 struct timespec tstart; 812 bool ret = true; 813 814 data = alloc_vm(); 815 if (!prepare_vm(data, nslots, maxslots, tdata->guest_code, 816 mem_size, slot_runtime)) { 817 ret = false; 818 goto exit_free; 819 } 820 821 sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL); 822 if (tdata->prepare && 823 !tdata->prepare(data, sync, maxslots)) { 824 ret = false; 825 goto exit_free; 826 } 827 828 launch_vm(data); 829 830 clock_gettime(CLOCK_MONOTONIC, &tstart); 831 let_guest_run(sync); 832 833 while (1) { 834 *guest_runtime = timespec_elapsed(tstart); 835 if (guest_runtime->tv_sec >= maxtime) 836 break; 837 838 tdata->loop(data, sync); 839 840 (*nloops)++; 841 } 842 843 make_guest_exit(sync); 844 wait_guest_exit(data); 845 846 exit_free: 847 free_vm(data); 848 849 return ret; 850 } 851 852 static const struct test_data tests[] = { 853 { 854 .name = "map", 855 .mem_size = MEM_SIZE_MAP, 856 .guest_code = guest_code_test_memslot_map, 857 .loop = test_memslot_map_loop, 858 }, 859 { 860 .name = "unmap", 861 .mem_size = MEM_TEST_UNMAP_SIZE + MEM_EXTRA_SIZE, 862 .guest_code = guest_code_test_memslot_unmap, 863 .loop = test_memslot_unmap_loop, 864 }, 865 { 866 .name = "unmap chunked", 867 .mem_size = MEM_TEST_UNMAP_SIZE + MEM_EXTRA_SIZE, 868 .guest_code = guest_code_test_memslot_unmap, 869 .loop = test_memslot_unmap_loop_chunked, 870 }, 871 { 872 .name = "move active area", 873 .guest_code = guest_code_test_memslot_move, 874 .prepare = test_memslot_move_prepare_active, 875 .loop = test_memslot_move_loop, 876 }, 877 { 878 .name = "move inactive area", 879 .guest_code = guest_code_test_memslot_move, 880 .prepare = test_memslot_move_prepare_inactive, 881 .loop = test_memslot_move_loop, 882 }, 883 { 884 .name = "RW", 885 .guest_code = guest_code_test_memslot_rw, 886 .loop = test_memslot_rw_loop 887 }, 888 }; 889 890 #define NTESTS ARRAY_SIZE(tests) 891 892 struct test_args { 893 int tfirst; 894 int tlast; 895 int nslots; 896 int seconds; 897 int runs; 898 }; 899 900 static void help(char *name, struct test_args *targs) 901 { 902 int ctr; 903 904 pr_info("usage: %s [-h] [-v] [-d] [-s slots] [-f first_test] [-e last_test] [-l test_length] [-r run_count] [-t wait_timeout]\n", 905 name); 906 pr_info(" -h: print this help screen.\n"); 907 pr_info(" -v: enable verbose mode (not for benchmarking).\n"); 908 pr_info(" -d: enable extra debug checks.\n"); 909 pr_info(" -q: Disable memslot zap quirk during memslot move.\n"); 910 pr_info(" -s: specify memslot count cap (-1 means no cap; currently: %i)\n", 911 targs->nslots); 912 pr_info(" -f: specify the first test to run (currently: %i; max %zu)\n", 913 targs->tfirst, NTESTS - 1); 914 pr_info(" -e: specify the last test to run (currently: %i; max %zu)\n", 915 targs->tlast, NTESTS - 1); 916 pr_info(" -l: specify the test length in seconds (currently: %i)\n", 917 targs->seconds); 918 pr_info(" -r: specify the number of runs per test (currently: %i)\n", 919 targs->runs); 920 pr_info(" -t: specify the number of seconds for host wait timeout (currently: %i)\n", 921 wait_timeout); 922 923 pr_info("\nAvailable tests:\n"); 924 for (ctr = 0; ctr < NTESTS; ctr++) 925 pr_info("%d: %s\n", ctr, tests[ctr].name); 926 } 927 928 static bool check_memory_sizes(void) 929 { 930 u32 host_page_size = getpagesize(); 931 u32 guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size; 932 933 if (host_page_size > SZ_64K || guest_page_size > SZ_64K) { 934 pr_info("Unsupported page size on host (0x%x) or guest (0x%x)\n", 935 host_page_size, guest_page_size); 936 return false; 937 } 938 939 if (MEM_SIZE % guest_page_size || 940 MEM_TEST_SIZE % guest_page_size) { 941 pr_info("invalid MEM_SIZE or MEM_TEST_SIZE\n"); 942 return false; 943 } 944 945 if (MEM_SIZE_MAP % guest_page_size || 946 MEM_TEST_MAP_SIZE % guest_page_size || 947 (MEM_TEST_MAP_SIZE / guest_page_size) <= 2 || 948 (MEM_TEST_MAP_SIZE / guest_page_size) % 2) { 949 pr_info("invalid MEM_SIZE_MAP or MEM_TEST_MAP_SIZE\n"); 950 return false; 951 } 952 953 if (MEM_TEST_UNMAP_SIZE > MEM_TEST_SIZE || 954 MEM_TEST_UNMAP_SIZE % guest_page_size || 955 (MEM_TEST_UNMAP_SIZE / guest_page_size) % 956 (2 * MEM_TEST_UNMAP_CHUNK_SIZE / guest_page_size)) { 957 pr_info("invalid MEM_TEST_UNMAP_SIZE or MEM_TEST_UNMAP_CHUNK_SIZE\n"); 958 return false; 959 } 960 961 return true; 962 } 963 964 static bool parse_args(int argc, char *argv[], 965 struct test_args *targs) 966 { 967 u32 max_mem_slots; 968 int opt; 969 970 while ((opt = getopt(argc, argv, "hvdqs:f:e:l:r:t:")) != -1) { 971 switch (opt) { 972 case 'h': 973 default: 974 help(argv[0], targs); 975 return false; 976 case 'v': 977 verbose = true; 978 break; 979 case 'd': 980 map_unmap_verify = true; 981 break; 982 #ifdef __x86_64__ 983 case 'q': 984 disable_slot_zap_quirk = true; 985 TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & 986 KVM_X86_QUIRK_SLOT_ZAP_ALL); 987 break; 988 #endif 989 case 's': 990 targs->nslots = atoi_paranoid(optarg); 991 if (targs->nslots <= 1 && targs->nslots != -1) { 992 pr_info("Slot count cap must be larger than 1 or -1 for no cap\n"); 993 return false; 994 } 995 break; 996 case 'f': 997 targs->tfirst = atoi_non_negative("First test", optarg); 998 break; 999 case 'e': 1000 targs->tlast = atoi_non_negative("Last test", optarg); 1001 if (targs->tlast >= NTESTS) { 1002 pr_info("Last test to run has to be non-negative and less than %zu\n", 1003 NTESTS); 1004 return false; 1005 } 1006 break; 1007 case 'l': 1008 targs->seconds = atoi_non_negative("Test length", optarg); 1009 break; 1010 case 'r': 1011 targs->runs = atoi_positive("Runs per test", optarg); 1012 break; 1013 case 't': 1014 wait_timeout = atoi_positive("Host wait timeout", optarg); 1015 break; 1016 } 1017 } 1018 1019 if (optind < argc) { 1020 help(argv[0], targs); 1021 return false; 1022 } 1023 1024 if (targs->tfirst > targs->tlast) { 1025 pr_info("First test to run cannot be greater than the last test to run\n"); 1026 return false; 1027 } 1028 1029 max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS); 1030 if (max_mem_slots <= 1) { 1031 pr_info("KVM_CAP_NR_MEMSLOTS should be greater than 1\n"); 1032 return false; 1033 } 1034 1035 /* Memory slot 0 is reserved */ 1036 if (targs->nslots == -1) 1037 targs->nslots = max_mem_slots - 1; 1038 else 1039 targs->nslots = min_t(int, targs->nslots, max_mem_slots) - 1; 1040 1041 pr_info_v("Allowed Number of memory slots: %"PRIu32"\n", 1042 targs->nslots + 1); 1043 1044 return true; 1045 } 1046 1047 struct test_result { 1048 struct timespec slot_runtime, guest_runtime, iter_runtime; 1049 s64 slottimens, runtimens; 1050 u64 nloops; 1051 }; 1052 1053 static bool test_loop(const struct test_data *data, 1054 const struct test_args *targs, 1055 struct test_result *rbestslottime, 1056 struct test_result *rbestruntime) 1057 { 1058 u64 maxslots; 1059 struct test_result result = {}; 1060 1061 if (!test_execute(targs->nslots, &maxslots, targs->seconds, data, 1062 &result.nloops, 1063 &result.slot_runtime, &result.guest_runtime)) { 1064 if (maxslots) 1065 pr_info("Memslot count too high for this test, decrease the cap (max is %"PRIu64")\n", 1066 maxslots); 1067 else 1068 pr_info("Memslot count may be too high for this test, try adjusting the cap\n"); 1069 1070 return false; 1071 } 1072 1073 pr_info("Test took %ld.%.9lds for slot setup + %ld.%.9lds all iterations\n", 1074 result.slot_runtime.tv_sec, result.slot_runtime.tv_nsec, 1075 result.guest_runtime.tv_sec, result.guest_runtime.tv_nsec); 1076 if (!result.nloops) { 1077 pr_info("No full loops done - too short test time or system too loaded?\n"); 1078 return true; 1079 } 1080 1081 result.iter_runtime = timespec_div(result.guest_runtime, 1082 result.nloops); 1083 pr_info("Done %"PRIu64" iterations, avg %ld.%.9lds each\n", 1084 result.nloops, 1085 result.iter_runtime.tv_sec, 1086 result.iter_runtime.tv_nsec); 1087 result.slottimens = timespec_to_ns(result.slot_runtime); 1088 result.runtimens = timespec_to_ns(result.iter_runtime); 1089 1090 /* 1091 * Only rank the slot setup time for tests using the whole test memory 1092 * area so they are comparable 1093 */ 1094 if (!data->mem_size && 1095 (!rbestslottime->slottimens || 1096 result.slottimens < rbestslottime->slottimens)) 1097 *rbestslottime = result; 1098 if (!rbestruntime->runtimens || 1099 result.runtimens < rbestruntime->runtimens) 1100 *rbestruntime = result; 1101 1102 return true; 1103 } 1104 1105 int main(int argc, char *argv[]) 1106 { 1107 struct test_args targs = { 1108 .tfirst = 0, 1109 .tlast = NTESTS - 1, 1110 .nslots = -1, 1111 .seconds = 5, 1112 .runs = 1, 1113 }; 1114 struct test_result rbestslottime = {}; 1115 int tctr; 1116 1117 if (!check_memory_sizes()) 1118 return -1; 1119 1120 if (!parse_args(argc, argv, &targs)) 1121 return -1; 1122 1123 for (tctr = targs.tfirst; tctr <= targs.tlast; tctr++) { 1124 const struct test_data *data = &tests[tctr]; 1125 unsigned int runctr; 1126 struct test_result rbestruntime = {}; 1127 1128 if (tctr > targs.tfirst) 1129 pr_info("\n"); 1130 1131 pr_info("Testing %s performance with %i runs, %d seconds each\n", 1132 data->name, targs.runs, targs.seconds); 1133 1134 for (runctr = 0; runctr < targs.runs; runctr++) 1135 if (!test_loop(data, &targs, 1136 &rbestslottime, &rbestruntime)) 1137 break; 1138 1139 if (rbestruntime.runtimens) 1140 pr_info("Best runtime result was %ld.%.9lds per iteration (with %"PRIu64" iterations)\n", 1141 rbestruntime.iter_runtime.tv_sec, 1142 rbestruntime.iter_runtime.tv_nsec, 1143 rbestruntime.nloops); 1144 } 1145 1146 if (rbestslottime.slottimens) 1147 pr_info("Best slot setup time for the whole test area was %ld.%.9lds\n", 1148 rbestslottime.slot_runtime.tv_sec, 1149 rbestslottime.slot_runtime.tv_nsec); 1150 1151 return 0; 1152 } 1153