1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * page_fault_test.c - Test stage 2 faults. 4 * 5 * This test tries different combinations of guest accesses (e.g., write, 6 * S1PTW), backing source type (e.g., anon) and types of faults (e.g., read on 7 * hugetlbfs with a hole). It checks that the expected handling method is 8 * called (e.g., uffd faults with the right address and write/read flag). 9 */ 10 11 #define _GNU_SOURCE 12 #include <linux/bitmap.h> 13 #include <fcntl.h> 14 #include <test_util.h> 15 #include <kvm_util.h> 16 #include <processor.h> 17 #include <asm/sysreg.h> 18 #include <linux/bitfield.h> 19 #include "guest_modes.h" 20 #include "userfaultfd_util.h" 21 22 /* Guest virtual addresses that point to the test page and its PTE. */ 23 #define TEST_GVA 0xc0000000 24 #define TEST_EXEC_GVA (TEST_GVA + 0x8) 25 #define TEST_PTE_GVA 0xb0000000 26 #define TEST_DATA 0x0123456789ABCDEF 27 28 static uint64_t *guest_test_memory = (uint64_t *)TEST_GVA; 29 30 #define CMD_NONE (0) 31 #define CMD_SKIP_TEST (1ULL << 1) 32 #define CMD_HOLE_PT (1ULL << 2) 33 #define CMD_HOLE_DATA (1ULL << 3) 34 #define CMD_CHECK_WRITE_IN_DIRTY_LOG (1ULL << 4) 35 #define CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG (1ULL << 5) 36 #define CMD_CHECK_NO_WRITE_IN_DIRTY_LOG (1ULL << 6) 37 #define CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG (1ULL << 7) 38 #define CMD_SET_PTE_AF (1ULL << 8) 39 40 #define PREPARE_FN_NR 10 41 #define CHECK_FN_NR 10 42 43 static struct event_cnt { 44 int mmio_exits; 45 int fail_vcpu_runs; 46 int uffd_faults; 47 /* uffd_faults is incremented from multiple threads. */ 48 pthread_mutex_t uffd_faults_mutex; 49 } events; 50 51 struct test_desc { 52 const char *name; 53 uint64_t mem_mark_cmd; 54 /* Skip the test if any prepare function returns false */ 55 bool (*guest_prepare[PREPARE_FN_NR])(void); 56 void (*guest_test)(void); 57 void (*guest_test_check[CHECK_FN_NR])(void); 58 uffd_handler_t uffd_pt_handler; 59 uffd_handler_t uffd_data_handler; 60 void (*dabt_handler)(struct ex_regs *regs); 61 void (*iabt_handler)(struct ex_regs *regs); 62 void (*mmio_handler)(struct kvm_vm *vm, struct kvm_run *run); 63 void (*fail_vcpu_run_handler)(int ret); 64 uint32_t pt_memslot_flags; 65 uint32_t data_memslot_flags; 66 bool skip; 67 struct event_cnt expected_events; 68 }; 69 70 struct test_params { 71 enum vm_mem_backing_src_type src_type; 72 struct test_desc *test_desc; 73 }; 74 75 static inline void flush_tlb_page(uint64_t vaddr) 76 { 77 uint64_t page = vaddr >> 12; 78 79 dsb(ishst); 80 asm volatile("tlbi vaae1is, %0" :: "r" (page)); 81 dsb(ish); 82 isb(); 83 } 84 85 static void guest_write64(void) 86 { 87 uint64_t val; 88 89 WRITE_ONCE(*guest_test_memory, TEST_DATA); 90 val = READ_ONCE(*guest_test_memory); 91 GUEST_ASSERT_EQ(val, TEST_DATA); 92 } 93 94 /* Check the system for atomic instructions. */ 95 static bool guest_check_lse(void) 96 { 97 uint64_t isar0 = read_sysreg(id_aa64isar0_el1); 98 uint64_t atomic; 99 100 atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMICS), isar0); 101 return atomic >= 2; 102 } 103 104 static bool guest_check_dc_zva(void) 105 { 106 uint64_t dczid = read_sysreg(dczid_el0); 107 uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_DZP), dczid); 108 109 return dzp == 0; 110 } 111 112 /* Compare and swap instruction. */ 113 static void guest_cas(void) 114 { 115 uint64_t val; 116 117 GUEST_ASSERT(guest_check_lse()); 118 asm volatile(".arch_extension lse\n" 119 "casal %0, %1, [%2]\n" 120 :: "r" (0), "r" (TEST_DATA), "r" (guest_test_memory)); 121 val = READ_ONCE(*guest_test_memory); 122 GUEST_ASSERT_EQ(val, TEST_DATA); 123 } 124 125 static void guest_read64(void) 126 { 127 uint64_t val; 128 129 val = READ_ONCE(*guest_test_memory); 130 GUEST_ASSERT_EQ(val, 0); 131 } 132 133 /* Address translation instruction */ 134 static void guest_at(void) 135 { 136 uint64_t par; 137 138 asm volatile("at s1e1r, %0" :: "r" (guest_test_memory)); 139 par = read_sysreg(par_el1); 140 isb(); 141 142 /* Bit 1 indicates whether the AT was successful */ 143 GUEST_ASSERT_EQ(par & 1, 0); 144 } 145 146 /* 147 * The size of the block written by "dc zva" is guaranteed to be between (2 << 148 * 0) and (2 << 9), which is safe in our case as we need the write to happen 149 * for at least a word, and not more than a page. 150 */ 151 static void guest_dc_zva(void) 152 { 153 uint16_t val; 154 155 asm volatile("dc zva, %0" :: "r" (guest_test_memory)); 156 dsb(ish); 157 val = READ_ONCE(*guest_test_memory); 158 GUEST_ASSERT_EQ(val, 0); 159 } 160 161 /* 162 * Pre-indexing loads and stores don't have a valid syndrome (ESR_EL2.ISV==0). 163 * And that's special because KVM must take special care with those: they 164 * should still count as accesses for dirty logging or user-faulting, but 165 * should be handled differently on mmio. 166 */ 167 static void guest_ld_preidx(void) 168 { 169 uint64_t val; 170 uint64_t addr = TEST_GVA - 8; 171 172 /* 173 * This ends up accessing "TEST_GVA + 8 - 8", where "TEST_GVA - 8" is 174 * in a gap between memslots not backing by anything. 175 */ 176 asm volatile("ldr %0, [%1, #8]!" 177 : "=r" (val), "+r" (addr)); 178 GUEST_ASSERT_EQ(val, 0); 179 GUEST_ASSERT_EQ(addr, TEST_GVA); 180 } 181 182 static void guest_st_preidx(void) 183 { 184 uint64_t val = TEST_DATA; 185 uint64_t addr = TEST_GVA - 8; 186 187 asm volatile("str %0, [%1, #8]!" 188 : "+r" (val), "+r" (addr)); 189 190 GUEST_ASSERT_EQ(addr, TEST_GVA); 191 val = READ_ONCE(*guest_test_memory); 192 } 193 194 static bool guest_set_ha(void) 195 { 196 uint64_t mmfr1 = read_sysreg(id_aa64mmfr1_el1); 197 uint64_t hadbs, tcr; 198 199 /* Skip if HA is not supported. */ 200 hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_HADBS), mmfr1); 201 if (hadbs == 0) 202 return false; 203 204 tcr = read_sysreg(tcr_el1) | TCR_EL1_HA; 205 write_sysreg(tcr, tcr_el1); 206 isb(); 207 208 return true; 209 } 210 211 static bool guest_clear_pte_af(void) 212 { 213 *((uint64_t *)TEST_PTE_GVA) &= ~PTE_AF; 214 flush_tlb_page(TEST_GVA); 215 216 return true; 217 } 218 219 static void guest_check_pte_af(void) 220 { 221 dsb(ish); 222 GUEST_ASSERT_EQ(*((uint64_t *)TEST_PTE_GVA) & PTE_AF, PTE_AF); 223 } 224 225 static void guest_check_write_in_dirty_log(void) 226 { 227 GUEST_SYNC(CMD_CHECK_WRITE_IN_DIRTY_LOG); 228 } 229 230 static void guest_check_no_write_in_dirty_log(void) 231 { 232 GUEST_SYNC(CMD_CHECK_NO_WRITE_IN_DIRTY_LOG); 233 } 234 235 static void guest_check_s1ptw_wr_in_dirty_log(void) 236 { 237 GUEST_SYNC(CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG); 238 } 239 240 static void guest_exec(void) 241 { 242 int (*code)(void) = (int (*)(void))TEST_EXEC_GVA; 243 int ret; 244 245 ret = code(); 246 GUEST_ASSERT_EQ(ret, 0x77); 247 } 248 249 static bool guest_prepare(struct test_desc *test) 250 { 251 bool (*prepare_fn)(void); 252 int i; 253 254 for (i = 0; i < PREPARE_FN_NR; i++) { 255 prepare_fn = test->guest_prepare[i]; 256 if (prepare_fn && !prepare_fn()) 257 return false; 258 } 259 260 return true; 261 } 262 263 static void guest_test_check(struct test_desc *test) 264 { 265 void (*check_fn)(void); 266 int i; 267 268 for (i = 0; i < CHECK_FN_NR; i++) { 269 check_fn = test->guest_test_check[i]; 270 if (check_fn) 271 check_fn(); 272 } 273 } 274 275 static void guest_code(struct test_desc *test) 276 { 277 if (!guest_prepare(test)) 278 GUEST_SYNC(CMD_SKIP_TEST); 279 280 GUEST_SYNC(test->mem_mark_cmd); 281 282 if (test->guest_test) 283 test->guest_test(); 284 285 guest_test_check(test); 286 GUEST_DONE(); 287 } 288 289 static void no_dabt_handler(struct ex_regs *regs) 290 { 291 GUEST_ASSERT_1(false, read_sysreg(far_el1)); 292 } 293 294 static void no_iabt_handler(struct ex_regs *regs) 295 { 296 GUEST_ASSERT_1(false, regs->pc); 297 } 298 299 static struct uffd_args { 300 char *copy; 301 void *hva; 302 uint64_t paging_size; 303 } pt_args, data_args; 304 305 /* Returns true to continue the test, and false if it should be skipped. */ 306 static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg, 307 struct uffd_args *args, bool expect_write) 308 { 309 uint64_t addr = msg->arg.pagefault.address; 310 uint64_t flags = msg->arg.pagefault.flags; 311 struct uffdio_copy copy; 312 int ret; 313 314 TEST_ASSERT(uffd_mode == UFFDIO_REGISTER_MODE_MISSING, 315 "The only expected UFFD mode is MISSING"); 316 ASSERT_EQ(!!(flags & UFFD_PAGEFAULT_FLAG_WRITE), expect_write); 317 ASSERT_EQ(addr, (uint64_t)args->hva); 318 319 pr_debug("uffd fault: addr=%p write=%d\n", 320 (void *)addr, !!(flags & UFFD_PAGEFAULT_FLAG_WRITE)); 321 322 copy.src = (uint64_t)args->copy; 323 copy.dst = addr; 324 copy.len = args->paging_size; 325 copy.mode = 0; 326 327 ret = ioctl(uffd, UFFDIO_COPY, ©); 328 if (ret == -1) { 329 pr_info("Failed UFFDIO_COPY in 0x%lx with errno: %d\n", 330 addr, errno); 331 return ret; 332 } 333 334 pthread_mutex_lock(&events.uffd_faults_mutex); 335 events.uffd_faults += 1; 336 pthread_mutex_unlock(&events.uffd_faults_mutex); 337 return 0; 338 } 339 340 static int uffd_pt_write_handler(int mode, int uffd, struct uffd_msg *msg) 341 { 342 return uffd_generic_handler(mode, uffd, msg, &pt_args, true); 343 } 344 345 static int uffd_data_write_handler(int mode, int uffd, struct uffd_msg *msg) 346 { 347 return uffd_generic_handler(mode, uffd, msg, &data_args, true); 348 } 349 350 static int uffd_data_read_handler(int mode, int uffd, struct uffd_msg *msg) 351 { 352 return uffd_generic_handler(mode, uffd, msg, &data_args, false); 353 } 354 355 static void setup_uffd_args(struct userspace_mem_region *region, 356 struct uffd_args *args) 357 { 358 args->hva = (void *)region->region.userspace_addr; 359 args->paging_size = region->region.memory_size; 360 361 args->copy = malloc(args->paging_size); 362 TEST_ASSERT(args->copy, "Failed to allocate data copy."); 363 memcpy(args->copy, args->hva, args->paging_size); 364 } 365 366 static void setup_uffd(struct kvm_vm *vm, struct test_params *p, 367 struct uffd_desc **pt_uffd, struct uffd_desc **data_uffd) 368 { 369 struct test_desc *test = p->test_desc; 370 int uffd_mode = UFFDIO_REGISTER_MODE_MISSING; 371 372 setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_PT), &pt_args); 373 setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_TEST_DATA), &data_args); 374 375 *pt_uffd = NULL; 376 if (test->uffd_pt_handler) 377 *pt_uffd = uffd_setup_demand_paging(uffd_mode, 0, 378 pt_args.hva, 379 pt_args.paging_size, 380 test->uffd_pt_handler); 381 382 *data_uffd = NULL; 383 if (test->uffd_data_handler) 384 *data_uffd = uffd_setup_demand_paging(uffd_mode, 0, 385 data_args.hva, 386 data_args.paging_size, 387 test->uffd_data_handler); 388 } 389 390 static void free_uffd(struct test_desc *test, struct uffd_desc *pt_uffd, 391 struct uffd_desc *data_uffd) 392 { 393 if (test->uffd_pt_handler) 394 uffd_stop_demand_paging(pt_uffd); 395 if (test->uffd_data_handler) 396 uffd_stop_demand_paging(data_uffd); 397 398 free(pt_args.copy); 399 free(data_args.copy); 400 } 401 402 static int uffd_no_handler(int mode, int uffd, struct uffd_msg *msg) 403 { 404 TEST_FAIL("There was no UFFD fault expected."); 405 return -1; 406 } 407 408 /* Returns false if the test should be skipped. */ 409 static bool punch_hole_in_backing_store(struct kvm_vm *vm, 410 struct userspace_mem_region *region) 411 { 412 void *hva = (void *)region->region.userspace_addr; 413 uint64_t paging_size = region->region.memory_size; 414 int ret, fd = region->fd; 415 416 if (fd != -1) { 417 ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 418 0, paging_size); 419 TEST_ASSERT(ret == 0, "fallocate failed\n"); 420 } else { 421 ret = madvise(hva, paging_size, MADV_DONTNEED); 422 TEST_ASSERT(ret == 0, "madvise failed\n"); 423 } 424 425 return true; 426 } 427 428 static void mmio_on_test_gpa_handler(struct kvm_vm *vm, struct kvm_run *run) 429 { 430 struct userspace_mem_region *region; 431 void *hva; 432 433 region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); 434 hva = (void *)region->region.userspace_addr; 435 436 ASSERT_EQ(run->mmio.phys_addr, region->region.guest_phys_addr); 437 438 memcpy(hva, run->mmio.data, run->mmio.len); 439 events.mmio_exits += 1; 440 } 441 442 static void mmio_no_handler(struct kvm_vm *vm, struct kvm_run *run) 443 { 444 uint64_t data; 445 446 memcpy(&data, run->mmio.data, sizeof(data)); 447 pr_debug("addr=%lld len=%d w=%d data=%lx\n", 448 run->mmio.phys_addr, run->mmio.len, 449 run->mmio.is_write, data); 450 TEST_FAIL("There was no MMIO exit expected."); 451 } 452 453 static bool check_write_in_dirty_log(struct kvm_vm *vm, 454 struct userspace_mem_region *region, 455 uint64_t host_pg_nr) 456 { 457 unsigned long *bmap; 458 bool first_page_dirty; 459 uint64_t size = region->region.memory_size; 460 461 /* getpage_size() is not always equal to vm->page_size */ 462 bmap = bitmap_zalloc(size / getpagesize()); 463 kvm_vm_get_dirty_log(vm, region->region.slot, bmap); 464 first_page_dirty = test_bit(host_pg_nr, bmap); 465 free(bmap); 466 return first_page_dirty; 467 } 468 469 /* Returns true to continue the test, and false if it should be skipped. */ 470 static bool handle_cmd(struct kvm_vm *vm, int cmd) 471 { 472 struct userspace_mem_region *data_region, *pt_region; 473 bool continue_test = true; 474 475 data_region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); 476 pt_region = vm_get_mem_region(vm, MEM_REGION_PT); 477 478 if (cmd == CMD_SKIP_TEST) 479 continue_test = false; 480 481 if (cmd & CMD_HOLE_PT) 482 continue_test = punch_hole_in_backing_store(vm, pt_region); 483 if (cmd & CMD_HOLE_DATA) 484 continue_test = punch_hole_in_backing_store(vm, data_region); 485 if (cmd & CMD_CHECK_WRITE_IN_DIRTY_LOG) 486 TEST_ASSERT(check_write_in_dirty_log(vm, data_region, 0), 487 "Missing write in dirty log"); 488 if (cmd & CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG) 489 TEST_ASSERT(check_write_in_dirty_log(vm, pt_region, 0), 490 "Missing s1ptw write in dirty log"); 491 if (cmd & CMD_CHECK_NO_WRITE_IN_DIRTY_LOG) 492 TEST_ASSERT(!check_write_in_dirty_log(vm, data_region, 0), 493 "Unexpected write in dirty log"); 494 if (cmd & CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG) 495 TEST_ASSERT(!check_write_in_dirty_log(vm, pt_region, 0), 496 "Unexpected s1ptw write in dirty log"); 497 498 return continue_test; 499 } 500 501 void fail_vcpu_run_no_handler(int ret) 502 { 503 TEST_FAIL("Unexpected vcpu run failure\n"); 504 } 505 506 void fail_vcpu_run_mmio_no_syndrome_handler(int ret) 507 { 508 TEST_ASSERT(errno == ENOSYS, 509 "The mmio handler should have returned not implemented."); 510 events.fail_vcpu_runs += 1; 511 } 512 513 typedef uint32_t aarch64_insn_t; 514 extern aarch64_insn_t __exec_test[2]; 515 516 noinline void __return_0x77(void) 517 { 518 asm volatile("__exec_test: mov x0, #0x77\n" 519 "ret\n"); 520 } 521 522 /* 523 * Note that this function runs on the host before the test VM starts: there's 524 * no need to sync the D$ and I$ caches. 525 */ 526 static void load_exec_code_for_test(struct kvm_vm *vm) 527 { 528 uint64_t *code; 529 struct userspace_mem_region *region; 530 void *hva; 531 532 region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); 533 hva = (void *)region->region.userspace_addr; 534 535 assert(TEST_EXEC_GVA > TEST_GVA); 536 code = hva + TEST_EXEC_GVA - TEST_GVA; 537 memcpy(code, __exec_test, sizeof(__exec_test)); 538 } 539 540 static void setup_abort_handlers(struct kvm_vm *vm, struct kvm_vcpu *vcpu, 541 struct test_desc *test) 542 { 543 vm_init_descriptor_tables(vm); 544 vcpu_init_descriptor_tables(vcpu); 545 546 vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, 547 ESR_EC_DABT, no_dabt_handler); 548 vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, 549 ESR_EC_IABT, no_iabt_handler); 550 } 551 552 static void setup_gva_maps(struct kvm_vm *vm) 553 { 554 struct userspace_mem_region *region; 555 uint64_t pte_gpa; 556 557 region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); 558 /* Map TEST_GVA first. This will install a new PTE. */ 559 virt_pg_map(vm, TEST_GVA, region->region.guest_phys_addr); 560 /* Then map TEST_PTE_GVA to the above PTE. */ 561 pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA)); 562 virt_pg_map(vm, TEST_PTE_GVA, pte_gpa); 563 } 564 565 enum pf_test_memslots { 566 CODE_AND_DATA_MEMSLOT, 567 PAGE_TABLE_MEMSLOT, 568 TEST_DATA_MEMSLOT, 569 }; 570 571 /* 572 * Create a memslot for code and data at pfn=0, and test-data and PT ones 573 * at max_gfn. 574 */ 575 static void setup_memslots(struct kvm_vm *vm, struct test_params *p) 576 { 577 uint64_t backing_src_pagesz = get_backing_src_pagesz(p->src_type); 578 uint64_t guest_page_size = vm->page_size; 579 uint64_t max_gfn = vm_compute_max_gfn(vm); 580 /* Enough for 2M of code when using 4K guest pages. */ 581 uint64_t code_npages = 512; 582 uint64_t pt_size, data_size, data_gpa; 583 584 /* 585 * This test requires 1 pgd, 2 pud, 4 pmd, and 6 pte pages when using 586 * VM_MODE_P48V48_4K. Note that the .text takes ~1.6MBs. That's 13 587 * pages. VM_MODE_P48V48_4K is the mode with most PT pages; let's use 588 * twice that just in case. 589 */ 590 pt_size = 26 * guest_page_size; 591 592 /* memslot sizes and gpa's must be aligned to the backing page size */ 593 pt_size = align_up(pt_size, backing_src_pagesz); 594 data_size = align_up(guest_page_size, backing_src_pagesz); 595 data_gpa = (max_gfn * guest_page_size) - data_size; 596 data_gpa = align_down(data_gpa, backing_src_pagesz); 597 598 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 599 CODE_AND_DATA_MEMSLOT, code_npages, 0); 600 vm->memslots[MEM_REGION_CODE] = CODE_AND_DATA_MEMSLOT; 601 vm->memslots[MEM_REGION_DATA] = CODE_AND_DATA_MEMSLOT; 602 603 vm_userspace_mem_region_add(vm, p->src_type, data_gpa - pt_size, 604 PAGE_TABLE_MEMSLOT, pt_size / guest_page_size, 605 p->test_desc->pt_memslot_flags); 606 vm->memslots[MEM_REGION_PT] = PAGE_TABLE_MEMSLOT; 607 608 vm_userspace_mem_region_add(vm, p->src_type, data_gpa, TEST_DATA_MEMSLOT, 609 data_size / guest_page_size, 610 p->test_desc->data_memslot_flags); 611 vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT; 612 } 613 614 static void setup_ucall(struct kvm_vm *vm) 615 { 616 struct userspace_mem_region *region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); 617 618 ucall_init(vm, region->region.guest_phys_addr + region->region.memory_size); 619 } 620 621 static void setup_default_handlers(struct test_desc *test) 622 { 623 if (!test->mmio_handler) 624 test->mmio_handler = mmio_no_handler; 625 626 if (!test->fail_vcpu_run_handler) 627 test->fail_vcpu_run_handler = fail_vcpu_run_no_handler; 628 } 629 630 static void check_event_counts(struct test_desc *test) 631 { 632 ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults); 633 ASSERT_EQ(test->expected_events.mmio_exits, events.mmio_exits); 634 ASSERT_EQ(test->expected_events.fail_vcpu_runs, events.fail_vcpu_runs); 635 } 636 637 static void print_test_banner(enum vm_guest_mode mode, struct test_params *p) 638 { 639 struct test_desc *test = p->test_desc; 640 641 pr_debug("Test: %s\n", test->name); 642 pr_debug("Testing guest mode: %s\n", vm_guest_mode_string(mode)); 643 pr_debug("Testing memory backing src type: %s\n", 644 vm_mem_backing_src_alias(p->src_type)->name); 645 } 646 647 static void reset_event_counts(void) 648 { 649 memset(&events, 0, sizeof(events)); 650 } 651 652 /* 653 * This function either succeeds, skips the test (after setting test->skip), or 654 * fails with a TEST_FAIL that aborts all tests. 655 */ 656 static void vcpu_run_loop(struct kvm_vm *vm, struct kvm_vcpu *vcpu, 657 struct test_desc *test) 658 { 659 struct kvm_run *run; 660 struct ucall uc; 661 int ret; 662 663 run = vcpu->run; 664 665 for (;;) { 666 ret = _vcpu_run(vcpu); 667 if (ret) { 668 test->fail_vcpu_run_handler(ret); 669 goto done; 670 } 671 672 switch (get_ucall(vcpu, &uc)) { 673 case UCALL_SYNC: 674 if (!handle_cmd(vm, uc.args[1])) { 675 test->skip = true; 676 goto done; 677 } 678 break; 679 case UCALL_ABORT: 680 REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); 681 break; 682 case UCALL_DONE: 683 goto done; 684 case UCALL_NONE: 685 if (run->exit_reason == KVM_EXIT_MMIO) 686 test->mmio_handler(vm, run); 687 break; 688 default: 689 TEST_FAIL("Unknown ucall %lu", uc.cmd); 690 } 691 } 692 693 done: 694 pr_debug(test->skip ? "Skipped.\n" : "Done.\n"); 695 } 696 697 static void run_test(enum vm_guest_mode mode, void *arg) 698 { 699 struct test_params *p = (struct test_params *)arg; 700 struct test_desc *test = p->test_desc; 701 struct kvm_vm *vm; 702 struct kvm_vcpu *vcpu; 703 struct uffd_desc *pt_uffd, *data_uffd; 704 705 print_test_banner(mode, p); 706 707 vm = ____vm_create(mode); 708 setup_memslots(vm, p); 709 kvm_vm_elf_load(vm, program_invocation_name); 710 setup_ucall(vm); 711 vcpu = vm_vcpu_add(vm, 0, guest_code); 712 713 setup_gva_maps(vm); 714 715 reset_event_counts(); 716 717 /* 718 * Set some code in the data memslot for the guest to execute (only 719 * applicable to the EXEC tests). This has to be done before 720 * setup_uffd() as that function copies the memslot data for the uffd 721 * handler. 722 */ 723 load_exec_code_for_test(vm); 724 setup_uffd(vm, p, &pt_uffd, &data_uffd); 725 setup_abort_handlers(vm, vcpu, test); 726 setup_default_handlers(test); 727 vcpu_args_set(vcpu, 1, test); 728 729 vcpu_run_loop(vm, vcpu, test); 730 731 kvm_vm_free(vm); 732 free_uffd(test, pt_uffd, data_uffd); 733 734 /* 735 * Make sure we check the events after the uffd threads have exited, 736 * which means they updated their respective event counters. 737 */ 738 if (!test->skip) 739 check_event_counts(test); 740 } 741 742 static void help(char *name) 743 { 744 puts(""); 745 printf("usage: %s [-h] [-s mem-type]\n", name); 746 puts(""); 747 guest_modes_help(); 748 backing_src_help("-s"); 749 puts(""); 750 } 751 752 #define SNAME(s) #s 753 #define SCAT2(a, b) SNAME(a ## _ ## b) 754 #define SCAT3(a, b, c) SCAT2(a, SCAT2(b, c)) 755 #define SCAT4(a, b, c, d) SCAT2(a, SCAT3(b, c, d)) 756 757 #define _CHECK(_test) _CHECK_##_test 758 #define _PREPARE(_test) _PREPARE_##_test 759 #define _PREPARE_guest_read64 NULL 760 #define _PREPARE_guest_ld_preidx NULL 761 #define _PREPARE_guest_write64 NULL 762 #define _PREPARE_guest_st_preidx NULL 763 #define _PREPARE_guest_exec NULL 764 #define _PREPARE_guest_at NULL 765 #define _PREPARE_guest_dc_zva guest_check_dc_zva 766 #define _PREPARE_guest_cas guest_check_lse 767 768 /* With or without access flag checks */ 769 #define _PREPARE_with_af guest_set_ha, guest_clear_pte_af 770 #define _PREPARE_no_af NULL 771 #define _CHECK_with_af guest_check_pte_af 772 #define _CHECK_no_af NULL 773 774 /* Performs an access and checks that no faults were triggered. */ 775 #define TEST_ACCESS(_access, _with_af, _mark_cmd) \ 776 { \ 777 .name = SCAT3(_access, _with_af, #_mark_cmd), \ 778 .guest_prepare = { _PREPARE(_with_af), \ 779 _PREPARE(_access) }, \ 780 .mem_mark_cmd = _mark_cmd, \ 781 .guest_test = _access, \ 782 .guest_test_check = { _CHECK(_with_af) }, \ 783 .expected_events = { 0 }, \ 784 } 785 786 #define TEST_UFFD(_access, _with_af, _mark_cmd, \ 787 _uffd_data_handler, _uffd_pt_handler, _uffd_faults) \ 788 { \ 789 .name = SCAT4(uffd, _access, _with_af, #_mark_cmd), \ 790 .guest_prepare = { _PREPARE(_with_af), \ 791 _PREPARE(_access) }, \ 792 .guest_test = _access, \ 793 .mem_mark_cmd = _mark_cmd, \ 794 .guest_test_check = { _CHECK(_with_af) }, \ 795 .uffd_data_handler = _uffd_data_handler, \ 796 .uffd_pt_handler = _uffd_pt_handler, \ 797 .expected_events = { .uffd_faults = _uffd_faults, }, \ 798 } 799 800 #define TEST_DIRTY_LOG(_access, _with_af, _test_check) \ 801 { \ 802 .name = SCAT3(dirty_log, _access, _with_af), \ 803 .data_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ 804 .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ 805 .guest_prepare = { _PREPARE(_with_af), \ 806 _PREPARE(_access) }, \ 807 .guest_test = _access, \ 808 .guest_test_check = { _CHECK(_with_af), _test_check, \ 809 guest_check_s1ptw_wr_in_dirty_log}, \ 810 .expected_events = { 0 }, \ 811 } 812 813 #define TEST_UFFD_AND_DIRTY_LOG(_access, _with_af, _uffd_data_handler, \ 814 _uffd_faults, _test_check) \ 815 { \ 816 .name = SCAT3(uffd_and_dirty_log, _access, _with_af), \ 817 .data_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ 818 .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ 819 .guest_prepare = { _PREPARE(_with_af), \ 820 _PREPARE(_access) }, \ 821 .guest_test = _access, \ 822 .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \ 823 .guest_test_check = { _CHECK(_with_af), _test_check }, \ 824 .uffd_data_handler = _uffd_data_handler, \ 825 .uffd_pt_handler = uffd_pt_write_handler, \ 826 .expected_events = { .uffd_faults = _uffd_faults, }, \ 827 } 828 829 #define TEST_RO_MEMSLOT(_access, _mmio_handler, _mmio_exits) \ 830 { \ 831 .name = SCAT3(ro_memslot, _access, _with_af), \ 832 .data_memslot_flags = KVM_MEM_READONLY, \ 833 .guest_prepare = { _PREPARE(_access) }, \ 834 .guest_test = _access, \ 835 .mmio_handler = _mmio_handler, \ 836 .expected_events = { .mmio_exits = _mmio_exits }, \ 837 } 838 839 #define TEST_RO_MEMSLOT_NO_SYNDROME(_access) \ 840 { \ 841 .name = SCAT2(ro_memslot_no_syndrome, _access), \ 842 .data_memslot_flags = KVM_MEM_READONLY, \ 843 .guest_test = _access, \ 844 .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \ 845 .expected_events = { .fail_vcpu_runs = 1 }, \ 846 } 847 848 #define TEST_RO_MEMSLOT_AND_DIRTY_LOG(_access, _mmio_handler, _mmio_exits, \ 849 _test_check) \ 850 { \ 851 .name = SCAT3(ro_memslot, _access, _with_af), \ 852 .data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \ 853 .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ 854 .guest_prepare = { _PREPARE(_access) }, \ 855 .guest_test = _access, \ 856 .guest_test_check = { _test_check }, \ 857 .mmio_handler = _mmio_handler, \ 858 .expected_events = { .mmio_exits = _mmio_exits}, \ 859 } 860 861 #define TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(_access, _test_check) \ 862 { \ 863 .name = SCAT2(ro_memslot_no_syn_and_dlog, _access), \ 864 .data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \ 865 .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ 866 .guest_test = _access, \ 867 .guest_test_check = { _test_check }, \ 868 .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \ 869 .expected_events = { .fail_vcpu_runs = 1 }, \ 870 } 871 872 #define TEST_RO_MEMSLOT_AND_UFFD(_access, _mmio_handler, _mmio_exits, \ 873 _uffd_data_handler, _uffd_faults) \ 874 { \ 875 .name = SCAT2(ro_memslot_uffd, _access), \ 876 .data_memslot_flags = KVM_MEM_READONLY, \ 877 .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \ 878 .guest_prepare = { _PREPARE(_access) }, \ 879 .guest_test = _access, \ 880 .uffd_data_handler = _uffd_data_handler, \ 881 .uffd_pt_handler = uffd_pt_write_handler, \ 882 .mmio_handler = _mmio_handler, \ 883 .expected_events = { .mmio_exits = _mmio_exits, \ 884 .uffd_faults = _uffd_faults }, \ 885 } 886 887 #define TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(_access, _uffd_data_handler, \ 888 _uffd_faults) \ 889 { \ 890 .name = SCAT2(ro_memslot_no_syndrome, _access), \ 891 .data_memslot_flags = KVM_MEM_READONLY, \ 892 .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \ 893 .guest_test = _access, \ 894 .uffd_data_handler = _uffd_data_handler, \ 895 .uffd_pt_handler = uffd_pt_write_handler, \ 896 .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \ 897 .expected_events = { .fail_vcpu_runs = 1, \ 898 .uffd_faults = _uffd_faults }, \ 899 } 900 901 static struct test_desc tests[] = { 902 903 /* Check that HW is setting the Access Flag (AF) (sanity checks). */ 904 TEST_ACCESS(guest_read64, with_af, CMD_NONE), 905 TEST_ACCESS(guest_ld_preidx, with_af, CMD_NONE), 906 TEST_ACCESS(guest_cas, with_af, CMD_NONE), 907 TEST_ACCESS(guest_write64, with_af, CMD_NONE), 908 TEST_ACCESS(guest_st_preidx, with_af, CMD_NONE), 909 TEST_ACCESS(guest_dc_zva, with_af, CMD_NONE), 910 TEST_ACCESS(guest_exec, with_af, CMD_NONE), 911 912 /* 913 * Punch a hole in the data backing store, and then try multiple 914 * accesses: reads should rturn zeroes, and writes should 915 * re-populate the page. Moreover, the test also check that no 916 * exception was generated in the guest. Note that this 917 * reading/writing behavior is the same as reading/writing a 918 * punched page (with fallocate(FALLOC_FL_PUNCH_HOLE)) from 919 * userspace. 920 */ 921 TEST_ACCESS(guest_read64, no_af, CMD_HOLE_DATA), 922 TEST_ACCESS(guest_cas, no_af, CMD_HOLE_DATA), 923 TEST_ACCESS(guest_ld_preidx, no_af, CMD_HOLE_DATA), 924 TEST_ACCESS(guest_write64, no_af, CMD_HOLE_DATA), 925 TEST_ACCESS(guest_st_preidx, no_af, CMD_HOLE_DATA), 926 TEST_ACCESS(guest_at, no_af, CMD_HOLE_DATA), 927 TEST_ACCESS(guest_dc_zva, no_af, CMD_HOLE_DATA), 928 929 /* 930 * Punch holes in the data and PT backing stores and mark them for 931 * userfaultfd handling. This should result in 2 faults: the access 932 * on the data backing store, and its respective S1 page table walk 933 * (S1PTW). 934 */ 935 TEST_UFFD(guest_read64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, 936 uffd_data_read_handler, uffd_pt_write_handler, 2), 937 /* no_af should also lead to a PT write. */ 938 TEST_UFFD(guest_read64, no_af, CMD_HOLE_DATA | CMD_HOLE_PT, 939 uffd_data_read_handler, uffd_pt_write_handler, 2), 940 /* Note how that cas invokes the read handler. */ 941 TEST_UFFD(guest_cas, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, 942 uffd_data_read_handler, uffd_pt_write_handler, 2), 943 /* 944 * Can't test guest_at with_af as it's IMPDEF whether the AF is set. 945 * The S1PTW fault should still be marked as a write. 946 */ 947 TEST_UFFD(guest_at, no_af, CMD_HOLE_DATA | CMD_HOLE_PT, 948 uffd_data_read_handler, uffd_pt_write_handler, 1), 949 TEST_UFFD(guest_ld_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, 950 uffd_data_read_handler, uffd_pt_write_handler, 2), 951 TEST_UFFD(guest_write64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, 952 uffd_data_write_handler, uffd_pt_write_handler, 2), 953 TEST_UFFD(guest_dc_zva, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, 954 uffd_data_write_handler, uffd_pt_write_handler, 2), 955 TEST_UFFD(guest_st_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, 956 uffd_data_write_handler, uffd_pt_write_handler, 2), 957 TEST_UFFD(guest_exec, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, 958 uffd_data_read_handler, uffd_pt_write_handler, 2), 959 960 /* 961 * Try accesses when the data and PT memory regions are both 962 * tracked for dirty logging. 963 */ 964 TEST_DIRTY_LOG(guest_read64, with_af, guest_check_no_write_in_dirty_log), 965 /* no_af should also lead to a PT write. */ 966 TEST_DIRTY_LOG(guest_read64, no_af, guest_check_no_write_in_dirty_log), 967 TEST_DIRTY_LOG(guest_ld_preidx, with_af, guest_check_no_write_in_dirty_log), 968 TEST_DIRTY_LOG(guest_at, no_af, guest_check_no_write_in_dirty_log), 969 TEST_DIRTY_LOG(guest_exec, with_af, guest_check_no_write_in_dirty_log), 970 TEST_DIRTY_LOG(guest_write64, with_af, guest_check_write_in_dirty_log), 971 TEST_DIRTY_LOG(guest_cas, with_af, guest_check_write_in_dirty_log), 972 TEST_DIRTY_LOG(guest_dc_zva, with_af, guest_check_write_in_dirty_log), 973 TEST_DIRTY_LOG(guest_st_preidx, with_af, guest_check_write_in_dirty_log), 974 975 /* 976 * Access when the data and PT memory regions are both marked for 977 * dirty logging and UFFD at the same time. The expected result is 978 * that writes should mark the dirty log and trigger a userfaultfd 979 * write fault. Reads/execs should result in a read userfaultfd 980 * fault, and nothing in the dirty log. Any S1PTW should result in 981 * a write in the dirty log and a userfaultfd write. 982 */ 983 TEST_UFFD_AND_DIRTY_LOG(guest_read64, with_af, uffd_data_read_handler, 2, 984 guest_check_no_write_in_dirty_log), 985 /* no_af should also lead to a PT write. */ 986 TEST_UFFD_AND_DIRTY_LOG(guest_read64, no_af, uffd_data_read_handler, 2, 987 guest_check_no_write_in_dirty_log), 988 TEST_UFFD_AND_DIRTY_LOG(guest_ld_preidx, with_af, uffd_data_read_handler, 989 2, guest_check_no_write_in_dirty_log), 990 TEST_UFFD_AND_DIRTY_LOG(guest_at, with_af, 0, 1, 991 guest_check_no_write_in_dirty_log), 992 TEST_UFFD_AND_DIRTY_LOG(guest_exec, with_af, uffd_data_read_handler, 2, 993 guest_check_no_write_in_dirty_log), 994 TEST_UFFD_AND_DIRTY_LOG(guest_write64, with_af, uffd_data_write_handler, 995 2, guest_check_write_in_dirty_log), 996 TEST_UFFD_AND_DIRTY_LOG(guest_cas, with_af, uffd_data_read_handler, 2, 997 guest_check_write_in_dirty_log), 998 TEST_UFFD_AND_DIRTY_LOG(guest_dc_zva, with_af, uffd_data_write_handler, 999 2, guest_check_write_in_dirty_log), 1000 TEST_UFFD_AND_DIRTY_LOG(guest_st_preidx, with_af, 1001 uffd_data_write_handler, 2, 1002 guest_check_write_in_dirty_log), 1003 1004 /* 1005 * Try accesses when the data memory region is marked read-only 1006 * (with KVM_MEM_READONLY). Writes with a syndrome result in an 1007 * MMIO exit, writes with no syndrome (e.g., CAS) result in a 1008 * failed vcpu run, and reads/execs with and without syndroms do 1009 * not fault. 1010 */ 1011 TEST_RO_MEMSLOT(guest_read64, 0, 0), 1012 TEST_RO_MEMSLOT(guest_ld_preidx, 0, 0), 1013 TEST_RO_MEMSLOT(guest_at, 0, 0), 1014 TEST_RO_MEMSLOT(guest_exec, 0, 0), 1015 TEST_RO_MEMSLOT(guest_write64, mmio_on_test_gpa_handler, 1), 1016 TEST_RO_MEMSLOT_NO_SYNDROME(guest_dc_zva), 1017 TEST_RO_MEMSLOT_NO_SYNDROME(guest_cas), 1018 TEST_RO_MEMSLOT_NO_SYNDROME(guest_st_preidx), 1019 1020 /* 1021 * Access when both the data region is both read-only and marked 1022 * for dirty logging at the same time. The expected result is that 1023 * for writes there should be no write in the dirty log. The 1024 * readonly handling is the same as if the memslot was not marked 1025 * for dirty logging: writes with a syndrome result in an MMIO 1026 * exit, and writes with no syndrome result in a failed vcpu run. 1027 */ 1028 TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_read64, 0, 0, 1029 guest_check_no_write_in_dirty_log), 1030 TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_ld_preidx, 0, 0, 1031 guest_check_no_write_in_dirty_log), 1032 TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_at, 0, 0, 1033 guest_check_no_write_in_dirty_log), 1034 TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_exec, 0, 0, 1035 guest_check_no_write_in_dirty_log), 1036 TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_write64, mmio_on_test_gpa_handler, 1037 1, guest_check_no_write_in_dirty_log), 1038 TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_dc_zva, 1039 guest_check_no_write_in_dirty_log), 1040 TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_cas, 1041 guest_check_no_write_in_dirty_log), 1042 TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_st_preidx, 1043 guest_check_no_write_in_dirty_log), 1044 1045 /* 1046 * Access when the data region is both read-only and punched with 1047 * holes tracked with userfaultfd. The expected result is the 1048 * union of both userfaultfd and read-only behaviors. For example, 1049 * write accesses result in a userfaultfd write fault and an MMIO 1050 * exit. Writes with no syndrome result in a failed vcpu run and 1051 * no userfaultfd write fault. Reads result in userfaultfd getting 1052 * triggered. 1053 */ 1054 TEST_RO_MEMSLOT_AND_UFFD(guest_read64, 0, 0, 1055 uffd_data_read_handler, 2), 1056 TEST_RO_MEMSLOT_AND_UFFD(guest_ld_preidx, 0, 0, 1057 uffd_data_read_handler, 2), 1058 TEST_RO_MEMSLOT_AND_UFFD(guest_at, 0, 0, 1059 uffd_no_handler, 1), 1060 TEST_RO_MEMSLOT_AND_UFFD(guest_exec, 0, 0, 1061 uffd_data_read_handler, 2), 1062 TEST_RO_MEMSLOT_AND_UFFD(guest_write64, mmio_on_test_gpa_handler, 1, 1063 uffd_data_write_handler, 2), 1064 TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_cas, 1065 uffd_data_read_handler, 2), 1066 TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_dc_zva, 1067 uffd_no_handler, 1), 1068 TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_st_preidx, 1069 uffd_no_handler, 1), 1070 1071 { 0 } 1072 }; 1073 1074 static void for_each_test_and_guest_mode(enum vm_mem_backing_src_type src_type) 1075 { 1076 struct test_desc *t; 1077 1078 for (t = &tests[0]; t->name; t++) { 1079 if (t->skip) 1080 continue; 1081 1082 struct test_params p = { 1083 .src_type = src_type, 1084 .test_desc = t, 1085 }; 1086 1087 for_each_guest_mode(run_test, &p); 1088 } 1089 } 1090 1091 int main(int argc, char *argv[]) 1092 { 1093 enum vm_mem_backing_src_type src_type; 1094 int opt; 1095 1096 setbuf(stdout, NULL); 1097 1098 src_type = DEFAULT_VM_MEM_SRC; 1099 1100 while ((opt = getopt(argc, argv, "hm:s:")) != -1) { 1101 switch (opt) { 1102 case 'm': 1103 guest_modes_cmdline(optarg); 1104 break; 1105 case 's': 1106 src_type = parse_backing_src_type(optarg); 1107 break; 1108 case 'h': 1109 default: 1110 help(argv[0]); 1111 exit(0); 1112 } 1113 } 1114 1115 for_each_test_and_guest_mode(src_type); 1116 return 0; 1117 } 1118