1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright Intel Corporation, 2023 4 * 5 * Author: Chao Peng <chao.p.peng@linux.intel.com> 6 */ 7 #include <stdlib.h> 8 #include <string.h> 9 #include <unistd.h> 10 #include <errno.h> 11 #include <stdio.h> 12 #include <fcntl.h> 13 14 #include <linux/bitmap.h> 15 #include <linux/falloc.h> 16 #include <linux/sizes.h> 17 #include <sys/mman.h> 18 #include <sys/types.h> 19 #include <sys/stat.h> 20 21 #include "kvm_util.h" 22 #include "numaif.h" 23 #include "test_util.h" 24 #include "ucall_common.h" 25 26 static size_t page_size; 27 28 static void test_file_read_write(int fd, size_t total_size) 29 { 30 char buf[64]; 31 32 TEST_ASSERT(read(fd, buf, sizeof(buf)) < 0, 33 "read on a guest_mem fd should fail"); 34 TEST_ASSERT(write(fd, buf, sizeof(buf)) < 0, 35 "write on a guest_mem fd should fail"); 36 TEST_ASSERT(pread(fd, buf, sizeof(buf), 0) < 0, 37 "pread on a guest_mem fd should fail"); 38 TEST_ASSERT(pwrite(fd, buf, sizeof(buf), 0) < 0, 39 "pwrite on a guest_mem fd should fail"); 40 } 41 42 static void test_mmap_cow(int fd, size_t size) 43 { 44 void *mem; 45 46 mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); 47 TEST_ASSERT(mem == MAP_FAILED, "Copy-on-write not allowed by guest_memfd."); 48 } 49 50 static void test_mmap_supported(int fd, size_t total_size) 51 { 52 const char val = 0xaa; 53 char *mem; 54 size_t i; 55 int ret; 56 57 mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); 58 59 memset(mem, val, total_size); 60 for (i = 0; i < total_size; i++) 61 TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); 62 63 ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, 64 page_size); 65 TEST_ASSERT(!ret, "fallocate the first page should succeed."); 66 67 for (i = 0; i < page_size; i++) 68 TEST_ASSERT_EQ(READ_ONCE(mem[i]), 0x00); 69 for (; i < total_size; i++) 70 TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); 71 72 memset(mem, val, page_size); 73 for (i = 0; i < total_size; i++) 74 TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); 75 76 kvm_munmap(mem, total_size); 77 } 78 79 static void test_mbind(int fd, size_t total_size) 80 { 81 const unsigned long nodemask_0 = 1; /* nid: 0 */ 82 unsigned long nodemask = 0; 83 unsigned long maxnode = 8; 84 int policy; 85 char *mem; 86 int ret; 87 88 if (!is_multi_numa_node_system()) 89 return; 90 91 mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); 92 93 /* Test MPOL_INTERLEAVE policy */ 94 kvm_mbind(mem, page_size * 2, MPOL_INTERLEAVE, &nodemask_0, maxnode, 0); 95 kvm_get_mempolicy(&policy, &nodemask, maxnode, mem, MPOL_F_ADDR); 96 TEST_ASSERT(policy == MPOL_INTERLEAVE && nodemask == nodemask_0, 97 "Wanted MPOL_INTERLEAVE (%u) and nodemask 0x%lx, got %u and 0x%lx", 98 MPOL_INTERLEAVE, nodemask_0, policy, nodemask); 99 100 /* Test basic MPOL_BIND policy */ 101 kvm_mbind(mem + page_size * 2, page_size * 2, MPOL_BIND, &nodemask_0, maxnode, 0); 102 kvm_get_mempolicy(&policy, &nodemask, maxnode, mem + page_size * 2, MPOL_F_ADDR); 103 TEST_ASSERT(policy == MPOL_BIND && nodemask == nodemask_0, 104 "Wanted MPOL_BIND (%u) and nodemask 0x%lx, got %u and 0x%lx", 105 MPOL_BIND, nodemask_0, policy, nodemask); 106 107 /* Test MPOL_DEFAULT policy */ 108 kvm_mbind(mem, total_size, MPOL_DEFAULT, NULL, 0, 0); 109 kvm_get_mempolicy(&policy, &nodemask, maxnode, mem, MPOL_F_ADDR); 110 TEST_ASSERT(policy == MPOL_DEFAULT && !nodemask, 111 "Wanted MPOL_DEFAULT (%u) and nodemask 0x0, got %u and 0x%lx", 112 MPOL_DEFAULT, policy, nodemask); 113 114 /* Test with invalid policy */ 115 ret = mbind(mem, page_size, 999, &nodemask_0, maxnode, 0); 116 TEST_ASSERT(ret == -1 && errno == EINVAL, 117 "mbind with invalid policy should fail with EINVAL"); 118 119 kvm_munmap(mem, total_size); 120 } 121 122 static void test_numa_allocation(int fd, size_t total_size) 123 { 124 unsigned long node0_mask = 1; /* Node 0 */ 125 unsigned long node1_mask = 2; /* Node 1 */ 126 unsigned long maxnode = 8; 127 void *pages[4]; 128 int status[4]; 129 char *mem; 130 int i; 131 132 if (!is_multi_numa_node_system()) 133 return; 134 135 mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); 136 137 for (i = 0; i < 4; i++) 138 pages[i] = (char *)mem + page_size * i; 139 140 /* Set NUMA policy after allocation */ 141 memset(mem, 0xaa, page_size); 142 kvm_mbind(pages[0], page_size, MPOL_BIND, &node0_mask, maxnode, 0); 143 kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, page_size); 144 145 /* Set NUMA policy before allocation */ 146 kvm_mbind(pages[0], page_size * 2, MPOL_BIND, &node1_mask, maxnode, 0); 147 kvm_mbind(pages[2], page_size * 2, MPOL_BIND, &node0_mask, maxnode, 0); 148 memset(mem, 0xaa, total_size); 149 150 /* Validate if pages are allocated on specified NUMA nodes */ 151 kvm_move_pages(0, 4, pages, NULL, status, 0); 152 TEST_ASSERT(status[0] == 1, "Expected page 0 on node 1, got it on node %d", status[0]); 153 TEST_ASSERT(status[1] == 1, "Expected page 1 on node 1, got it on node %d", status[1]); 154 TEST_ASSERT(status[2] == 0, "Expected page 2 on node 0, got it on node %d", status[2]); 155 TEST_ASSERT(status[3] == 0, "Expected page 3 on node 0, got it on node %d", status[3]); 156 157 /* Punch hole for all pages */ 158 kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, total_size); 159 160 /* Change NUMA policy nodes and reallocate */ 161 kvm_mbind(pages[0], page_size * 2, MPOL_BIND, &node0_mask, maxnode, 0); 162 kvm_mbind(pages[2], page_size * 2, MPOL_BIND, &node1_mask, maxnode, 0); 163 memset(mem, 0xaa, total_size); 164 165 kvm_move_pages(0, 4, pages, NULL, status, 0); 166 TEST_ASSERT(status[0] == 0, "Expected page 0 on node 0, got it on node %d", status[0]); 167 TEST_ASSERT(status[1] == 0, "Expected page 1 on node 0, got it on node %d", status[1]); 168 TEST_ASSERT(status[2] == 1, "Expected page 2 on node 1, got it on node %d", status[2]); 169 TEST_ASSERT(status[3] == 1, "Expected page 3 on node 1, got it on node %d", status[3]); 170 171 kvm_munmap(mem, total_size); 172 } 173 174 static void test_fault_sigbus(int fd, size_t accessible_size, size_t map_size) 175 { 176 const char val = 0xaa; 177 char *mem; 178 size_t i; 179 180 mem = kvm_mmap(map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); 181 182 TEST_EXPECT_SIGBUS(memset(mem, val, map_size)); 183 TEST_EXPECT_SIGBUS((void)READ_ONCE(mem[accessible_size])); 184 185 for (i = 0; i < accessible_size; i++) 186 TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); 187 188 kvm_munmap(mem, map_size); 189 } 190 191 static void test_fault_overflow(int fd, size_t total_size) 192 { 193 test_fault_sigbus(fd, total_size, total_size * 4); 194 } 195 196 static void test_fault_private(int fd, size_t total_size) 197 { 198 test_fault_sigbus(fd, 0, total_size); 199 } 200 201 static void test_mmap_not_supported(int fd, size_t total_size) 202 { 203 char *mem; 204 205 mem = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 206 TEST_ASSERT_EQ(mem, MAP_FAILED); 207 208 mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 209 TEST_ASSERT_EQ(mem, MAP_FAILED); 210 } 211 212 static void test_file_size(int fd, size_t total_size) 213 { 214 struct stat sb; 215 int ret; 216 217 ret = fstat(fd, &sb); 218 TEST_ASSERT(!ret, "fstat should succeed"); 219 TEST_ASSERT_EQ(sb.st_size, total_size); 220 TEST_ASSERT_EQ(sb.st_blksize, page_size); 221 } 222 223 static void test_fallocate(int fd, size_t total_size) 224 { 225 int ret; 226 227 ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, total_size); 228 TEST_ASSERT(!ret, "fallocate with aligned offset and size should succeed"); 229 230 ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 231 page_size - 1, page_size); 232 TEST_ASSERT(ret, "fallocate with unaligned offset should fail"); 233 234 ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, total_size, page_size); 235 TEST_ASSERT(ret, "fallocate beginning at total_size should fail"); 236 237 ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, total_size + page_size, page_size); 238 TEST_ASSERT(ret, "fallocate beginning after total_size should fail"); 239 240 ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 241 total_size, page_size); 242 TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) at total_size should succeed"); 243 244 ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 245 total_size + page_size, page_size); 246 TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) after total_size should succeed"); 247 248 ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 249 page_size, page_size - 1); 250 TEST_ASSERT(ret, "fallocate with unaligned size should fail"); 251 252 ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 253 page_size, page_size); 254 TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) with aligned offset and size should succeed"); 255 256 ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, page_size, page_size); 257 TEST_ASSERT(!ret, "fallocate to restore punched hole should succeed"); 258 } 259 260 static void test_invalid_punch_hole(int fd, size_t total_size) 261 { 262 struct { 263 off_t offset; 264 off_t len; 265 } testcases[] = { 266 {0, 1}, 267 {0, page_size - 1}, 268 {0, page_size + 1}, 269 270 {1, 1}, 271 {1, page_size - 1}, 272 {1, page_size}, 273 {1, page_size + 1}, 274 275 {page_size, 1}, 276 {page_size, page_size - 1}, 277 {page_size, page_size + 1}, 278 }; 279 int ret, i; 280 281 for (i = 0; i < ARRAY_SIZE(testcases); i++) { 282 ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 283 testcases[i].offset, testcases[i].len); 284 TEST_ASSERT(ret == -1 && errno == EINVAL, 285 "PUNCH_HOLE with !PAGE_SIZE offset (%lx) and/or length (%lx) should fail", 286 testcases[i].offset, testcases[i].len); 287 } 288 } 289 290 static void test_create_guest_memfd_invalid_sizes(struct kvm_vm *vm, 291 uint64_t guest_memfd_flags) 292 { 293 size_t size; 294 int fd; 295 296 for (size = 1; size < page_size; size++) { 297 fd = __vm_create_guest_memfd(vm, size, guest_memfd_flags); 298 TEST_ASSERT(fd < 0 && errno == EINVAL, 299 "guest_memfd() with non-page-aligned page size '0x%lx' should fail with EINVAL", 300 size); 301 } 302 } 303 304 static void test_create_guest_memfd_multiple(struct kvm_vm *vm) 305 { 306 int fd1, fd2, ret; 307 struct stat st1, st2; 308 309 fd1 = __vm_create_guest_memfd(vm, page_size, 0); 310 TEST_ASSERT(fd1 != -1, "memfd creation should succeed"); 311 312 ret = fstat(fd1, &st1); 313 TEST_ASSERT(ret != -1, "memfd fstat should succeed"); 314 TEST_ASSERT(st1.st_size == page_size, "memfd st_size should match requested size"); 315 316 fd2 = __vm_create_guest_memfd(vm, page_size * 2, 0); 317 TEST_ASSERT(fd2 != -1, "memfd creation should succeed"); 318 319 ret = fstat(fd2, &st2); 320 TEST_ASSERT(ret != -1, "memfd fstat should succeed"); 321 TEST_ASSERT(st2.st_size == page_size * 2, "second memfd st_size should match requested size"); 322 323 ret = fstat(fd1, &st1); 324 TEST_ASSERT(ret != -1, "memfd fstat should succeed"); 325 TEST_ASSERT(st1.st_size == page_size, "first memfd st_size should still match requested size"); 326 TEST_ASSERT(st1.st_ino != st2.st_ino, "different memfd should have different inode numbers"); 327 328 close(fd2); 329 close(fd1); 330 } 331 332 static void test_guest_memfd_flags(struct kvm_vm *vm) 333 { 334 uint64_t valid_flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS); 335 uint64_t flag; 336 int fd; 337 338 for (flag = BIT(0); flag; flag <<= 1) { 339 fd = __vm_create_guest_memfd(vm, page_size, flag); 340 if (flag & valid_flags) { 341 TEST_ASSERT(fd >= 0, 342 "guest_memfd() with flag '0x%lx' should succeed", 343 flag); 344 close(fd); 345 } else { 346 TEST_ASSERT(fd < 0 && errno == EINVAL, 347 "guest_memfd() with flag '0x%lx' should fail with EINVAL", 348 flag); 349 } 350 } 351 } 352 353 #define gmem_test(__test, __vm, __flags) \ 354 do { \ 355 int fd = vm_create_guest_memfd(__vm, page_size * 4, __flags); \ 356 \ 357 test_##__test(fd, page_size * 4); \ 358 close(fd); \ 359 } while (0) 360 361 static void __test_guest_memfd(struct kvm_vm *vm, uint64_t flags) 362 { 363 test_create_guest_memfd_multiple(vm); 364 test_create_guest_memfd_invalid_sizes(vm, flags); 365 366 gmem_test(file_read_write, vm, flags); 367 368 if (flags & GUEST_MEMFD_FLAG_MMAP) { 369 if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) { 370 gmem_test(mmap_supported, vm, flags); 371 gmem_test(fault_overflow, vm, flags); 372 gmem_test(numa_allocation, vm, flags); 373 } else { 374 gmem_test(fault_private, vm, flags); 375 } 376 377 gmem_test(mmap_cow, vm, flags); 378 gmem_test(mbind, vm, flags); 379 } else { 380 gmem_test(mmap_not_supported, vm, flags); 381 } 382 383 gmem_test(file_size, vm, flags); 384 gmem_test(fallocate, vm, flags); 385 gmem_test(invalid_punch_hole, vm, flags); 386 } 387 388 static void test_guest_memfd(unsigned long vm_type) 389 { 390 struct kvm_vm *vm = vm_create_barebones_type(vm_type); 391 uint64_t flags; 392 393 test_guest_memfd_flags(vm); 394 395 __test_guest_memfd(vm, 0); 396 397 flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS); 398 if (flags & GUEST_MEMFD_FLAG_MMAP) 399 __test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP); 400 401 /* MMAP should always be supported if INIT_SHARED is supported. */ 402 if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) 403 __test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP | 404 GUEST_MEMFD_FLAG_INIT_SHARED); 405 406 kvm_vm_free(vm); 407 } 408 409 static void guest_code(uint8_t *mem, uint64_t size) 410 { 411 size_t i; 412 413 for (i = 0; i < size; i++) 414 __GUEST_ASSERT(mem[i] == 0xaa, 415 "Guest expected 0xaa at offset %lu, got 0x%x", i, mem[i]); 416 417 memset(mem, 0xff, size); 418 GUEST_DONE(); 419 } 420 421 static void test_guest_memfd_guest(void) 422 { 423 /* 424 * Skip the first 4gb and slot0. slot0 maps <1gb and is used to back 425 * the guest's code, stack, and page tables, and low memory contains 426 * the PCI hole and other MMIO regions that need to be avoided. 427 */ 428 const uint64_t gpa = SZ_4G; 429 const int slot = 1; 430 431 struct kvm_vcpu *vcpu; 432 struct kvm_vm *vm; 433 uint8_t *mem; 434 size_t size; 435 int fd, i; 436 437 if (!kvm_check_cap(KVM_CAP_GUEST_MEMFD_FLAGS)) 438 return; 439 440 vm = __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, &vcpu, 1, guest_code); 441 442 TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_MMAP, 443 "Default VM type should support MMAP, supported flags = 0x%x", 444 vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS)); 445 TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_INIT_SHARED, 446 "Default VM type should support INIT_SHARED, supported flags = 0x%x", 447 vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS)); 448 449 size = vm->page_size; 450 fd = vm_create_guest_memfd(vm, size, GUEST_MEMFD_FLAG_MMAP | 451 GUEST_MEMFD_FLAG_INIT_SHARED); 452 vm_set_user_memory_region2(vm, slot, KVM_MEM_GUEST_MEMFD, gpa, size, NULL, fd, 0); 453 454 mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); 455 memset(mem, 0xaa, size); 456 kvm_munmap(mem, size); 457 458 virt_pg_map(vm, gpa, gpa); 459 vcpu_args_set(vcpu, 2, gpa, size); 460 vcpu_run(vcpu); 461 462 TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); 463 464 mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); 465 for (i = 0; i < size; i++) 466 TEST_ASSERT_EQ(mem[i], 0xff); 467 468 close(fd); 469 kvm_vm_free(vm); 470 } 471 472 int main(int argc, char *argv[]) 473 { 474 unsigned long vm_types, vm_type; 475 476 TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD)); 477 478 page_size = getpagesize(); 479 480 /* 481 * Not all architectures support KVM_CAP_VM_TYPES. However, those that 482 * support guest_memfd have that support for the default VM type. 483 */ 484 vm_types = kvm_check_cap(KVM_CAP_VM_TYPES); 485 if (!vm_types) 486 vm_types = BIT(VM_TYPE_DEFAULT); 487 488 for_each_set_bit(vm_type, &vm_types, BITS_PER_TYPE(vm_types)) 489 test_guest_memfd(vm_type); 490 491 test_guest_memfd_guest(); 492 } 493