1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * KVM demand paging test 4 * Adapted from dirty_log_test.c 5 * 6 * Copyright (C) 2018, Red Hat, Inc. 7 * Copyright (C) 2019, Google, Inc. 8 */ 9 #include <inttypes.h> 10 #include <stdio.h> 11 #include <stdlib.h> 12 #include <time.h> 13 #include <pthread.h> 14 #include <linux/userfaultfd.h> 15 #include <sys/syscall.h> 16 17 #include "kvm_util.h" 18 #include "test_util.h" 19 #include "memstress.h" 20 #include "guest_modes.h" 21 #include "ucall_common.h" 22 #include "userfaultfd_util.h" 23 24 #ifdef __NR_userfaultfd 25 26 static int nr_vcpus = 1; 27 static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; 28 29 static size_t demand_paging_size; 30 static char *guest_data_prototype; 31 32 static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) 33 { 34 struct kvm_vcpu *vcpu = vcpu_args->vcpu; 35 int vcpu_idx = vcpu_args->vcpu_idx; 36 struct kvm_run *run = vcpu->run; 37 struct timespec start; 38 struct timespec ts_diff; 39 int ret; 40 41 clock_gettime(CLOCK_MONOTONIC, &start); 42 43 /* Let the guest access its memory */ 44 ret = _vcpu_run(vcpu); 45 TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret); 46 if (get_ucall(vcpu, NULL) != UCALL_SYNC) { 47 TEST_ASSERT(false, 48 "Invalid guest sync status: exit_reason=%s", 49 exit_reason_str(run->exit_reason)); 50 } 51 52 ts_diff = timespec_elapsed(start); 53 PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_idx, 54 ts_diff.tv_sec, ts_diff.tv_nsec); 55 } 56 57 static int handle_uffd_page_request(int uffd_mode, int uffd, 58 struct uffd_msg *msg) 59 { 60 pid_t tid = syscall(__NR_gettid); 61 uint64_t addr = msg->arg.pagefault.address; 62 struct timespec start; 63 struct timespec ts_diff; 64 int r; 65 66 clock_gettime(CLOCK_MONOTONIC, &start); 67 68 if (uffd_mode == UFFDIO_REGISTER_MODE_MISSING) { 69 struct uffdio_copy copy; 70 71 copy.src = (uint64_t)guest_data_prototype; 72 copy.dst = addr; 73 copy.len = demand_paging_size; 74 copy.mode = 0; 75 76 r = ioctl(uffd, UFFDIO_COPY, ©); 77 /* 78 * With multiple vCPU threads fault on a single page and there are 79 * multiple readers for the UFFD, at least one of the UFFDIO_COPYs 80 * will fail with EEXIST: handle that case without signaling an 81 * error. 82 * 83 * Note that this also suppress any EEXISTs occurring from, 84 * e.g., the first UFFDIO_COPY/CONTINUEs on a page. That never 85 * happens here, but a realistic VMM might potentially maintain 86 * some external state to correctly surface EEXISTs to userspace 87 * (or prevent duplicate COPY/CONTINUEs in the first place). 88 */ 89 if (r == -1 && errno != EEXIST) { 90 pr_info("Failed UFFDIO_COPY in 0x%lx from thread %d, errno = %d\n", 91 addr, tid, errno); 92 return r; 93 } 94 } else if (uffd_mode == UFFDIO_REGISTER_MODE_MINOR) { 95 struct uffdio_continue cont = {0}; 96 97 cont.range.start = addr; 98 cont.range.len = demand_paging_size; 99 100 r = ioctl(uffd, UFFDIO_CONTINUE, &cont); 101 /* 102 * With multiple vCPU threads fault on a single page and there are 103 * multiple readers for the UFFD, at least one of the UFFDIO_COPYs 104 * will fail with EEXIST: handle that case without signaling an 105 * error. 106 * 107 * Note that this also suppress any EEXISTs occurring from, 108 * e.g., the first UFFDIO_COPY/CONTINUEs on a page. That never 109 * happens here, but a realistic VMM might potentially maintain 110 * some external state to correctly surface EEXISTs to userspace 111 * (or prevent duplicate COPY/CONTINUEs in the first place). 112 */ 113 if (r == -1 && errno != EEXIST) { 114 pr_info("Failed UFFDIO_CONTINUE in 0x%lx, thread %d, errno = %d\n", 115 addr, tid, errno); 116 return r; 117 } 118 } else { 119 TEST_FAIL("Invalid uffd mode %d", uffd_mode); 120 } 121 122 ts_diff = timespec_elapsed(start); 123 124 PER_PAGE_DEBUG("UFFD page-in %d \t%ld ns\n", tid, 125 timespec_to_ns(ts_diff)); 126 PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n", 127 demand_paging_size, addr, tid); 128 129 return 0; 130 } 131 132 struct test_params { 133 int uffd_mode; 134 bool single_uffd; 135 useconds_t uffd_delay; 136 int readers_per_uffd; 137 enum vm_mem_backing_src_type src_type; 138 bool partition_vcpu_memory_access; 139 }; 140 141 static void prefault_mem(void *alias, uint64_t len) 142 { 143 size_t p; 144 145 TEST_ASSERT(alias != NULL, "Alias required for minor faults"); 146 for (p = 0; p < (len / demand_paging_size); ++p) { 147 memcpy(alias + (p * demand_paging_size), 148 guest_data_prototype, demand_paging_size); 149 } 150 } 151 152 static void run_test(enum vm_guest_mode mode, void *arg) 153 { 154 struct memstress_vcpu_args *vcpu_args; 155 struct test_params *p = arg; 156 struct uffd_desc **uffd_descs = NULL; 157 uint64_t uffd_region_size; 158 struct timespec start; 159 struct timespec ts_diff; 160 double vcpu_paging_rate; 161 struct kvm_vm *vm; 162 int i, num_uffds = 0; 163 164 vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, 165 p->src_type, p->partition_vcpu_memory_access); 166 167 demand_paging_size = get_backing_src_pagesz(p->src_type); 168 169 guest_data_prototype = malloc(demand_paging_size); 170 TEST_ASSERT(guest_data_prototype, 171 "Failed to allocate buffer for guest data pattern"); 172 memset(guest_data_prototype, 0xAB, demand_paging_size); 173 174 if (p->uffd_mode == UFFDIO_REGISTER_MODE_MINOR) { 175 num_uffds = p->single_uffd ? 1 : nr_vcpus; 176 for (i = 0; i < num_uffds; i++) { 177 vcpu_args = &memstress_args.vcpu_args[i]; 178 prefault_mem(addr_gpa2alias(vm, vcpu_args->gpa), 179 vcpu_args->pages * memstress_args.guest_page_size); 180 } 181 } 182 183 if (p->uffd_mode) { 184 num_uffds = p->single_uffd ? 1 : nr_vcpus; 185 uffd_region_size = nr_vcpus * guest_percpu_mem_size / num_uffds; 186 187 uffd_descs = malloc(num_uffds * sizeof(struct uffd_desc *)); 188 TEST_ASSERT(uffd_descs, "Memory allocation failed"); 189 for (i = 0; i < num_uffds; i++) { 190 struct memstress_vcpu_args *vcpu_args; 191 void *vcpu_hva; 192 193 vcpu_args = &memstress_args.vcpu_args[i]; 194 195 /* Cache the host addresses of the region */ 196 vcpu_hva = addr_gpa2hva(vm, vcpu_args->gpa); 197 /* 198 * Set up user fault fd to handle demand paging 199 * requests. 200 */ 201 uffd_descs[i] = uffd_setup_demand_paging( 202 p->uffd_mode, p->uffd_delay, vcpu_hva, 203 uffd_region_size, 204 p->readers_per_uffd, 205 &handle_uffd_page_request); 206 } 207 } 208 209 pr_info("Finished creating vCPUs and starting uffd threads\n"); 210 211 clock_gettime(CLOCK_MONOTONIC, &start); 212 memstress_start_vcpu_threads(nr_vcpus, vcpu_worker); 213 pr_info("Started all vCPUs\n"); 214 215 memstress_join_vcpu_threads(nr_vcpus); 216 ts_diff = timespec_elapsed(start); 217 pr_info("All vCPU threads joined\n"); 218 219 if (p->uffd_mode) { 220 /* Tell the user fault fd handler threads to quit */ 221 for (i = 0; i < num_uffds; i++) 222 uffd_stop_demand_paging(uffd_descs[i]); 223 } 224 225 pr_info("Total guest execution time:\t%ld.%.9lds\n", 226 ts_diff.tv_sec, ts_diff.tv_nsec); 227 228 vcpu_paging_rate = memstress_args.vcpu_args[0].pages / 229 ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / NSEC_PER_SEC); 230 pr_info("Per-vcpu demand paging rate:\t%f pgs/sec/vcpu\n", 231 vcpu_paging_rate); 232 pr_info("Overall demand paging rate:\t%f pgs/sec\n", 233 vcpu_paging_rate * nr_vcpus); 234 235 memstress_destroy_vm(vm); 236 237 free(guest_data_prototype); 238 if (p->uffd_mode) 239 free(uffd_descs); 240 } 241 242 static void help(char *name) 243 { 244 puts(""); 245 printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-a]\n" 246 " [-d uffd_delay_usec] [-r readers_per_uffd] [-b memory]\n" 247 " [-s type] [-v vcpus] [-c cpu_list] [-o]\n", name); 248 guest_modes_help(); 249 printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n" 250 " UFFD registration mode: 'MISSING' or 'MINOR'.\n"); 251 kvm_print_vcpu_pinning_help(); 252 printf(" -a: Use a single userfaultfd for all of guest memory, instead of\n" 253 " creating one for each region paged by a unique vCPU\n" 254 " Set implicitly with -o, and no effect without -u.\n"); 255 printf(" -d: add a delay in usec to the User Fault\n" 256 " FD handler to simulate demand paging\n" 257 " overheads. Ignored without -u.\n"); 258 printf(" -r: Set the number of reader threads per uffd.\n"); 259 printf(" -b: specify the size of the memory region which should be\n" 260 " demand paged by each vCPU. e.g. 10M or 3G.\n" 261 " Default: 1G\n"); 262 backing_src_help("-s"); 263 printf(" -v: specify the number of vCPUs to run.\n"); 264 printf(" -o: Overlap guest memory accesses instead of partitioning\n" 265 " them into a separate region of memory for each vCPU.\n"); 266 puts(""); 267 exit(0); 268 } 269 270 int main(int argc, char *argv[]) 271 { 272 int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); 273 const char *cpulist = NULL; 274 struct test_params p = { 275 .src_type = DEFAULT_VM_MEM_SRC, 276 .partition_vcpu_memory_access = true, 277 .readers_per_uffd = 1, 278 .single_uffd = false, 279 }; 280 int opt; 281 282 guest_modes_append_default(); 283 284 while ((opt = getopt(argc, argv, "ahom:u:d:b:s:v:c:r:")) != -1) { 285 switch (opt) { 286 case 'm': 287 guest_modes_cmdline(optarg); 288 break; 289 case 'u': 290 if (!strcmp("MISSING", optarg)) 291 p.uffd_mode = UFFDIO_REGISTER_MODE_MISSING; 292 else if (!strcmp("MINOR", optarg)) 293 p.uffd_mode = UFFDIO_REGISTER_MODE_MINOR; 294 TEST_ASSERT(p.uffd_mode, "UFFD mode must be 'MISSING' or 'MINOR'."); 295 break; 296 case 'a': 297 p.single_uffd = true; 298 break; 299 case 'd': 300 p.uffd_delay = strtoul(optarg, NULL, 0); 301 TEST_ASSERT(p.uffd_delay >= 0, "A negative UFFD delay is not supported."); 302 break; 303 case 'b': 304 guest_percpu_mem_size = parse_size(optarg); 305 break; 306 case 's': 307 p.src_type = parse_backing_src_type(optarg); 308 break; 309 case 'v': 310 nr_vcpus = atoi_positive("Number of vCPUs", optarg); 311 TEST_ASSERT(nr_vcpus <= max_vcpus, 312 "Invalid number of vcpus, must be between 1 and %d", max_vcpus); 313 break; 314 case 'c': 315 cpulist = optarg; 316 break; 317 case 'o': 318 p.partition_vcpu_memory_access = false; 319 p.single_uffd = true; 320 break; 321 case 'r': 322 p.readers_per_uffd = atoi(optarg); 323 TEST_ASSERT(p.readers_per_uffd >= 1, 324 "Invalid number of readers per uffd %d: must be >=1", 325 p.readers_per_uffd); 326 break; 327 case 'h': 328 default: 329 help(argv[0]); 330 break; 331 } 332 } 333 334 if (p.uffd_mode == UFFDIO_REGISTER_MODE_MINOR && 335 !backing_src_is_shared(p.src_type)) { 336 TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -s"); 337 } 338 339 if (cpulist) { 340 kvm_parse_vcpu_pinning(cpulist, memstress_args.vcpu_to_pcpu, 341 nr_vcpus); 342 memstress_args.pin_vcpus = true; 343 } 344 345 for_each_guest_mode(run_test, &p); 346 347 return 0; 348 } 349 350 #else /* __NR_userfaultfd */ 351 352 #warning "missing __NR_userfaultfd definition" 353 354 int main(void) 355 { 356 print_skip("__NR_userfaultfd must be present for userfaultfd test"); 357 return KSFT_SKIP; 358 } 359 360 #endif /* __NR_userfaultfd */ 361