1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * KVM demand paging test 4 * Adapted from dirty_log_test.c 5 * 6 * Copyright (C) 2018, Red Hat, Inc. 7 * Copyright (C) 2019, Google, Inc. 8 */ 9 10 #define _GNU_SOURCE /* for pipe2 */ 11 12 #include <stdio.h> 13 #include <stdlib.h> 14 #include <time.h> 15 #include <poll.h> 16 #include <pthread.h> 17 #include <linux/userfaultfd.h> 18 #include <sys/syscall.h> 19 20 #include "kvm_util.h" 21 #include "test_util.h" 22 #include "perf_test_util.h" 23 #include "guest_modes.h" 24 25 #ifdef __NR_userfaultfd 26 27 #ifdef PRINT_PER_PAGE_UPDATES 28 #define PER_PAGE_DEBUG(...) printf(__VA_ARGS__) 29 #else 30 #define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__) 31 #endif 32 33 #ifdef PRINT_PER_VCPU_UPDATES 34 #define PER_VCPU_DEBUG(...) printf(__VA_ARGS__) 35 #else 36 #define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__) 37 #endif 38 39 static int nr_vcpus = 1; 40 static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; 41 static char *guest_data_prototype; 42 43 static void *vcpu_worker(void *data) 44 { 45 int ret; 46 struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data; 47 int vcpu_id = vcpu_args->vcpu_id; 48 struct kvm_vm *vm = perf_test_args.vm; 49 struct kvm_run *run; 50 struct timespec start; 51 struct timespec ts_diff; 52 53 vcpu_args_set(vm, vcpu_id, 1, vcpu_id); 54 run = vcpu_state(vm, vcpu_id); 55 56 clock_gettime(CLOCK_MONOTONIC, &start); 57 58 /* Let the guest access its memory */ 59 ret = _vcpu_run(vm, vcpu_id); 60 TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); 61 if (get_ucall(vm, vcpu_id, NULL) != UCALL_SYNC) { 62 TEST_ASSERT(false, 63 "Invalid guest sync status: exit_reason=%s\n", 64 exit_reason_str(run->exit_reason)); 65 } 66 67 ts_diff = timespec_diff_now(start); 68 PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id, 69 ts_diff.tv_sec, ts_diff.tv_nsec); 70 71 return NULL; 72 } 73 74 static int handle_uffd_page_request(int uffd, uint64_t addr) 75 { 76 pid_t tid; 77 struct timespec start; 78 struct timespec ts_diff; 79 struct uffdio_copy copy; 80 int r; 81 82 tid = syscall(__NR_gettid); 83 84 copy.src = (uint64_t)guest_data_prototype; 85 copy.dst = addr; 86 copy.len = perf_test_args.host_page_size; 87 copy.mode = 0; 88 89 clock_gettime(CLOCK_MONOTONIC, &start); 90 91 r = ioctl(uffd, UFFDIO_COPY, ©); 92 if (r == -1) { 93 pr_info("Failed Paged in 0x%lx from thread %d with errno: %d\n", 94 addr, tid, errno); 95 return r; 96 } 97 98 ts_diff = timespec_diff_now(start); 99 100 PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid, 101 timespec_to_ns(ts_diff)); 102 PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n", 103 perf_test_args.host_page_size, addr, tid); 104 105 return 0; 106 } 107 108 bool quit_uffd_thread; 109 110 struct uffd_handler_args { 111 int uffd; 112 int pipefd; 113 useconds_t delay; 114 }; 115 116 static void *uffd_handler_thread_fn(void *arg) 117 { 118 struct uffd_handler_args *uffd_args = (struct uffd_handler_args *)arg; 119 int uffd = uffd_args->uffd; 120 int pipefd = uffd_args->pipefd; 121 useconds_t delay = uffd_args->delay; 122 int64_t pages = 0; 123 struct timespec start; 124 struct timespec ts_diff; 125 126 clock_gettime(CLOCK_MONOTONIC, &start); 127 while (!quit_uffd_thread) { 128 struct uffd_msg msg; 129 struct pollfd pollfd[2]; 130 char tmp_chr; 131 int r; 132 uint64_t addr; 133 134 pollfd[0].fd = uffd; 135 pollfd[0].events = POLLIN; 136 pollfd[1].fd = pipefd; 137 pollfd[1].events = POLLIN; 138 139 r = poll(pollfd, 2, -1); 140 switch (r) { 141 case -1: 142 pr_info("poll err"); 143 continue; 144 case 0: 145 continue; 146 case 1: 147 break; 148 default: 149 pr_info("Polling uffd returned %d", r); 150 return NULL; 151 } 152 153 if (pollfd[0].revents & POLLERR) { 154 pr_info("uffd revents has POLLERR"); 155 return NULL; 156 } 157 158 if (pollfd[1].revents & POLLIN) { 159 r = read(pollfd[1].fd, &tmp_chr, 1); 160 TEST_ASSERT(r == 1, 161 "Error reading pipefd in UFFD thread\n"); 162 return NULL; 163 } 164 165 if (!pollfd[0].revents & POLLIN) 166 continue; 167 168 r = read(uffd, &msg, sizeof(msg)); 169 if (r == -1) { 170 if (errno == EAGAIN) 171 continue; 172 pr_info("Read of uffd gor errno %d", errno); 173 return NULL; 174 } 175 176 if (r != sizeof(msg)) { 177 pr_info("Read on uffd returned unexpected size: %d bytes", r); 178 return NULL; 179 } 180 181 if (!(msg.event & UFFD_EVENT_PAGEFAULT)) 182 continue; 183 184 if (delay) 185 usleep(delay); 186 addr = msg.arg.pagefault.address; 187 r = handle_uffd_page_request(uffd, addr); 188 if (r < 0) 189 return NULL; 190 pages++; 191 } 192 193 ts_diff = timespec_diff_now(start); 194 PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n", 195 pages, ts_diff.tv_sec, ts_diff.tv_nsec, 196 pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); 197 198 return NULL; 199 } 200 201 static int setup_demand_paging(struct kvm_vm *vm, 202 pthread_t *uffd_handler_thread, int pipefd, 203 useconds_t uffd_delay, 204 struct uffd_handler_args *uffd_args, 205 void *hva, uint64_t len) 206 { 207 int uffd; 208 struct uffdio_api uffdio_api; 209 struct uffdio_register uffdio_register; 210 211 uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); 212 if (uffd == -1) { 213 pr_info("uffd creation failed\n"); 214 return -1; 215 } 216 217 uffdio_api.api = UFFD_API; 218 uffdio_api.features = 0; 219 if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) { 220 pr_info("ioctl uffdio_api failed\n"); 221 return -1; 222 } 223 224 uffdio_register.range.start = (uint64_t)hva; 225 uffdio_register.range.len = len; 226 uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; 227 if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) { 228 pr_info("ioctl uffdio_register failed\n"); 229 return -1; 230 } 231 232 if ((uffdio_register.ioctls & UFFD_API_RANGE_IOCTLS) != 233 UFFD_API_RANGE_IOCTLS) { 234 pr_info("unexpected userfaultfd ioctl set\n"); 235 return -1; 236 } 237 238 uffd_args->uffd = uffd; 239 uffd_args->pipefd = pipefd; 240 uffd_args->delay = uffd_delay; 241 pthread_create(uffd_handler_thread, NULL, uffd_handler_thread_fn, 242 uffd_args); 243 244 PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n", 245 hva, hva + len); 246 247 return 0; 248 } 249 250 struct test_params { 251 bool use_uffd; 252 useconds_t uffd_delay; 253 }; 254 255 static void run_test(enum vm_guest_mode mode, void *arg) 256 { 257 struct test_params *p = arg; 258 pthread_t *vcpu_threads; 259 pthread_t *uffd_handler_threads = NULL; 260 struct uffd_handler_args *uffd_args = NULL; 261 struct timespec start; 262 struct timespec ts_diff; 263 int *pipefds = NULL; 264 struct kvm_vm *vm; 265 int vcpu_id; 266 int r; 267 268 vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size); 269 270 perf_test_args.wr_fract = 1; 271 272 guest_data_prototype = malloc(perf_test_args.host_page_size); 273 TEST_ASSERT(guest_data_prototype, 274 "Failed to allocate buffer for guest data pattern"); 275 memset(guest_data_prototype, 0xAB, perf_test_args.host_page_size); 276 277 vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); 278 TEST_ASSERT(vcpu_threads, "Memory allocation failed"); 279 280 perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size); 281 282 if (p->use_uffd) { 283 uffd_handler_threads = 284 malloc(nr_vcpus * sizeof(*uffd_handler_threads)); 285 TEST_ASSERT(uffd_handler_threads, "Memory allocation failed"); 286 287 uffd_args = malloc(nr_vcpus * sizeof(*uffd_args)); 288 TEST_ASSERT(uffd_args, "Memory allocation failed"); 289 290 pipefds = malloc(sizeof(int) * nr_vcpus * 2); 291 TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd"); 292 293 for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { 294 vm_paddr_t vcpu_gpa; 295 void *vcpu_hva; 296 297 vcpu_gpa = guest_test_phys_mem + (vcpu_id * guest_percpu_mem_size); 298 PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n", 299 vcpu_id, vcpu_gpa, vcpu_gpa + guest_percpu_mem_size); 300 301 /* Cache the HVA pointer of the region */ 302 vcpu_hva = addr_gpa2hva(vm, vcpu_gpa); 303 304 /* 305 * Set up user fault fd to handle demand paging 306 * requests. 307 */ 308 r = pipe2(&pipefds[vcpu_id * 2], 309 O_CLOEXEC | O_NONBLOCK); 310 TEST_ASSERT(!r, "Failed to set up pipefd"); 311 312 r = setup_demand_paging(vm, 313 &uffd_handler_threads[vcpu_id], 314 pipefds[vcpu_id * 2], 315 p->uffd_delay, &uffd_args[vcpu_id], 316 vcpu_hva, guest_percpu_mem_size); 317 if (r < 0) 318 exit(-r); 319 } 320 } 321 322 /* Export the shared variables to the guest */ 323 sync_global_to_guest(vm, perf_test_args); 324 325 pr_info("Finished creating vCPUs and starting uffd threads\n"); 326 327 clock_gettime(CLOCK_MONOTONIC, &start); 328 329 for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { 330 pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, 331 &perf_test_args.vcpu_args[vcpu_id]); 332 } 333 334 pr_info("Started all vCPUs\n"); 335 336 /* Wait for the vcpu threads to quit */ 337 for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { 338 pthread_join(vcpu_threads[vcpu_id], NULL); 339 PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id); 340 } 341 342 ts_diff = timespec_diff_now(start); 343 344 pr_info("All vCPU threads joined\n"); 345 346 if (p->use_uffd) { 347 char c; 348 349 /* Tell the user fault fd handler threads to quit */ 350 for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { 351 r = write(pipefds[vcpu_id * 2 + 1], &c, 1); 352 TEST_ASSERT(r == 1, "Unable to write to pipefd"); 353 354 pthread_join(uffd_handler_threads[vcpu_id], NULL); 355 } 356 } 357 358 pr_info("Total guest execution time: %ld.%.9lds\n", 359 ts_diff.tv_sec, ts_diff.tv_nsec); 360 pr_info("Overall demand paging rate: %f pgs/sec\n", 361 perf_test_args.vcpu_args[0].pages * nr_vcpus / 362 ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); 363 364 perf_test_destroy_vm(vm); 365 366 free(guest_data_prototype); 367 free(vcpu_threads); 368 if (p->use_uffd) { 369 free(uffd_handler_threads); 370 free(uffd_args); 371 free(pipefds); 372 } 373 } 374 375 static void help(char *name) 376 { 377 puts(""); 378 printf("usage: %s [-h] [-m mode] [-u] [-d uffd_delay_usec]\n" 379 " [-b memory] [-v vcpus]\n", name); 380 guest_modes_help(); 381 printf(" -u: use User Fault FD to handle vCPU page\n" 382 " faults.\n"); 383 printf(" -d: add a delay in usec to the User Fault\n" 384 " FD handler to simulate demand paging\n" 385 " overheads. Ignored without -u.\n"); 386 printf(" -b: specify the size of the memory region which should be\n" 387 " demand paged by each vCPU. e.g. 10M or 3G.\n" 388 " Default: 1G\n"); 389 printf(" -v: specify the number of vCPUs to run.\n"); 390 puts(""); 391 exit(0); 392 } 393 394 int main(int argc, char *argv[]) 395 { 396 int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); 397 struct test_params p = {}; 398 int opt; 399 400 guest_modes_append_default(); 401 402 while ((opt = getopt(argc, argv, "hm:ud:b:v:")) != -1) { 403 switch (opt) { 404 case 'm': 405 guest_modes_cmdline(optarg); 406 break; 407 case 'u': 408 p.use_uffd = true; 409 break; 410 case 'd': 411 p.uffd_delay = strtoul(optarg, NULL, 0); 412 TEST_ASSERT(p.uffd_delay >= 0, "A negative UFFD delay is not supported."); 413 break; 414 case 'b': 415 guest_percpu_mem_size = parse_size(optarg); 416 break; 417 case 'v': 418 nr_vcpus = atoi(optarg); 419 TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, 420 "Invalid number of vcpus, must be between 1 and %d", max_vcpus); 421 break; 422 case 'h': 423 default: 424 help(argv[0]); 425 break; 426 } 427 } 428 429 for_each_guest_mode(run_test, &p); 430 431 return 0; 432 } 433 434 #else /* __NR_userfaultfd */ 435 436 #warning "missing __NR_userfaultfd definition" 437 438 int main(void) 439 { 440 print_skip("__NR_userfaultfd must be present for userfaultfd test"); 441 return KSFT_SKIP; 442 } 443 444 #endif /* __NR_userfaultfd */ 445