xref: /linux/tools/testing/selftests/kvm/demand_paging_test.c (revision 954ea91fb68b771dba6d87cfa61b68e09cc2497f)
1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   * KVM demand paging test
4   * Adapted from dirty_log_test.c
5   *
6   * Copyright (C) 2018, Red Hat, Inc.
7   * Copyright (C) 2019, Google, Inc.
8   */
9  
10  #define _GNU_SOURCE /* for pipe2 */
11  
12  #include <inttypes.h>
13  #include <stdio.h>
14  #include <stdlib.h>
15  #include <time.h>
16  #include <poll.h>
17  #include <pthread.h>
18  #include <linux/userfaultfd.h>
19  #include <sys/syscall.h>
20  
21  #include "kvm_util.h"
22  #include "test_util.h"
23  #include "memstress.h"
24  #include "guest_modes.h"
25  #include "userfaultfd_util.h"
26  
27  #ifdef __NR_userfaultfd
28  
29  static int nr_vcpus = 1;
30  static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
31  
32  static size_t demand_paging_size;
33  static char *guest_data_prototype;
34  
35  static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
36  {
37  	struct kvm_vcpu *vcpu = vcpu_args->vcpu;
38  	int vcpu_idx = vcpu_args->vcpu_idx;
39  	struct kvm_run *run = vcpu->run;
40  	struct timespec start;
41  	struct timespec ts_diff;
42  	int ret;
43  
44  	clock_gettime(CLOCK_MONOTONIC, &start);
45  
46  	/* Let the guest access its memory */
47  	ret = _vcpu_run(vcpu);
48  	TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
49  	if (get_ucall(vcpu, NULL) != UCALL_SYNC) {
50  		TEST_ASSERT(false,
51  			    "Invalid guest sync status: exit_reason=%s\n",
52  			    exit_reason_str(run->exit_reason));
53  	}
54  
55  	ts_diff = timespec_elapsed(start);
56  	PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_idx,
57  		       ts_diff.tv_sec, ts_diff.tv_nsec);
58  }
59  
60  static int handle_uffd_page_request(int uffd_mode, int uffd,
61  		struct uffd_msg *msg)
62  {
63  	pid_t tid = syscall(__NR_gettid);
64  	uint64_t addr = msg->arg.pagefault.address;
65  	struct timespec start;
66  	struct timespec ts_diff;
67  	int r;
68  
69  	clock_gettime(CLOCK_MONOTONIC, &start);
70  
71  	if (uffd_mode == UFFDIO_REGISTER_MODE_MISSING) {
72  		struct uffdio_copy copy;
73  
74  		copy.src = (uint64_t)guest_data_prototype;
75  		copy.dst = addr;
76  		copy.len = demand_paging_size;
77  		copy.mode = 0;
78  
79  		r = ioctl(uffd, UFFDIO_COPY, &copy);
80  		if (r == -1) {
81  			pr_info("Failed UFFDIO_COPY in 0x%lx from thread %d with errno: %d\n",
82  				addr, tid, errno);
83  			return r;
84  		}
85  	} else if (uffd_mode == UFFDIO_REGISTER_MODE_MINOR) {
86  		struct uffdio_continue cont = {0};
87  
88  		cont.range.start = addr;
89  		cont.range.len = demand_paging_size;
90  
91  		r = ioctl(uffd, UFFDIO_CONTINUE, &cont);
92  		if (r == -1) {
93  			pr_info("Failed UFFDIO_CONTINUE in 0x%lx from thread %d with errno: %d\n",
94  				addr, tid, errno);
95  			return r;
96  		}
97  	} else {
98  		TEST_FAIL("Invalid uffd mode %d", uffd_mode);
99  	}
100  
101  	ts_diff = timespec_elapsed(start);
102  
103  	PER_PAGE_DEBUG("UFFD page-in %d \t%ld ns\n", tid,
104  		       timespec_to_ns(ts_diff));
105  	PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n",
106  		       demand_paging_size, addr, tid);
107  
108  	return 0;
109  }
110  
111  struct test_params {
112  	int uffd_mode;
113  	useconds_t uffd_delay;
114  	enum vm_mem_backing_src_type src_type;
115  	bool partition_vcpu_memory_access;
116  };
117  
118  static void prefault_mem(void *alias, uint64_t len)
119  {
120  	size_t p;
121  
122  	TEST_ASSERT(alias != NULL, "Alias required for minor faults");
123  	for (p = 0; p < (len / demand_paging_size); ++p) {
124  		memcpy(alias + (p * demand_paging_size),
125  		       guest_data_prototype, demand_paging_size);
126  	}
127  }
128  
129  static void run_test(enum vm_guest_mode mode, void *arg)
130  {
131  	struct test_params *p = arg;
132  	struct uffd_desc **uffd_descs = NULL;
133  	struct timespec start;
134  	struct timespec ts_diff;
135  	struct kvm_vm *vm;
136  	int i;
137  
138  	vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
139  				 p->src_type, p->partition_vcpu_memory_access);
140  
141  	demand_paging_size = get_backing_src_pagesz(p->src_type);
142  
143  	guest_data_prototype = malloc(demand_paging_size);
144  	TEST_ASSERT(guest_data_prototype,
145  		    "Failed to allocate buffer for guest data pattern");
146  	memset(guest_data_prototype, 0xAB, demand_paging_size);
147  
148  	if (p->uffd_mode) {
149  		uffd_descs = malloc(nr_vcpus * sizeof(struct uffd_desc *));
150  		TEST_ASSERT(uffd_descs, "Memory allocation failed");
151  
152  		for (i = 0; i < nr_vcpus; i++) {
153  			struct memstress_vcpu_args *vcpu_args;
154  			void *vcpu_hva;
155  			void *vcpu_alias;
156  
157  			vcpu_args = &memstress_args.vcpu_args[i];
158  
159  			/* Cache the host addresses of the region */
160  			vcpu_hva = addr_gpa2hva(vm, vcpu_args->gpa);
161  			vcpu_alias = addr_gpa2alias(vm, vcpu_args->gpa);
162  
163  			prefault_mem(vcpu_alias,
164  				vcpu_args->pages * memstress_args.guest_page_size);
165  
166  			/*
167  			 * Set up user fault fd to handle demand paging
168  			 * requests.
169  			 */
170  			uffd_descs[i] = uffd_setup_demand_paging(
171  				p->uffd_mode, p->uffd_delay, vcpu_hva,
172  				vcpu_args->pages * memstress_args.guest_page_size,
173  				&handle_uffd_page_request);
174  		}
175  	}
176  
177  	pr_info("Finished creating vCPUs and starting uffd threads\n");
178  
179  	clock_gettime(CLOCK_MONOTONIC, &start);
180  	memstress_start_vcpu_threads(nr_vcpus, vcpu_worker);
181  	pr_info("Started all vCPUs\n");
182  
183  	memstress_join_vcpu_threads(nr_vcpus);
184  	ts_diff = timespec_elapsed(start);
185  	pr_info("All vCPU threads joined\n");
186  
187  	if (p->uffd_mode) {
188  		/* Tell the user fault fd handler threads to quit */
189  		for (i = 0; i < nr_vcpus; i++)
190  			uffd_stop_demand_paging(uffd_descs[i]);
191  	}
192  
193  	pr_info("Total guest execution time: %ld.%.9lds\n",
194  		ts_diff.tv_sec, ts_diff.tv_nsec);
195  	pr_info("Overall demand paging rate: %f pgs/sec\n",
196  		memstress_args.vcpu_args[0].pages * nr_vcpus /
197  		((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
198  
199  	memstress_destroy_vm(vm);
200  
201  	free(guest_data_prototype);
202  	if (p->uffd_mode)
203  		free(uffd_descs);
204  }
205  
206  static void help(char *name)
207  {
208  	puts("");
209  	printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n"
210  	       "          [-b memory] [-s type] [-v vcpus] [-o]\n", name);
211  	guest_modes_help();
212  	printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n"
213  	       "     UFFD registration mode: 'MISSING' or 'MINOR'.\n");
214  	printf(" -d: add a delay in usec to the User Fault\n"
215  	       "     FD handler to simulate demand paging\n"
216  	       "     overheads. Ignored without -u.\n");
217  	printf(" -b: specify the size of the memory region which should be\n"
218  	       "     demand paged by each vCPU. e.g. 10M or 3G.\n"
219  	       "     Default: 1G\n");
220  	backing_src_help("-s");
221  	printf(" -v: specify the number of vCPUs to run.\n");
222  	printf(" -o: Overlap guest memory accesses instead of partitioning\n"
223  	       "     them into a separate region of memory for each vCPU.\n");
224  	puts("");
225  	exit(0);
226  }
227  
228  int main(int argc, char *argv[])
229  {
230  	int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
231  	struct test_params p = {
232  		.src_type = DEFAULT_VM_MEM_SRC,
233  		.partition_vcpu_memory_access = true,
234  	};
235  	int opt;
236  
237  	guest_modes_append_default();
238  
239  	while ((opt = getopt(argc, argv, "hm:u:d:b:s:v:o")) != -1) {
240  		switch (opt) {
241  		case 'm':
242  			guest_modes_cmdline(optarg);
243  			break;
244  		case 'u':
245  			if (!strcmp("MISSING", optarg))
246  				p.uffd_mode = UFFDIO_REGISTER_MODE_MISSING;
247  			else if (!strcmp("MINOR", optarg))
248  				p.uffd_mode = UFFDIO_REGISTER_MODE_MINOR;
249  			TEST_ASSERT(p.uffd_mode, "UFFD mode must be 'MISSING' or 'MINOR'.");
250  			break;
251  		case 'd':
252  			p.uffd_delay = strtoul(optarg, NULL, 0);
253  			TEST_ASSERT(p.uffd_delay >= 0, "A negative UFFD delay is not supported.");
254  			break;
255  		case 'b':
256  			guest_percpu_mem_size = parse_size(optarg);
257  			break;
258  		case 's':
259  			p.src_type = parse_backing_src_type(optarg);
260  			break;
261  		case 'v':
262  			nr_vcpus = atoi_positive("Number of vCPUs", optarg);
263  			TEST_ASSERT(nr_vcpus <= max_vcpus,
264  				    "Invalid number of vcpus, must be between 1 and %d", max_vcpus);
265  			break;
266  		case 'o':
267  			p.partition_vcpu_memory_access = false;
268  			break;
269  		case 'h':
270  		default:
271  			help(argv[0]);
272  			break;
273  		}
274  	}
275  
276  	if (p.uffd_mode == UFFDIO_REGISTER_MODE_MINOR &&
277  	    !backing_src_is_shared(p.src_type)) {
278  		TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -s");
279  	}
280  
281  	for_each_guest_mode(run_test, &p);
282  
283  	return 0;
284  }
285  
286  #else /* __NR_userfaultfd */
287  
288  #warning "missing __NR_userfaultfd definition"
289  
290  int main(void)
291  {
292  	print_skip("__NR_userfaultfd must be present for userfaultfd test");
293  	return KSFT_SKIP;
294  }
295  
296  #endif /* __NR_userfaultfd */
297