xref: /linux/tools/testing/selftests/kvm/dirty_log_perf_test.c (revision 6093a688a07da07808f0122f9aa2a3eed250d853)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * KVM dirty page logging performance test
4  *
5  * Based on dirty_log_test.c
6  *
7  * Copyright (C) 2018, Red Hat, Inc.
8  * Copyright (C) 2020, Google, Inc.
9  */
10 
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <time.h>
14 #include <pthread.h>
15 #include <linux/bitmap.h>
16 
17 #include "kvm_util.h"
18 #include "test_util.h"
19 #include "memstress.h"
20 #include "guest_modes.h"
21 #include "ucall_common.h"
22 
23 /* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/
24 #define TEST_HOST_LOOP_N		2UL
25 
26 static int nr_vcpus = 1;
27 static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
28 static bool run_vcpus_while_disabling_dirty_logging;
29 
30 /* Host variables */
31 static u64 dirty_log_manual_caps;
32 static bool host_quit;
33 static int iteration;
34 static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
35 
36 static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
37 {
38 	struct kvm_vcpu *vcpu = vcpu_args->vcpu;
39 	int vcpu_idx = vcpu_args->vcpu_idx;
40 	uint64_t pages_count = 0;
41 	struct kvm_run *run;
42 	struct timespec start;
43 	struct timespec ts_diff;
44 	struct timespec total = (struct timespec){0};
45 	struct timespec avg;
46 	int ret;
47 
48 	run = vcpu->run;
49 
50 	while (!READ_ONCE(host_quit)) {
51 		int current_iteration = READ_ONCE(iteration);
52 
53 		clock_gettime(CLOCK_MONOTONIC, &start);
54 		ret = _vcpu_run(vcpu);
55 		ts_diff = timespec_elapsed(start);
56 
57 		TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret);
58 		TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
59 			    "Invalid guest sync status: exit_reason=%s",
60 			    exit_reason_str(run->exit_reason));
61 
62 		pr_debug("Got sync event from vCPU %d\n", vcpu_idx);
63 		vcpu_last_completed_iteration[vcpu_idx] = current_iteration;
64 		pr_debug("vCPU %d updated last completed iteration to %d\n",
65 			 vcpu_idx, vcpu_last_completed_iteration[vcpu_idx]);
66 
67 		if (current_iteration) {
68 			pages_count += vcpu_args->pages;
69 			total = timespec_add(total, ts_diff);
70 			pr_debug("vCPU %d iteration %d dirty memory time: %ld.%.9lds\n",
71 				vcpu_idx, current_iteration, ts_diff.tv_sec,
72 				ts_diff.tv_nsec);
73 		} else {
74 			pr_debug("vCPU %d iteration %d populate memory time: %ld.%.9lds\n",
75 				vcpu_idx, current_iteration, ts_diff.tv_sec,
76 				ts_diff.tv_nsec);
77 		}
78 
79 		/*
80 		 * Keep running the guest while dirty logging is being disabled
81 		 * (iteration is negative) so that vCPUs are accessing memory
82 		 * for the entire duration of zapping collapsible SPTEs.
83 		 */
84 		while (current_iteration == READ_ONCE(iteration) &&
85 		       READ_ONCE(iteration) >= 0 && !READ_ONCE(host_quit)) {}
86 	}
87 
88 	avg = timespec_div(total, vcpu_last_completed_iteration[vcpu_idx]);
89 	pr_debug("\nvCPU %d dirtied 0x%lx pages over %d iterations in %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
90 		vcpu_idx, pages_count, vcpu_last_completed_iteration[vcpu_idx],
91 		total.tv_sec, total.tv_nsec, avg.tv_sec, avg.tv_nsec);
92 }
93 
94 struct test_params {
95 	unsigned long iterations;
96 	uint64_t phys_offset;
97 	bool partition_vcpu_memory_access;
98 	enum vm_mem_backing_src_type backing_src;
99 	int slots;
100 	uint32_t write_percent;
101 	bool random_access;
102 };
103 
104 static void run_test(enum vm_guest_mode mode, void *arg)
105 {
106 	struct test_params *p = arg;
107 	struct kvm_vm *vm;
108 	unsigned long **bitmaps;
109 	uint64_t guest_num_pages;
110 	uint64_t host_num_pages;
111 	uint64_t pages_per_slot;
112 	struct timespec start;
113 	struct timespec ts_diff;
114 	struct timespec get_dirty_log_total = (struct timespec){0};
115 	struct timespec vcpu_dirty_total = (struct timespec){0};
116 	struct timespec avg;
117 	struct timespec clear_dirty_log_total = (struct timespec){0};
118 	int i;
119 
120 	vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size,
121 				 p->slots, p->backing_src,
122 				 p->partition_vcpu_memory_access);
123 
124 	memstress_set_write_percent(vm, p->write_percent);
125 
126 	guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm->page_shift;
127 	guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
128 	host_num_pages = vm_num_host_pages(mode, guest_num_pages);
129 	pages_per_slot = host_num_pages / p->slots;
130 
131 	bitmaps = memstress_alloc_bitmaps(p->slots, pages_per_slot);
132 
133 	if (dirty_log_manual_caps)
134 		vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2,
135 			      dirty_log_manual_caps);
136 
137 	/* Start the iterations */
138 	iteration = 0;
139 	host_quit = false;
140 
141 	clock_gettime(CLOCK_MONOTONIC, &start);
142 	for (i = 0; i < nr_vcpus; i++)
143 		vcpu_last_completed_iteration[i] = -1;
144 
145 	/*
146 	 * Use 100% writes during the population phase to ensure all
147 	 * memory is actually populated and not just mapped to the zero
148 	 * page. The prevents expensive copy-on-write faults from
149 	 * occurring during the dirty memory iterations below, which
150 	 * would pollute the performance results.
151 	 */
152 	memstress_set_write_percent(vm, 100);
153 	memstress_set_random_access(vm, false);
154 	memstress_start_vcpu_threads(nr_vcpus, vcpu_worker);
155 
156 	/* Allow the vCPUs to populate memory */
157 	pr_debug("Starting iteration %d - Populating\n", iteration);
158 	for (i = 0; i < nr_vcpus; i++) {
159 		while (READ_ONCE(vcpu_last_completed_iteration[i]) !=
160 		       iteration)
161 			;
162 	}
163 
164 	ts_diff = timespec_elapsed(start);
165 	pr_info("Populate memory time: %ld.%.9lds\n",
166 		ts_diff.tv_sec, ts_diff.tv_nsec);
167 
168 	/* Enable dirty logging */
169 	clock_gettime(CLOCK_MONOTONIC, &start);
170 	memstress_enable_dirty_logging(vm, p->slots);
171 	ts_diff = timespec_elapsed(start);
172 	pr_info("Enabling dirty logging time: %ld.%.9lds\n\n",
173 		ts_diff.tv_sec, ts_diff.tv_nsec);
174 
175 	memstress_set_write_percent(vm, p->write_percent);
176 	memstress_set_random_access(vm, p->random_access);
177 
178 	while (iteration < p->iterations) {
179 		/*
180 		 * Incrementing the iteration number will start the vCPUs
181 		 * dirtying memory again.
182 		 */
183 		clock_gettime(CLOCK_MONOTONIC, &start);
184 		iteration++;
185 
186 		pr_debug("Starting iteration %d\n", iteration);
187 		for (i = 0; i < nr_vcpus; i++) {
188 			while (READ_ONCE(vcpu_last_completed_iteration[i])
189 			       != iteration)
190 				;
191 		}
192 
193 		ts_diff = timespec_elapsed(start);
194 		vcpu_dirty_total = timespec_add(vcpu_dirty_total, ts_diff);
195 		pr_info("Iteration %d dirty memory time: %ld.%.9lds\n",
196 			iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
197 
198 		clock_gettime(CLOCK_MONOTONIC, &start);
199 		memstress_get_dirty_log(vm, bitmaps, p->slots);
200 		ts_diff = timespec_elapsed(start);
201 		get_dirty_log_total = timespec_add(get_dirty_log_total,
202 						   ts_diff);
203 		pr_info("Iteration %d get dirty log time: %ld.%.9lds\n",
204 			iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
205 
206 		if (dirty_log_manual_caps) {
207 			clock_gettime(CLOCK_MONOTONIC, &start);
208 			memstress_clear_dirty_log(vm, bitmaps, p->slots,
209 						  pages_per_slot);
210 			ts_diff = timespec_elapsed(start);
211 			clear_dirty_log_total = timespec_add(clear_dirty_log_total,
212 							     ts_diff);
213 			pr_info("Iteration %d clear dirty log time: %ld.%.9lds\n",
214 				iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
215 		}
216 	}
217 
218 	/*
219 	 * Run vCPUs while dirty logging is being disabled to stress disabling
220 	 * in terms of both performance and correctness.  Opt-in via command
221 	 * line as this significantly increases time to disable dirty logging.
222 	 */
223 	if (run_vcpus_while_disabling_dirty_logging)
224 		WRITE_ONCE(iteration, -1);
225 
226 	/* Disable dirty logging */
227 	clock_gettime(CLOCK_MONOTONIC, &start);
228 	memstress_disable_dirty_logging(vm, p->slots);
229 	ts_diff = timespec_elapsed(start);
230 	pr_info("Disabling dirty logging time: %ld.%.9lds\n",
231 		ts_diff.tv_sec, ts_diff.tv_nsec);
232 
233 	/*
234 	 * Tell the vCPU threads to quit.  No need to manually check that vCPUs
235 	 * have stopped running after disabling dirty logging, the join will
236 	 * wait for them to exit.
237 	 */
238 	host_quit = true;
239 	memstress_join_vcpu_threads(nr_vcpus);
240 
241 	avg = timespec_div(get_dirty_log_total, p->iterations);
242 	pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
243 		p->iterations, get_dirty_log_total.tv_sec,
244 		get_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
245 
246 	if (dirty_log_manual_caps) {
247 		avg = timespec_div(clear_dirty_log_total, p->iterations);
248 		pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
249 			p->iterations, clear_dirty_log_total.tv_sec,
250 			clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
251 	}
252 
253 	memstress_free_bitmaps(bitmaps, p->slots);
254 	memstress_destroy_vm(vm);
255 }
256 
257 static void help(char *name)
258 {
259 	puts("");
260 	printf("usage: %s [-h] [-a] [-i iterations] [-p offset] [-g] "
261 	       "[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-r random seed ] [-s mem type]"
262 	       "[-x memslots] [-w percentage] [-c physical cpus to run test on]\n", name);
263 	puts("");
264 	printf(" -a: access memory randomly rather than in order.\n");
265 	printf(" -i: specify iteration counts (default: %"PRIu64")\n",
266 	       TEST_HOST_LOOP_N);
267 	printf(" -g: Do not enable KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2. This\n"
268 	       "     makes KVM_GET_DIRTY_LOG clear the dirty log (i.e.\n"
269 	       "     KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE is not enabled)\n"
270 	       "     and writes will be tracked as soon as dirty logging is\n"
271 	       "     enabled on the memslot (i.e. KVM_DIRTY_LOG_INITIALLY_SET\n"
272 	       "     is not enabled).\n");
273 	printf(" -p: specify guest physical test memory offset\n"
274 	       "     Warning: a low offset can conflict with the loaded test code.\n");
275 	guest_modes_help();
276 	printf(" -n: Run the vCPUs in nested mode (L2)\n");
277 	printf(" -e: Run vCPUs while dirty logging is being disabled.  This\n"
278 	       "     can significantly increase runtime, especially if there\n"
279 	       "     isn't a dedicated pCPU for the main thread.\n");
280 	printf(" -b: specify the size of the memory region which should be\n"
281 	       "     dirtied by each vCPU. e.g. 10M or 3G.\n"
282 	       "     (default: 1G)\n");
283 	printf(" -v: specify the number of vCPUs to run.\n");
284 	printf(" -o: Overlap guest memory accesses instead of partitioning\n"
285 	       "     them into a separate region of memory for each vCPU.\n");
286 	printf(" -r: specify the starting random seed.\n");
287 	backing_src_help("-s");
288 	printf(" -x: Split the memory region into this number of memslots.\n"
289 	       "     (default: 1)\n");
290 	printf(" -w: specify the percentage of pages which should be written to\n"
291 	       "     as an integer from 0-100 inclusive. This is probabilistic,\n"
292 	       "     so -w X means each page has an X%% chance of writing\n"
293 	       "     and a (100-X)%% chance of reading.\n"
294 	       "     (default: 100 i.e. all pages are written to.)\n");
295 	kvm_print_vcpu_pinning_help();
296 	puts("");
297 	exit(0);
298 }
299 
300 int main(int argc, char *argv[])
301 {
302 	int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
303 	const char *pcpu_list = NULL;
304 	struct test_params p = {
305 		.iterations = TEST_HOST_LOOP_N,
306 		.partition_vcpu_memory_access = true,
307 		.backing_src = DEFAULT_VM_MEM_SRC,
308 		.slots = 1,
309 		.write_percent = 100,
310 	};
311 	int opt;
312 
313 	/* Override the seed to be deterministic by default. */
314 	guest_random_seed = 1;
315 
316 	dirty_log_manual_caps =
317 		kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
318 	dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
319 				  KVM_DIRTY_LOG_INITIALLY_SET);
320 
321 	guest_modes_append_default();
322 
323 	while ((opt = getopt(argc, argv, "ab:c:eghi:m:nop:r:s:v:x:w:")) != -1) {
324 		switch (opt) {
325 		case 'a':
326 			p.random_access = true;
327 			break;
328 		case 'b':
329 			guest_percpu_mem_size = parse_size(optarg);
330 			break;
331 		case 'c':
332 			pcpu_list = optarg;
333 			break;
334 		case 'e':
335 			/* 'e' is for evil. */
336 			run_vcpus_while_disabling_dirty_logging = true;
337 			break;
338 		case 'g':
339 			dirty_log_manual_caps = 0;
340 			break;
341 		case 'h':
342 			help(argv[0]);
343 			break;
344 		case 'i':
345 			p.iterations = atoi_positive("Number of iterations", optarg);
346 			break;
347 		case 'm':
348 			guest_modes_cmdline(optarg);
349 			break;
350 		case 'n':
351 			memstress_args.nested = true;
352 			break;
353 		case 'o':
354 			p.partition_vcpu_memory_access = false;
355 			break;
356 		case 'p':
357 			p.phys_offset = strtoull(optarg, NULL, 0);
358 			break;
359 		case 'r':
360 			guest_random_seed = atoi_positive("Random seed", optarg);
361 			break;
362 		case 's':
363 			p.backing_src = parse_backing_src_type(optarg);
364 			break;
365 		case 'v':
366 			nr_vcpus = atoi_positive("Number of vCPUs", optarg);
367 			TEST_ASSERT(nr_vcpus <= max_vcpus,
368 				    "Invalid number of vcpus, must be between 1 and %d", max_vcpus);
369 			break;
370 		case 'w':
371 			p.write_percent = atoi_non_negative("Write percentage", optarg);
372 			TEST_ASSERT(p.write_percent <= 100,
373 				    "Write percentage must be between 0 and 100");
374 			break;
375 		case 'x':
376 			p.slots = atoi_positive("Number of slots", optarg);
377 			break;
378 		default:
379 			help(argv[0]);
380 			break;
381 		}
382 	}
383 
384 	if (pcpu_list) {
385 		kvm_parse_vcpu_pinning(pcpu_list, memstress_args.vcpu_to_pcpu,
386 				       nr_vcpus);
387 		memstress_args.pin_vcpus = true;
388 	}
389 
390 	TEST_ASSERT(p.iterations >= 2, "The test should have at least two iterations");
391 
392 	pr_info("Test iterations: %"PRIu64"\n",	p.iterations);
393 
394 	for_each_guest_mode(run_test, &p);
395 
396 	return 0;
397 }
398