xref: /linux/tools/testing/selftests/kvm/memslot_perf_test.c (revision b02a4f8c4284e2cbbf539a95b27647687adae816)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * A memslot-related performance benchmark.
4  *
5  * Copyright (C) 2021 Oracle and/or its affiliates.
6  *
7  * Basic guest setup / host vCPU thread code lifted from set_memory_region_test.
8  */
9 #include <pthread.h>
10 #include <sched.h>
11 #include <semaphore.h>
12 #include <stdatomic.h>
13 #include <stdbool.h>
14 #include <stdint.h>
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <time.h>
19 #include <unistd.h>
20 
21 #include <linux/compiler.h>
22 #include <linux/sizes.h>
23 
24 #include <test_util.h>
25 #include <kvm_syscalls.h>
26 #include <kvm_util.h>
27 #include <processor.h>
28 #include <ucall_common.h>
29 
30 #define MEM_EXTRA_SIZE		SZ_64K
31 
32 #define MEM_SIZE		(SZ_512M + MEM_EXTRA_SIZE)
33 #define MEM_GPA			SZ_256M
34 #define MEM_AUX_GPA		MEM_GPA
35 #define MEM_SYNC_GPA		MEM_AUX_GPA
36 #define MEM_TEST_GPA		(MEM_AUX_GPA + MEM_EXTRA_SIZE)
37 #define MEM_TEST_SIZE		(MEM_SIZE - MEM_EXTRA_SIZE)
38 
39 /*
40  * 32 MiB is max size that gets well over 100 iterations on 509 slots.
41  * Considering that each slot needs to have at least one page up to
42  * 8194 slots in use can then be tested (although with slightly
43  * limited resolution).
44  */
45 #define MEM_SIZE_MAP		(SZ_32M + MEM_EXTRA_SIZE)
46 #define MEM_TEST_MAP_SIZE	(MEM_SIZE_MAP - MEM_EXTRA_SIZE)
47 
48 /*
49  * 128 MiB is min size that fills 32k slots with at least one page in each
50  * while at the same time gets 100+ iterations in such test
51  *
52  * 2 MiB chunk size like a typical huge page
53  */
54 #define MEM_TEST_UNMAP_SIZE		SZ_128M
55 #define MEM_TEST_UNMAP_CHUNK_SIZE	SZ_2M
56 
57 /*
58  * For the move active test the middle of the test area is placed on
59  * a memslot boundary: half lies in the memslot being moved, half in
60  * other memslot(s).
61  *
62  * We have different number of memory slots, excluding the reserved
63  * memory slot 0, on various architectures and configurations. The
64  * memory size in this test is calculated by picking the maximal
65  * last memory slot's memory size, with alignment to the largest
66  * supported page size (64KB). In this way, the selected memory
67  * size for this test is compatible with test_memslot_move_prepare().
68  *
69  * architecture   slots    memory-per-slot    memory-on-last-slot
70  * --------------------------------------------------------------
71  * x86-4KB        32763    16KB               160KB
72  * arm64-4KB      32766    16KB               112KB
73  * arm64-16KB     32766    16KB               112KB
74  * arm64-64KB     8192     64KB               128KB
75  */
76 #define MEM_TEST_MOVE_SIZE		(3 * SZ_64K)
77 #define MEM_TEST_MOVE_GPA_DEST		(MEM_GPA + MEM_SIZE)
78 static_assert(MEM_TEST_MOVE_SIZE <= MEM_TEST_SIZE,
79 	      "invalid move test region size");
80 
81 #define MEM_TEST_VAL_1 0x1122334455667788
82 #define MEM_TEST_VAL_2 0x99AABBCCDDEEFF00
83 
84 struct vm_data {
85 	struct kvm_vm *vm;
86 	struct kvm_vcpu *vcpu;
87 	pthread_t vcpu_thread;
88 	u32 nslots;
89 	u64 npages;
90 	u64 pages_per_slot;
91 	void **hva_slots;
92 	bool mmio_ok;
93 	u64 mmio_gpa_min;
94 	u64 mmio_gpa_max;
95 };
96 
97 struct sync_area {
98 	u32    guest_page_size;
99 	atomic_bool start_flag;
100 	atomic_bool exit_flag;
101 	atomic_bool sync_flag;
102 	void *move_area_ptr;
103 };
104 
105 /*
106  * Technically, we need also for the atomic bool to be address-free, which
107  * is recommended, but not strictly required, by C11 for lockless
108  * implementations.
109  * However, in practice both GCC and Clang fulfill this requirement on
110  * all KVM-supported platforms.
111  */
112 static_assert(ATOMIC_BOOL_LOCK_FREE == 2, "atomic bool is not lockless");
113 
114 static int wait_timeout = 10;
115 static sem_t vcpu_ready;
116 
117 static bool map_unmap_verify;
118 #ifdef __x86_64__
119 static bool disable_slot_zap_quirk;
120 #endif
121 
122 static bool verbose;
123 #define pr_info_v(...)				\
124 	do {					\
125 		if (verbose)			\
126 			pr_info(__VA_ARGS__);	\
127 	} while (0)
128 
129 static void check_mmio_access(struct vm_data *data, struct kvm_run *run)
130 {
131 	TEST_ASSERT(data->mmio_ok, "Unexpected mmio exit");
132 	TEST_ASSERT(run->mmio.is_write, "Unexpected mmio read");
133 	TEST_ASSERT(run->mmio.len == 8,
134 		    "Unexpected exit mmio size = %u", run->mmio.len);
135 	TEST_ASSERT(run->mmio.phys_addr >= data->mmio_gpa_min &&
136 		    run->mmio.phys_addr <= data->mmio_gpa_max,
137 		    "Unexpected exit mmio address = 0x%llx",
138 		    run->mmio.phys_addr);
139 }
140 
141 static void *vcpu_worker(void *__data)
142 {
143 	struct vm_data *data = __data;
144 	struct kvm_vcpu *vcpu = data->vcpu;
145 	struct kvm_run *run = vcpu->run;
146 	struct ucall uc;
147 
148 	while (1) {
149 		vcpu_run(vcpu);
150 
151 		switch (get_ucall(vcpu, &uc)) {
152 		case UCALL_SYNC:
153 			TEST_ASSERT(uc.args[1] == 0,
154 				"Unexpected sync ucall, got %lx",
155 				(ulong)uc.args[1]);
156 			sem_post(&vcpu_ready);
157 			continue;
158 		case UCALL_NONE:
159 			if (run->exit_reason == KVM_EXIT_MMIO)
160 				check_mmio_access(data, run);
161 			else
162 				goto done;
163 			break;
164 		case UCALL_ABORT:
165 			REPORT_GUEST_ASSERT(uc);
166 			break;
167 		case UCALL_DONE:
168 			goto done;
169 		default:
170 			TEST_FAIL("Unknown ucall %lu", uc.cmd);
171 		}
172 	}
173 
174 done:
175 	return NULL;
176 }
177 
178 static void wait_for_vcpu(void)
179 {
180 	struct timespec ts;
181 
182 	TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts),
183 		    "clock_gettime() failed: %d", errno);
184 
185 	ts.tv_sec += 2;
186 	TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts),
187 		    "sem_timedwait() failed: %d", errno);
188 }
189 
190 static void *vm_gpa2hva(struct vm_data *data, gpa_t gpa, u64 *rempages)
191 {
192 	gpa_t gpage, pgoffs;
193 	u32 slot, slotoffs;
194 	void *base;
195 	u32 guest_page_size = data->vm->page_size;
196 
197 	TEST_ASSERT(gpa >= MEM_GPA, "Too low gpa to translate");
198 	TEST_ASSERT(gpa < MEM_GPA + data->npages * guest_page_size,
199 		    "Too high gpa to translate");
200 	gpa -= MEM_GPA;
201 
202 	gpage = gpa / guest_page_size;
203 	pgoffs = gpa % guest_page_size;
204 	slot = min(gpage / data->pages_per_slot, (u64)data->nslots - 1);
205 	slotoffs = gpage - (slot * data->pages_per_slot);
206 
207 	if (rempages) {
208 		u64 slotpages;
209 
210 		if (slot == data->nslots - 1)
211 			slotpages = data->npages - slot * data->pages_per_slot;
212 		else
213 			slotpages = data->pages_per_slot;
214 
215 		TEST_ASSERT(!pgoffs,
216 			    "Asking for remaining pages in slot but gpa not page aligned");
217 		*rempages = slotpages - slotoffs;
218 	}
219 
220 	base = data->hva_slots[slot];
221 	return (u8 *)base + slotoffs * guest_page_size + pgoffs;
222 }
223 
224 static u64 vm_slot2gpa(struct vm_data *data, u32 slot)
225 {
226 	u32 guest_page_size = data->vm->page_size;
227 
228 	TEST_ASSERT(slot < data->nslots, "Too high slot number");
229 
230 	return MEM_GPA + slot * data->pages_per_slot * guest_page_size;
231 }
232 
233 static struct vm_data *alloc_vm(void)
234 {
235 	struct vm_data *data;
236 
237 	data = malloc(sizeof(*data));
238 	TEST_ASSERT(data, "malloc(vmdata) failed");
239 
240 	data->vm = NULL;
241 	data->vcpu = NULL;
242 	data->hva_slots = NULL;
243 
244 	return data;
245 }
246 
247 static bool check_slot_pages(u32 host_page_size, u32 guest_page_size,
248 			     u64 pages_per_slot, u64 rempages)
249 {
250 	if (!pages_per_slot)
251 		return false;
252 
253 	if ((pages_per_slot * guest_page_size) % host_page_size)
254 		return false;
255 
256 	if ((rempages * guest_page_size) % host_page_size)
257 		return false;
258 
259 	return true;
260 }
261 
262 
263 static u64 get_max_slots(struct vm_data *data, u32 host_page_size)
264 {
265 	u32 guest_page_size = data->vm->page_size;
266 	u64 mempages, pages_per_slot, rempages;
267 	u64 slots;
268 
269 	mempages = data->npages;
270 	slots = data->nslots;
271 	while (--slots > 1) {
272 		pages_per_slot = mempages / slots;
273 		if (!pages_per_slot)
274 			continue;
275 
276 		rempages = mempages % pages_per_slot;
277 		if (check_slot_pages(host_page_size, guest_page_size,
278 				     pages_per_slot, rempages))
279 			return slots + 1;	/* slot 0 is reserved */
280 	}
281 
282 	return 0;
283 }
284 
285 static bool prepare_vm(struct vm_data *data, int nslots, u64 *maxslots,
286 		       void *guest_code, u64 mem_size,
287 		       struct timespec *slot_runtime)
288 {
289 	u64 mempages, rempages;
290 	u64 guest_addr;
291 	u32 slot, host_page_size, guest_page_size;
292 	struct timespec tstart;
293 	struct sync_area *sync;
294 
295 	host_page_size = getpagesize();
296 	guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
297 	mempages = mem_size / guest_page_size;
298 
299 	data->vm = __vm_create_with_one_vcpu(&data->vcpu, mempages, guest_code);
300 	TEST_ASSERT(data->vm->page_size == guest_page_size, "Invalid VM page size");
301 
302 	data->npages = mempages;
303 	TEST_ASSERT(data->npages > 1, "Can't test without any memory");
304 	data->nslots = nslots;
305 	data->pages_per_slot = data->npages / data->nslots;
306 	rempages = data->npages % data->nslots;
307 	if (!check_slot_pages(host_page_size, guest_page_size,
308 			      data->pages_per_slot, rempages)) {
309 		*maxslots = get_max_slots(data, host_page_size);
310 		return false;
311 	}
312 
313 	data->hva_slots = malloc(sizeof(*data->hva_slots) * data->nslots);
314 	TEST_ASSERT(data->hva_slots, "malloc() fail");
315 
316 	pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n",
317 		data->nslots, data->pages_per_slot, rempages);
318 
319 	clock_gettime(CLOCK_MONOTONIC, &tstart);
320 	for (slot = 1, guest_addr = MEM_GPA; slot <= data->nslots; slot++) {
321 		u64 npages;
322 
323 		npages = data->pages_per_slot;
324 		if (slot == data->nslots)
325 			npages += rempages;
326 
327 		vm_userspace_mem_region_add(data->vm, VM_MEM_SRC_ANONYMOUS,
328 					    guest_addr, slot, npages,
329 					    0);
330 		guest_addr += npages * guest_page_size;
331 	}
332 	*slot_runtime = timespec_elapsed(tstart);
333 
334 	for (slot = 1, guest_addr = MEM_GPA; slot <= data->nslots; slot++) {
335 		u64 npages;
336 		gpa_t gpa;
337 
338 		npages = data->pages_per_slot;
339 		if (slot == data->nslots)
340 			npages += rempages;
341 
342 		gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr, slot);
343 		TEST_ASSERT(gpa == guest_addr,
344 			    "vm_phy_pages_alloc() failed");
345 
346 		data->hva_slots[slot - 1] = addr_gpa2hva(data->vm, guest_addr);
347 		memset(data->hva_slots[slot - 1], 0, npages * guest_page_size);
348 
349 		guest_addr += npages * guest_page_size;
350 	}
351 
352 	virt_map(data->vm, MEM_GPA, MEM_GPA, data->npages);
353 
354 	sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL);
355 	sync->guest_page_size = data->vm->page_size;
356 	atomic_init(&sync->start_flag, false);
357 	atomic_init(&sync->exit_flag, false);
358 	atomic_init(&sync->sync_flag, false);
359 
360 	data->mmio_ok = false;
361 
362 	return true;
363 }
364 
365 static void launch_vm(struct vm_data *data)
366 {
367 	pr_info_v("Launching the test VM\n");
368 
369 	pthread_create(&data->vcpu_thread, NULL, vcpu_worker, data);
370 
371 	/* Ensure the guest thread is spun up. */
372 	wait_for_vcpu();
373 }
374 
375 static void free_vm(struct vm_data *data)
376 {
377 	kvm_vm_free(data->vm);
378 	free(data->hva_slots);
379 	free(data);
380 }
381 
382 static void wait_guest_exit(struct vm_data *data)
383 {
384 	pthread_join(data->vcpu_thread, NULL);
385 }
386 
387 static void let_guest_run(struct sync_area *sync)
388 {
389 	atomic_store_explicit(&sync->start_flag, true, memory_order_release);
390 }
391 
392 static void guest_spin_until_start(void)
393 {
394 	struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
395 
396 	while (!atomic_load_explicit(&sync->start_flag, memory_order_acquire))
397 		;
398 }
399 
400 static void make_guest_exit(struct sync_area *sync)
401 {
402 	atomic_store_explicit(&sync->exit_flag, true, memory_order_release);
403 }
404 
405 static bool _guest_should_exit(void)
406 {
407 	struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
408 
409 	return atomic_load_explicit(&sync->exit_flag, memory_order_acquire);
410 }
411 
412 #define guest_should_exit() unlikely(_guest_should_exit())
413 
414 /*
415  * noinline so we can easily see how much time the host spends waiting
416  * for the guest.
417  * For the same reason use alarm() instead of polling clock_gettime()
418  * to implement a wait timeout.
419  */
420 static noinline void host_perform_sync(struct sync_area *sync)
421 {
422 	alarm(wait_timeout);
423 
424 	atomic_store_explicit(&sync->sync_flag, true, memory_order_release);
425 	while (atomic_load_explicit(&sync->sync_flag, memory_order_acquire))
426 		;
427 
428 	alarm(0);
429 }
430 
431 static bool guest_perform_sync(void)
432 {
433 	struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
434 	bool expected;
435 
436 	do {
437 		if (guest_should_exit())
438 			return false;
439 
440 		expected = true;
441 	} while (!atomic_compare_exchange_weak_explicit(&sync->sync_flag,
442 							&expected, false,
443 							memory_order_acq_rel,
444 							memory_order_relaxed));
445 
446 	return true;
447 }
448 
449 static void guest_code_test_memslot_move(void)
450 {
451 	struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
452 	u32 page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size);
453 	uintptr_t base = (typeof(base))READ_ONCE(sync->move_area_ptr);
454 
455 	GUEST_SYNC(0);
456 
457 	guest_spin_until_start();
458 
459 	while (!guest_should_exit()) {
460 		uintptr_t ptr;
461 
462 		for (ptr = base; ptr < base + MEM_TEST_MOVE_SIZE;
463 		     ptr += page_size)
464 			*(u64 *)ptr = MEM_TEST_VAL_1;
465 
466 		/*
467 		 * No host sync here since the MMIO exits are so expensive
468 		 * that the host would spend most of its time waiting for
469 		 * the guest and so instead of measuring memslot move
470 		 * performance we would measure the performance and
471 		 * likelihood of MMIO exits
472 		 */
473 	}
474 
475 	GUEST_DONE();
476 }
477 
478 static void guest_code_test_memslot_map(void)
479 {
480 	struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
481 	u32 page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size);
482 
483 	GUEST_SYNC(0);
484 
485 	guest_spin_until_start();
486 
487 	while (1) {
488 		uintptr_t ptr;
489 
490 		for (ptr = MEM_TEST_GPA;
491 		     ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2;
492 		     ptr += page_size)
493 			*(u64 *)ptr = MEM_TEST_VAL_1;
494 
495 		if (!guest_perform_sync())
496 			break;
497 
498 		for (ptr = MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2;
499 		     ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE;
500 		     ptr += page_size)
501 			*(u64 *)ptr = MEM_TEST_VAL_2;
502 
503 		if (!guest_perform_sync())
504 			break;
505 	}
506 
507 	GUEST_DONE();
508 }
509 
510 static void guest_code_test_memslot_unmap(void)
511 {
512 	struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
513 
514 	GUEST_SYNC(0);
515 
516 	guest_spin_until_start();
517 
518 	while (1) {
519 		uintptr_t ptr = MEM_TEST_GPA;
520 
521 		/*
522 		 * We can afford to access (map) just a small number of pages
523 		 * per host sync as otherwise the host will spend
524 		 * a significant amount of its time waiting for the guest
525 		 * (instead of doing unmap operations), so this will
526 		 * effectively turn this test into a map performance test.
527 		 *
528 		 * Just access a single page to be on the safe side.
529 		 */
530 		*(u64 *)ptr = MEM_TEST_VAL_1;
531 
532 		if (!guest_perform_sync())
533 			break;
534 
535 		ptr += MEM_TEST_UNMAP_SIZE / 2;
536 		*(u64 *)ptr = MEM_TEST_VAL_2;
537 
538 		if (!guest_perform_sync())
539 			break;
540 	}
541 
542 	GUEST_DONE();
543 }
544 
545 static void guest_code_test_memslot_rw(void)
546 {
547 	struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
548 	u32 page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size);
549 
550 	GUEST_SYNC(0);
551 
552 	guest_spin_until_start();
553 
554 	while (1) {
555 		uintptr_t ptr;
556 
557 		for (ptr = MEM_TEST_GPA;
558 		     ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size)
559 			*(u64 *)ptr = MEM_TEST_VAL_1;
560 
561 		if (!guest_perform_sync())
562 			break;
563 
564 		for (ptr = MEM_TEST_GPA + page_size / 2;
565 		     ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size) {
566 			u64 val = *(u64 *)ptr;
567 
568 			GUEST_ASSERT_EQ(val, MEM_TEST_VAL_2);
569 			*(u64 *)ptr = 0;
570 		}
571 
572 		if (!guest_perform_sync())
573 			break;
574 	}
575 
576 	GUEST_DONE();
577 }
578 
579 static bool test_memslot_move_prepare(struct vm_data *data,
580 				      struct sync_area *sync,
581 				      u64 *maxslots, bool isactive)
582 {
583 	u32 guest_page_size = data->vm->page_size;
584 	u64 movesrcgpa, movetestgpa;
585 
586 #ifdef __x86_64__
587 	if (disable_slot_zap_quirk)
588 		vm_enable_cap(data->vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL);
589 #endif
590 
591 	movesrcgpa = vm_slot2gpa(data, data->nslots - 1);
592 
593 	if (isactive) {
594 		u64 lastpages;
595 
596 		vm_gpa2hva(data, movesrcgpa, &lastpages);
597 		if (lastpages * guest_page_size < MEM_TEST_MOVE_SIZE / 2) {
598 			*maxslots = 0;
599 			return false;
600 		}
601 	}
602 
603 	movetestgpa = movesrcgpa - (MEM_TEST_MOVE_SIZE / (isactive ? 2 : 1));
604 	sync->move_area_ptr = (void *)movetestgpa;
605 
606 	if (isactive) {
607 		data->mmio_ok = true;
608 		data->mmio_gpa_min = movesrcgpa;
609 		data->mmio_gpa_max = movesrcgpa + MEM_TEST_MOVE_SIZE / 2 - 1;
610 	}
611 
612 	return true;
613 }
614 
615 static bool test_memslot_move_prepare_active(struct vm_data *data,
616 					     struct sync_area *sync,
617 					     u64 *maxslots)
618 {
619 	return test_memslot_move_prepare(data, sync, maxslots, true);
620 }
621 
622 static bool test_memslot_move_prepare_inactive(struct vm_data *data,
623 					       struct sync_area *sync,
624 					       u64 *maxslots)
625 {
626 	return test_memslot_move_prepare(data, sync, maxslots, false);
627 }
628 
629 static void test_memslot_move_loop(struct vm_data *data, struct sync_area *sync)
630 {
631 	u64 movesrcgpa;
632 
633 	movesrcgpa = vm_slot2gpa(data, data->nslots - 1);
634 	vm_mem_region_move(data->vm, data->nslots - 1 + 1,
635 			   MEM_TEST_MOVE_GPA_DEST);
636 	vm_mem_region_move(data->vm, data->nslots - 1 + 1, movesrcgpa);
637 }
638 
639 static void test_memslot_do_unmap(struct vm_data *data,
640 				  u64 offsp, u64 count)
641 {
642 	gpa_t gpa, ctr;
643 	u32 guest_page_size = data->vm->page_size;
644 
645 	for (gpa = MEM_TEST_GPA + offsp * guest_page_size, ctr = 0; ctr < count; ) {
646 		u64 npages;
647 		void *hva;
648 		int ret;
649 
650 		hva = vm_gpa2hva(data, gpa, &npages);
651 		TEST_ASSERT(npages, "Empty memory slot at gptr 0x%"PRIx64, gpa);
652 		npages = min(npages, count - ctr);
653 		ret = madvise(hva, npages * guest_page_size, MADV_DONTNEED);
654 		TEST_ASSERT(!ret,
655 			    "madvise(%p, MADV_DONTNEED) on VM memory should not fail for gptr 0x%"PRIx64,
656 			    hva, gpa);
657 		ctr += npages;
658 		gpa += npages * guest_page_size;
659 	}
660 	TEST_ASSERT(ctr == count,
661 		    "madvise(MADV_DONTNEED) should exactly cover all of the requested area");
662 }
663 
664 static void test_memslot_map_unmap_check(struct vm_data *data,
665 					 u64 offsp, u64 valexp)
666 {
667 	gpa_t gpa;
668 	u64 *val;
669 	u32 guest_page_size = data->vm->page_size;
670 
671 	if (!map_unmap_verify)
672 		return;
673 
674 	gpa = MEM_TEST_GPA + offsp * guest_page_size;
675 	val = (typeof(val))vm_gpa2hva(data, gpa, NULL);
676 	TEST_ASSERT(*val == valexp,
677 		    "Guest written values should read back correctly before unmap (%"PRIu64" vs %"PRIu64" @ %"PRIx64")",
678 		    *val, valexp, gpa);
679 	*val = 0;
680 }
681 
682 static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync)
683 {
684 	u32 guest_page_size = data->vm->page_size;
685 	u64 guest_pages = MEM_TEST_MAP_SIZE / guest_page_size;
686 
687 	/*
688 	 * Unmap the second half of the test area while guest writes to (maps)
689 	 * the first half.
690 	 */
691 	test_memslot_do_unmap(data, guest_pages / 2, guest_pages / 2);
692 
693 	/*
694 	 * Wait for the guest to finish writing the first half of the test
695 	 * area, verify the written value on the first and the last page of
696 	 * this area and then unmap it.
697 	 * Meanwhile, the guest is writing to (mapping) the second half of
698 	 * the test area.
699 	 */
700 	host_perform_sync(sync);
701 	test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1);
702 	test_memslot_map_unmap_check(data, guest_pages / 2 - 1, MEM_TEST_VAL_1);
703 	test_memslot_do_unmap(data, 0, guest_pages / 2);
704 
705 
706 	/*
707 	 * Wait for the guest to finish writing the second half of the test
708 	 * area and verify the written value on the first and the last page
709 	 * of this area.
710 	 * The area will be unmapped at the beginning of the next loop
711 	 * iteration.
712 	 * Meanwhile, the guest is writing to (mapping) the first half of
713 	 * the test area.
714 	 */
715 	host_perform_sync(sync);
716 	test_memslot_map_unmap_check(data, guest_pages / 2, MEM_TEST_VAL_2);
717 	test_memslot_map_unmap_check(data, guest_pages - 1, MEM_TEST_VAL_2);
718 }
719 
720 static void test_memslot_unmap_loop_common(struct vm_data *data,
721 					   struct sync_area *sync,
722 					   u64 chunk)
723 {
724 	u32 guest_page_size = data->vm->page_size;
725 	u64 guest_pages = MEM_TEST_UNMAP_SIZE / guest_page_size;
726 	u64 ctr;
727 
728 	/*
729 	 * Wait for the guest to finish mapping page(s) in the first half
730 	 * of the test area, verify the written value and then perform unmap
731 	 * of this area.
732 	 * Meanwhile, the guest is writing to (mapping) page(s) in the second
733 	 * half of the test area.
734 	 */
735 	host_perform_sync(sync);
736 	test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1);
737 	for (ctr = 0; ctr < guest_pages / 2; ctr += chunk)
738 		test_memslot_do_unmap(data, ctr, chunk);
739 
740 	/* Likewise, but for the opposite host / guest areas */
741 	host_perform_sync(sync);
742 	test_memslot_map_unmap_check(data, guest_pages / 2, MEM_TEST_VAL_2);
743 	for (ctr = guest_pages / 2; ctr < guest_pages; ctr += chunk)
744 		test_memslot_do_unmap(data, ctr, chunk);
745 }
746 
747 static void test_memslot_unmap_loop(struct vm_data *data,
748 				    struct sync_area *sync)
749 {
750 	u32 host_page_size = getpagesize();
751 	u32 guest_page_size = data->vm->page_size;
752 	u64 guest_chunk_pages = guest_page_size >= host_page_size ?
753 					1 : host_page_size / guest_page_size;
754 
755 	test_memslot_unmap_loop_common(data, sync, guest_chunk_pages);
756 }
757 
758 static void test_memslot_unmap_loop_chunked(struct vm_data *data,
759 					    struct sync_area *sync)
760 {
761 	u32 guest_page_size = data->vm->page_size;
762 	u64 guest_chunk_pages = MEM_TEST_UNMAP_CHUNK_SIZE / guest_page_size;
763 
764 	test_memslot_unmap_loop_common(data, sync, guest_chunk_pages);
765 }
766 
767 static void test_memslot_rw_loop(struct vm_data *data, struct sync_area *sync)
768 {
769 	u64 gptr;
770 	u32 guest_page_size = data->vm->page_size;
771 
772 	for (gptr = MEM_TEST_GPA + guest_page_size / 2;
773 	     gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += guest_page_size)
774 		*(u64 *)vm_gpa2hva(data, gptr, NULL) = MEM_TEST_VAL_2;
775 
776 	host_perform_sync(sync);
777 
778 	for (gptr = MEM_TEST_GPA;
779 	     gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += guest_page_size) {
780 		u64 *vptr = (typeof(vptr))vm_gpa2hva(data, gptr, NULL);
781 		u64 val = *vptr;
782 
783 		TEST_ASSERT(val == MEM_TEST_VAL_1,
784 			    "Guest written values should read back correctly (is %"PRIu64" @ %"PRIx64")",
785 			    val, gptr);
786 		*vptr = 0;
787 	}
788 
789 	host_perform_sync(sync);
790 }
791 
792 struct test_data {
793 	const char *name;
794 	u64 mem_size;
795 	void (*guest_code)(void);
796 	bool (*prepare)(struct vm_data *data, struct sync_area *sync,
797 			u64 *maxslots);
798 	void (*loop)(struct vm_data *data, struct sync_area *sync);
799 };
800 
801 static bool test_execute(int nslots, u64 *maxslots,
802 			 unsigned int maxtime,
803 			 const struct test_data *tdata,
804 			 u64 *nloops,
805 			 struct timespec *slot_runtime,
806 			 struct timespec *guest_runtime)
807 {
808 	u64 mem_size = tdata->mem_size ? : MEM_SIZE;
809 	struct vm_data *data;
810 	struct sync_area *sync;
811 	struct timespec tstart;
812 	bool ret = true;
813 
814 	data = alloc_vm();
815 	if (!prepare_vm(data, nslots, maxslots, tdata->guest_code,
816 			mem_size, slot_runtime)) {
817 		ret = false;
818 		goto exit_free;
819 	}
820 
821 	sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL);
822 	if (tdata->prepare &&
823 	    !tdata->prepare(data, sync, maxslots)) {
824 		ret = false;
825 		goto exit_free;
826 	}
827 
828 	launch_vm(data);
829 
830 	clock_gettime(CLOCK_MONOTONIC, &tstart);
831 	let_guest_run(sync);
832 
833 	while (1) {
834 		*guest_runtime = timespec_elapsed(tstart);
835 		if (guest_runtime->tv_sec >= maxtime)
836 			break;
837 
838 		tdata->loop(data, sync);
839 
840 		(*nloops)++;
841 	}
842 
843 	make_guest_exit(sync);
844 	wait_guest_exit(data);
845 
846 exit_free:
847 	free_vm(data);
848 
849 	return ret;
850 }
851 
852 static const struct test_data tests[] = {
853 	{
854 		.name = "map",
855 		.mem_size = MEM_SIZE_MAP,
856 		.guest_code = guest_code_test_memslot_map,
857 		.loop = test_memslot_map_loop,
858 	},
859 	{
860 		.name = "unmap",
861 		.mem_size = MEM_TEST_UNMAP_SIZE + MEM_EXTRA_SIZE,
862 		.guest_code = guest_code_test_memslot_unmap,
863 		.loop = test_memslot_unmap_loop,
864 	},
865 	{
866 		.name = "unmap chunked",
867 		.mem_size = MEM_TEST_UNMAP_SIZE + MEM_EXTRA_SIZE,
868 		.guest_code = guest_code_test_memslot_unmap,
869 		.loop = test_memslot_unmap_loop_chunked,
870 	},
871 	{
872 		.name = "move active area",
873 		.guest_code = guest_code_test_memslot_move,
874 		.prepare = test_memslot_move_prepare_active,
875 		.loop = test_memslot_move_loop,
876 	},
877 	{
878 		.name = "move inactive area",
879 		.guest_code = guest_code_test_memslot_move,
880 		.prepare = test_memslot_move_prepare_inactive,
881 		.loop = test_memslot_move_loop,
882 	},
883 	{
884 		.name = "RW",
885 		.guest_code = guest_code_test_memslot_rw,
886 		.loop = test_memslot_rw_loop
887 	},
888 };
889 
890 #define NTESTS ARRAY_SIZE(tests)
891 
892 struct test_args {
893 	int tfirst;
894 	int tlast;
895 	int nslots;
896 	int seconds;
897 	int runs;
898 };
899 
900 static void help(char *name, struct test_args *targs)
901 {
902 	int ctr;
903 
904 	pr_info("usage: %s [-h] [-v] [-d] [-s slots] [-f first_test] [-e last_test] [-l test_length] [-r run_count] [-t wait_timeout]\n",
905 		name);
906 	pr_info(" -h: print this help screen.\n");
907 	pr_info(" -v: enable verbose mode (not for benchmarking).\n");
908 	pr_info(" -d: enable extra debug checks.\n");
909 	pr_info(" -q: Disable memslot zap quirk during memslot move.\n");
910 	pr_info(" -s: specify memslot count cap (-1 means no cap; currently: %i)\n",
911 		targs->nslots);
912 	pr_info(" -f: specify the first test to run (currently: %i; max %zu)\n",
913 		targs->tfirst, NTESTS - 1);
914 	pr_info(" -e: specify the last test to run (currently: %i; max %zu)\n",
915 		targs->tlast, NTESTS - 1);
916 	pr_info(" -l: specify the test length in seconds (currently: %i)\n",
917 		targs->seconds);
918 	pr_info(" -r: specify the number of runs per test (currently: %i)\n",
919 		targs->runs);
920 	pr_info(" -t: specify the number of seconds for host wait timeout (currently: %i)\n",
921 		wait_timeout);
922 
923 	pr_info("\nAvailable tests:\n");
924 	for (ctr = 0; ctr < NTESTS; ctr++)
925 		pr_info("%d: %s\n", ctr, tests[ctr].name);
926 }
927 
928 static bool check_memory_sizes(void)
929 {
930 	u32 host_page_size = getpagesize();
931 	u32 guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
932 
933 	if (host_page_size > SZ_64K || guest_page_size > SZ_64K) {
934 		pr_info("Unsupported page size on host (0x%x) or guest (0x%x)\n",
935 			host_page_size, guest_page_size);
936 		return false;
937 	}
938 
939 	if (MEM_SIZE % guest_page_size ||
940 	    MEM_TEST_SIZE % guest_page_size) {
941 		pr_info("invalid MEM_SIZE or MEM_TEST_SIZE\n");
942 		return false;
943 	}
944 
945 	if (MEM_SIZE_MAP % guest_page_size		||
946 	    MEM_TEST_MAP_SIZE % guest_page_size		||
947 	    (MEM_TEST_MAP_SIZE / guest_page_size) <= 2	||
948 	    (MEM_TEST_MAP_SIZE / guest_page_size) % 2) {
949 		pr_info("invalid MEM_SIZE_MAP or MEM_TEST_MAP_SIZE\n");
950 		return false;
951 	}
952 
953 	if (MEM_TEST_UNMAP_SIZE > MEM_TEST_SIZE		||
954 	    MEM_TEST_UNMAP_SIZE % guest_page_size	||
955 	    (MEM_TEST_UNMAP_SIZE / guest_page_size) %
956 	    (2 * MEM_TEST_UNMAP_CHUNK_SIZE / guest_page_size)) {
957 		pr_info("invalid MEM_TEST_UNMAP_SIZE or MEM_TEST_UNMAP_CHUNK_SIZE\n");
958 		return false;
959 	}
960 
961 	return true;
962 }
963 
964 static bool parse_args(int argc, char *argv[],
965 		       struct test_args *targs)
966 {
967 	u32 max_mem_slots;
968 	int opt;
969 
970 	while ((opt = getopt(argc, argv, "hvdqs:f:e:l:r:t:")) != -1) {
971 		switch (opt) {
972 		case 'h':
973 		default:
974 			help(argv[0], targs);
975 			return false;
976 		case 'v':
977 			verbose = true;
978 			break;
979 		case 'd':
980 			map_unmap_verify = true;
981 			break;
982 #ifdef __x86_64__
983 		case 'q':
984 			disable_slot_zap_quirk = true;
985 			TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) &
986 				     KVM_X86_QUIRK_SLOT_ZAP_ALL);
987 			break;
988 #endif
989 		case 's':
990 			targs->nslots = atoi_paranoid(optarg);
991 			if (targs->nslots <= 1 && targs->nslots != -1) {
992 				pr_info("Slot count cap must be larger than 1 or -1 for no cap\n");
993 				return false;
994 			}
995 			break;
996 		case 'f':
997 			targs->tfirst = atoi_non_negative("First test", optarg);
998 			break;
999 		case 'e':
1000 			targs->tlast = atoi_non_negative("Last test", optarg);
1001 			if (targs->tlast >= NTESTS) {
1002 				pr_info("Last test to run has to be non-negative and less than %zu\n",
1003 					NTESTS);
1004 				return false;
1005 			}
1006 			break;
1007 		case 'l':
1008 			targs->seconds = atoi_non_negative("Test length", optarg);
1009 			break;
1010 		case 'r':
1011 			targs->runs = atoi_positive("Runs per test", optarg);
1012 			break;
1013 		case 't':
1014 			wait_timeout = atoi_positive("Host wait timeout", optarg);
1015 			break;
1016 		}
1017 	}
1018 
1019 	if (optind < argc) {
1020 		help(argv[0], targs);
1021 		return false;
1022 	}
1023 
1024 	if (targs->tfirst > targs->tlast) {
1025 		pr_info("First test to run cannot be greater than the last test to run\n");
1026 		return false;
1027 	}
1028 
1029 	max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS);
1030 	if (max_mem_slots <= 1) {
1031 		pr_info("KVM_CAP_NR_MEMSLOTS should be greater than 1\n");
1032 		return false;
1033 	}
1034 
1035 	/* Memory slot 0 is reserved */
1036 	if (targs->nslots == -1)
1037 		targs->nslots = max_mem_slots - 1;
1038 	else
1039 		targs->nslots = min_t(int, targs->nslots, max_mem_slots) - 1;
1040 
1041 	pr_info_v("Allowed Number of memory slots: %"PRIu32"\n",
1042 		  targs->nslots + 1);
1043 
1044 	return true;
1045 }
1046 
1047 struct test_result {
1048 	struct timespec slot_runtime, guest_runtime, iter_runtime;
1049 	s64 slottimens, runtimens;
1050 	u64 nloops;
1051 };
1052 
1053 static bool test_loop(const struct test_data *data,
1054 		      const struct test_args *targs,
1055 		      struct test_result *rbestslottime,
1056 		      struct test_result *rbestruntime)
1057 {
1058 	u64 maxslots;
1059 	struct test_result result = {};
1060 
1061 	if (!test_execute(targs->nslots, &maxslots, targs->seconds, data,
1062 			  &result.nloops,
1063 			  &result.slot_runtime, &result.guest_runtime)) {
1064 		if (maxslots)
1065 			pr_info("Memslot count too high for this test, decrease the cap (max is %"PRIu64")\n",
1066 				maxslots);
1067 		else
1068 			pr_info("Memslot count may be too high for this test, try adjusting the cap\n");
1069 
1070 		return false;
1071 	}
1072 
1073 	pr_info("Test took %ld.%.9lds for slot setup + %ld.%.9lds all iterations\n",
1074 		result.slot_runtime.tv_sec, result.slot_runtime.tv_nsec,
1075 		result.guest_runtime.tv_sec, result.guest_runtime.tv_nsec);
1076 	if (!result.nloops) {
1077 		pr_info("No full loops done - too short test time or system too loaded?\n");
1078 		return true;
1079 	}
1080 
1081 	result.iter_runtime = timespec_div(result.guest_runtime,
1082 					   result.nloops);
1083 	pr_info("Done %"PRIu64" iterations, avg %ld.%.9lds each\n",
1084 		result.nloops,
1085 		result.iter_runtime.tv_sec,
1086 		result.iter_runtime.tv_nsec);
1087 	result.slottimens = timespec_to_ns(result.slot_runtime);
1088 	result.runtimens = timespec_to_ns(result.iter_runtime);
1089 
1090 	/*
1091 	 * Only rank the slot setup time for tests using the whole test memory
1092 	 * area so they are comparable
1093 	 */
1094 	if (!data->mem_size &&
1095 	    (!rbestslottime->slottimens ||
1096 	     result.slottimens < rbestslottime->slottimens))
1097 		*rbestslottime = result;
1098 	if (!rbestruntime->runtimens ||
1099 	    result.runtimens < rbestruntime->runtimens)
1100 		*rbestruntime = result;
1101 
1102 	return true;
1103 }
1104 
1105 int main(int argc, char *argv[])
1106 {
1107 	struct test_args targs = {
1108 		.tfirst = 0,
1109 		.tlast = NTESTS - 1,
1110 		.nslots = -1,
1111 		.seconds = 5,
1112 		.runs = 1,
1113 	};
1114 	struct test_result rbestslottime = {};
1115 	int tctr;
1116 
1117 	if (!check_memory_sizes())
1118 		return -1;
1119 
1120 	if (!parse_args(argc, argv, &targs))
1121 		return -1;
1122 
1123 	for (tctr = targs.tfirst; tctr <= targs.tlast; tctr++) {
1124 		const struct test_data *data = &tests[tctr];
1125 		unsigned int runctr;
1126 		struct test_result rbestruntime = {};
1127 
1128 		if (tctr > targs.tfirst)
1129 			pr_info("\n");
1130 
1131 		pr_info("Testing %s performance with %i runs, %d seconds each\n",
1132 			data->name, targs.runs, targs.seconds);
1133 
1134 		for (runctr = 0; runctr < targs.runs; runctr++)
1135 			if (!test_loop(data, &targs,
1136 				       &rbestslottime, &rbestruntime))
1137 				break;
1138 
1139 		if (rbestruntime.runtimens)
1140 			pr_info("Best runtime result was %ld.%.9lds per iteration (with %"PRIu64" iterations)\n",
1141 				rbestruntime.iter_runtime.tv_sec,
1142 				rbestruntime.iter_runtime.tv_nsec,
1143 				rbestruntime.nloops);
1144 	}
1145 
1146 	if (rbestslottime.slottimens)
1147 		pr_info("Best slot setup time for the whole test area was %ld.%.9lds\n",
1148 			rbestslottime.slot_runtime.tv_sec,
1149 			rbestslottime.slot_runtime.tv_nsec);
1150 
1151 	return 0;
1152 }
1153