xref: /linux/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c (revision 68a052239fc4b351e961f698b824f7654a346091)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * vgic_lpi_stress - Stress test for KVM's ITS emulation
4  *
5  * Copyright (c) 2024 Google LLC
6  */
7 
8 #include <linux/sizes.h>
9 #include <pthread.h>
10 #include <stdatomic.h>
11 #include <sys/sysinfo.h>
12 
13 #include "kvm_util.h"
14 #include "gic.h"
15 #include "gic_v3.h"
16 #include "gic_v3_its.h"
17 #include "processor.h"
18 #include "ucall.h"
19 #include "vgic.h"
20 
21 #define TEST_MEMSLOT_INDEX	1
22 
23 #define GIC_LPI_OFFSET	8192
24 
25 static size_t nr_iterations = 1000;
26 static vm_paddr_t gpa_base;
27 
28 static struct kvm_vm *vm;
29 static struct kvm_vcpu **vcpus;
30 static int its_fd;
31 
32 static struct test_data {
33 	bool		request_vcpus_stop;
34 	u32		nr_cpus;
35 	u32		nr_devices;
36 	u32		nr_event_ids;
37 
38 	vm_paddr_t	device_table;
39 	vm_paddr_t	collection_table;
40 	vm_paddr_t	cmdq_base;
41 	void		*cmdq_base_va;
42 	vm_paddr_t	itt_tables;
43 
44 	vm_paddr_t	lpi_prop_table;
45 	vm_paddr_t	lpi_pend_tables;
46 } test_data =  {
47 	.nr_cpus	= 1,
48 	.nr_devices	= 1,
49 	.nr_event_ids	= 16,
50 };
51 
52 static void guest_irq_handler(struct ex_regs *regs)
53 {
54 	u32 intid = gic_get_and_ack_irq();
55 
56 	if (intid == IAR_SPURIOUS)
57 		return;
58 
59 	GUEST_ASSERT(intid >= GIC_LPI_OFFSET);
60 	gic_set_eoi(intid);
61 }
62 
63 static void guest_setup_its_mappings(void)
64 {
65 	u32 coll_id, device_id, event_id, intid = GIC_LPI_OFFSET;
66 	u32 nr_events = test_data.nr_event_ids;
67 	u32 nr_devices = test_data.nr_devices;
68 	u32 nr_cpus = test_data.nr_cpus;
69 
70 	for (coll_id = 0; coll_id < nr_cpus; coll_id++)
71 		its_send_mapc_cmd(test_data.cmdq_base_va, coll_id, coll_id, true);
72 
73 	/* Round-robin the LPIs to all of the vCPUs in the VM */
74 	coll_id = 0;
75 	for (device_id = 0; device_id < nr_devices; device_id++) {
76 		vm_paddr_t itt_base = test_data.itt_tables + (device_id * SZ_64K);
77 
78 		its_send_mapd_cmd(test_data.cmdq_base_va, device_id,
79 				  itt_base, SZ_64K, true);
80 
81 		for (event_id = 0; event_id < nr_events; event_id++) {
82 			its_send_mapti_cmd(test_data.cmdq_base_va, device_id,
83 					   event_id, coll_id, intid++);
84 
85 			coll_id = (coll_id + 1) % test_data.nr_cpus;
86 		}
87 	}
88 }
89 
90 static void guest_invalidate_all_rdists(void)
91 {
92 	int i;
93 
94 	for (i = 0; i < test_data.nr_cpus; i++)
95 		its_send_invall_cmd(test_data.cmdq_base_va, i);
96 }
97 
98 static void guest_setup_gic(void)
99 {
100 	static atomic_int nr_cpus_ready = 0;
101 	u32 cpuid = guest_get_vcpuid();
102 
103 	gic_init(GIC_V3, test_data.nr_cpus);
104 	gic_rdist_enable_lpis(test_data.lpi_prop_table, SZ_64K,
105 			      test_data.lpi_pend_tables + (cpuid * SZ_64K));
106 
107 	atomic_fetch_add(&nr_cpus_ready, 1);
108 
109 	if (cpuid > 0)
110 		return;
111 
112 	while (atomic_load(&nr_cpus_ready) < test_data.nr_cpus)
113 		cpu_relax();
114 
115 	its_init(test_data.collection_table, SZ_64K,
116 		 test_data.device_table, SZ_64K,
117 		 test_data.cmdq_base, SZ_64K);
118 
119 	guest_setup_its_mappings();
120 	guest_invalidate_all_rdists();
121 }
122 
123 static void guest_code(size_t nr_lpis)
124 {
125 	guest_setup_gic();
126 
127 	GUEST_SYNC(0);
128 
129 	/*
130 	 * Don't use WFI here to avoid blocking the vCPU thread indefinitely and
131 	 * never getting the stop signal.
132 	 */
133 	while (!READ_ONCE(test_data.request_vcpus_stop))
134 		cpu_relax();
135 
136 	GUEST_DONE();
137 }
138 
139 static void setup_memslot(void)
140 {
141 	size_t pages;
142 	size_t sz;
143 
144 	/*
145 	 * For the ITS:
146 	 *  - A single level device table
147 	 *  - A single level collection table
148 	 *  - The command queue
149 	 *  - An ITT for each device
150 	 */
151 	sz = (3 + test_data.nr_devices) * SZ_64K;
152 
153 	/*
154 	 * For the redistributors:
155 	 *  - A shared LPI configuration table
156 	 *  - An LPI pending table for each vCPU
157 	 */
158 	sz += (1 + test_data.nr_cpus) * SZ_64K;
159 
160 	pages = sz / vm->page_size;
161 	gpa_base = ((vm_compute_max_gfn(vm) + 1) * vm->page_size) - sz;
162 	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa_base,
163 				    TEST_MEMSLOT_INDEX, pages, 0);
164 }
165 
166 #define LPI_PROP_DEFAULT_PRIO	0xa0
167 
168 static void configure_lpis(void)
169 {
170 	size_t nr_lpis = test_data.nr_devices * test_data.nr_event_ids;
171 	u8 *tbl = addr_gpa2hva(vm, test_data.lpi_prop_table);
172 	size_t i;
173 
174 	for (i = 0; i < nr_lpis; i++) {
175 		tbl[i] = LPI_PROP_DEFAULT_PRIO |
176 			 LPI_PROP_GROUP1 |
177 			 LPI_PROP_ENABLED;
178 	}
179 }
180 
181 static void setup_test_data(void)
182 {
183 	size_t pages_per_64k = vm_calc_num_guest_pages(vm->mode, SZ_64K);
184 	u32 nr_devices = test_data.nr_devices;
185 	u32 nr_cpus = test_data.nr_cpus;
186 	vm_paddr_t cmdq_base;
187 
188 	test_data.device_table = vm_phy_pages_alloc(vm, pages_per_64k,
189 						    gpa_base,
190 						    TEST_MEMSLOT_INDEX);
191 
192 	test_data.collection_table = vm_phy_pages_alloc(vm, pages_per_64k,
193 							gpa_base,
194 							TEST_MEMSLOT_INDEX);
195 
196 	cmdq_base = vm_phy_pages_alloc(vm, pages_per_64k, gpa_base,
197 				       TEST_MEMSLOT_INDEX);
198 	virt_map(vm, cmdq_base, cmdq_base, pages_per_64k);
199 	test_data.cmdq_base = cmdq_base;
200 	test_data.cmdq_base_va = (void *)cmdq_base;
201 
202 	test_data.itt_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_devices,
203 						  gpa_base, TEST_MEMSLOT_INDEX);
204 
205 	test_data.lpi_prop_table = vm_phy_pages_alloc(vm, pages_per_64k,
206 						      gpa_base, TEST_MEMSLOT_INDEX);
207 	configure_lpis();
208 
209 	test_data.lpi_pend_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_cpus,
210 						       gpa_base, TEST_MEMSLOT_INDEX);
211 
212 	sync_global_to_guest(vm, test_data);
213 }
214 
215 static void setup_gic(void)
216 {
217 	its_fd = vgic_its_setup(vm);
218 }
219 
220 static void signal_lpi(u32 device_id, u32 event_id)
221 {
222 	vm_paddr_t db_addr = GITS_BASE_GPA + GITS_TRANSLATER;
223 
224 	struct kvm_msi msi = {
225 		.address_lo	= db_addr,
226 		.address_hi	= db_addr >> 32,
227 		.data		= event_id,
228 		.devid		= device_id,
229 		.flags		= KVM_MSI_VALID_DEVID,
230 	};
231 
232 	/*
233 	 * KVM_SIGNAL_MSI returns 1 if the MSI wasn't 'blocked' by the VM,
234 	 * which for arm64 implies having a valid translation in the ITS.
235 	 */
236 	TEST_ASSERT(__vm_ioctl(vm, KVM_SIGNAL_MSI, &msi) == 1,
237 		    "KVM_SIGNAL_MSI ioctl failed");
238 }
239 
240 static pthread_barrier_t test_setup_barrier;
241 
242 static void *lpi_worker_thread(void *data)
243 {
244 	u32 device_id = (size_t)data;
245 	u32 event_id;
246 	size_t i;
247 
248 	pthread_barrier_wait(&test_setup_barrier);
249 
250 	for (i = 0; i < nr_iterations; i++)
251 		for (event_id = 0; event_id < test_data.nr_event_ids; event_id++)
252 			signal_lpi(device_id, event_id);
253 
254 	return NULL;
255 }
256 
257 static void *vcpu_worker_thread(void *data)
258 {
259 	struct kvm_vcpu *vcpu = data;
260 	struct ucall uc;
261 
262 	while (true) {
263 		vcpu_run(vcpu);
264 
265 		switch (get_ucall(vcpu, &uc)) {
266 		case UCALL_SYNC:
267 			pthread_barrier_wait(&test_setup_barrier);
268 			continue;
269 		case UCALL_DONE:
270 			return NULL;
271 		case UCALL_ABORT:
272 			REPORT_GUEST_ASSERT(uc);
273 			break;
274 		default:
275 			TEST_FAIL("Unknown ucall: %lu", uc.cmd);
276 		}
277 	}
278 
279 	return NULL;
280 }
281 
282 static void report_stats(struct timespec delta)
283 {
284 	double nr_lpis;
285 	double time;
286 
287 	nr_lpis = test_data.nr_devices * test_data.nr_event_ids * nr_iterations;
288 
289 	time = delta.tv_sec;
290 	time += ((double)delta.tv_nsec) / NSEC_PER_SEC;
291 
292 	pr_info("Rate: %.2f LPIs/sec\n", nr_lpis / time);
293 }
294 
295 static void run_test(void)
296 {
297 	u32 nr_devices = test_data.nr_devices;
298 	u32 nr_vcpus = test_data.nr_cpus;
299 	pthread_t *lpi_threads = malloc(nr_devices * sizeof(pthread_t));
300 	pthread_t *vcpu_threads = malloc(nr_vcpus * sizeof(pthread_t));
301 	struct timespec start, delta;
302 	size_t i;
303 
304 	TEST_ASSERT(lpi_threads && vcpu_threads, "Failed to allocate pthread arrays");
305 
306 	pthread_barrier_init(&test_setup_barrier, NULL, nr_vcpus + nr_devices + 1);
307 
308 	for (i = 0; i < nr_vcpus; i++)
309 		pthread_create(&vcpu_threads[i], NULL, vcpu_worker_thread, vcpus[i]);
310 
311 	for (i = 0; i < nr_devices; i++)
312 		pthread_create(&lpi_threads[i], NULL, lpi_worker_thread, (void *)i);
313 
314 	pthread_barrier_wait(&test_setup_barrier);
315 
316 	clock_gettime(CLOCK_MONOTONIC, &start);
317 
318 	for (i = 0; i < nr_devices; i++)
319 		pthread_join(lpi_threads[i], NULL);
320 
321 	delta = timespec_elapsed(start);
322 	write_guest_global(vm, test_data.request_vcpus_stop, true);
323 
324 	for (i = 0; i < nr_vcpus; i++)
325 		pthread_join(vcpu_threads[i], NULL);
326 
327 	report_stats(delta);
328 }
329 
330 static void setup_vm(void)
331 {
332 	int i;
333 
334 	vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu));
335 	TEST_ASSERT(vcpus, "Failed to allocate vCPU array");
336 
337 	vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus);
338 
339 	vm_init_descriptor_tables(vm);
340 	for (i = 0; i < test_data.nr_cpus; i++)
341 		vcpu_init_descriptor_tables(vcpus[i]);
342 
343 	vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
344 
345 	setup_memslot();
346 
347 	setup_gic();
348 
349 	setup_test_data();
350 }
351 
352 static void destroy_vm(void)
353 {
354 	close(its_fd);
355 	kvm_vm_free(vm);
356 	free(vcpus);
357 }
358 
359 static void pr_usage(const char *name)
360 {
361 	pr_info("%s [-v NR_VCPUS] [-d NR_DEVICES] [-e NR_EVENTS] [-i ITERS] -h\n", name);
362 	pr_info("  -v:\tnumber of vCPUs (default: %u)\n", test_data.nr_cpus);
363 	pr_info("  -d:\tnumber of devices (default: %u)\n", test_data.nr_devices);
364 	pr_info("  -e:\tnumber of event IDs per device (default: %u)\n", test_data.nr_event_ids);
365 	pr_info("  -i:\tnumber of iterations (default: %lu)\n", nr_iterations);
366 }
367 
368 int main(int argc, char **argv)
369 {
370 	u32 nr_threads;
371 	int c;
372 
373 	TEST_REQUIRE(kvm_supports_vgic_v3());
374 
375 	while ((c = getopt(argc, argv, "hv:d:e:i:")) != -1) {
376 		switch (c) {
377 		case 'v':
378 			test_data.nr_cpus = atoi(optarg);
379 			break;
380 		case 'd':
381 			test_data.nr_devices = atoi(optarg);
382 			break;
383 		case 'e':
384 			test_data.nr_event_ids = atoi(optarg);
385 			break;
386 		case 'i':
387 			nr_iterations = strtoul(optarg, NULL, 0);
388 			break;
389 		case 'h':
390 		default:
391 			pr_usage(argv[0]);
392 			return 1;
393 		}
394 	}
395 
396 	nr_threads = test_data.nr_cpus + test_data.nr_devices;
397 	if (nr_threads > get_nprocs())
398 		pr_info("WARNING: running %u threads on %d CPUs; performance is degraded.\n",
399 			 nr_threads, get_nprocs());
400 
401 	setup_vm();
402 
403 	run_test();
404 
405 	destroy_vm();
406 
407 	return 0;
408 }
409