1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * vgic_lpi_stress - Stress test for KVM's ITS emulation
4 *
5 * Copyright (c) 2024 Google LLC
6 */
7
8 #include <linux/sizes.h>
9 #include <pthread.h>
10 #include <stdatomic.h>
11 #include <sys/sysinfo.h>
12
13 #include "kvm_util.h"
14 #include "gic.h"
15 #include "gic_v3.h"
16 #include "gic_v3_its.h"
17 #include "processor.h"
18 #include "ucall.h"
19 #include "vgic.h"
20
21 #define TEST_MEMSLOT_INDEX 1
22
23 #define GIC_LPI_OFFSET 8192
24
25 static size_t nr_iterations = 1000;
26 static vm_paddr_t gpa_base;
27
28 static struct kvm_vm *vm;
29 static struct kvm_vcpu **vcpus;
30 static int its_fd;
31
32 static struct test_data {
33 bool request_vcpus_stop;
34 u32 nr_cpus;
35 u32 nr_devices;
36 u32 nr_event_ids;
37
38 vm_paddr_t device_table;
39 vm_paddr_t collection_table;
40 vm_paddr_t cmdq_base;
41 void *cmdq_base_va;
42 vm_paddr_t itt_tables;
43
44 vm_paddr_t lpi_prop_table;
45 vm_paddr_t lpi_pend_tables;
46 } test_data = {
47 .nr_cpus = 1,
48 .nr_devices = 1,
49 .nr_event_ids = 16,
50 };
51
guest_irq_handler(struct ex_regs * regs)52 static void guest_irq_handler(struct ex_regs *regs)
53 {
54 u32 intid = gic_get_and_ack_irq();
55
56 if (intid == IAR_SPURIOUS)
57 return;
58
59 GUEST_ASSERT(intid >= GIC_LPI_OFFSET);
60 gic_set_eoi(intid);
61 }
62
guest_setup_its_mappings(void)63 static void guest_setup_its_mappings(void)
64 {
65 u32 coll_id, device_id, event_id, intid = GIC_LPI_OFFSET;
66 u32 nr_events = test_data.nr_event_ids;
67 u32 nr_devices = test_data.nr_devices;
68 u32 nr_cpus = test_data.nr_cpus;
69
70 for (coll_id = 0; coll_id < nr_cpus; coll_id++)
71 its_send_mapc_cmd(test_data.cmdq_base_va, coll_id, coll_id, true);
72
73 /* Round-robin the LPIs to all of the vCPUs in the VM */
74 coll_id = 0;
75 for (device_id = 0; device_id < nr_devices; device_id++) {
76 vm_paddr_t itt_base = test_data.itt_tables + (device_id * SZ_64K);
77
78 its_send_mapd_cmd(test_data.cmdq_base_va, device_id,
79 itt_base, SZ_64K, true);
80
81 for (event_id = 0; event_id < nr_events; event_id++) {
82 its_send_mapti_cmd(test_data.cmdq_base_va, device_id,
83 event_id, coll_id, intid++);
84
85 coll_id = (coll_id + 1) % test_data.nr_cpus;
86 }
87 }
88 }
89
guest_invalidate_all_rdists(void)90 static void guest_invalidate_all_rdists(void)
91 {
92 int i;
93
94 for (i = 0; i < test_data.nr_cpus; i++)
95 its_send_invall_cmd(test_data.cmdq_base_va, i);
96 }
97
guest_setup_gic(void)98 static void guest_setup_gic(void)
99 {
100 static atomic_int nr_cpus_ready = 0;
101 u32 cpuid = guest_get_vcpuid();
102
103 gic_init(GIC_V3, test_data.nr_cpus);
104 gic_rdist_enable_lpis(test_data.lpi_prop_table, SZ_64K,
105 test_data.lpi_pend_tables + (cpuid * SZ_64K));
106
107 atomic_fetch_add(&nr_cpus_ready, 1);
108
109 if (cpuid > 0)
110 return;
111
112 while (atomic_load(&nr_cpus_ready) < test_data.nr_cpus)
113 cpu_relax();
114
115 its_init(test_data.collection_table, SZ_64K,
116 test_data.device_table, SZ_64K,
117 test_data.cmdq_base, SZ_64K);
118
119 guest_setup_its_mappings();
120 guest_invalidate_all_rdists();
121 }
122
guest_code(size_t nr_lpis)123 static void guest_code(size_t nr_lpis)
124 {
125 guest_setup_gic();
126 local_irq_enable();
127
128 GUEST_SYNC(0);
129
130 /*
131 * Don't use WFI here to avoid blocking the vCPU thread indefinitely and
132 * never getting the stop signal.
133 */
134 while (!READ_ONCE(test_data.request_vcpus_stop))
135 cpu_relax();
136
137 GUEST_DONE();
138 }
139
setup_memslot(void)140 static void setup_memslot(void)
141 {
142 size_t pages;
143 size_t sz;
144
145 /*
146 * For the ITS:
147 * - A single level device table
148 * - A single level collection table
149 * - The command queue
150 * - An ITT for each device
151 */
152 sz = (3 + test_data.nr_devices) * SZ_64K;
153
154 /*
155 * For the redistributors:
156 * - A shared LPI configuration table
157 * - An LPI pending table for each vCPU
158 */
159 sz += (1 + test_data.nr_cpus) * SZ_64K;
160
161 pages = sz / vm->page_size;
162 gpa_base = ((vm_compute_max_gfn(vm) + 1) * vm->page_size) - sz;
163 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa_base,
164 TEST_MEMSLOT_INDEX, pages, 0);
165 }
166
167 #define LPI_PROP_DEFAULT_PRIO 0xa0
168
configure_lpis(void)169 static void configure_lpis(void)
170 {
171 size_t nr_lpis = test_data.nr_devices * test_data.nr_event_ids;
172 u8 *tbl = addr_gpa2hva(vm, test_data.lpi_prop_table);
173 size_t i;
174
175 for (i = 0; i < nr_lpis; i++) {
176 tbl[i] = LPI_PROP_DEFAULT_PRIO |
177 LPI_PROP_GROUP1 |
178 LPI_PROP_ENABLED;
179 }
180 }
181
setup_test_data(void)182 static void setup_test_data(void)
183 {
184 size_t pages_per_64k = vm_calc_num_guest_pages(vm->mode, SZ_64K);
185 u32 nr_devices = test_data.nr_devices;
186 u32 nr_cpus = test_data.nr_cpus;
187 vm_paddr_t cmdq_base;
188
189 test_data.device_table = vm_phy_pages_alloc(vm, pages_per_64k,
190 gpa_base,
191 TEST_MEMSLOT_INDEX);
192
193 test_data.collection_table = vm_phy_pages_alloc(vm, pages_per_64k,
194 gpa_base,
195 TEST_MEMSLOT_INDEX);
196
197 cmdq_base = vm_phy_pages_alloc(vm, pages_per_64k, gpa_base,
198 TEST_MEMSLOT_INDEX);
199 virt_map(vm, cmdq_base, cmdq_base, pages_per_64k);
200 test_data.cmdq_base = cmdq_base;
201 test_data.cmdq_base_va = (void *)cmdq_base;
202
203 test_data.itt_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_devices,
204 gpa_base, TEST_MEMSLOT_INDEX);
205
206 test_data.lpi_prop_table = vm_phy_pages_alloc(vm, pages_per_64k,
207 gpa_base, TEST_MEMSLOT_INDEX);
208 configure_lpis();
209
210 test_data.lpi_pend_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_cpus,
211 gpa_base, TEST_MEMSLOT_INDEX);
212
213 sync_global_to_guest(vm, test_data);
214 }
215
setup_gic(void)216 static void setup_gic(void)
217 {
218 its_fd = vgic_its_setup(vm);
219 }
220
signal_lpi(u32 device_id,u32 event_id)221 static void signal_lpi(u32 device_id, u32 event_id)
222 {
223 vm_paddr_t db_addr = GITS_BASE_GPA + GITS_TRANSLATER;
224
225 struct kvm_msi msi = {
226 .address_lo = db_addr,
227 .address_hi = db_addr >> 32,
228 .data = event_id,
229 .devid = device_id,
230 .flags = KVM_MSI_VALID_DEVID,
231 };
232
233 /*
234 * KVM_SIGNAL_MSI returns 1 if the MSI wasn't 'blocked' by the VM,
235 * which for arm64 implies having a valid translation in the ITS.
236 */
237 TEST_ASSERT(__vm_ioctl(vm, KVM_SIGNAL_MSI, &msi) == 1,
238 "KVM_SIGNAL_MSI ioctl failed");
239 }
240
241 static pthread_barrier_t test_setup_barrier;
242
lpi_worker_thread(void * data)243 static void *lpi_worker_thread(void *data)
244 {
245 u32 device_id = (size_t)data;
246 u32 event_id;
247 size_t i;
248
249 pthread_barrier_wait(&test_setup_barrier);
250
251 for (i = 0; i < nr_iterations; i++)
252 for (event_id = 0; event_id < test_data.nr_event_ids; event_id++)
253 signal_lpi(device_id, event_id);
254
255 return NULL;
256 }
257
vcpu_worker_thread(void * data)258 static void *vcpu_worker_thread(void *data)
259 {
260 struct kvm_vcpu *vcpu = data;
261 struct ucall uc;
262
263 while (true) {
264 vcpu_run(vcpu);
265
266 switch (get_ucall(vcpu, &uc)) {
267 case UCALL_SYNC:
268 pthread_barrier_wait(&test_setup_barrier);
269 continue;
270 case UCALL_DONE:
271 return NULL;
272 case UCALL_ABORT:
273 REPORT_GUEST_ASSERT(uc);
274 break;
275 default:
276 TEST_FAIL("Unknown ucall: %lu", uc.cmd);
277 }
278 }
279
280 return NULL;
281 }
282
report_stats(struct timespec delta)283 static void report_stats(struct timespec delta)
284 {
285 double nr_lpis;
286 double time;
287
288 nr_lpis = test_data.nr_devices * test_data.nr_event_ids * nr_iterations;
289
290 time = delta.tv_sec;
291 time += ((double)delta.tv_nsec) / NSEC_PER_SEC;
292
293 pr_info("Rate: %.2f LPIs/sec\n", nr_lpis / time);
294 }
295
run_test(void)296 static void run_test(void)
297 {
298 u32 nr_devices = test_data.nr_devices;
299 u32 nr_vcpus = test_data.nr_cpus;
300 pthread_t *lpi_threads = malloc(nr_devices * sizeof(pthread_t));
301 pthread_t *vcpu_threads = malloc(nr_vcpus * sizeof(pthread_t));
302 struct timespec start, delta;
303 size_t i;
304
305 TEST_ASSERT(lpi_threads && vcpu_threads, "Failed to allocate pthread arrays");
306
307 pthread_barrier_init(&test_setup_barrier, NULL, nr_vcpus + nr_devices + 1);
308
309 for (i = 0; i < nr_vcpus; i++)
310 pthread_create(&vcpu_threads[i], NULL, vcpu_worker_thread, vcpus[i]);
311
312 for (i = 0; i < nr_devices; i++)
313 pthread_create(&lpi_threads[i], NULL, lpi_worker_thread, (void *)i);
314
315 pthread_barrier_wait(&test_setup_barrier);
316
317 clock_gettime(CLOCK_MONOTONIC, &start);
318
319 for (i = 0; i < nr_devices; i++)
320 pthread_join(lpi_threads[i], NULL);
321
322 delta = timespec_elapsed(start);
323 write_guest_global(vm, test_data.request_vcpus_stop, true);
324
325 for (i = 0; i < nr_vcpus; i++)
326 pthread_join(vcpu_threads[i], NULL);
327
328 report_stats(delta);
329 }
330
setup_vm(void)331 static void setup_vm(void)
332 {
333 int i;
334
335 vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu *));
336 TEST_ASSERT(vcpus, "Failed to allocate vCPU array");
337
338 vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus);
339
340 vm_init_descriptor_tables(vm);
341 for (i = 0; i < test_data.nr_cpus; i++)
342 vcpu_init_descriptor_tables(vcpus[i]);
343
344 vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
345
346 setup_memslot();
347
348 setup_gic();
349
350 setup_test_data();
351 }
352
destroy_vm(void)353 static void destroy_vm(void)
354 {
355 close(its_fd);
356 kvm_vm_free(vm);
357 free(vcpus);
358 }
359
pr_usage(const char * name)360 static void pr_usage(const char *name)
361 {
362 pr_info("%s [-v NR_VCPUS] [-d NR_DEVICES] [-e NR_EVENTS] [-i ITERS] -h\n", name);
363 pr_info(" -v:\tnumber of vCPUs (default: %u)\n", test_data.nr_cpus);
364 pr_info(" -d:\tnumber of devices (default: %u)\n", test_data.nr_devices);
365 pr_info(" -e:\tnumber of event IDs per device (default: %u)\n", test_data.nr_event_ids);
366 pr_info(" -i:\tnumber of iterations (default: %lu)\n", nr_iterations);
367 }
368
main(int argc,char ** argv)369 int main(int argc, char **argv)
370 {
371 u32 nr_threads;
372 int c;
373
374 TEST_REQUIRE(kvm_supports_vgic_v3());
375
376 while ((c = getopt(argc, argv, "hv:d:e:i:")) != -1) {
377 switch (c) {
378 case 'v':
379 test_data.nr_cpus = atoi(optarg);
380 break;
381 case 'd':
382 test_data.nr_devices = atoi(optarg);
383 break;
384 case 'e':
385 test_data.nr_event_ids = atoi(optarg);
386 break;
387 case 'i':
388 nr_iterations = strtoul(optarg, NULL, 0);
389 break;
390 case 'h':
391 default:
392 pr_usage(argv[0]);
393 return 1;
394 }
395 }
396
397 nr_threads = test_data.nr_cpus + test_data.nr_devices;
398 if (nr_threads > get_nprocs())
399 pr_info("WARNING: running %u threads on %d CPUs; performance is degraded.\n",
400 nr_threads, get_nprocs());
401
402 setup_vm();
403
404 run_test();
405
406 destroy_vm();
407
408 return 0;
409 }
410