1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * vgic_lpi_stress - Stress test for KVM's ITS emulation 4 * 5 * Copyright (c) 2024 Google LLC 6 */ 7 8 #include <linux/sizes.h> 9 #include <pthread.h> 10 #include <stdatomic.h> 11 #include <sys/sysinfo.h> 12 13 #include "kvm_util.h" 14 #include "gic.h" 15 #include "gic_v3.h" 16 #include "gic_v3_its.h" 17 #include "processor.h" 18 #include "ucall.h" 19 #include "vgic.h" 20 21 #define TEST_MEMSLOT_INDEX 1 22 23 #define GIC_LPI_OFFSET 8192 24 25 static size_t nr_iterations = 1000; 26 static vm_paddr_t gpa_base; 27 28 static struct kvm_vm *vm; 29 static struct kvm_vcpu **vcpus; 30 static int its_fd; 31 32 static struct test_data { 33 bool request_vcpus_stop; 34 u32 nr_cpus; 35 u32 nr_devices; 36 u32 nr_event_ids; 37 38 vm_paddr_t device_table; 39 vm_paddr_t collection_table; 40 vm_paddr_t cmdq_base; 41 void *cmdq_base_va; 42 vm_paddr_t itt_tables; 43 44 vm_paddr_t lpi_prop_table; 45 vm_paddr_t lpi_pend_tables; 46 } test_data = { 47 .nr_cpus = 1, 48 .nr_devices = 1, 49 .nr_event_ids = 16, 50 }; 51 52 static void guest_irq_handler(struct ex_regs *regs) 53 { 54 u32 intid = gic_get_and_ack_irq(); 55 56 if (intid == IAR_SPURIOUS) 57 return; 58 59 GUEST_ASSERT(intid >= GIC_LPI_OFFSET); 60 gic_set_eoi(intid); 61 } 62 63 static void guest_setup_its_mappings(void) 64 { 65 u32 coll_id, device_id, event_id, intid = GIC_LPI_OFFSET; 66 u32 nr_events = test_data.nr_event_ids; 67 u32 nr_devices = test_data.nr_devices; 68 u32 nr_cpus = test_data.nr_cpus; 69 70 for (coll_id = 0; coll_id < nr_cpus; coll_id++) 71 its_send_mapc_cmd(test_data.cmdq_base_va, coll_id, coll_id, true); 72 73 /* Round-robin the LPIs to all of the vCPUs in the VM */ 74 coll_id = 0; 75 for (device_id = 0; device_id < nr_devices; device_id++) { 76 vm_paddr_t itt_base = test_data.itt_tables + (device_id * SZ_64K); 77 78 its_send_mapd_cmd(test_data.cmdq_base_va, device_id, 79 itt_base, SZ_64K, true); 80 81 for (event_id = 0; event_id < nr_events; event_id++) { 82 its_send_mapti_cmd(test_data.cmdq_base_va, device_id, 83 event_id, coll_id, intid++); 84 85 coll_id = (coll_id + 1) % test_data.nr_cpus; 86 } 87 } 88 } 89 90 static void guest_invalidate_all_rdists(void) 91 { 92 int i; 93 94 for (i = 0; i < test_data.nr_cpus; i++) 95 its_send_invall_cmd(test_data.cmdq_base_va, i); 96 } 97 98 static void guest_setup_gic(void) 99 { 100 static atomic_int nr_cpus_ready = 0; 101 u32 cpuid = guest_get_vcpuid(); 102 103 gic_init(GIC_V3, test_data.nr_cpus); 104 gic_rdist_enable_lpis(test_data.lpi_prop_table, SZ_64K, 105 test_data.lpi_pend_tables + (cpuid * SZ_64K)); 106 107 atomic_fetch_add(&nr_cpus_ready, 1); 108 109 if (cpuid > 0) 110 return; 111 112 while (atomic_load(&nr_cpus_ready) < test_data.nr_cpus) 113 cpu_relax(); 114 115 its_init(test_data.collection_table, SZ_64K, 116 test_data.device_table, SZ_64K, 117 test_data.cmdq_base, SZ_64K); 118 119 guest_setup_its_mappings(); 120 guest_invalidate_all_rdists(); 121 } 122 123 static void guest_code(size_t nr_lpis) 124 { 125 guest_setup_gic(); 126 127 GUEST_SYNC(0); 128 129 /* 130 * Don't use WFI here to avoid blocking the vCPU thread indefinitely and 131 * never getting the stop signal. 132 */ 133 while (!READ_ONCE(test_data.request_vcpus_stop)) 134 cpu_relax(); 135 136 GUEST_DONE(); 137 } 138 139 static void setup_memslot(void) 140 { 141 size_t pages; 142 size_t sz; 143 144 /* 145 * For the ITS: 146 * - A single level device table 147 * - A single level collection table 148 * - The command queue 149 * - An ITT for each device 150 */ 151 sz = (3 + test_data.nr_devices) * SZ_64K; 152 153 /* 154 * For the redistributors: 155 * - A shared LPI configuration table 156 * - An LPI pending table for each vCPU 157 */ 158 sz += (1 + test_data.nr_cpus) * SZ_64K; 159 160 pages = sz / vm->page_size; 161 gpa_base = ((vm_compute_max_gfn(vm) + 1) * vm->page_size) - sz; 162 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa_base, 163 TEST_MEMSLOT_INDEX, pages, 0); 164 } 165 166 #define LPI_PROP_DEFAULT_PRIO 0xa0 167 168 static void configure_lpis(void) 169 { 170 size_t nr_lpis = test_data.nr_devices * test_data.nr_event_ids; 171 u8 *tbl = addr_gpa2hva(vm, test_data.lpi_prop_table); 172 size_t i; 173 174 for (i = 0; i < nr_lpis; i++) { 175 tbl[i] = LPI_PROP_DEFAULT_PRIO | 176 LPI_PROP_GROUP1 | 177 LPI_PROP_ENABLED; 178 } 179 } 180 181 static void setup_test_data(void) 182 { 183 size_t pages_per_64k = vm_calc_num_guest_pages(vm->mode, SZ_64K); 184 u32 nr_devices = test_data.nr_devices; 185 u32 nr_cpus = test_data.nr_cpus; 186 vm_paddr_t cmdq_base; 187 188 test_data.device_table = vm_phy_pages_alloc(vm, pages_per_64k, 189 gpa_base, 190 TEST_MEMSLOT_INDEX); 191 192 test_data.collection_table = vm_phy_pages_alloc(vm, pages_per_64k, 193 gpa_base, 194 TEST_MEMSLOT_INDEX); 195 196 cmdq_base = vm_phy_pages_alloc(vm, pages_per_64k, gpa_base, 197 TEST_MEMSLOT_INDEX); 198 virt_map(vm, cmdq_base, cmdq_base, pages_per_64k); 199 test_data.cmdq_base = cmdq_base; 200 test_data.cmdq_base_va = (void *)cmdq_base; 201 202 test_data.itt_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_devices, 203 gpa_base, TEST_MEMSLOT_INDEX); 204 205 test_data.lpi_prop_table = vm_phy_pages_alloc(vm, pages_per_64k, 206 gpa_base, TEST_MEMSLOT_INDEX); 207 configure_lpis(); 208 209 test_data.lpi_pend_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_cpus, 210 gpa_base, TEST_MEMSLOT_INDEX); 211 212 sync_global_to_guest(vm, test_data); 213 } 214 215 static void setup_gic(void) 216 { 217 its_fd = vgic_its_setup(vm); 218 } 219 220 static void signal_lpi(u32 device_id, u32 event_id) 221 { 222 vm_paddr_t db_addr = GITS_BASE_GPA + GITS_TRANSLATER; 223 224 struct kvm_msi msi = { 225 .address_lo = db_addr, 226 .address_hi = db_addr >> 32, 227 .data = event_id, 228 .devid = device_id, 229 .flags = KVM_MSI_VALID_DEVID, 230 }; 231 232 /* 233 * KVM_SIGNAL_MSI returns 1 if the MSI wasn't 'blocked' by the VM, 234 * which for arm64 implies having a valid translation in the ITS. 235 */ 236 TEST_ASSERT(__vm_ioctl(vm, KVM_SIGNAL_MSI, &msi) == 1, 237 "KVM_SIGNAL_MSI ioctl failed"); 238 } 239 240 static pthread_barrier_t test_setup_barrier; 241 242 static void *lpi_worker_thread(void *data) 243 { 244 u32 device_id = (size_t)data; 245 u32 event_id; 246 size_t i; 247 248 pthread_barrier_wait(&test_setup_barrier); 249 250 for (i = 0; i < nr_iterations; i++) 251 for (event_id = 0; event_id < test_data.nr_event_ids; event_id++) 252 signal_lpi(device_id, event_id); 253 254 return NULL; 255 } 256 257 static void *vcpu_worker_thread(void *data) 258 { 259 struct kvm_vcpu *vcpu = data; 260 struct ucall uc; 261 262 while (true) { 263 vcpu_run(vcpu); 264 265 switch (get_ucall(vcpu, &uc)) { 266 case UCALL_SYNC: 267 pthread_barrier_wait(&test_setup_barrier); 268 continue; 269 case UCALL_DONE: 270 return NULL; 271 case UCALL_ABORT: 272 REPORT_GUEST_ASSERT(uc); 273 break; 274 default: 275 TEST_FAIL("Unknown ucall: %lu", uc.cmd); 276 } 277 } 278 279 return NULL; 280 } 281 282 static void report_stats(struct timespec delta) 283 { 284 double nr_lpis; 285 double time; 286 287 nr_lpis = test_data.nr_devices * test_data.nr_event_ids * nr_iterations; 288 289 time = delta.tv_sec; 290 time += ((double)delta.tv_nsec) / NSEC_PER_SEC; 291 292 pr_info("Rate: %.2f LPIs/sec\n", nr_lpis / time); 293 } 294 295 static void run_test(void) 296 { 297 u32 nr_devices = test_data.nr_devices; 298 u32 nr_vcpus = test_data.nr_cpus; 299 pthread_t *lpi_threads = malloc(nr_devices * sizeof(pthread_t)); 300 pthread_t *vcpu_threads = malloc(nr_vcpus * sizeof(pthread_t)); 301 struct timespec start, delta; 302 size_t i; 303 304 TEST_ASSERT(lpi_threads && vcpu_threads, "Failed to allocate pthread arrays"); 305 306 pthread_barrier_init(&test_setup_barrier, NULL, nr_vcpus + nr_devices + 1); 307 308 for (i = 0; i < nr_vcpus; i++) 309 pthread_create(&vcpu_threads[i], NULL, vcpu_worker_thread, vcpus[i]); 310 311 for (i = 0; i < nr_devices; i++) 312 pthread_create(&lpi_threads[i], NULL, lpi_worker_thread, (void *)i); 313 314 pthread_barrier_wait(&test_setup_barrier); 315 316 clock_gettime(CLOCK_MONOTONIC, &start); 317 318 for (i = 0; i < nr_devices; i++) 319 pthread_join(lpi_threads[i], NULL); 320 321 delta = timespec_elapsed(start); 322 write_guest_global(vm, test_data.request_vcpus_stop, true); 323 324 for (i = 0; i < nr_vcpus; i++) 325 pthread_join(vcpu_threads[i], NULL); 326 327 report_stats(delta); 328 } 329 330 static void setup_vm(void) 331 { 332 int i; 333 334 vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu)); 335 TEST_ASSERT(vcpus, "Failed to allocate vCPU array"); 336 337 vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus); 338 339 vm_init_descriptor_tables(vm); 340 for (i = 0; i < test_data.nr_cpus; i++) 341 vcpu_init_descriptor_tables(vcpus[i]); 342 343 vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler); 344 345 setup_memslot(); 346 347 setup_gic(); 348 349 setup_test_data(); 350 } 351 352 static void destroy_vm(void) 353 { 354 close(its_fd); 355 kvm_vm_free(vm); 356 free(vcpus); 357 } 358 359 static void pr_usage(const char *name) 360 { 361 pr_info("%s [-v NR_VCPUS] [-d NR_DEVICES] [-e NR_EVENTS] [-i ITERS] -h\n", name); 362 pr_info(" -v:\tnumber of vCPUs (default: %u)\n", test_data.nr_cpus); 363 pr_info(" -d:\tnumber of devices (default: %u)\n", test_data.nr_devices); 364 pr_info(" -e:\tnumber of event IDs per device (default: %u)\n", test_data.nr_event_ids); 365 pr_info(" -i:\tnumber of iterations (default: %lu)\n", nr_iterations); 366 } 367 368 int main(int argc, char **argv) 369 { 370 u32 nr_threads; 371 int c; 372 373 TEST_REQUIRE(kvm_supports_vgic_v3()); 374 375 while ((c = getopt(argc, argv, "hv:d:e:i:")) != -1) { 376 switch (c) { 377 case 'v': 378 test_data.nr_cpus = atoi(optarg); 379 break; 380 case 'd': 381 test_data.nr_devices = atoi(optarg); 382 break; 383 case 'e': 384 test_data.nr_event_ids = atoi(optarg); 385 break; 386 case 'i': 387 nr_iterations = strtoul(optarg, NULL, 0); 388 break; 389 case 'h': 390 default: 391 pr_usage(argv[0]); 392 return 1; 393 } 394 } 395 396 nr_threads = test_data.nr_cpus + test_data.nr_devices; 397 if (nr_threads > get_nprocs()) 398 pr_info("WARNING: running %u threads on %d CPUs; performance is degraded.\n", 399 nr_threads, get_nprocs()); 400 401 setup_vm(); 402 403 run_test(); 404 405 destroy_vm(); 406 407 return 0; 408 } 409