1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * vgic_lpi_stress - Stress test for KVM's ITS emulation 4 * 5 * Copyright (c) 2024 Google LLC 6 */ 7 8 #include <linux/sizes.h> 9 #include <pthread.h> 10 #include <stdatomic.h> 11 #include <sys/sysinfo.h> 12 13 #include "kvm_util.h" 14 #include "gic.h" 15 #include "gic_v3.h" 16 #include "gic_v3_its.h" 17 #include "processor.h" 18 #include "ucall.h" 19 #include "vgic.h" 20 21 #define TEST_MEMSLOT_INDEX 1 22 23 #define GIC_LPI_OFFSET 8192 24 25 static size_t nr_iterations = 1000; 26 static vm_paddr_t gpa_base; 27 28 static struct kvm_vm *vm; 29 static struct kvm_vcpu **vcpus; 30 static int gic_fd, its_fd; 31 32 static struct test_data { 33 bool request_vcpus_stop; 34 u32 nr_cpus; 35 u32 nr_devices; 36 u32 nr_event_ids; 37 38 vm_paddr_t device_table; 39 vm_paddr_t collection_table; 40 vm_paddr_t cmdq_base; 41 void *cmdq_base_va; 42 vm_paddr_t itt_tables; 43 44 vm_paddr_t lpi_prop_table; 45 vm_paddr_t lpi_pend_tables; 46 } test_data = { 47 .nr_cpus = 1, 48 .nr_devices = 1, 49 .nr_event_ids = 16, 50 }; 51 52 static void guest_irq_handler(struct ex_regs *regs) 53 { 54 u32 intid = gic_get_and_ack_irq(); 55 56 if (intid == IAR_SPURIOUS) 57 return; 58 59 GUEST_ASSERT(intid >= GIC_LPI_OFFSET); 60 gic_set_eoi(intid); 61 } 62 63 static void guest_setup_its_mappings(void) 64 { 65 u32 coll_id, device_id, event_id, intid = GIC_LPI_OFFSET; 66 u32 nr_events = test_data.nr_event_ids; 67 u32 nr_devices = test_data.nr_devices; 68 u32 nr_cpus = test_data.nr_cpus; 69 70 for (coll_id = 0; coll_id < nr_cpus; coll_id++) 71 its_send_mapc_cmd(test_data.cmdq_base_va, coll_id, coll_id, true); 72 73 /* Round-robin the LPIs to all of the vCPUs in the VM */ 74 coll_id = 0; 75 for (device_id = 0; device_id < nr_devices; device_id++) { 76 vm_paddr_t itt_base = test_data.itt_tables + (device_id * SZ_64K); 77 78 its_send_mapd_cmd(test_data.cmdq_base_va, device_id, 79 itt_base, SZ_64K, true); 80 81 for (event_id = 0; event_id < nr_events; event_id++) { 82 its_send_mapti_cmd(test_data.cmdq_base_va, device_id, 83 event_id, coll_id, intid++); 84 85 coll_id = (coll_id + 1) % test_data.nr_cpus; 86 } 87 } 88 } 89 90 static void guest_invalidate_all_rdists(void) 91 { 92 int i; 93 94 for (i = 0; i < test_data.nr_cpus; i++) 95 its_send_invall_cmd(test_data.cmdq_base_va, i); 96 } 97 98 static void guest_setup_gic(void) 99 { 100 static atomic_int nr_cpus_ready = 0; 101 u32 cpuid = guest_get_vcpuid(); 102 103 gic_init(GIC_V3, test_data.nr_cpus); 104 gic_rdist_enable_lpis(test_data.lpi_prop_table, SZ_64K, 105 test_data.lpi_pend_tables + (cpuid * SZ_64K)); 106 107 atomic_fetch_add(&nr_cpus_ready, 1); 108 109 if (cpuid > 0) 110 return; 111 112 while (atomic_load(&nr_cpus_ready) < test_data.nr_cpus) 113 cpu_relax(); 114 115 its_init(test_data.collection_table, SZ_64K, 116 test_data.device_table, SZ_64K, 117 test_data.cmdq_base, SZ_64K); 118 119 guest_setup_its_mappings(); 120 guest_invalidate_all_rdists(); 121 } 122 123 static void guest_code(size_t nr_lpis) 124 { 125 guest_setup_gic(); 126 127 GUEST_SYNC(0); 128 129 /* 130 * Don't use WFI here to avoid blocking the vCPU thread indefinitely and 131 * never getting the stop signal. 132 */ 133 while (!READ_ONCE(test_data.request_vcpus_stop)) 134 cpu_relax(); 135 136 GUEST_DONE(); 137 } 138 139 static void setup_memslot(void) 140 { 141 size_t pages; 142 size_t sz; 143 144 /* 145 * For the ITS: 146 * - A single level device table 147 * - A single level collection table 148 * - The command queue 149 * - An ITT for each device 150 */ 151 sz = (3 + test_data.nr_devices) * SZ_64K; 152 153 /* 154 * For the redistributors: 155 * - A shared LPI configuration table 156 * - An LPI pending table for each vCPU 157 */ 158 sz += (1 + test_data.nr_cpus) * SZ_64K; 159 160 pages = sz / vm->page_size; 161 gpa_base = ((vm_compute_max_gfn(vm) + 1) * vm->page_size) - sz; 162 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa_base, 163 TEST_MEMSLOT_INDEX, pages, 0); 164 } 165 166 #define LPI_PROP_DEFAULT_PRIO 0xa0 167 168 static void configure_lpis(void) 169 { 170 size_t nr_lpis = test_data.nr_devices * test_data.nr_event_ids; 171 u8 *tbl = addr_gpa2hva(vm, test_data.lpi_prop_table); 172 size_t i; 173 174 for (i = 0; i < nr_lpis; i++) { 175 tbl[i] = LPI_PROP_DEFAULT_PRIO | 176 LPI_PROP_GROUP1 | 177 LPI_PROP_ENABLED; 178 } 179 } 180 181 static void setup_test_data(void) 182 { 183 size_t pages_per_64k = vm_calc_num_guest_pages(vm->mode, SZ_64K); 184 u32 nr_devices = test_data.nr_devices; 185 u32 nr_cpus = test_data.nr_cpus; 186 vm_paddr_t cmdq_base; 187 188 test_data.device_table = vm_phy_pages_alloc(vm, pages_per_64k, 189 gpa_base, 190 TEST_MEMSLOT_INDEX); 191 192 test_data.collection_table = vm_phy_pages_alloc(vm, pages_per_64k, 193 gpa_base, 194 TEST_MEMSLOT_INDEX); 195 196 cmdq_base = vm_phy_pages_alloc(vm, pages_per_64k, gpa_base, 197 TEST_MEMSLOT_INDEX); 198 virt_map(vm, cmdq_base, cmdq_base, pages_per_64k); 199 test_data.cmdq_base = cmdq_base; 200 test_data.cmdq_base_va = (void *)cmdq_base; 201 202 test_data.itt_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_devices, 203 gpa_base, TEST_MEMSLOT_INDEX); 204 205 test_data.lpi_prop_table = vm_phy_pages_alloc(vm, pages_per_64k, 206 gpa_base, TEST_MEMSLOT_INDEX); 207 configure_lpis(); 208 209 test_data.lpi_pend_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_cpus, 210 gpa_base, TEST_MEMSLOT_INDEX); 211 212 sync_global_to_guest(vm, test_data); 213 } 214 215 static void setup_gic(void) 216 { 217 gic_fd = vgic_v3_setup(vm, test_data.nr_cpus, 64); 218 __TEST_REQUIRE(gic_fd >= 0, "Failed to create GICv3"); 219 220 its_fd = vgic_its_setup(vm); 221 } 222 223 static void signal_lpi(u32 device_id, u32 event_id) 224 { 225 vm_paddr_t db_addr = GITS_BASE_GPA + GITS_TRANSLATER; 226 227 struct kvm_msi msi = { 228 .address_lo = db_addr, 229 .address_hi = db_addr >> 32, 230 .data = event_id, 231 .devid = device_id, 232 .flags = KVM_MSI_VALID_DEVID, 233 }; 234 235 /* 236 * KVM_SIGNAL_MSI returns 1 if the MSI wasn't 'blocked' by the VM, 237 * which for arm64 implies having a valid translation in the ITS. 238 */ 239 TEST_ASSERT(__vm_ioctl(vm, KVM_SIGNAL_MSI, &msi) == 1, 240 "KVM_SIGNAL_MSI ioctl failed"); 241 } 242 243 static pthread_barrier_t test_setup_barrier; 244 245 static void *lpi_worker_thread(void *data) 246 { 247 u32 device_id = (size_t)data; 248 u32 event_id; 249 size_t i; 250 251 pthread_barrier_wait(&test_setup_barrier); 252 253 for (i = 0; i < nr_iterations; i++) 254 for (event_id = 0; event_id < test_data.nr_event_ids; event_id++) 255 signal_lpi(device_id, event_id); 256 257 return NULL; 258 } 259 260 static void *vcpu_worker_thread(void *data) 261 { 262 struct kvm_vcpu *vcpu = data; 263 struct ucall uc; 264 265 while (true) { 266 vcpu_run(vcpu); 267 268 switch (get_ucall(vcpu, &uc)) { 269 case UCALL_SYNC: 270 pthread_barrier_wait(&test_setup_barrier); 271 continue; 272 case UCALL_DONE: 273 return NULL; 274 case UCALL_ABORT: 275 REPORT_GUEST_ASSERT(uc); 276 break; 277 default: 278 TEST_FAIL("Unknown ucall: %lu", uc.cmd); 279 } 280 } 281 282 return NULL; 283 } 284 285 static void report_stats(struct timespec delta) 286 { 287 double nr_lpis; 288 double time; 289 290 nr_lpis = test_data.nr_devices * test_data.nr_event_ids * nr_iterations; 291 292 time = delta.tv_sec; 293 time += ((double)delta.tv_nsec) / NSEC_PER_SEC; 294 295 pr_info("Rate: %.2f LPIs/sec\n", nr_lpis / time); 296 } 297 298 static void run_test(void) 299 { 300 u32 nr_devices = test_data.nr_devices; 301 u32 nr_vcpus = test_data.nr_cpus; 302 pthread_t *lpi_threads = malloc(nr_devices * sizeof(pthread_t)); 303 pthread_t *vcpu_threads = malloc(nr_vcpus * sizeof(pthread_t)); 304 struct timespec start, delta; 305 size_t i; 306 307 TEST_ASSERT(lpi_threads && vcpu_threads, "Failed to allocate pthread arrays"); 308 309 pthread_barrier_init(&test_setup_barrier, NULL, nr_vcpus + nr_devices + 1); 310 311 for (i = 0; i < nr_vcpus; i++) 312 pthread_create(&vcpu_threads[i], NULL, vcpu_worker_thread, vcpus[i]); 313 314 for (i = 0; i < nr_devices; i++) 315 pthread_create(&lpi_threads[i], NULL, lpi_worker_thread, (void *)i); 316 317 pthread_barrier_wait(&test_setup_barrier); 318 319 clock_gettime(CLOCK_MONOTONIC, &start); 320 321 for (i = 0; i < nr_devices; i++) 322 pthread_join(lpi_threads[i], NULL); 323 324 delta = timespec_elapsed(start); 325 write_guest_global(vm, test_data.request_vcpus_stop, true); 326 327 for (i = 0; i < nr_vcpus; i++) 328 pthread_join(vcpu_threads[i], NULL); 329 330 report_stats(delta); 331 } 332 333 static void setup_vm(void) 334 { 335 int i; 336 337 vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu)); 338 TEST_ASSERT(vcpus, "Failed to allocate vCPU array"); 339 340 vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus); 341 342 vm_init_descriptor_tables(vm); 343 for (i = 0; i < test_data.nr_cpus; i++) 344 vcpu_init_descriptor_tables(vcpus[i]); 345 346 vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler); 347 348 setup_memslot(); 349 350 setup_gic(); 351 352 setup_test_data(); 353 } 354 355 static void destroy_vm(void) 356 { 357 close(its_fd); 358 close(gic_fd); 359 kvm_vm_free(vm); 360 free(vcpus); 361 } 362 363 static void pr_usage(const char *name) 364 { 365 pr_info("%s [-v NR_VCPUS] [-d NR_DEVICES] [-e NR_EVENTS] [-i ITERS] -h\n", name); 366 pr_info(" -v:\tnumber of vCPUs (default: %u)\n", test_data.nr_cpus); 367 pr_info(" -d:\tnumber of devices (default: %u)\n", test_data.nr_devices); 368 pr_info(" -e:\tnumber of event IDs per device (default: %u)\n", test_data.nr_event_ids); 369 pr_info(" -i:\tnumber of iterations (default: %lu)\n", nr_iterations); 370 } 371 372 int main(int argc, char **argv) 373 { 374 u32 nr_threads; 375 int c; 376 377 while ((c = getopt(argc, argv, "hv:d:e:i:")) != -1) { 378 switch (c) { 379 case 'v': 380 test_data.nr_cpus = atoi(optarg); 381 break; 382 case 'd': 383 test_data.nr_devices = atoi(optarg); 384 break; 385 case 'e': 386 test_data.nr_event_ids = atoi(optarg); 387 break; 388 case 'i': 389 nr_iterations = strtoul(optarg, NULL, 0); 390 break; 391 case 'h': 392 default: 393 pr_usage(argv[0]); 394 return 1; 395 } 396 } 397 398 nr_threads = test_data.nr_cpus + test_data.nr_devices; 399 if (nr_threads > get_nprocs()) 400 pr_info("WARNING: running %u threads on %d CPUs; performance is degraded.\n", 401 nr_threads, get_nprocs()); 402 403 setup_vm(); 404 405 run_test(); 406 407 destroy_vm(); 408 409 return 0; 410 } 411