1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Test module for stress and analyze performance of vmalloc allocator. 5 * (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com> 6 */ 7 #include <linux/init.h> 8 #include <linux/kernel.h> 9 #include <linux/module.h> 10 #include <linux/vmalloc.h> 11 #include <linux/random.h> 12 #include <linux/kthread.h> 13 #include <linux/moduleparam.h> 14 #include <linux/completion.h> 15 #include <linux/delay.h> 16 #include <linux/rwsem.h> 17 #include <linux/mm.h> 18 #include <linux/rcupdate.h> 19 #include <linux/slab.h> 20 21 #define __param(type, name, init, msg) \ 22 static type name = init; \ 23 module_param(name, type, 0444); \ 24 MODULE_PARM_DESC(name, msg) \ 25 26 __param(bool, single_cpu_test, false, 27 "Use single first online CPU to run tests"); 28 29 __param(bool, sequential_test_order, false, 30 "Use sequential stress tests order"); 31 32 __param(int, test_repeat_count, 1, 33 "Set test repeat counter"); 34 35 __param(int, test_loop_count, 1000000, 36 "Set test loop counter"); 37 38 __param(int, run_test_mask, INT_MAX, 39 "Set tests specified in the mask.\n\n" 40 "\t\tid: 1, name: fix_size_alloc_test\n" 41 "\t\tid: 2, name: full_fit_alloc_test\n" 42 "\t\tid: 4, name: long_busy_list_alloc_test\n" 43 "\t\tid: 8, name: random_size_alloc_test\n" 44 "\t\tid: 16, name: fix_align_alloc_test\n" 45 "\t\tid: 32, name: random_size_align_alloc_test\n" 46 "\t\tid: 64, name: align_shift_alloc_test\n" 47 "\t\tid: 128, name: pcpu_alloc_test\n" 48 "\t\tid: 256, name: kvfree_rcu_1_arg_vmalloc_test\n" 49 "\t\tid: 512, name: kvfree_rcu_2_arg_vmalloc_test\n" 50 "\t\tid: 1024, name: kvfree_rcu_1_arg_slab_test\n" 51 "\t\tid: 2048, name: kvfree_rcu_2_arg_slab_test\n" 52 /* Add a new test case description here. */ 53 ); 54 55 /* 56 * Depends on single_cpu_test parameter. If it is true, then 57 * use first online CPU to trigger a test on, otherwise go with 58 * all online CPUs. 59 */ 60 static cpumask_t cpus_run_test_mask = CPU_MASK_NONE; 61 62 /* 63 * Read write semaphore for synchronization of setup 64 * phase that is done in main thread and workers. 65 */ 66 static DECLARE_RWSEM(prepare_for_test_rwsem); 67 68 /* 69 * Completion tracking for worker threads. 70 */ 71 static DECLARE_COMPLETION(test_all_done_comp); 72 static atomic_t test_n_undone = ATOMIC_INIT(0); 73 74 static inline void 75 test_report_one_done(void) 76 { 77 if (atomic_dec_and_test(&test_n_undone)) 78 complete(&test_all_done_comp); 79 } 80 81 static int random_size_align_alloc_test(void) 82 { 83 unsigned long size, align, rnd; 84 void *ptr; 85 int i; 86 87 for (i = 0; i < test_loop_count; i++) { 88 get_random_bytes(&rnd, sizeof(rnd)); 89 90 /* 91 * Maximum 1024 pages, if PAGE_SIZE is 4096. 92 */ 93 align = 1 << (rnd % 23); 94 95 /* 96 * Maximum 10 pages. 97 */ 98 size = ((rnd % 10) + 1) * PAGE_SIZE; 99 100 ptr = __vmalloc_node(size, align, GFP_KERNEL | __GFP_ZERO, 0, 101 __builtin_return_address(0)); 102 if (!ptr) 103 return -1; 104 105 vfree(ptr); 106 } 107 108 return 0; 109 } 110 111 /* 112 * This test case is supposed to be failed. 113 */ 114 static int align_shift_alloc_test(void) 115 { 116 unsigned long align; 117 void *ptr; 118 int i; 119 120 for (i = 0; i < BITS_PER_LONG; i++) { 121 align = ((unsigned long) 1) << i; 122 123 ptr = __vmalloc_node(PAGE_SIZE, align, GFP_KERNEL|__GFP_ZERO, 0, 124 __builtin_return_address(0)); 125 if (!ptr) 126 return -1; 127 128 vfree(ptr); 129 } 130 131 return 0; 132 } 133 134 static int fix_align_alloc_test(void) 135 { 136 void *ptr; 137 int i; 138 139 for (i = 0; i < test_loop_count; i++) { 140 ptr = __vmalloc_node(5 * PAGE_SIZE, THREAD_ALIGN << 1, 141 GFP_KERNEL | __GFP_ZERO, 0, 142 __builtin_return_address(0)); 143 if (!ptr) 144 return -1; 145 146 vfree(ptr); 147 } 148 149 return 0; 150 } 151 152 static int random_size_alloc_test(void) 153 { 154 unsigned int n; 155 void *p; 156 int i; 157 158 for (i = 0; i < test_loop_count; i++) { 159 get_random_bytes(&n, sizeof(i)); 160 n = (n % 100) + 1; 161 162 p = vmalloc(n * PAGE_SIZE); 163 164 if (!p) 165 return -1; 166 167 *((__u8 *)p) = 1; 168 vfree(p); 169 } 170 171 return 0; 172 } 173 174 static int long_busy_list_alloc_test(void) 175 { 176 void *ptr_1, *ptr_2; 177 void **ptr; 178 int rv = -1; 179 int i; 180 181 ptr = vmalloc(sizeof(void *) * 15000); 182 if (!ptr) 183 return rv; 184 185 for (i = 0; i < 15000; i++) 186 ptr[i] = vmalloc(1 * PAGE_SIZE); 187 188 for (i = 0; i < test_loop_count; i++) { 189 ptr_1 = vmalloc(100 * PAGE_SIZE); 190 if (!ptr_1) 191 goto leave; 192 193 ptr_2 = vmalloc(1 * PAGE_SIZE); 194 if (!ptr_2) { 195 vfree(ptr_1); 196 goto leave; 197 } 198 199 *((__u8 *)ptr_1) = 0; 200 *((__u8 *)ptr_2) = 1; 201 202 vfree(ptr_1); 203 vfree(ptr_2); 204 } 205 206 /* Success */ 207 rv = 0; 208 209 leave: 210 for (i = 0; i < 15000; i++) 211 vfree(ptr[i]); 212 213 vfree(ptr); 214 return rv; 215 } 216 217 static int full_fit_alloc_test(void) 218 { 219 void **ptr, **junk_ptr, *tmp; 220 int junk_length; 221 int rv = -1; 222 int i; 223 224 junk_length = fls(num_online_cpus()); 225 junk_length *= (32 * 1024 * 1024 / PAGE_SIZE); 226 227 ptr = vmalloc(sizeof(void *) * junk_length); 228 if (!ptr) 229 return rv; 230 231 junk_ptr = vmalloc(sizeof(void *) * junk_length); 232 if (!junk_ptr) { 233 vfree(ptr); 234 return rv; 235 } 236 237 for (i = 0; i < junk_length; i++) { 238 ptr[i] = vmalloc(1 * PAGE_SIZE); 239 junk_ptr[i] = vmalloc(1 * PAGE_SIZE); 240 } 241 242 for (i = 0; i < junk_length; i++) 243 vfree(junk_ptr[i]); 244 245 for (i = 0; i < test_loop_count; i++) { 246 tmp = vmalloc(1 * PAGE_SIZE); 247 248 if (!tmp) 249 goto error; 250 251 *((__u8 *)tmp) = 1; 252 vfree(tmp); 253 } 254 255 /* Success */ 256 rv = 0; 257 258 error: 259 for (i = 0; i < junk_length; i++) 260 vfree(ptr[i]); 261 262 vfree(ptr); 263 vfree(junk_ptr); 264 265 return rv; 266 } 267 268 static int fix_size_alloc_test(void) 269 { 270 void *ptr; 271 int i; 272 273 for (i = 0; i < test_loop_count; i++) { 274 ptr = vmalloc(3 * PAGE_SIZE); 275 276 if (!ptr) 277 return -1; 278 279 *((__u8 *)ptr) = 0; 280 281 vfree(ptr); 282 } 283 284 return 0; 285 } 286 287 static int 288 pcpu_alloc_test(void) 289 { 290 int rv = 0; 291 #ifndef CONFIG_NEED_PER_CPU_KM 292 void __percpu **pcpu; 293 size_t size, align; 294 int i; 295 296 pcpu = vmalloc(sizeof(void __percpu *) * 35000); 297 if (!pcpu) 298 return -1; 299 300 for (i = 0; i < 35000; i++) { 301 unsigned int r; 302 303 get_random_bytes(&r, sizeof(i)); 304 size = (r % (PAGE_SIZE / 4)) + 1; 305 306 /* 307 * Maximum PAGE_SIZE 308 */ 309 get_random_bytes(&r, sizeof(i)); 310 align = 1 << ((i % 11) + 1); 311 312 pcpu[i] = __alloc_percpu(size, align); 313 if (!pcpu[i]) 314 rv = -1; 315 } 316 317 for (i = 0; i < 35000; i++) 318 free_percpu(pcpu[i]); 319 320 vfree(pcpu); 321 #endif 322 return rv; 323 } 324 325 struct test_kvfree_rcu { 326 struct rcu_head rcu; 327 unsigned char array[20]; 328 }; 329 330 static int 331 kvfree_rcu_1_arg_vmalloc_test(void) 332 { 333 struct test_kvfree_rcu *p; 334 int i; 335 336 for (i = 0; i < test_loop_count; i++) { 337 p = vmalloc(1 * PAGE_SIZE); 338 if (!p) 339 return -1; 340 341 p->array[0] = 'a'; 342 kvfree_rcu(p); 343 } 344 345 return 0; 346 } 347 348 static int 349 kvfree_rcu_2_arg_vmalloc_test(void) 350 { 351 struct test_kvfree_rcu *p; 352 int i; 353 354 for (i = 0; i < test_loop_count; i++) { 355 p = vmalloc(1 * PAGE_SIZE); 356 if (!p) 357 return -1; 358 359 p->array[0] = 'a'; 360 kvfree_rcu(p, rcu); 361 } 362 363 return 0; 364 } 365 366 static int 367 kvfree_rcu_1_arg_slab_test(void) 368 { 369 struct test_kvfree_rcu *p; 370 int i; 371 372 for (i = 0; i < test_loop_count; i++) { 373 p = kmalloc(sizeof(*p), GFP_KERNEL); 374 if (!p) 375 return -1; 376 377 p->array[0] = 'a'; 378 kvfree_rcu(p); 379 } 380 381 return 0; 382 } 383 384 static int 385 kvfree_rcu_2_arg_slab_test(void) 386 { 387 struct test_kvfree_rcu *p; 388 int i; 389 390 for (i = 0; i < test_loop_count; i++) { 391 p = kmalloc(sizeof(*p), GFP_KERNEL); 392 if (!p) 393 return -1; 394 395 p->array[0] = 'a'; 396 kvfree_rcu(p, rcu); 397 } 398 399 return 0; 400 } 401 402 struct test_case_desc { 403 const char *test_name; 404 int (*test_func)(void); 405 }; 406 407 static struct test_case_desc test_case_array[] = { 408 { "fix_size_alloc_test", fix_size_alloc_test }, 409 { "full_fit_alloc_test", full_fit_alloc_test }, 410 { "long_busy_list_alloc_test", long_busy_list_alloc_test }, 411 { "random_size_alloc_test", random_size_alloc_test }, 412 { "fix_align_alloc_test", fix_align_alloc_test }, 413 { "random_size_align_alloc_test", random_size_align_alloc_test }, 414 { "align_shift_alloc_test", align_shift_alloc_test }, 415 { "pcpu_alloc_test", pcpu_alloc_test }, 416 { "kvfree_rcu_1_arg_vmalloc_test", kvfree_rcu_1_arg_vmalloc_test }, 417 { "kvfree_rcu_2_arg_vmalloc_test", kvfree_rcu_2_arg_vmalloc_test }, 418 { "kvfree_rcu_1_arg_slab_test", kvfree_rcu_1_arg_slab_test }, 419 { "kvfree_rcu_2_arg_slab_test", kvfree_rcu_2_arg_slab_test }, 420 /* Add a new test case here. */ 421 }; 422 423 struct test_case_data { 424 int test_failed; 425 int test_passed; 426 u64 time; 427 }; 428 429 /* Split it to get rid of: WARNING: line over 80 characters */ 430 static struct test_case_data 431 per_cpu_test_data[NR_CPUS][ARRAY_SIZE(test_case_array)]; 432 433 static struct test_driver { 434 struct task_struct *task; 435 unsigned long start; 436 unsigned long stop; 437 int cpu; 438 } per_cpu_test_driver[NR_CPUS]; 439 440 static void shuffle_array(int *arr, int n) 441 { 442 unsigned int rnd; 443 int i, j, x; 444 445 for (i = n - 1; i > 0; i--) { 446 get_random_bytes(&rnd, sizeof(rnd)); 447 448 /* Cut the range. */ 449 j = rnd % i; 450 451 /* Swap indexes. */ 452 x = arr[i]; 453 arr[i] = arr[j]; 454 arr[j] = x; 455 } 456 } 457 458 static int test_func(void *private) 459 { 460 struct test_driver *t = private; 461 int random_array[ARRAY_SIZE(test_case_array)]; 462 int index, i, j; 463 ktime_t kt; 464 u64 delta; 465 466 if (set_cpus_allowed_ptr(current, cpumask_of(t->cpu)) < 0) 467 pr_err("Failed to set affinity to %d CPU\n", t->cpu); 468 469 for (i = 0; i < ARRAY_SIZE(test_case_array); i++) 470 random_array[i] = i; 471 472 if (!sequential_test_order) 473 shuffle_array(random_array, ARRAY_SIZE(test_case_array)); 474 475 /* 476 * Block until initialization is done. 477 */ 478 down_read(&prepare_for_test_rwsem); 479 480 t->start = get_cycles(); 481 for (i = 0; i < ARRAY_SIZE(test_case_array); i++) { 482 index = random_array[i]; 483 484 /* 485 * Skip tests if run_test_mask has been specified. 486 */ 487 if (!((run_test_mask & (1 << index)) >> index)) 488 continue; 489 490 kt = ktime_get(); 491 for (j = 0; j < test_repeat_count; j++) { 492 if (!test_case_array[index].test_func()) 493 per_cpu_test_data[t->cpu][index].test_passed++; 494 else 495 per_cpu_test_data[t->cpu][index].test_failed++; 496 } 497 498 /* 499 * Take an average time that test took. 500 */ 501 delta = (u64) ktime_us_delta(ktime_get(), kt); 502 do_div(delta, (u32) test_repeat_count); 503 504 per_cpu_test_data[t->cpu][index].time = delta; 505 } 506 t->stop = get_cycles(); 507 508 up_read(&prepare_for_test_rwsem); 509 test_report_one_done(); 510 511 /* 512 * Wait for the kthread_stop() call. 513 */ 514 while (!kthread_should_stop()) 515 msleep(10); 516 517 return 0; 518 } 519 520 static void 521 init_test_configurtion(void) 522 { 523 /* 524 * Reset all data of all CPUs. 525 */ 526 memset(per_cpu_test_data, 0, sizeof(per_cpu_test_data)); 527 528 if (single_cpu_test) 529 cpumask_set_cpu(cpumask_first(cpu_online_mask), 530 &cpus_run_test_mask); 531 else 532 cpumask_and(&cpus_run_test_mask, cpu_online_mask, 533 cpu_online_mask); 534 535 if (test_repeat_count <= 0) 536 test_repeat_count = 1; 537 538 if (test_loop_count <= 0) 539 test_loop_count = 1; 540 } 541 542 static void do_concurrent_test(void) 543 { 544 int cpu, ret; 545 546 /* 547 * Set some basic configurations plus sanity check. 548 */ 549 init_test_configurtion(); 550 551 /* 552 * Put on hold all workers. 553 */ 554 down_write(&prepare_for_test_rwsem); 555 556 for_each_cpu(cpu, &cpus_run_test_mask) { 557 struct test_driver *t = &per_cpu_test_driver[cpu]; 558 559 t->cpu = cpu; 560 t->task = kthread_run(test_func, t, "vmalloc_test/%d", cpu); 561 562 if (!IS_ERR(t->task)) 563 /* Success. */ 564 atomic_inc(&test_n_undone); 565 else 566 pr_err("Failed to start kthread for %d CPU\n", cpu); 567 } 568 569 /* 570 * Now let the workers do their job. 571 */ 572 up_write(&prepare_for_test_rwsem); 573 574 /* 575 * Sleep quiet until all workers are done with 1 second 576 * interval. Since the test can take a lot of time we 577 * can run into a stack trace of the hung task. That is 578 * why we go with completion_timeout and HZ value. 579 */ 580 do { 581 ret = wait_for_completion_timeout(&test_all_done_comp, HZ); 582 } while (!ret); 583 584 for_each_cpu(cpu, &cpus_run_test_mask) { 585 struct test_driver *t = &per_cpu_test_driver[cpu]; 586 int i; 587 588 if (!IS_ERR(t->task)) 589 kthread_stop(t->task); 590 591 for (i = 0; i < ARRAY_SIZE(test_case_array); i++) { 592 if (!((run_test_mask & (1 << i)) >> i)) 593 continue; 594 595 pr_info( 596 "Summary: %s passed: %d failed: %d repeat: %d loops: %d avg: %llu usec\n", 597 test_case_array[i].test_name, 598 per_cpu_test_data[cpu][i].test_passed, 599 per_cpu_test_data[cpu][i].test_failed, 600 test_repeat_count, test_loop_count, 601 per_cpu_test_data[cpu][i].time); 602 } 603 604 pr_info("All test took CPU%d=%lu cycles\n", 605 cpu, t->stop - t->start); 606 } 607 } 608 609 static int vmalloc_test_init(void) 610 { 611 do_concurrent_test(); 612 return -EAGAIN; /* Fail will directly unload the module */ 613 } 614 615 static void vmalloc_test_exit(void) 616 { 617 } 618 619 module_init(vmalloc_test_init) 620 module_exit(vmalloc_test_exit) 621 622 MODULE_LICENSE("GPL"); 623 MODULE_AUTHOR("Uladzislau Rezki"); 624 MODULE_DESCRIPTION("vmalloc test module"); 625