1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Test module for stress and analyze performance of vmalloc allocator. 5 * (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com> 6 */ 7 #include <linux/init.h> 8 #include <linux/kernel.h> 9 #include <linux/module.h> 10 #include <linux/vmalloc.h> 11 #include <linux/random.h> 12 #include <linux/kthread.h> 13 #include <linux/moduleparam.h> 14 #include <linux/completion.h> 15 #include <linux/delay.h> 16 #include <linux/mm.h> 17 #include <linux/rcupdate.h> 18 #include <linux/srcu.h> 19 #include <linux/slab.h> 20 21 #define __param(type, name, init, msg) \ 22 static type name = init; \ 23 module_param(name, type, 0444); \ 24 MODULE_PARM_DESC(name, msg) \ 25 26 __param(int, nr_threads, 0, 27 "Number of workers to perform tests(min: 1 max: USHRT_MAX)"); 28 29 __param(bool, sequential_test_order, false, 30 "Use sequential stress tests order"); 31 32 __param(int, test_repeat_count, 1, 33 "Set test repeat counter"); 34 35 __param(int, test_loop_count, 1000000, 36 "Set test loop counter"); 37 38 __param(int, nr_pages, 0, 39 "Set number of pages for fix_size_alloc_test(default: 1)"); 40 41 __param(bool, use_huge, false, 42 "Use vmalloc_huge in fix_size_alloc_test"); 43 44 __param(int, run_test_mask, 7, 45 "Set tests specified in the mask.\n\n" 46 "\t\tid: 1, name: fix_size_alloc_test\n" 47 "\t\tid: 2, name: full_fit_alloc_test\n" 48 "\t\tid: 4, name: long_busy_list_alloc_test\n" 49 "\t\tid: 8, name: random_size_alloc_test\n" 50 "\t\tid: 16, name: fix_align_alloc_test\n" 51 "\t\tid: 32, name: random_size_align_alloc_test\n" 52 "\t\tid: 64, name: align_shift_alloc_test\n" 53 "\t\tid: 128, name: pcpu_alloc_test\n" 54 "\t\tid: 256, name: kvfree_rcu_1_arg_vmalloc_test\n" 55 "\t\tid: 512, name: kvfree_rcu_2_arg_vmalloc_test\n" 56 "\t\tid: 1024, name: vm_map_ram_test\n" 57 "\t\tid: 2048, name: no_block_alloc_test\n" 58 "\t\tid: 4096, name: vrealloc_test\n" 59 /* Add a new test case description here. */ 60 ); 61 62 __param(int, nr_pcpu_objects, 35000, 63 "Number of pcpu objects to allocate for pcpu_alloc_test"); 64 65 /* 66 * This is for synchronization of setup phase. 67 */ 68 DEFINE_STATIC_SRCU(prepare_for_test_srcu); 69 70 /* 71 * Completion tracking for worker threads. 72 */ 73 static DECLARE_COMPLETION(test_all_done_comp); 74 static atomic_t test_n_undone = ATOMIC_INIT(0); 75 76 static inline void 77 test_report_one_done(void) 78 { 79 if (atomic_dec_and_test(&test_n_undone)) 80 complete(&test_all_done_comp); 81 } 82 83 static int random_size_align_alloc_test(void) 84 { 85 unsigned long size, align; 86 unsigned int rnd; 87 void *ptr; 88 int i; 89 90 for (i = 0; i < test_loop_count; i++) { 91 rnd = get_random_u8(); 92 93 /* 94 * Maximum 1024 pages, if PAGE_SIZE is 4096. 95 */ 96 align = 1 << (rnd % 23); 97 98 /* 99 * Maximum 10 pages. 100 */ 101 size = ((rnd % 10) + 1) * PAGE_SIZE; 102 103 ptr = __vmalloc_node(size, align, GFP_KERNEL | __GFP_ZERO, 0, 104 __builtin_return_address(0)); 105 if (!ptr) 106 return -1; 107 108 vfree(ptr); 109 } 110 111 return 0; 112 } 113 114 /* 115 * This test case is supposed to be failed. 116 */ 117 static int align_shift_alloc_test(void) 118 { 119 unsigned long align; 120 void *ptr; 121 int i; 122 123 for (i = 0; i < BITS_PER_LONG; i++) { 124 align = 1UL << i; 125 126 ptr = __vmalloc_node(PAGE_SIZE, align, GFP_KERNEL|__GFP_ZERO, 0, 127 __builtin_return_address(0)); 128 if (!ptr) 129 return -1; 130 131 vfree(ptr); 132 } 133 134 return 0; 135 } 136 137 static int fix_align_alloc_test(void) 138 { 139 void *ptr; 140 int i; 141 142 for (i = 0; i < test_loop_count; i++) { 143 ptr = __vmalloc_node(5 * PAGE_SIZE, THREAD_ALIGN << 1, 144 GFP_KERNEL | __GFP_ZERO, 0, 145 __builtin_return_address(0)); 146 if (!ptr) 147 return -1; 148 149 vfree(ptr); 150 } 151 152 return 0; 153 } 154 155 static int random_size_alloc_test(void) 156 { 157 unsigned int n; 158 void *p; 159 int i; 160 161 for (i = 0; i < test_loop_count; i++) { 162 n = get_random_u32_inclusive(1, 100); 163 p = vmalloc(n * PAGE_SIZE); 164 165 if (!p) 166 return -1; 167 168 *((__u8 *)p) = 1; 169 vfree(p); 170 } 171 172 return 0; 173 } 174 175 static int long_busy_list_alloc_test(void) 176 { 177 void *ptr_1, *ptr_2; 178 void **ptr; 179 int rv = -1; 180 int i; 181 182 ptr = vmalloc(sizeof(void *) * 15000); 183 if (!ptr) 184 return rv; 185 186 for (i = 0; i < 15000; i++) 187 ptr[i] = vmalloc(1 * PAGE_SIZE); 188 189 for (i = 0; i < test_loop_count; i++) { 190 ptr_1 = vmalloc(100 * PAGE_SIZE); 191 if (!ptr_1) 192 goto leave; 193 194 ptr_2 = vmalloc(1 * PAGE_SIZE); 195 if (!ptr_2) { 196 vfree(ptr_1); 197 goto leave; 198 } 199 200 *((__u8 *)ptr_1) = 0; 201 *((__u8 *)ptr_2) = 1; 202 203 vfree(ptr_1); 204 vfree(ptr_2); 205 } 206 207 /* Success */ 208 rv = 0; 209 210 leave: 211 for (i = 0; i < 15000; i++) 212 vfree(ptr[i]); 213 214 vfree(ptr); 215 return rv; 216 } 217 218 static int full_fit_alloc_test(void) 219 { 220 void **ptr, **junk_ptr, *tmp; 221 int junk_length; 222 int rv = -1; 223 int i; 224 225 junk_length = fls(num_online_cpus()); 226 junk_length *= (32 * 1024 * 1024 / PAGE_SIZE); 227 228 ptr = vmalloc(sizeof(void *) * junk_length); 229 if (!ptr) 230 return rv; 231 232 junk_ptr = vmalloc(sizeof(void *) * junk_length); 233 if (!junk_ptr) { 234 vfree(ptr); 235 return rv; 236 } 237 238 for (i = 0; i < junk_length; i++) { 239 ptr[i] = vmalloc(1 * PAGE_SIZE); 240 junk_ptr[i] = vmalloc(1 * PAGE_SIZE); 241 } 242 243 for (i = 0; i < junk_length; i++) 244 vfree(junk_ptr[i]); 245 246 for (i = 0; i < test_loop_count; i++) { 247 tmp = vmalloc(1 * PAGE_SIZE); 248 249 if (!tmp) 250 goto error; 251 252 *((__u8 *)tmp) = 1; 253 vfree(tmp); 254 } 255 256 /* Success */ 257 rv = 0; 258 259 error: 260 for (i = 0; i < junk_length; i++) 261 vfree(ptr[i]); 262 263 vfree(ptr); 264 vfree(junk_ptr); 265 266 return rv; 267 } 268 269 static int fix_size_alloc_test(void) 270 { 271 void *ptr; 272 int i; 273 274 for (i = 0; i < test_loop_count; i++) { 275 if (use_huge) 276 ptr = vmalloc_huge((nr_pages > 0 ? nr_pages:1) * PAGE_SIZE, GFP_KERNEL); 277 else 278 ptr = vmalloc((nr_pages > 0 ? nr_pages:1) * PAGE_SIZE); 279 280 if (!ptr) 281 return -1; 282 283 *((__u8 *)ptr) = 0; 284 285 vfree(ptr); 286 } 287 288 return 0; 289 } 290 291 static int no_block_alloc_test(void) 292 { 293 void *ptr; 294 int i; 295 296 for (i = 0; i < test_loop_count; i++) { 297 bool use_atomic = !!(get_random_u8() % 2); 298 gfp_t gfp = use_atomic ? GFP_ATOMIC : GFP_NOWAIT; 299 unsigned long size = (nr_pages > 0 ? nr_pages : 1) * PAGE_SIZE; 300 301 preempt_disable(); 302 ptr = __vmalloc(size, gfp); 303 preempt_enable(); 304 305 if (!ptr) 306 return -1; 307 308 *((__u8 *)ptr) = 0; 309 vfree(ptr); 310 } 311 312 return 0; 313 } 314 315 static int 316 pcpu_alloc_test(void) 317 { 318 int rv = 0; 319 #ifndef CONFIG_NEED_PER_CPU_KM 320 void __percpu **pcpu; 321 size_t size, align; 322 int i; 323 324 pcpu = vmalloc(sizeof(void __percpu *) * nr_pcpu_objects); 325 if (!pcpu) 326 return -1; 327 328 for (i = 0; i < nr_pcpu_objects; i++) { 329 size = get_random_u32_inclusive(1, PAGE_SIZE / 4); 330 331 /* 332 * Maximum PAGE_SIZE 333 */ 334 align = 1 << get_random_u32_inclusive(1, PAGE_SHIFT - 1); 335 336 pcpu[i] = __alloc_percpu(size, align); 337 if (!pcpu[i]) 338 rv = -1; 339 } 340 341 for (i = 0; i < nr_pcpu_objects; i++) 342 free_percpu(pcpu[i]); 343 344 vfree(pcpu); 345 #endif 346 return rv; 347 } 348 349 struct test_kvfree_rcu { 350 struct rcu_head rcu; 351 unsigned char array[20]; 352 }; 353 354 static int 355 kvfree_rcu_1_arg_vmalloc_test(void) 356 { 357 struct test_kvfree_rcu *p; 358 int i; 359 360 for (i = 0; i < test_loop_count; i++) { 361 p = vmalloc(1 * PAGE_SIZE); 362 if (!p) 363 return -1; 364 365 p->array[0] = 'a'; 366 kvfree_rcu_mightsleep(p); 367 } 368 369 return 0; 370 } 371 372 static int 373 kvfree_rcu_2_arg_vmalloc_test(void) 374 { 375 struct test_kvfree_rcu *p; 376 int i; 377 378 for (i = 0; i < test_loop_count; i++) { 379 p = vmalloc(1 * PAGE_SIZE); 380 if (!p) 381 return -1; 382 383 p->array[0] = 'a'; 384 kvfree_rcu(p, rcu); 385 } 386 387 return 0; 388 } 389 390 static int 391 vm_map_ram_test(void) 392 { 393 unsigned long nr_allocated; 394 unsigned int map_nr_pages; 395 unsigned char *v_ptr; 396 struct page **pages; 397 int i; 398 399 map_nr_pages = nr_pages > 0 ? nr_pages:1; 400 pages = kzalloc_objs(struct page *, map_nr_pages); 401 if (!pages) 402 return -1; 403 404 nr_allocated = alloc_pages_bulk(GFP_KERNEL, map_nr_pages, pages); 405 if (nr_allocated != map_nr_pages) 406 goto cleanup; 407 408 /* Run the test loop. */ 409 for (i = 0; i < test_loop_count; i++) { 410 v_ptr = vm_map_ram(pages, map_nr_pages, NUMA_NO_NODE); 411 *v_ptr = 'a'; 412 vm_unmap_ram(v_ptr, map_nr_pages); 413 } 414 415 cleanup: 416 for (i = 0; i < nr_allocated; i++) 417 __free_page(pages[i]); 418 419 kfree(pages); 420 421 /* 0 indicates success. */ 422 return nr_allocated != map_nr_pages; 423 } 424 425 static int vrealloc_test(void) 426 { 427 void *ptr, *tmp; 428 int i; 429 430 for (i = 0; i < test_loop_count; i++) { 431 int err = -1; 432 433 ptr = vrealloc(NULL, PAGE_SIZE, GFP_KERNEL); 434 if (!ptr) 435 return -1; 436 437 *((__u8 *)ptr) = 'a'; 438 439 /* Grow: beyond allocated pages, triggers full realloc. */ 440 tmp = vrealloc(ptr, 4 * PAGE_SIZE, GFP_KERNEL); 441 if (!tmp) 442 goto error; 443 ptr = tmp; 444 445 if (*((__u8 *)ptr) != 'a') 446 goto error; 447 448 /* Shrink: crosses page boundary, frees tail pages. */ 449 tmp = vrealloc(ptr, PAGE_SIZE, GFP_KERNEL); 450 if (!tmp) 451 goto error; 452 ptr = tmp; 453 454 if (*((__u8 *)ptr) != 'a') 455 goto error; 456 457 /* Shrink: within same page, no page freeing. */ 458 tmp = vrealloc(ptr, PAGE_SIZE / 2, GFP_KERNEL); 459 if (!tmp) 460 goto error; 461 ptr = tmp; 462 463 if (*((__u8 *)ptr) != 'a') 464 goto error; 465 466 /* Grow: within allocated page, in-place, no realloc. */ 467 tmp = vrealloc(ptr, PAGE_SIZE, GFP_KERNEL); 468 if (!tmp) 469 goto error; 470 ptr = tmp; 471 472 if (*((__u8 *)ptr) != 'a') 473 goto error; 474 475 err = 0; 476 error: 477 vfree(ptr); 478 if (err) 479 return err; 480 } 481 482 return 0; 483 } 484 485 struct test_case_desc { 486 const char *test_name; 487 int (*test_func)(void); 488 bool xfail; 489 }; 490 491 static struct test_case_desc test_case_array[] = { 492 { "fix_size_alloc_test", fix_size_alloc_test, }, 493 { "full_fit_alloc_test", full_fit_alloc_test, }, 494 { "long_busy_list_alloc_test", long_busy_list_alloc_test, }, 495 { "random_size_alloc_test", random_size_alloc_test, }, 496 { "fix_align_alloc_test", fix_align_alloc_test, }, 497 { "random_size_align_alloc_test", random_size_align_alloc_test, }, 498 { "align_shift_alloc_test", align_shift_alloc_test, true }, 499 { "pcpu_alloc_test", pcpu_alloc_test, }, 500 { "kvfree_rcu_1_arg_vmalloc_test", kvfree_rcu_1_arg_vmalloc_test, }, 501 { "kvfree_rcu_2_arg_vmalloc_test", kvfree_rcu_2_arg_vmalloc_test, }, 502 { "vm_map_ram_test", vm_map_ram_test, }, 503 { "no_block_alloc_test", no_block_alloc_test, true }, 504 { "vrealloc_test", vrealloc_test, }, 505 /* Add a new test case here. */ 506 }; 507 508 struct test_case_data { 509 int test_failed; 510 int test_xfailed; 511 int test_passed; 512 u64 time; 513 }; 514 515 static struct test_driver { 516 struct task_struct *task; 517 struct test_case_data data[ARRAY_SIZE(test_case_array)]; 518 519 unsigned long start; 520 unsigned long stop; 521 } *tdriver; 522 523 static void shuffle_array(int *arr, int n) 524 { 525 int i, j; 526 527 for (i = n - 1; i > 0; i--) { 528 /* Cut the range. */ 529 j = get_random_u32_below(i); 530 531 /* Swap indexes. */ 532 swap(arr[i], arr[j]); 533 } 534 } 535 536 static int test_func(void *private) 537 { 538 struct test_driver *t = private; 539 int random_array[ARRAY_SIZE(test_case_array)]; 540 int index, i, j, ret; 541 ktime_t kt; 542 u64 delta; 543 544 for (i = 0; i < ARRAY_SIZE(test_case_array); i++) 545 random_array[i] = i; 546 547 if (!sequential_test_order) 548 shuffle_array(random_array, ARRAY_SIZE(test_case_array)); 549 550 /* 551 * Block until initialization is done. 552 */ 553 synchronize_srcu(&prepare_for_test_srcu); 554 555 t->start = get_cycles(); 556 for (i = 0; i < ARRAY_SIZE(test_case_array); i++) { 557 index = random_array[i]; 558 559 /* 560 * Skip tests if run_test_mask has been specified. 561 */ 562 if (!((run_test_mask & (1 << index)) >> index)) 563 continue; 564 kt = ktime_get(); 565 for (j = 0; j < test_repeat_count; j++) { 566 ret = test_case_array[index].test_func(); 567 568 if (!ret) 569 t->data[index].test_passed++; 570 else if (ret && test_case_array[index].xfail) 571 t->data[index].test_xfailed++; 572 else 573 t->data[index].test_failed++; 574 } 575 576 /* 577 * Take an average time that test took. 578 */ 579 delta = (u64) ktime_us_delta(ktime_get(), kt); 580 do_div(delta, (u32) test_repeat_count); 581 582 t->data[index].time = delta; 583 } 584 t->stop = get_cycles(); 585 test_report_one_done(); 586 587 /* 588 * Wait for the kthread_stop() call. 589 */ 590 while (!kthread_should_stop()) 591 msleep(10); 592 593 return 0; 594 } 595 596 static int 597 init_test_configuration(void) 598 { 599 /* 600 * A maximum number of workers is defined as hard-coded 601 * value and set to USHRT_MAX. We add such gap just in 602 * case and for potential heavy stressing. 603 */ 604 nr_threads = clamp(nr_threads, 1, (int) USHRT_MAX); 605 606 /* Allocate the space for test instances. */ 607 tdriver = kvzalloc_objs(*tdriver, nr_threads); 608 if (tdriver == NULL) 609 return -1; 610 611 if (test_repeat_count <= 0) 612 test_repeat_count = 1; 613 614 if (test_loop_count <= 0) 615 test_loop_count = 1; 616 617 return 0; 618 } 619 620 static void do_concurrent_test(void) 621 { 622 int i, ret, idx; 623 624 /* 625 * Set some basic configurations plus sanity check. 626 */ 627 ret = init_test_configuration(); 628 if (ret < 0) 629 return; 630 631 /* 632 * Put on hold all workers. 633 */ 634 idx = srcu_read_lock(&prepare_for_test_srcu); 635 636 for (i = 0; i < nr_threads; i++) { 637 struct test_driver *t = &tdriver[i]; 638 639 t->task = kthread_run(test_func, t, "vmalloc_test/%d", i); 640 641 if (!IS_ERR(t->task)) 642 /* Success. */ 643 atomic_inc(&test_n_undone); 644 else 645 pr_err("Failed to start %d kthread\n", i); 646 } 647 648 /* 649 * Now let the workers do their job. 650 */ 651 srcu_read_unlock(&prepare_for_test_srcu, idx); 652 653 /* 654 * Sleep quiet until all workers are done with 1 second 655 * interval. Since the test can take a lot of time we 656 * can run into a stack trace of the hung task. That is 657 * why we go with completion_timeout and HZ value. 658 */ 659 do { 660 ret = wait_for_completion_timeout(&test_all_done_comp, HZ); 661 } while (!ret); 662 663 for (i = 0; i < nr_threads; i++) { 664 struct test_driver *t = &tdriver[i]; 665 int j; 666 667 if (!IS_ERR(t->task)) 668 kthread_stop(t->task); 669 670 for (j = 0; j < ARRAY_SIZE(test_case_array); j++) { 671 if (!((run_test_mask & (1 << j)) >> j)) 672 continue; 673 674 pr_info( 675 "Summary: %s passed: %d failed: %d xfailed: %d repeat: %d loops: %d avg: %llu usec\n", 676 test_case_array[j].test_name, 677 t->data[j].test_passed, 678 t->data[j].test_failed, 679 t->data[j].test_xfailed, 680 test_repeat_count, test_loop_count, 681 t->data[j].time); 682 } 683 684 pr_info("All test took worker%d=%lu cycles\n", 685 i, t->stop - t->start); 686 } 687 688 kvfree(tdriver); 689 } 690 691 static int __init vmalloc_test_init(void) 692 { 693 do_concurrent_test(); 694 /* Fail will directly unload the module */ 695 return IS_BUILTIN(CONFIG_TEST_VMALLOC) ? 0:-EAGAIN; 696 } 697 698 #ifdef MODULE 699 module_init(vmalloc_test_init) 700 #else 701 late_initcall(vmalloc_test_init); 702 #endif 703 704 MODULE_LICENSE("GPL"); 705 MODULE_AUTHOR("Uladzislau Rezki"); 706 MODULE_DESCRIPTION("vmalloc test module"); 707