1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Test module for stress and analyze performance of vmalloc allocator. 5 * (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com> 6 */ 7 #include <linux/init.h> 8 #include <linux/kernel.h> 9 #include <linux/module.h> 10 #include <linux/vmalloc.h> 11 #include <linux/random.h> 12 #include <linux/kthread.h> 13 #include <linux/moduleparam.h> 14 #include <linux/completion.h> 15 #include <linux/delay.h> 16 #include <linux/mm.h> 17 #include <linux/rcupdate.h> 18 #include <linux/srcu.h> 19 #include <linux/slab.h> 20 21 #define __param(type, name, init, msg) \ 22 static type name = init; \ 23 module_param(name, type, 0444); \ 24 MODULE_PARM_DESC(name, msg) \ 25 26 __param(int, nr_threads, 0, 27 "Number of workers to perform tests(min: 1 max: USHRT_MAX)"); 28 29 __param(bool, sequential_test_order, false, 30 "Use sequential stress tests order"); 31 32 __param(int, test_repeat_count, 1, 33 "Set test repeat counter"); 34 35 __param(int, test_loop_count, 1000000, 36 "Set test loop counter"); 37 38 __param(int, nr_pages, 0, 39 "Set number of pages for fix_size_alloc_test(default: 1)"); 40 41 __param(bool, use_huge, false, 42 "Use vmalloc_huge in fix_size_alloc_test"); 43 44 __param(int, run_test_mask, 7, 45 "Set tests specified in the mask.\n\n" 46 "\t\tid: 1, name: fix_size_alloc_test\n" 47 "\t\tid: 2, name: full_fit_alloc_test\n" 48 "\t\tid: 4, name: long_busy_list_alloc_test\n" 49 "\t\tid: 8, name: random_size_alloc_test\n" 50 "\t\tid: 16, name: fix_align_alloc_test\n" 51 "\t\tid: 32, name: random_size_align_alloc_test\n" 52 "\t\tid: 64, name: align_shift_alloc_test\n" 53 "\t\tid: 128, name: pcpu_alloc_test\n" 54 "\t\tid: 256, name: kvfree_rcu_1_arg_vmalloc_test\n" 55 "\t\tid: 512, name: kvfree_rcu_2_arg_vmalloc_test\n" 56 "\t\tid: 1024, name: vm_map_ram_test\n" 57 "\t\tid: 2048, name: no_block_alloc_test\n" 58 /* Add a new test case description here. */ 59 ); 60 61 __param(int, nr_pcpu_objects, 35000, 62 "Number of pcpu objects to allocate for pcpu_alloc_test"); 63 64 /* 65 * This is for synchronization of setup phase. 66 */ 67 DEFINE_STATIC_SRCU(prepare_for_test_srcu); 68 69 /* 70 * Completion tracking for worker threads. 71 */ 72 static DECLARE_COMPLETION(test_all_done_comp); 73 static atomic_t test_n_undone = ATOMIC_INIT(0); 74 75 static inline void 76 test_report_one_done(void) 77 { 78 if (atomic_dec_and_test(&test_n_undone)) 79 complete(&test_all_done_comp); 80 } 81 82 static int random_size_align_alloc_test(void) 83 { 84 unsigned long size, align; 85 unsigned int rnd; 86 void *ptr; 87 int i; 88 89 for (i = 0; i < test_loop_count; i++) { 90 rnd = get_random_u8(); 91 92 /* 93 * Maximum 1024 pages, if PAGE_SIZE is 4096. 94 */ 95 align = 1 << (rnd % 23); 96 97 /* 98 * Maximum 10 pages. 99 */ 100 size = ((rnd % 10) + 1) * PAGE_SIZE; 101 102 ptr = __vmalloc_node(size, align, GFP_KERNEL | __GFP_ZERO, 0, 103 __builtin_return_address(0)); 104 if (!ptr) 105 return -1; 106 107 vfree(ptr); 108 } 109 110 return 0; 111 } 112 113 /* 114 * This test case is supposed to be failed. 115 */ 116 static int align_shift_alloc_test(void) 117 { 118 unsigned long align; 119 void *ptr; 120 int i; 121 122 for (i = 0; i < BITS_PER_LONG; i++) { 123 align = 1UL << i; 124 125 ptr = __vmalloc_node(PAGE_SIZE, align, GFP_KERNEL|__GFP_ZERO, 0, 126 __builtin_return_address(0)); 127 if (!ptr) 128 return -1; 129 130 vfree(ptr); 131 } 132 133 return 0; 134 } 135 136 static int fix_align_alloc_test(void) 137 { 138 void *ptr; 139 int i; 140 141 for (i = 0; i < test_loop_count; i++) { 142 ptr = __vmalloc_node(5 * PAGE_SIZE, THREAD_ALIGN << 1, 143 GFP_KERNEL | __GFP_ZERO, 0, 144 __builtin_return_address(0)); 145 if (!ptr) 146 return -1; 147 148 vfree(ptr); 149 } 150 151 return 0; 152 } 153 154 static int random_size_alloc_test(void) 155 { 156 unsigned int n; 157 void *p; 158 int i; 159 160 for (i = 0; i < test_loop_count; i++) { 161 n = get_random_u32_inclusive(1, 100); 162 p = vmalloc(n * PAGE_SIZE); 163 164 if (!p) 165 return -1; 166 167 *((__u8 *)p) = 1; 168 vfree(p); 169 } 170 171 return 0; 172 } 173 174 static int long_busy_list_alloc_test(void) 175 { 176 void *ptr_1, *ptr_2; 177 void **ptr; 178 int rv = -1; 179 int i; 180 181 ptr = vmalloc(sizeof(void *) * 15000); 182 if (!ptr) 183 return rv; 184 185 for (i = 0; i < 15000; i++) 186 ptr[i] = vmalloc(1 * PAGE_SIZE); 187 188 for (i = 0; i < test_loop_count; i++) { 189 ptr_1 = vmalloc(100 * PAGE_SIZE); 190 if (!ptr_1) 191 goto leave; 192 193 ptr_2 = vmalloc(1 * PAGE_SIZE); 194 if (!ptr_2) { 195 vfree(ptr_1); 196 goto leave; 197 } 198 199 *((__u8 *)ptr_1) = 0; 200 *((__u8 *)ptr_2) = 1; 201 202 vfree(ptr_1); 203 vfree(ptr_2); 204 } 205 206 /* Success */ 207 rv = 0; 208 209 leave: 210 for (i = 0; i < 15000; i++) 211 vfree(ptr[i]); 212 213 vfree(ptr); 214 return rv; 215 } 216 217 static int full_fit_alloc_test(void) 218 { 219 void **ptr, **junk_ptr, *tmp; 220 int junk_length; 221 int rv = -1; 222 int i; 223 224 junk_length = fls(num_online_cpus()); 225 junk_length *= (32 * 1024 * 1024 / PAGE_SIZE); 226 227 ptr = vmalloc(sizeof(void *) * junk_length); 228 if (!ptr) 229 return rv; 230 231 junk_ptr = vmalloc(sizeof(void *) * junk_length); 232 if (!junk_ptr) { 233 vfree(ptr); 234 return rv; 235 } 236 237 for (i = 0; i < junk_length; i++) { 238 ptr[i] = vmalloc(1 * PAGE_SIZE); 239 junk_ptr[i] = vmalloc(1 * PAGE_SIZE); 240 } 241 242 for (i = 0; i < junk_length; i++) 243 vfree(junk_ptr[i]); 244 245 for (i = 0; i < test_loop_count; i++) { 246 tmp = vmalloc(1 * PAGE_SIZE); 247 248 if (!tmp) 249 goto error; 250 251 *((__u8 *)tmp) = 1; 252 vfree(tmp); 253 } 254 255 /* Success */ 256 rv = 0; 257 258 error: 259 for (i = 0; i < junk_length; i++) 260 vfree(ptr[i]); 261 262 vfree(ptr); 263 vfree(junk_ptr); 264 265 return rv; 266 } 267 268 static int fix_size_alloc_test(void) 269 { 270 void *ptr; 271 int i; 272 273 for (i = 0; i < test_loop_count; i++) { 274 if (use_huge) 275 ptr = vmalloc_huge((nr_pages > 0 ? nr_pages:1) * PAGE_SIZE, GFP_KERNEL); 276 else 277 ptr = vmalloc((nr_pages > 0 ? nr_pages:1) * PAGE_SIZE); 278 279 if (!ptr) 280 return -1; 281 282 *((__u8 *)ptr) = 0; 283 284 vfree(ptr); 285 } 286 287 return 0; 288 } 289 290 static int no_block_alloc_test(void) 291 { 292 void *ptr; 293 int i; 294 295 for (i = 0; i < test_loop_count; i++) { 296 bool use_atomic = !!(get_random_u8() % 2); 297 gfp_t gfp = use_atomic ? GFP_ATOMIC : GFP_NOWAIT; 298 unsigned long size = (nr_pages > 0 ? nr_pages : 1) * PAGE_SIZE; 299 300 preempt_disable(); 301 ptr = __vmalloc(size, gfp); 302 preempt_enable(); 303 304 if (!ptr) 305 return -1; 306 307 *((__u8 *)ptr) = 0; 308 vfree(ptr); 309 } 310 311 return 0; 312 } 313 314 static int 315 pcpu_alloc_test(void) 316 { 317 int rv = 0; 318 #ifndef CONFIG_NEED_PER_CPU_KM 319 void __percpu **pcpu; 320 size_t size, align; 321 int i; 322 323 pcpu = vmalloc(sizeof(void __percpu *) * nr_pcpu_objects); 324 if (!pcpu) 325 return -1; 326 327 for (i = 0; i < nr_pcpu_objects; i++) { 328 size = get_random_u32_inclusive(1, PAGE_SIZE / 4); 329 330 /* 331 * Maximum PAGE_SIZE 332 */ 333 align = 1 << get_random_u32_inclusive(1, PAGE_SHIFT - 1); 334 335 pcpu[i] = __alloc_percpu(size, align); 336 if (!pcpu[i]) 337 rv = -1; 338 } 339 340 for (i = 0; i < nr_pcpu_objects; i++) 341 free_percpu(pcpu[i]); 342 343 vfree(pcpu); 344 #endif 345 return rv; 346 } 347 348 struct test_kvfree_rcu { 349 struct rcu_head rcu; 350 unsigned char array[20]; 351 }; 352 353 static int 354 kvfree_rcu_1_arg_vmalloc_test(void) 355 { 356 struct test_kvfree_rcu *p; 357 int i; 358 359 for (i = 0; i < test_loop_count; i++) { 360 p = vmalloc(1 * PAGE_SIZE); 361 if (!p) 362 return -1; 363 364 p->array[0] = 'a'; 365 kvfree_rcu_mightsleep(p); 366 } 367 368 return 0; 369 } 370 371 static int 372 kvfree_rcu_2_arg_vmalloc_test(void) 373 { 374 struct test_kvfree_rcu *p; 375 int i; 376 377 for (i = 0; i < test_loop_count; i++) { 378 p = vmalloc(1 * PAGE_SIZE); 379 if (!p) 380 return -1; 381 382 p->array[0] = 'a'; 383 kvfree_rcu(p, rcu); 384 } 385 386 return 0; 387 } 388 389 static int 390 vm_map_ram_test(void) 391 { 392 unsigned long nr_allocated; 393 unsigned int map_nr_pages; 394 unsigned char *v_ptr; 395 struct page **pages; 396 int i; 397 398 map_nr_pages = nr_pages > 0 ? nr_pages:1; 399 pages = kcalloc(map_nr_pages, sizeof(struct page *), GFP_KERNEL); 400 if (!pages) 401 return -1; 402 403 nr_allocated = alloc_pages_bulk(GFP_KERNEL, map_nr_pages, pages); 404 if (nr_allocated != map_nr_pages) 405 goto cleanup; 406 407 /* Run the test loop. */ 408 for (i = 0; i < test_loop_count; i++) { 409 v_ptr = vm_map_ram(pages, map_nr_pages, NUMA_NO_NODE); 410 *v_ptr = 'a'; 411 vm_unmap_ram(v_ptr, map_nr_pages); 412 } 413 414 cleanup: 415 for (i = 0; i < nr_allocated; i++) 416 __free_page(pages[i]); 417 418 kfree(pages); 419 420 /* 0 indicates success. */ 421 return nr_allocated != map_nr_pages; 422 } 423 424 struct test_case_desc { 425 const char *test_name; 426 int (*test_func)(void); 427 bool xfail; 428 }; 429 430 static struct test_case_desc test_case_array[] = { 431 { "fix_size_alloc_test", fix_size_alloc_test, }, 432 { "full_fit_alloc_test", full_fit_alloc_test, }, 433 { "long_busy_list_alloc_test", long_busy_list_alloc_test, }, 434 { "random_size_alloc_test", random_size_alloc_test, }, 435 { "fix_align_alloc_test", fix_align_alloc_test, }, 436 { "random_size_align_alloc_test", random_size_align_alloc_test, }, 437 { "align_shift_alloc_test", align_shift_alloc_test, true }, 438 { "pcpu_alloc_test", pcpu_alloc_test, }, 439 { "kvfree_rcu_1_arg_vmalloc_test", kvfree_rcu_1_arg_vmalloc_test, }, 440 { "kvfree_rcu_2_arg_vmalloc_test", kvfree_rcu_2_arg_vmalloc_test, }, 441 { "vm_map_ram_test", vm_map_ram_test, }, 442 { "no_block_alloc_test", no_block_alloc_test, true }, 443 /* Add a new test case here. */ 444 }; 445 446 struct test_case_data { 447 int test_failed; 448 int test_xfailed; 449 int test_passed; 450 u64 time; 451 }; 452 453 static struct test_driver { 454 struct task_struct *task; 455 struct test_case_data data[ARRAY_SIZE(test_case_array)]; 456 457 unsigned long start; 458 unsigned long stop; 459 } *tdriver; 460 461 static void shuffle_array(int *arr, int n) 462 { 463 int i, j; 464 465 for (i = n - 1; i > 0; i--) { 466 /* Cut the range. */ 467 j = get_random_u32_below(i); 468 469 /* Swap indexes. */ 470 swap(arr[i], arr[j]); 471 } 472 } 473 474 static int test_func(void *private) 475 { 476 struct test_driver *t = private; 477 int random_array[ARRAY_SIZE(test_case_array)]; 478 int index, i, j, ret; 479 ktime_t kt; 480 u64 delta; 481 482 for (i = 0; i < ARRAY_SIZE(test_case_array); i++) 483 random_array[i] = i; 484 485 if (!sequential_test_order) 486 shuffle_array(random_array, ARRAY_SIZE(test_case_array)); 487 488 /* 489 * Block until initialization is done. 490 */ 491 synchronize_srcu(&prepare_for_test_srcu); 492 493 t->start = get_cycles(); 494 for (i = 0; i < ARRAY_SIZE(test_case_array); i++) { 495 index = random_array[i]; 496 497 /* 498 * Skip tests if run_test_mask has been specified. 499 */ 500 if (!((run_test_mask & (1 << index)) >> index)) 501 continue; 502 kt = ktime_get(); 503 for (j = 0; j < test_repeat_count; j++) { 504 ret = test_case_array[index].test_func(); 505 506 if (!ret) 507 t->data[index].test_passed++; 508 else if (ret && test_case_array[index].xfail) 509 t->data[index].test_xfailed++; 510 else 511 t->data[index].test_failed++; 512 } 513 514 /* 515 * Take an average time that test took. 516 */ 517 delta = (u64) ktime_us_delta(ktime_get(), kt); 518 do_div(delta, (u32) test_repeat_count); 519 520 t->data[index].time = delta; 521 } 522 t->stop = get_cycles(); 523 test_report_one_done(); 524 525 /* 526 * Wait for the kthread_stop() call. 527 */ 528 while (!kthread_should_stop()) 529 msleep(10); 530 531 return 0; 532 } 533 534 static int 535 init_test_configuration(void) 536 { 537 /* 538 * A maximum number of workers is defined as hard-coded 539 * value and set to USHRT_MAX. We add such gap just in 540 * case and for potential heavy stressing. 541 */ 542 nr_threads = clamp(nr_threads, 1, (int) USHRT_MAX); 543 544 /* Allocate the space for test instances. */ 545 tdriver = kvcalloc(nr_threads, sizeof(*tdriver), GFP_KERNEL); 546 if (tdriver == NULL) 547 return -1; 548 549 if (test_repeat_count <= 0) 550 test_repeat_count = 1; 551 552 if (test_loop_count <= 0) 553 test_loop_count = 1; 554 555 return 0; 556 } 557 558 static void do_concurrent_test(void) 559 { 560 int i, ret, idx; 561 562 /* 563 * Set some basic configurations plus sanity check. 564 */ 565 ret = init_test_configuration(); 566 if (ret < 0) 567 return; 568 569 /* 570 * Put on hold all workers. 571 */ 572 idx = srcu_read_lock(&prepare_for_test_srcu); 573 574 for (i = 0; i < nr_threads; i++) { 575 struct test_driver *t = &tdriver[i]; 576 577 t->task = kthread_run(test_func, t, "vmalloc_test/%d", i); 578 579 if (!IS_ERR(t->task)) 580 /* Success. */ 581 atomic_inc(&test_n_undone); 582 else 583 pr_err("Failed to start %d kthread\n", i); 584 } 585 586 /* 587 * Now let the workers do their job. 588 */ 589 srcu_read_unlock(&prepare_for_test_srcu, idx); 590 591 /* 592 * Sleep quiet until all workers are done with 1 second 593 * interval. Since the test can take a lot of time we 594 * can run into a stack trace of the hung task. That is 595 * why we go with completion_timeout and HZ value. 596 */ 597 do { 598 ret = wait_for_completion_timeout(&test_all_done_comp, HZ); 599 } while (!ret); 600 601 for (i = 0; i < nr_threads; i++) { 602 struct test_driver *t = &tdriver[i]; 603 int j; 604 605 if (!IS_ERR(t->task)) 606 kthread_stop(t->task); 607 608 for (j = 0; j < ARRAY_SIZE(test_case_array); j++) { 609 if (!((run_test_mask & (1 << j)) >> j)) 610 continue; 611 612 pr_info( 613 "Summary: %s passed: %d failed: %d xfailed: %d repeat: %d loops: %d avg: %llu usec\n", 614 test_case_array[j].test_name, 615 t->data[j].test_passed, 616 t->data[j].test_failed, 617 t->data[j].test_xfailed, 618 test_repeat_count, test_loop_count, 619 t->data[j].time); 620 } 621 622 pr_info("All test took worker%d=%lu cycles\n", 623 i, t->stop - t->start); 624 } 625 626 kvfree(tdriver); 627 } 628 629 static int __init vmalloc_test_init(void) 630 { 631 do_concurrent_test(); 632 /* Fail will directly unload the module */ 633 return IS_BUILTIN(CONFIG_TEST_VMALLOC) ? 0:-EAGAIN; 634 } 635 636 #ifdef MODULE 637 module_init(vmalloc_test_init) 638 #else 639 late_initcall(vmalloc_test_init); 640 #endif 641 642 MODULE_LICENSE("GPL"); 643 MODULE_AUTHOR("Uladzislau Rezki"); 644 MODULE_DESCRIPTION("vmalloc test module"); 645