1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Test module for stress and analyze performance of vmalloc allocator. 5 * (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com> 6 */ 7 #include <linux/init.h> 8 #include <linux/kernel.h> 9 #include <linux/module.h> 10 #include <linux/vmalloc.h> 11 #include <linux/random.h> 12 #include <linux/kthread.h> 13 #include <linux/moduleparam.h> 14 #include <linux/completion.h> 15 #include <linux/delay.h> 16 #include <linux/mm.h> 17 #include <linux/rcupdate.h> 18 #include <linux/srcu.h> 19 #include <linux/slab.h> 20 21 #define __param(type, name, init, msg) \ 22 static type name = init; \ 23 module_param(name, type, 0444); \ 24 MODULE_PARM_DESC(name, msg) \ 25 26 __param(int, nr_threads, 0, 27 "Number of workers to perform tests(min: 1 max: USHRT_MAX)"); 28 29 __param(bool, sequential_test_order, false, 30 "Use sequential stress tests order"); 31 32 __param(int, test_repeat_count, 1, 33 "Set test repeat counter"); 34 35 __param(int, test_loop_count, 1000000, 36 "Set test loop counter"); 37 38 __param(int, nr_pages, 0, 39 "Set number of pages for fix_size_alloc_test(default: 1)"); 40 41 __param(bool, use_huge, false, 42 "Use vmalloc_huge in fix_size_alloc_test"); 43 44 __param(int, run_test_mask, 7, 45 "Set tests specified in the mask.\n\n" 46 "\t\tid: 1, name: fix_size_alloc_test\n" 47 "\t\tid: 2, name: full_fit_alloc_test\n" 48 "\t\tid: 4, name: long_busy_list_alloc_test\n" 49 "\t\tid: 8, name: random_size_alloc_test\n" 50 "\t\tid: 16, name: fix_align_alloc_test\n" 51 "\t\tid: 32, name: random_size_align_alloc_test\n" 52 "\t\tid: 64, name: align_shift_alloc_test\n" 53 "\t\tid: 128, name: pcpu_alloc_test\n" 54 "\t\tid: 256, name: kvfree_rcu_1_arg_vmalloc_test\n" 55 "\t\tid: 512, name: kvfree_rcu_2_arg_vmalloc_test\n" 56 "\t\tid: 1024, name: vm_map_ram_test\n" 57 "\t\tid: 2048, name: no_block_alloc_test\n" 58 /* Add a new test case description here. */ 59 ); 60 61 /* 62 * This is for synchronization of setup phase. 63 */ 64 DEFINE_STATIC_SRCU(prepare_for_test_srcu); 65 66 /* 67 * Completion tracking for worker threads. 68 */ 69 static DECLARE_COMPLETION(test_all_done_comp); 70 static atomic_t test_n_undone = ATOMIC_INIT(0); 71 72 static inline void 73 test_report_one_done(void) 74 { 75 if (atomic_dec_and_test(&test_n_undone)) 76 complete(&test_all_done_comp); 77 } 78 79 static int random_size_align_alloc_test(void) 80 { 81 unsigned long size, align; 82 unsigned int rnd; 83 void *ptr; 84 int i; 85 86 for (i = 0; i < test_loop_count; i++) { 87 rnd = get_random_u8(); 88 89 /* 90 * Maximum 1024 pages, if PAGE_SIZE is 4096. 91 */ 92 align = 1 << (rnd % 23); 93 94 /* 95 * Maximum 10 pages. 96 */ 97 size = ((rnd % 10) + 1) * PAGE_SIZE; 98 99 ptr = __vmalloc_node(size, align, GFP_KERNEL | __GFP_ZERO, 0, 100 __builtin_return_address(0)); 101 if (!ptr) 102 return -1; 103 104 vfree(ptr); 105 } 106 107 return 0; 108 } 109 110 /* 111 * This test case is supposed to be failed. 112 */ 113 static int align_shift_alloc_test(void) 114 { 115 unsigned long align; 116 void *ptr; 117 int i; 118 119 for (i = 0; i < BITS_PER_LONG; i++) { 120 align = 1UL << i; 121 122 ptr = __vmalloc_node(PAGE_SIZE, align, GFP_KERNEL|__GFP_ZERO, 0, 123 __builtin_return_address(0)); 124 if (!ptr) 125 return -1; 126 127 vfree(ptr); 128 } 129 130 return 0; 131 } 132 133 static int fix_align_alloc_test(void) 134 { 135 void *ptr; 136 int i; 137 138 for (i = 0; i < test_loop_count; i++) { 139 ptr = __vmalloc_node(5 * PAGE_SIZE, THREAD_ALIGN << 1, 140 GFP_KERNEL | __GFP_ZERO, 0, 141 __builtin_return_address(0)); 142 if (!ptr) 143 return -1; 144 145 vfree(ptr); 146 } 147 148 return 0; 149 } 150 151 static int random_size_alloc_test(void) 152 { 153 unsigned int n; 154 void *p; 155 int i; 156 157 for (i = 0; i < test_loop_count; i++) { 158 n = get_random_u32_inclusive(1, 100); 159 p = vmalloc(n * PAGE_SIZE); 160 161 if (!p) 162 return -1; 163 164 *((__u8 *)p) = 1; 165 vfree(p); 166 } 167 168 return 0; 169 } 170 171 static int long_busy_list_alloc_test(void) 172 { 173 void *ptr_1, *ptr_2; 174 void **ptr; 175 int rv = -1; 176 int i; 177 178 ptr = vmalloc(sizeof(void *) * 15000); 179 if (!ptr) 180 return rv; 181 182 for (i = 0; i < 15000; i++) 183 ptr[i] = vmalloc(1 * PAGE_SIZE); 184 185 for (i = 0; i < test_loop_count; i++) { 186 ptr_1 = vmalloc(100 * PAGE_SIZE); 187 if (!ptr_1) 188 goto leave; 189 190 ptr_2 = vmalloc(1 * PAGE_SIZE); 191 if (!ptr_2) { 192 vfree(ptr_1); 193 goto leave; 194 } 195 196 *((__u8 *)ptr_1) = 0; 197 *((__u8 *)ptr_2) = 1; 198 199 vfree(ptr_1); 200 vfree(ptr_2); 201 } 202 203 /* Success */ 204 rv = 0; 205 206 leave: 207 for (i = 0; i < 15000; i++) 208 vfree(ptr[i]); 209 210 vfree(ptr); 211 return rv; 212 } 213 214 static int full_fit_alloc_test(void) 215 { 216 void **ptr, **junk_ptr, *tmp; 217 int junk_length; 218 int rv = -1; 219 int i; 220 221 junk_length = fls(num_online_cpus()); 222 junk_length *= (32 * 1024 * 1024 / PAGE_SIZE); 223 224 ptr = vmalloc(sizeof(void *) * junk_length); 225 if (!ptr) 226 return rv; 227 228 junk_ptr = vmalloc(sizeof(void *) * junk_length); 229 if (!junk_ptr) { 230 vfree(ptr); 231 return rv; 232 } 233 234 for (i = 0; i < junk_length; i++) { 235 ptr[i] = vmalloc(1 * PAGE_SIZE); 236 junk_ptr[i] = vmalloc(1 * PAGE_SIZE); 237 } 238 239 for (i = 0; i < junk_length; i++) 240 vfree(junk_ptr[i]); 241 242 for (i = 0; i < test_loop_count; i++) { 243 tmp = vmalloc(1 * PAGE_SIZE); 244 245 if (!tmp) 246 goto error; 247 248 *((__u8 *)tmp) = 1; 249 vfree(tmp); 250 } 251 252 /* Success */ 253 rv = 0; 254 255 error: 256 for (i = 0; i < junk_length; i++) 257 vfree(ptr[i]); 258 259 vfree(ptr); 260 vfree(junk_ptr); 261 262 return rv; 263 } 264 265 static int fix_size_alloc_test(void) 266 { 267 void *ptr; 268 int i; 269 270 for (i = 0; i < test_loop_count; i++) { 271 if (use_huge) 272 ptr = vmalloc_huge((nr_pages > 0 ? nr_pages:1) * PAGE_SIZE, GFP_KERNEL); 273 else 274 ptr = vmalloc((nr_pages > 0 ? nr_pages:1) * PAGE_SIZE); 275 276 if (!ptr) 277 return -1; 278 279 *((__u8 *)ptr) = 0; 280 281 vfree(ptr); 282 } 283 284 return 0; 285 } 286 287 static int no_block_alloc_test(void) 288 { 289 void *ptr; 290 int i; 291 292 for (i = 0; i < test_loop_count; i++) { 293 bool use_atomic = !!(get_random_u8() % 2); 294 gfp_t gfp = use_atomic ? GFP_ATOMIC : GFP_NOWAIT; 295 unsigned long size = (nr_pages > 0 ? nr_pages : 1) * PAGE_SIZE; 296 297 preempt_disable(); 298 ptr = __vmalloc(size, gfp); 299 preempt_enable(); 300 301 if (!ptr) 302 return -1; 303 304 *((__u8 *)ptr) = 0; 305 vfree(ptr); 306 } 307 308 return 0; 309 } 310 311 static int 312 pcpu_alloc_test(void) 313 { 314 int rv = 0; 315 #ifndef CONFIG_NEED_PER_CPU_KM 316 void __percpu **pcpu; 317 size_t size, align; 318 int i; 319 320 pcpu = vmalloc(sizeof(void __percpu *) * 35000); 321 if (!pcpu) 322 return -1; 323 324 for (i = 0; i < 35000; i++) { 325 size = get_random_u32_inclusive(1, PAGE_SIZE / 4); 326 327 /* 328 * Maximum PAGE_SIZE 329 */ 330 align = 1 << get_random_u32_inclusive(1, 11); 331 332 pcpu[i] = __alloc_percpu(size, align); 333 if (!pcpu[i]) 334 rv = -1; 335 } 336 337 for (i = 0; i < 35000; i++) 338 free_percpu(pcpu[i]); 339 340 vfree(pcpu); 341 #endif 342 return rv; 343 } 344 345 struct test_kvfree_rcu { 346 struct rcu_head rcu; 347 unsigned char array[20]; 348 }; 349 350 static int 351 kvfree_rcu_1_arg_vmalloc_test(void) 352 { 353 struct test_kvfree_rcu *p; 354 int i; 355 356 for (i = 0; i < test_loop_count; i++) { 357 p = vmalloc(1 * PAGE_SIZE); 358 if (!p) 359 return -1; 360 361 p->array[0] = 'a'; 362 kvfree_rcu_mightsleep(p); 363 } 364 365 return 0; 366 } 367 368 static int 369 kvfree_rcu_2_arg_vmalloc_test(void) 370 { 371 struct test_kvfree_rcu *p; 372 int i; 373 374 for (i = 0; i < test_loop_count; i++) { 375 p = vmalloc(1 * PAGE_SIZE); 376 if (!p) 377 return -1; 378 379 p->array[0] = 'a'; 380 kvfree_rcu(p, rcu); 381 } 382 383 return 0; 384 } 385 386 static int 387 vm_map_ram_test(void) 388 { 389 unsigned long nr_allocated; 390 unsigned int map_nr_pages; 391 unsigned char *v_ptr; 392 struct page **pages; 393 int i; 394 395 map_nr_pages = nr_pages > 0 ? nr_pages:1; 396 pages = kcalloc(map_nr_pages, sizeof(struct page *), GFP_KERNEL); 397 if (!pages) 398 return -1; 399 400 nr_allocated = alloc_pages_bulk(GFP_KERNEL, map_nr_pages, pages); 401 if (nr_allocated != map_nr_pages) 402 goto cleanup; 403 404 /* Run the test loop. */ 405 for (i = 0; i < test_loop_count; i++) { 406 v_ptr = vm_map_ram(pages, map_nr_pages, NUMA_NO_NODE); 407 *v_ptr = 'a'; 408 vm_unmap_ram(v_ptr, map_nr_pages); 409 } 410 411 cleanup: 412 for (i = 0; i < nr_allocated; i++) 413 __free_page(pages[i]); 414 415 kfree(pages); 416 417 /* 0 indicates success. */ 418 return nr_allocated != map_nr_pages; 419 } 420 421 struct test_case_desc { 422 const char *test_name; 423 int (*test_func)(void); 424 bool xfail; 425 }; 426 427 static struct test_case_desc test_case_array[] = { 428 { "fix_size_alloc_test", fix_size_alloc_test, }, 429 { "full_fit_alloc_test", full_fit_alloc_test, }, 430 { "long_busy_list_alloc_test", long_busy_list_alloc_test, }, 431 { "random_size_alloc_test", random_size_alloc_test, }, 432 { "fix_align_alloc_test", fix_align_alloc_test, }, 433 { "random_size_align_alloc_test", random_size_align_alloc_test, }, 434 { "align_shift_alloc_test", align_shift_alloc_test, true }, 435 { "pcpu_alloc_test", pcpu_alloc_test, }, 436 { "kvfree_rcu_1_arg_vmalloc_test", kvfree_rcu_1_arg_vmalloc_test, }, 437 { "kvfree_rcu_2_arg_vmalloc_test", kvfree_rcu_2_arg_vmalloc_test, }, 438 { "vm_map_ram_test", vm_map_ram_test, }, 439 { "no_block_alloc_test", no_block_alloc_test, true }, 440 /* Add a new test case here. */ 441 }; 442 443 struct test_case_data { 444 int test_failed; 445 int test_xfailed; 446 int test_passed; 447 u64 time; 448 }; 449 450 static struct test_driver { 451 struct task_struct *task; 452 struct test_case_data data[ARRAY_SIZE(test_case_array)]; 453 454 unsigned long start; 455 unsigned long stop; 456 } *tdriver; 457 458 static void shuffle_array(int *arr, int n) 459 { 460 int i, j; 461 462 for (i = n - 1; i > 0; i--) { 463 /* Cut the range. */ 464 j = get_random_u32_below(i); 465 466 /* Swap indexes. */ 467 swap(arr[i], arr[j]); 468 } 469 } 470 471 static int test_func(void *private) 472 { 473 struct test_driver *t = private; 474 int random_array[ARRAY_SIZE(test_case_array)]; 475 int index, i, j, ret; 476 ktime_t kt; 477 u64 delta; 478 479 for (i = 0; i < ARRAY_SIZE(test_case_array); i++) 480 random_array[i] = i; 481 482 if (!sequential_test_order) 483 shuffle_array(random_array, ARRAY_SIZE(test_case_array)); 484 485 /* 486 * Block until initialization is done. 487 */ 488 synchronize_srcu(&prepare_for_test_srcu); 489 490 t->start = get_cycles(); 491 for (i = 0; i < ARRAY_SIZE(test_case_array); i++) { 492 index = random_array[i]; 493 494 /* 495 * Skip tests if run_test_mask has been specified. 496 */ 497 if (!((run_test_mask & (1 << index)) >> index)) 498 continue; 499 kt = ktime_get(); 500 for (j = 0; j < test_repeat_count; j++) { 501 ret = test_case_array[index].test_func(); 502 503 if (!ret) 504 t->data[index].test_passed++; 505 else if (ret && test_case_array[index].xfail) 506 t->data[index].test_xfailed++; 507 else 508 t->data[index].test_failed++; 509 } 510 511 /* 512 * Take an average time that test took. 513 */ 514 delta = (u64) ktime_us_delta(ktime_get(), kt); 515 do_div(delta, (u32) test_repeat_count); 516 517 t->data[index].time = delta; 518 } 519 t->stop = get_cycles(); 520 test_report_one_done(); 521 522 /* 523 * Wait for the kthread_stop() call. 524 */ 525 while (!kthread_should_stop()) 526 msleep(10); 527 528 return 0; 529 } 530 531 static int 532 init_test_configuration(void) 533 { 534 /* 535 * A maximum number of workers is defined as hard-coded 536 * value and set to USHRT_MAX. We add such gap just in 537 * case and for potential heavy stressing. 538 */ 539 nr_threads = clamp(nr_threads, 1, (int) USHRT_MAX); 540 541 /* Allocate the space for test instances. */ 542 tdriver = kvcalloc(nr_threads, sizeof(*tdriver), GFP_KERNEL); 543 if (tdriver == NULL) 544 return -1; 545 546 if (test_repeat_count <= 0) 547 test_repeat_count = 1; 548 549 if (test_loop_count <= 0) 550 test_loop_count = 1; 551 552 return 0; 553 } 554 555 static void do_concurrent_test(void) 556 { 557 int i, ret, idx; 558 559 /* 560 * Set some basic configurations plus sanity check. 561 */ 562 ret = init_test_configuration(); 563 if (ret < 0) 564 return; 565 566 /* 567 * Put on hold all workers. 568 */ 569 idx = srcu_read_lock(&prepare_for_test_srcu); 570 571 for (i = 0; i < nr_threads; i++) { 572 struct test_driver *t = &tdriver[i]; 573 574 t->task = kthread_run(test_func, t, "vmalloc_test/%d", i); 575 576 if (!IS_ERR(t->task)) 577 /* Success. */ 578 atomic_inc(&test_n_undone); 579 else 580 pr_err("Failed to start %d kthread\n", i); 581 } 582 583 /* 584 * Now let the workers do their job. 585 */ 586 srcu_read_unlock(&prepare_for_test_srcu, idx); 587 588 /* 589 * Sleep quiet until all workers are done with 1 second 590 * interval. Since the test can take a lot of time we 591 * can run into a stack trace of the hung task. That is 592 * why we go with completion_timeout and HZ value. 593 */ 594 do { 595 ret = wait_for_completion_timeout(&test_all_done_comp, HZ); 596 } while (!ret); 597 598 for (i = 0; i < nr_threads; i++) { 599 struct test_driver *t = &tdriver[i]; 600 int j; 601 602 if (!IS_ERR(t->task)) 603 kthread_stop(t->task); 604 605 for (j = 0; j < ARRAY_SIZE(test_case_array); j++) { 606 if (!((run_test_mask & (1 << j)) >> j)) 607 continue; 608 609 pr_info( 610 "Summary: %s passed: %d failed: %d xfailed: %d repeat: %d loops: %d avg: %llu usec\n", 611 test_case_array[j].test_name, 612 t->data[j].test_passed, 613 t->data[j].test_failed, 614 t->data[j].test_xfailed, 615 test_repeat_count, test_loop_count, 616 t->data[j].time); 617 } 618 619 pr_info("All test took worker%d=%lu cycles\n", 620 i, t->stop - t->start); 621 } 622 623 kvfree(tdriver); 624 } 625 626 static int __init vmalloc_test_init(void) 627 { 628 do_concurrent_test(); 629 /* Fail will directly unload the module */ 630 return IS_BUILTIN(CONFIG_TEST_VMALLOC) ? 0:-EAGAIN; 631 } 632 633 #ifdef MODULE 634 module_init(vmalloc_test_init) 635 #else 636 late_initcall(vmalloc_test_init); 637 #endif 638 639 MODULE_LICENSE("GPL"); 640 MODULE_AUTHOR("Uladzislau Rezki"); 641 MODULE_DESCRIPTION("vmalloc test module"); 642