1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Test module for stress and analyze performance of vmalloc allocator. 5 * (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com> 6 */ 7 #include <linux/init.h> 8 #include <linux/kernel.h> 9 #include <linux/module.h> 10 #include <linux/vmalloc.h> 11 #include <linux/random.h> 12 #include <linux/kthread.h> 13 #include <linux/moduleparam.h> 14 #include <linux/completion.h> 15 #include <linux/delay.h> 16 #include <linux/mm.h> 17 #include <linux/rcupdate.h> 18 #include <linux/srcu.h> 19 #include <linux/slab.h> 20 21 #define __param(type, name, init, msg) \ 22 static type name = init; \ 23 module_param(name, type, 0444); \ 24 MODULE_PARM_DESC(name, msg) \ 25 26 __param(int, nr_threads, 0, 27 "Number of workers to perform tests(min: 1 max: USHRT_MAX)"); 28 29 __param(bool, sequential_test_order, false, 30 "Use sequential stress tests order"); 31 32 __param(int, test_repeat_count, 1, 33 "Set test repeat counter"); 34 35 __param(int, test_loop_count, 1000000, 36 "Set test loop counter"); 37 38 __param(int, nr_pages, 0, 39 "Set number of pages for fix_size_alloc_test(default: 1)"); 40 41 __param(bool, use_huge, false, 42 "Use vmalloc_huge in fix_size_alloc_test"); 43 44 __param(int, run_test_mask, 7, 45 "Set tests specified in the mask.\n\n" 46 "\t\tid: 1, name: fix_size_alloc_test\n" 47 "\t\tid: 2, name: full_fit_alloc_test\n" 48 "\t\tid: 4, name: long_busy_list_alloc_test\n" 49 "\t\tid: 8, name: random_size_alloc_test\n" 50 "\t\tid: 16, name: fix_align_alloc_test\n" 51 "\t\tid: 32, name: random_size_align_alloc_test\n" 52 "\t\tid: 64, name: align_shift_alloc_test\n" 53 "\t\tid: 128, name: pcpu_alloc_test\n" 54 "\t\tid: 256, name: kvfree_rcu_1_arg_vmalloc_test\n" 55 "\t\tid: 512, name: kvfree_rcu_2_arg_vmalloc_test\n" 56 "\t\tid: 1024, name: vm_map_ram_test\n" 57 /* Add a new test case description here. */ 58 ); 59 60 /* 61 * This is for synchronization of setup phase. 62 */ 63 DEFINE_STATIC_SRCU(prepare_for_test_srcu); 64 65 /* 66 * Completion tracking for worker threads. 67 */ 68 static DECLARE_COMPLETION(test_all_done_comp); 69 static atomic_t test_n_undone = ATOMIC_INIT(0); 70 71 static inline void 72 test_report_one_done(void) 73 { 74 if (atomic_dec_and_test(&test_n_undone)) 75 complete(&test_all_done_comp); 76 } 77 78 static int random_size_align_alloc_test(void) 79 { 80 unsigned long size, align; 81 unsigned int rnd; 82 void *ptr; 83 int i; 84 85 for (i = 0; i < test_loop_count; i++) { 86 rnd = get_random_u8(); 87 88 /* 89 * Maximum 1024 pages, if PAGE_SIZE is 4096. 90 */ 91 align = 1 << (rnd % 23); 92 93 /* 94 * Maximum 10 pages. 95 */ 96 size = ((rnd % 10) + 1) * PAGE_SIZE; 97 98 ptr = __vmalloc_node(size, align, GFP_KERNEL | __GFP_ZERO, 0, 99 __builtin_return_address(0)); 100 if (!ptr) 101 return -1; 102 103 vfree(ptr); 104 } 105 106 return 0; 107 } 108 109 /* 110 * This test case is supposed to be failed. 111 */ 112 static int align_shift_alloc_test(void) 113 { 114 unsigned long align; 115 void *ptr; 116 int i; 117 118 for (i = 0; i < BITS_PER_LONG; i++) { 119 align = 1UL << i; 120 121 ptr = __vmalloc_node(PAGE_SIZE, align, GFP_KERNEL|__GFP_ZERO, 0, 122 __builtin_return_address(0)); 123 if (!ptr) 124 return -1; 125 126 vfree(ptr); 127 } 128 129 return 0; 130 } 131 132 static int fix_align_alloc_test(void) 133 { 134 void *ptr; 135 int i; 136 137 for (i = 0; i < test_loop_count; i++) { 138 ptr = __vmalloc_node(5 * PAGE_SIZE, THREAD_ALIGN << 1, 139 GFP_KERNEL | __GFP_ZERO, 0, 140 __builtin_return_address(0)); 141 if (!ptr) 142 return -1; 143 144 vfree(ptr); 145 } 146 147 return 0; 148 } 149 150 static int random_size_alloc_test(void) 151 { 152 unsigned int n; 153 void *p; 154 int i; 155 156 for (i = 0; i < test_loop_count; i++) { 157 n = get_random_u32_inclusive(1, 100); 158 p = vmalloc(n * PAGE_SIZE); 159 160 if (!p) 161 return -1; 162 163 *((__u8 *)p) = 1; 164 vfree(p); 165 } 166 167 return 0; 168 } 169 170 static int long_busy_list_alloc_test(void) 171 { 172 void *ptr_1, *ptr_2; 173 void **ptr; 174 int rv = -1; 175 int i; 176 177 ptr = vmalloc(sizeof(void *) * 15000); 178 if (!ptr) 179 return rv; 180 181 for (i = 0; i < 15000; i++) 182 ptr[i] = vmalloc(1 * PAGE_SIZE); 183 184 for (i = 0; i < test_loop_count; i++) { 185 ptr_1 = vmalloc(100 * PAGE_SIZE); 186 if (!ptr_1) 187 goto leave; 188 189 ptr_2 = vmalloc(1 * PAGE_SIZE); 190 if (!ptr_2) { 191 vfree(ptr_1); 192 goto leave; 193 } 194 195 *((__u8 *)ptr_1) = 0; 196 *((__u8 *)ptr_2) = 1; 197 198 vfree(ptr_1); 199 vfree(ptr_2); 200 } 201 202 /* Success */ 203 rv = 0; 204 205 leave: 206 for (i = 0; i < 15000; i++) 207 vfree(ptr[i]); 208 209 vfree(ptr); 210 return rv; 211 } 212 213 static int full_fit_alloc_test(void) 214 { 215 void **ptr, **junk_ptr, *tmp; 216 int junk_length; 217 int rv = -1; 218 int i; 219 220 junk_length = fls(num_online_cpus()); 221 junk_length *= (32 * 1024 * 1024 / PAGE_SIZE); 222 223 ptr = vmalloc(sizeof(void *) * junk_length); 224 if (!ptr) 225 return rv; 226 227 junk_ptr = vmalloc(sizeof(void *) * junk_length); 228 if (!junk_ptr) { 229 vfree(ptr); 230 return rv; 231 } 232 233 for (i = 0; i < junk_length; i++) { 234 ptr[i] = vmalloc(1 * PAGE_SIZE); 235 junk_ptr[i] = vmalloc(1 * PAGE_SIZE); 236 } 237 238 for (i = 0; i < junk_length; i++) 239 vfree(junk_ptr[i]); 240 241 for (i = 0; i < test_loop_count; i++) { 242 tmp = vmalloc(1 * PAGE_SIZE); 243 244 if (!tmp) 245 goto error; 246 247 *((__u8 *)tmp) = 1; 248 vfree(tmp); 249 } 250 251 /* Success */ 252 rv = 0; 253 254 error: 255 for (i = 0; i < junk_length; i++) 256 vfree(ptr[i]); 257 258 vfree(ptr); 259 vfree(junk_ptr); 260 261 return rv; 262 } 263 264 static int fix_size_alloc_test(void) 265 { 266 void *ptr; 267 int i; 268 269 for (i = 0; i < test_loop_count; i++) { 270 if (use_huge) 271 ptr = vmalloc_huge((nr_pages > 0 ? nr_pages:1) * PAGE_SIZE, GFP_KERNEL); 272 else 273 ptr = vmalloc((nr_pages > 0 ? nr_pages:1) * PAGE_SIZE); 274 275 if (!ptr) 276 return -1; 277 278 *((__u8 *)ptr) = 0; 279 280 vfree(ptr); 281 } 282 283 return 0; 284 } 285 286 static int 287 pcpu_alloc_test(void) 288 { 289 int rv = 0; 290 #ifndef CONFIG_NEED_PER_CPU_KM 291 void __percpu **pcpu; 292 size_t size, align; 293 int i; 294 295 pcpu = vmalloc(sizeof(void __percpu *) * 35000); 296 if (!pcpu) 297 return -1; 298 299 for (i = 0; i < 35000; i++) { 300 size = get_random_u32_inclusive(1, PAGE_SIZE / 4); 301 302 /* 303 * Maximum PAGE_SIZE 304 */ 305 align = 1 << get_random_u32_inclusive(1, 11); 306 307 pcpu[i] = __alloc_percpu(size, align); 308 if (!pcpu[i]) 309 rv = -1; 310 } 311 312 for (i = 0; i < 35000; i++) 313 free_percpu(pcpu[i]); 314 315 vfree(pcpu); 316 #endif 317 return rv; 318 } 319 320 struct test_kvfree_rcu { 321 struct rcu_head rcu; 322 unsigned char array[20]; 323 }; 324 325 static int 326 kvfree_rcu_1_arg_vmalloc_test(void) 327 { 328 struct test_kvfree_rcu *p; 329 int i; 330 331 for (i = 0; i < test_loop_count; i++) { 332 p = vmalloc(1 * PAGE_SIZE); 333 if (!p) 334 return -1; 335 336 p->array[0] = 'a'; 337 kvfree_rcu_mightsleep(p); 338 } 339 340 return 0; 341 } 342 343 static int 344 kvfree_rcu_2_arg_vmalloc_test(void) 345 { 346 struct test_kvfree_rcu *p; 347 int i; 348 349 for (i = 0; i < test_loop_count; i++) { 350 p = vmalloc(1 * PAGE_SIZE); 351 if (!p) 352 return -1; 353 354 p->array[0] = 'a'; 355 kvfree_rcu(p, rcu); 356 } 357 358 return 0; 359 } 360 361 static int 362 vm_map_ram_test(void) 363 { 364 unsigned long nr_allocated; 365 unsigned int map_nr_pages; 366 unsigned char *v_ptr; 367 struct page **pages; 368 int i; 369 370 map_nr_pages = nr_pages > 0 ? nr_pages:1; 371 pages = kcalloc(map_nr_pages, sizeof(struct page *), GFP_KERNEL); 372 if (!pages) 373 return -1; 374 375 nr_allocated = alloc_pages_bulk(GFP_KERNEL, map_nr_pages, pages); 376 if (nr_allocated != map_nr_pages) 377 goto cleanup; 378 379 /* Run the test loop. */ 380 for (i = 0; i < test_loop_count; i++) { 381 v_ptr = vm_map_ram(pages, map_nr_pages, NUMA_NO_NODE); 382 *v_ptr = 'a'; 383 vm_unmap_ram(v_ptr, map_nr_pages); 384 } 385 386 cleanup: 387 for (i = 0; i < nr_allocated; i++) 388 __free_page(pages[i]); 389 390 kfree(pages); 391 392 /* 0 indicates success. */ 393 return nr_allocated != map_nr_pages; 394 } 395 396 struct test_case_desc { 397 const char *test_name; 398 int (*test_func)(void); 399 bool xfail; 400 }; 401 402 static struct test_case_desc test_case_array[] = { 403 { "fix_size_alloc_test", fix_size_alloc_test, }, 404 { "full_fit_alloc_test", full_fit_alloc_test, }, 405 { "long_busy_list_alloc_test", long_busy_list_alloc_test, }, 406 { "random_size_alloc_test", random_size_alloc_test, }, 407 { "fix_align_alloc_test", fix_align_alloc_test, }, 408 { "random_size_align_alloc_test", random_size_align_alloc_test, }, 409 { "align_shift_alloc_test", align_shift_alloc_test, true }, 410 { "pcpu_alloc_test", pcpu_alloc_test, }, 411 { "kvfree_rcu_1_arg_vmalloc_test", kvfree_rcu_1_arg_vmalloc_test, }, 412 { "kvfree_rcu_2_arg_vmalloc_test", kvfree_rcu_2_arg_vmalloc_test, }, 413 { "vm_map_ram_test", vm_map_ram_test, }, 414 /* Add a new test case here. */ 415 }; 416 417 struct test_case_data { 418 int test_failed; 419 int test_xfailed; 420 int test_passed; 421 u64 time; 422 }; 423 424 static struct test_driver { 425 struct task_struct *task; 426 struct test_case_data data[ARRAY_SIZE(test_case_array)]; 427 428 unsigned long start; 429 unsigned long stop; 430 } *tdriver; 431 432 static void shuffle_array(int *arr, int n) 433 { 434 int i, j; 435 436 for (i = n - 1; i > 0; i--) { 437 /* Cut the range. */ 438 j = get_random_u32_below(i); 439 440 /* Swap indexes. */ 441 swap(arr[i], arr[j]); 442 } 443 } 444 445 static int test_func(void *private) 446 { 447 struct test_driver *t = private; 448 int random_array[ARRAY_SIZE(test_case_array)]; 449 int index, i, j, ret; 450 ktime_t kt; 451 u64 delta; 452 453 for (i = 0; i < ARRAY_SIZE(test_case_array); i++) 454 random_array[i] = i; 455 456 if (!sequential_test_order) 457 shuffle_array(random_array, ARRAY_SIZE(test_case_array)); 458 459 /* 460 * Block until initialization is done. 461 */ 462 synchronize_srcu(&prepare_for_test_srcu); 463 464 t->start = get_cycles(); 465 for (i = 0; i < ARRAY_SIZE(test_case_array); i++) { 466 index = random_array[i]; 467 468 /* 469 * Skip tests if run_test_mask has been specified. 470 */ 471 if (!((run_test_mask & (1 << index)) >> index)) 472 continue; 473 kt = ktime_get(); 474 for (j = 0; j < test_repeat_count; j++) { 475 ret = test_case_array[index].test_func(); 476 477 if (!ret && !test_case_array[index].xfail) 478 t->data[index].test_passed++; 479 else if (ret && test_case_array[index].xfail) 480 t->data[index].test_xfailed++; 481 else 482 t->data[index].test_failed++; 483 } 484 485 /* 486 * Take an average time that test took. 487 */ 488 delta = (u64) ktime_us_delta(ktime_get(), kt); 489 do_div(delta, (u32) test_repeat_count); 490 491 t->data[index].time = delta; 492 } 493 t->stop = get_cycles(); 494 test_report_one_done(); 495 496 /* 497 * Wait for the kthread_stop() call. 498 */ 499 while (!kthread_should_stop()) 500 msleep(10); 501 502 return 0; 503 } 504 505 static int 506 init_test_configuration(void) 507 { 508 /* 509 * A maximum number of workers is defined as hard-coded 510 * value and set to USHRT_MAX. We add such gap just in 511 * case and for potential heavy stressing. 512 */ 513 nr_threads = clamp(nr_threads, 1, (int) USHRT_MAX); 514 515 /* Allocate the space for test instances. */ 516 tdriver = kvcalloc(nr_threads, sizeof(*tdriver), GFP_KERNEL); 517 if (tdriver == NULL) 518 return -1; 519 520 if (test_repeat_count <= 0) 521 test_repeat_count = 1; 522 523 if (test_loop_count <= 0) 524 test_loop_count = 1; 525 526 return 0; 527 } 528 529 static void do_concurrent_test(void) 530 { 531 int i, ret, idx; 532 533 /* 534 * Set some basic configurations plus sanity check. 535 */ 536 ret = init_test_configuration(); 537 if (ret < 0) 538 return; 539 540 /* 541 * Put on hold all workers. 542 */ 543 idx = srcu_read_lock(&prepare_for_test_srcu); 544 545 for (i = 0; i < nr_threads; i++) { 546 struct test_driver *t = &tdriver[i]; 547 548 t->task = kthread_run(test_func, t, "vmalloc_test/%d", i); 549 550 if (!IS_ERR(t->task)) 551 /* Success. */ 552 atomic_inc(&test_n_undone); 553 else 554 pr_err("Failed to start %d kthread\n", i); 555 } 556 557 /* 558 * Now let the workers do their job. 559 */ 560 srcu_read_unlock(&prepare_for_test_srcu, idx); 561 562 /* 563 * Sleep quiet until all workers are done with 1 second 564 * interval. Since the test can take a lot of time we 565 * can run into a stack trace of the hung task. That is 566 * why we go with completion_timeout and HZ value. 567 */ 568 do { 569 ret = wait_for_completion_timeout(&test_all_done_comp, HZ); 570 } while (!ret); 571 572 for (i = 0; i < nr_threads; i++) { 573 struct test_driver *t = &tdriver[i]; 574 int j; 575 576 if (!IS_ERR(t->task)) 577 kthread_stop(t->task); 578 579 for (j = 0; j < ARRAY_SIZE(test_case_array); j++) { 580 if (!((run_test_mask & (1 << j)) >> j)) 581 continue; 582 583 pr_info( 584 "Summary: %s passed: %d failed: %d xfailed: %d repeat: %d loops: %d avg: %llu usec\n", 585 test_case_array[j].test_name, 586 t->data[j].test_passed, 587 t->data[j].test_failed, 588 t->data[j].test_xfailed, 589 test_repeat_count, test_loop_count, 590 t->data[j].time); 591 } 592 593 pr_info("All test took worker%d=%lu cycles\n", 594 i, t->stop - t->start); 595 } 596 597 kvfree(tdriver); 598 } 599 600 static int __init vmalloc_test_init(void) 601 { 602 do_concurrent_test(); 603 /* Fail will directly unload the module */ 604 return IS_BUILTIN(CONFIG_TEST_VMALLOC) ? 0:-EAGAIN; 605 } 606 607 #ifdef MODULE 608 module_init(vmalloc_test_init) 609 #else 610 late_initcall(vmalloc_test_init); 611 #endif 612 613 MODULE_LICENSE("GPL"); 614 MODULE_AUTHOR("Uladzislau Rezki"); 615 MODULE_DESCRIPTION("vmalloc test module"); 616