1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Test module for stress and analyze performance of vmalloc allocator. 5 * (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com> 6 */ 7 #include <linux/init.h> 8 #include <linux/kernel.h> 9 #include <linux/module.h> 10 #include <linux/vmalloc.h> 11 #include <linux/random.h> 12 #include <linux/kthread.h> 13 #include <linux/moduleparam.h> 14 #include <linux/completion.h> 15 #include <linux/delay.h> 16 #include <linux/rwsem.h> 17 #include <linux/mm.h> 18 #include <linux/rcupdate.h> 19 #include <linux/slab.h> 20 21 #define __param(type, name, init, msg) \ 22 static type name = init; \ 23 module_param(name, type, 0444); \ 24 MODULE_PARM_DESC(name, msg) \ 25 26 __param(int, nr_threads, 0, 27 "Number of workers to perform tests(min: 1 max: USHRT_MAX)"); 28 29 __param(bool, sequential_test_order, false, 30 "Use sequential stress tests order"); 31 32 __param(int, test_repeat_count, 1, 33 "Set test repeat counter"); 34 35 __param(int, test_loop_count, 1000000, 36 "Set test loop counter"); 37 38 __param(int, run_test_mask, INT_MAX, 39 "Set tests specified in the mask.\n\n" 40 "\t\tid: 1, name: fix_size_alloc_test\n" 41 "\t\tid: 2, name: full_fit_alloc_test\n" 42 "\t\tid: 4, name: long_busy_list_alloc_test\n" 43 "\t\tid: 8, name: random_size_alloc_test\n" 44 "\t\tid: 16, name: fix_align_alloc_test\n" 45 "\t\tid: 32, name: random_size_align_alloc_test\n" 46 "\t\tid: 64, name: align_shift_alloc_test\n" 47 "\t\tid: 128, name: pcpu_alloc_test\n" 48 "\t\tid: 256, name: kvfree_rcu_1_arg_vmalloc_test\n" 49 "\t\tid: 512, name: kvfree_rcu_2_arg_vmalloc_test\n" 50 /* Add a new test case description here. */ 51 ); 52 53 /* 54 * Read write semaphore for synchronization of setup 55 * phase that is done in main thread and workers. 56 */ 57 static DECLARE_RWSEM(prepare_for_test_rwsem); 58 59 /* 60 * Completion tracking for worker threads. 61 */ 62 static DECLARE_COMPLETION(test_all_done_comp); 63 static atomic_t test_n_undone = ATOMIC_INIT(0); 64 65 static inline void 66 test_report_one_done(void) 67 { 68 if (atomic_dec_and_test(&test_n_undone)) 69 complete(&test_all_done_comp); 70 } 71 72 static int random_size_align_alloc_test(void) 73 { 74 unsigned long size, align, rnd; 75 void *ptr; 76 int i; 77 78 for (i = 0; i < test_loop_count; i++) { 79 get_random_bytes(&rnd, sizeof(rnd)); 80 81 /* 82 * Maximum 1024 pages, if PAGE_SIZE is 4096. 83 */ 84 align = 1 << (rnd % 23); 85 86 /* 87 * Maximum 10 pages. 88 */ 89 size = ((rnd % 10) + 1) * PAGE_SIZE; 90 91 ptr = __vmalloc_node(size, align, GFP_KERNEL | __GFP_ZERO, 0, 92 __builtin_return_address(0)); 93 if (!ptr) 94 return -1; 95 96 vfree(ptr); 97 } 98 99 return 0; 100 } 101 102 /* 103 * This test case is supposed to be failed. 104 */ 105 static int align_shift_alloc_test(void) 106 { 107 unsigned long align; 108 void *ptr; 109 int i; 110 111 for (i = 0; i < BITS_PER_LONG; i++) { 112 align = ((unsigned long) 1) << i; 113 114 ptr = __vmalloc_node(PAGE_SIZE, align, GFP_KERNEL|__GFP_ZERO, 0, 115 __builtin_return_address(0)); 116 if (!ptr) 117 return -1; 118 119 vfree(ptr); 120 } 121 122 return 0; 123 } 124 125 static int fix_align_alloc_test(void) 126 { 127 void *ptr; 128 int i; 129 130 for (i = 0; i < test_loop_count; i++) { 131 ptr = __vmalloc_node(5 * PAGE_SIZE, THREAD_ALIGN << 1, 132 GFP_KERNEL | __GFP_ZERO, 0, 133 __builtin_return_address(0)); 134 if (!ptr) 135 return -1; 136 137 vfree(ptr); 138 } 139 140 return 0; 141 } 142 143 static int random_size_alloc_test(void) 144 { 145 unsigned int n; 146 void *p; 147 int i; 148 149 for (i = 0; i < test_loop_count; i++) { 150 get_random_bytes(&n, sizeof(i)); 151 n = (n % 100) + 1; 152 153 p = vmalloc(n * PAGE_SIZE); 154 155 if (!p) 156 return -1; 157 158 *((__u8 *)p) = 1; 159 vfree(p); 160 } 161 162 return 0; 163 } 164 165 static int long_busy_list_alloc_test(void) 166 { 167 void *ptr_1, *ptr_2; 168 void **ptr; 169 int rv = -1; 170 int i; 171 172 ptr = vmalloc(sizeof(void *) * 15000); 173 if (!ptr) 174 return rv; 175 176 for (i = 0; i < 15000; i++) 177 ptr[i] = vmalloc(1 * PAGE_SIZE); 178 179 for (i = 0; i < test_loop_count; i++) { 180 ptr_1 = vmalloc(100 * PAGE_SIZE); 181 if (!ptr_1) 182 goto leave; 183 184 ptr_2 = vmalloc(1 * PAGE_SIZE); 185 if (!ptr_2) { 186 vfree(ptr_1); 187 goto leave; 188 } 189 190 *((__u8 *)ptr_1) = 0; 191 *((__u8 *)ptr_2) = 1; 192 193 vfree(ptr_1); 194 vfree(ptr_2); 195 } 196 197 /* Success */ 198 rv = 0; 199 200 leave: 201 for (i = 0; i < 15000; i++) 202 vfree(ptr[i]); 203 204 vfree(ptr); 205 return rv; 206 } 207 208 static int full_fit_alloc_test(void) 209 { 210 void **ptr, **junk_ptr, *tmp; 211 int junk_length; 212 int rv = -1; 213 int i; 214 215 junk_length = fls(num_online_cpus()); 216 junk_length *= (32 * 1024 * 1024 / PAGE_SIZE); 217 218 ptr = vmalloc(sizeof(void *) * junk_length); 219 if (!ptr) 220 return rv; 221 222 junk_ptr = vmalloc(sizeof(void *) * junk_length); 223 if (!junk_ptr) { 224 vfree(ptr); 225 return rv; 226 } 227 228 for (i = 0; i < junk_length; i++) { 229 ptr[i] = vmalloc(1 * PAGE_SIZE); 230 junk_ptr[i] = vmalloc(1 * PAGE_SIZE); 231 } 232 233 for (i = 0; i < junk_length; i++) 234 vfree(junk_ptr[i]); 235 236 for (i = 0; i < test_loop_count; i++) { 237 tmp = vmalloc(1 * PAGE_SIZE); 238 239 if (!tmp) 240 goto error; 241 242 *((__u8 *)tmp) = 1; 243 vfree(tmp); 244 } 245 246 /* Success */ 247 rv = 0; 248 249 error: 250 for (i = 0; i < junk_length; i++) 251 vfree(ptr[i]); 252 253 vfree(ptr); 254 vfree(junk_ptr); 255 256 return rv; 257 } 258 259 static int fix_size_alloc_test(void) 260 { 261 void *ptr; 262 int i; 263 264 for (i = 0; i < test_loop_count; i++) { 265 ptr = vmalloc(3 * PAGE_SIZE); 266 267 if (!ptr) 268 return -1; 269 270 *((__u8 *)ptr) = 0; 271 272 vfree(ptr); 273 } 274 275 return 0; 276 } 277 278 static int 279 pcpu_alloc_test(void) 280 { 281 int rv = 0; 282 #ifndef CONFIG_NEED_PER_CPU_KM 283 void __percpu **pcpu; 284 size_t size, align; 285 int i; 286 287 pcpu = vmalloc(sizeof(void __percpu *) * 35000); 288 if (!pcpu) 289 return -1; 290 291 for (i = 0; i < 35000; i++) { 292 unsigned int r; 293 294 get_random_bytes(&r, sizeof(i)); 295 size = (r % (PAGE_SIZE / 4)) + 1; 296 297 /* 298 * Maximum PAGE_SIZE 299 */ 300 get_random_bytes(&r, sizeof(i)); 301 align = 1 << ((i % 11) + 1); 302 303 pcpu[i] = __alloc_percpu(size, align); 304 if (!pcpu[i]) 305 rv = -1; 306 } 307 308 for (i = 0; i < 35000; i++) 309 free_percpu(pcpu[i]); 310 311 vfree(pcpu); 312 #endif 313 return rv; 314 } 315 316 struct test_kvfree_rcu { 317 struct rcu_head rcu; 318 unsigned char array[20]; 319 }; 320 321 static int 322 kvfree_rcu_1_arg_vmalloc_test(void) 323 { 324 struct test_kvfree_rcu *p; 325 int i; 326 327 for (i = 0; i < test_loop_count; i++) { 328 p = vmalloc(1 * PAGE_SIZE); 329 if (!p) 330 return -1; 331 332 p->array[0] = 'a'; 333 kvfree_rcu(p); 334 } 335 336 return 0; 337 } 338 339 static int 340 kvfree_rcu_2_arg_vmalloc_test(void) 341 { 342 struct test_kvfree_rcu *p; 343 int i; 344 345 for (i = 0; i < test_loop_count; i++) { 346 p = vmalloc(1 * PAGE_SIZE); 347 if (!p) 348 return -1; 349 350 p->array[0] = 'a'; 351 kvfree_rcu(p, rcu); 352 } 353 354 return 0; 355 } 356 357 struct test_case_desc { 358 const char *test_name; 359 int (*test_func)(void); 360 }; 361 362 static struct test_case_desc test_case_array[] = { 363 { "fix_size_alloc_test", fix_size_alloc_test }, 364 { "full_fit_alloc_test", full_fit_alloc_test }, 365 { "long_busy_list_alloc_test", long_busy_list_alloc_test }, 366 { "random_size_alloc_test", random_size_alloc_test }, 367 { "fix_align_alloc_test", fix_align_alloc_test }, 368 { "random_size_align_alloc_test", random_size_align_alloc_test }, 369 { "align_shift_alloc_test", align_shift_alloc_test }, 370 { "pcpu_alloc_test", pcpu_alloc_test }, 371 { "kvfree_rcu_1_arg_vmalloc_test", kvfree_rcu_1_arg_vmalloc_test }, 372 { "kvfree_rcu_2_arg_vmalloc_test", kvfree_rcu_2_arg_vmalloc_test }, 373 /* Add a new test case here. */ 374 }; 375 376 struct test_case_data { 377 int test_failed; 378 int test_passed; 379 u64 time; 380 }; 381 382 static struct test_driver { 383 struct task_struct *task; 384 struct test_case_data data[ARRAY_SIZE(test_case_array)]; 385 386 unsigned long start; 387 unsigned long stop; 388 } *tdriver; 389 390 static void shuffle_array(int *arr, int n) 391 { 392 unsigned int rnd; 393 int i, j, x; 394 395 for (i = n - 1; i > 0; i--) { 396 get_random_bytes(&rnd, sizeof(rnd)); 397 398 /* Cut the range. */ 399 j = rnd % i; 400 401 /* Swap indexes. */ 402 x = arr[i]; 403 arr[i] = arr[j]; 404 arr[j] = x; 405 } 406 } 407 408 static int test_func(void *private) 409 { 410 struct test_driver *t = private; 411 int random_array[ARRAY_SIZE(test_case_array)]; 412 int index, i, j; 413 ktime_t kt; 414 u64 delta; 415 416 for (i = 0; i < ARRAY_SIZE(test_case_array); i++) 417 random_array[i] = i; 418 419 if (!sequential_test_order) 420 shuffle_array(random_array, ARRAY_SIZE(test_case_array)); 421 422 /* 423 * Block until initialization is done. 424 */ 425 down_read(&prepare_for_test_rwsem); 426 427 t->start = get_cycles(); 428 for (i = 0; i < ARRAY_SIZE(test_case_array); i++) { 429 index = random_array[i]; 430 431 /* 432 * Skip tests if run_test_mask has been specified. 433 */ 434 if (!((run_test_mask & (1 << index)) >> index)) 435 continue; 436 437 kt = ktime_get(); 438 for (j = 0; j < test_repeat_count; j++) { 439 if (!test_case_array[index].test_func()) 440 t->data[index].test_passed++; 441 else 442 t->data[index].test_failed++; 443 } 444 445 /* 446 * Take an average time that test took. 447 */ 448 delta = (u64) ktime_us_delta(ktime_get(), kt); 449 do_div(delta, (u32) test_repeat_count); 450 451 t->data[index].time = delta; 452 } 453 t->stop = get_cycles(); 454 455 up_read(&prepare_for_test_rwsem); 456 test_report_one_done(); 457 458 /* 459 * Wait for the kthread_stop() call. 460 */ 461 while (!kthread_should_stop()) 462 msleep(10); 463 464 return 0; 465 } 466 467 static int 468 init_test_configurtion(void) 469 { 470 /* 471 * A maximum number of workers is defined as hard-coded 472 * value and set to USHRT_MAX. We add such gap just in 473 * case and for potential heavy stressing. 474 */ 475 nr_threads = clamp(nr_threads, 1, (int) USHRT_MAX); 476 477 /* Allocate the space for test instances. */ 478 tdriver = kvcalloc(nr_threads, sizeof(*tdriver), GFP_KERNEL); 479 if (tdriver == NULL) 480 return -1; 481 482 if (test_repeat_count <= 0) 483 test_repeat_count = 1; 484 485 if (test_loop_count <= 0) 486 test_loop_count = 1; 487 488 return 0; 489 } 490 491 static void do_concurrent_test(void) 492 { 493 int i, ret; 494 495 /* 496 * Set some basic configurations plus sanity check. 497 */ 498 ret = init_test_configurtion(); 499 if (ret < 0) 500 return; 501 502 /* 503 * Put on hold all workers. 504 */ 505 down_write(&prepare_for_test_rwsem); 506 507 for (i = 0; i < nr_threads; i++) { 508 struct test_driver *t = &tdriver[i]; 509 510 t->task = kthread_run(test_func, t, "vmalloc_test/%d", i); 511 512 if (!IS_ERR(t->task)) 513 /* Success. */ 514 atomic_inc(&test_n_undone); 515 else 516 pr_err("Failed to start %d kthread\n", i); 517 } 518 519 /* 520 * Now let the workers do their job. 521 */ 522 up_write(&prepare_for_test_rwsem); 523 524 /* 525 * Sleep quiet until all workers are done with 1 second 526 * interval. Since the test can take a lot of time we 527 * can run into a stack trace of the hung task. That is 528 * why we go with completion_timeout and HZ value. 529 */ 530 do { 531 ret = wait_for_completion_timeout(&test_all_done_comp, HZ); 532 } while (!ret); 533 534 for (i = 0; i < nr_threads; i++) { 535 struct test_driver *t = &tdriver[i]; 536 int j; 537 538 if (!IS_ERR(t->task)) 539 kthread_stop(t->task); 540 541 for (j = 0; j < ARRAY_SIZE(test_case_array); j++) { 542 if (!((run_test_mask & (1 << j)) >> j)) 543 continue; 544 545 pr_info( 546 "Summary: %s passed: %d failed: %d repeat: %d loops: %d avg: %llu usec\n", 547 test_case_array[j].test_name, 548 t->data[j].test_passed, 549 t->data[j].test_failed, 550 test_repeat_count, test_loop_count, 551 t->data[j].time); 552 } 553 554 pr_info("All test took worker%d=%lu cycles\n", 555 i, t->stop - t->start); 556 } 557 558 kvfree(tdriver); 559 } 560 561 static int vmalloc_test_init(void) 562 { 563 do_concurrent_test(); 564 return -EAGAIN; /* Fail will directly unload the module */ 565 } 566 567 static void vmalloc_test_exit(void) 568 { 569 } 570 571 module_init(vmalloc_test_init) 572 module_exit(vmalloc_test_exit) 573 574 MODULE_LICENSE("GPL"); 575 MODULE_AUTHOR("Uladzislau Rezki"); 576 MODULE_DESCRIPTION("vmalloc test module"); 577