1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Test module for lockless object pool 5 * 6 * Copyright: wuqiang.matt@bytedance.com 7 */ 8 9 #include <linux/errno.h> 10 #include <linux/module.h> 11 #include <linux/moduleparam.h> 12 #include <linux/completion.h> 13 #include <linux/kthread.h> 14 #include <linux/slab.h> 15 #include <linux/vmalloc.h> 16 #include <linux/delay.h> 17 #include <linux/hrtimer.h> 18 #include <linux/objpool.h> 19 20 #define OT_NR_MAX_BULK (16) 21 22 /* memory usage */ 23 struct ot_mem_stat { 24 atomic_long_t alloc; 25 atomic_long_t free; 26 }; 27 28 /* object allocation results */ 29 struct ot_obj_stat { 30 unsigned long nhits; 31 unsigned long nmiss; 32 }; 33 34 /* control & results per testcase */ 35 struct ot_data { 36 struct rw_semaphore start; 37 struct completion wait; 38 struct completion rcu; 39 atomic_t nthreads ____cacheline_aligned_in_smp; 40 atomic_t stop ____cacheline_aligned_in_smp; 41 struct ot_mem_stat kmalloc; 42 struct ot_mem_stat vmalloc; 43 struct ot_obj_stat objects; 44 u64 duration; 45 }; 46 47 /* testcase */ 48 struct ot_test { 49 int async; /* synchronous or asynchronous */ 50 int mode; /* only mode 0 supported */ 51 int objsz; /* object size */ 52 int duration; /* ms */ 53 int delay; /* ms */ 54 int bulk_normal; 55 int bulk_irq; 56 unsigned long hrtimer; /* ms */ 57 const char *name; 58 struct ot_data data; 59 }; 60 61 /* per-cpu worker */ 62 struct ot_item { 63 struct objpool_head *pool; /* pool head */ 64 struct ot_test *test; /* test parameters */ 65 66 void (*worker)(struct ot_item *item, int irq); 67 68 /* hrtimer control */ 69 ktime_t hrtcycle; 70 struct hrtimer hrtimer; 71 72 int bulk[2]; /* for thread and irq */ 73 int delay; 74 u32 niters; 75 76 /* summary per thread */ 77 struct ot_obj_stat stat[2]; /* thread and irq */ 78 u64 duration; 79 }; 80 81 /* 82 * memory leakage checking 83 */ 84 85 static void *ot_kzalloc(struct ot_test *test, long size) 86 { 87 void *ptr = kzalloc(size, GFP_KERNEL); 88 89 if (ptr) 90 atomic_long_add(size, &test->data.kmalloc.alloc); 91 return ptr; 92 } 93 94 static void ot_kfree(struct ot_test *test, void *ptr, long size) 95 { 96 if (!ptr) 97 return; 98 atomic_long_add(size, &test->data.kmalloc.free); 99 kfree(ptr); 100 } 101 102 static void ot_mem_report(struct ot_test *test) 103 { 104 long alloc, free; 105 106 pr_info("memory allocation summary for %s\n", test->name); 107 108 alloc = atomic_long_read(&test->data.kmalloc.alloc); 109 free = atomic_long_read(&test->data.kmalloc.free); 110 pr_info(" kmalloc: %lu - %lu = %lu\n", alloc, free, alloc - free); 111 112 alloc = atomic_long_read(&test->data.vmalloc.alloc); 113 free = atomic_long_read(&test->data.vmalloc.free); 114 pr_info(" vmalloc: %lu - %lu = %lu\n", alloc, free, alloc - free); 115 } 116 117 /* user object instance */ 118 struct ot_node { 119 void *owner; 120 unsigned long data; 121 unsigned long refs; 122 unsigned long payload[32]; 123 }; 124 125 /* user objpool manager */ 126 struct ot_context { 127 struct objpool_head pool; /* objpool head */ 128 struct ot_test *test; /* test parameters */ 129 void *ptr; /* user pool buffer */ 130 unsigned long size; /* buffer size */ 131 struct rcu_head rcu; 132 }; 133 134 static DEFINE_PER_CPU(struct ot_item, ot_pcup_items); 135 136 static int ot_init_data(struct ot_data *data) 137 { 138 memset(data, 0, sizeof(*data)); 139 init_rwsem(&data->start); 140 init_completion(&data->wait); 141 init_completion(&data->rcu); 142 atomic_set(&data->nthreads, 1); 143 144 return 0; 145 } 146 147 static int ot_init_node(void *nod, void *context) 148 { 149 struct ot_context *sop = context; 150 struct ot_node *on = nod; 151 152 on->owner = &sop->pool; 153 return 0; 154 } 155 156 static enum hrtimer_restart ot_hrtimer_handler(struct hrtimer *hrt) 157 { 158 struct ot_item *item = container_of(hrt, struct ot_item, hrtimer); 159 struct ot_test *test = item->test; 160 161 if (atomic_read_acquire(&test->data.stop)) 162 return HRTIMER_NORESTART; 163 164 /* do bulk-testings for objects pop/push */ 165 item->worker(item, 1); 166 167 hrtimer_forward(hrt, hrt->base->get_time(), item->hrtcycle); 168 return HRTIMER_RESTART; 169 } 170 171 static void ot_start_hrtimer(struct ot_item *item) 172 { 173 if (!item->test->hrtimer) 174 return; 175 hrtimer_start(&item->hrtimer, item->hrtcycle, HRTIMER_MODE_REL); 176 } 177 178 static void ot_stop_hrtimer(struct ot_item *item) 179 { 180 if (!item->test->hrtimer) 181 return; 182 hrtimer_cancel(&item->hrtimer); 183 } 184 185 static int ot_init_hrtimer(struct ot_item *item, unsigned long hrtimer) 186 { 187 struct hrtimer *hrt = &item->hrtimer; 188 189 if (!hrtimer) 190 return -ENOENT; 191 192 item->hrtcycle = ktime_set(0, hrtimer * 1000000UL); 193 hrtimer_init(hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 194 hrt->function = ot_hrtimer_handler; 195 return 0; 196 } 197 198 static int ot_init_cpu_item(struct ot_item *item, 199 struct ot_test *test, 200 struct objpool_head *pool, 201 void (*worker)(struct ot_item *, int)) 202 { 203 memset(item, 0, sizeof(*item)); 204 item->pool = pool; 205 item->test = test; 206 item->worker = worker; 207 208 item->bulk[0] = test->bulk_normal; 209 item->bulk[1] = test->bulk_irq; 210 item->delay = test->delay; 211 212 /* initialize hrtimer */ 213 ot_init_hrtimer(item, item->test->hrtimer); 214 return 0; 215 } 216 217 static int ot_thread_worker(void *arg) 218 { 219 struct ot_item *item = arg; 220 struct ot_test *test = item->test; 221 ktime_t start; 222 223 atomic_inc(&test->data.nthreads); 224 down_read(&test->data.start); 225 up_read(&test->data.start); 226 start = ktime_get(); 227 ot_start_hrtimer(item); 228 do { 229 if (atomic_read_acquire(&test->data.stop)) 230 break; 231 /* do bulk-testings for objects pop/push */ 232 item->worker(item, 0); 233 } while (!kthread_should_stop()); 234 ot_stop_hrtimer(item); 235 item->duration = (u64) ktime_us_delta(ktime_get(), start); 236 if (atomic_dec_and_test(&test->data.nthreads)) 237 complete(&test->data.wait); 238 239 return 0; 240 } 241 242 static void ot_perf_report(struct ot_test *test, u64 duration) 243 { 244 struct ot_obj_stat total, normal = {0}, irq = {0}; 245 int cpu, nthreads = 0; 246 247 pr_info("\n"); 248 pr_info("Testing summary for %s\n", test->name); 249 250 for_each_possible_cpu(cpu) { 251 struct ot_item *item = per_cpu_ptr(&ot_pcup_items, cpu); 252 if (!item->duration) 253 continue; 254 normal.nhits += item->stat[0].nhits; 255 normal.nmiss += item->stat[0].nmiss; 256 irq.nhits += item->stat[1].nhits; 257 irq.nmiss += item->stat[1].nmiss; 258 pr_info("CPU: %d duration: %lluus\n", cpu, item->duration); 259 pr_info("\tthread:\t%16lu hits \t%16lu miss\n", 260 item->stat[0].nhits, item->stat[0].nmiss); 261 pr_info("\tirq: \t%16lu hits \t%16lu miss\n", 262 item->stat[1].nhits, item->stat[1].nmiss); 263 pr_info("\ttotal: \t%16lu hits \t%16lu miss\n", 264 item->stat[0].nhits + item->stat[1].nhits, 265 item->stat[0].nmiss + item->stat[1].nmiss); 266 nthreads++; 267 } 268 269 total.nhits = normal.nhits + irq.nhits; 270 total.nmiss = normal.nmiss + irq.nmiss; 271 272 pr_info("ALL: \tnthreads: %d duration: %lluus\n", nthreads, duration); 273 pr_info("SUM: \t%16lu hits \t%16lu miss\n", 274 total.nhits, total.nmiss); 275 276 test->data.objects = total; 277 test->data.duration = duration; 278 } 279 280 /* 281 * synchronous test cases for objpool manipulation 282 */ 283 284 /* objpool manipulation for synchronous mode (percpu objpool) */ 285 static struct ot_context *ot_init_sync_m0(struct ot_test *test) 286 { 287 struct ot_context *sop = NULL; 288 int max = num_possible_cpus() << 3; 289 gfp_t gfp = GFP_KERNEL; 290 291 sop = (struct ot_context *)ot_kzalloc(test, sizeof(*sop)); 292 if (!sop) 293 return NULL; 294 sop->test = test; 295 if (test->objsz < 512) 296 gfp = GFP_ATOMIC; 297 298 if (objpool_init(&sop->pool, max, test->objsz, 299 gfp, sop, ot_init_node, NULL)) { 300 ot_kfree(test, sop, sizeof(*sop)); 301 return NULL; 302 } 303 WARN_ON(max != sop->pool.nr_objs); 304 305 return sop; 306 } 307 308 static void ot_fini_sync(struct ot_context *sop) 309 { 310 objpool_fini(&sop->pool); 311 ot_kfree(sop->test, sop, sizeof(*sop)); 312 } 313 314 static struct { 315 struct ot_context * (*init)(struct ot_test *oc); 316 void (*fini)(struct ot_context *sop); 317 } g_ot_sync_ops[] = { 318 {.init = ot_init_sync_m0, .fini = ot_fini_sync}, 319 }; 320 321 /* 322 * synchronous test cases: performance mode 323 */ 324 325 static void ot_bulk_sync(struct ot_item *item, int irq) 326 { 327 struct ot_node *nods[OT_NR_MAX_BULK]; 328 int i; 329 330 for (i = 0; i < item->bulk[irq]; i++) 331 nods[i] = objpool_pop(item->pool); 332 333 if (!irq && (item->delay || !(++(item->niters) & 0x7FFF))) 334 msleep(item->delay); 335 336 while (i-- > 0) { 337 struct ot_node *on = nods[i]; 338 if (on) { 339 on->refs++; 340 objpool_push(on, item->pool); 341 item->stat[irq].nhits++; 342 } else { 343 item->stat[irq].nmiss++; 344 } 345 } 346 } 347 348 static int ot_start_sync(struct ot_test *test) 349 { 350 struct ot_context *sop; 351 ktime_t start; 352 u64 duration; 353 unsigned long timeout; 354 int cpu; 355 356 /* initialize objpool for syncrhonous testcase */ 357 sop = g_ot_sync_ops[test->mode].init(test); 358 if (!sop) 359 return -ENOMEM; 360 361 /* grab rwsem to block testing threads */ 362 down_write(&test->data.start); 363 364 for_each_possible_cpu(cpu) { 365 struct ot_item *item = per_cpu_ptr(&ot_pcup_items, cpu); 366 struct task_struct *work; 367 368 ot_init_cpu_item(item, test, &sop->pool, ot_bulk_sync); 369 370 /* skip offline cpus */ 371 if (!cpu_online(cpu)) 372 continue; 373 374 work = kthread_run_on_cpu(ot_thread_worker, item, 375 cpu, "ot_worker_%d"); 376 if (IS_ERR(work)) 377 pr_err("failed to create thread for cpu %d\n", cpu); 378 } 379 380 /* wait a while to make sure all threads waiting at start line */ 381 msleep(20); 382 383 /* in case no threads were created: memory insufficient ? */ 384 if (atomic_dec_and_test(&test->data.nthreads)) 385 complete(&test->data.wait); 386 387 // sched_set_fifo_low(current); 388 389 /* start objpool testing threads */ 390 start = ktime_get(); 391 up_write(&test->data.start); 392 393 /* yeild cpu to worker threads for duration ms */ 394 timeout = msecs_to_jiffies(test->duration); 395 schedule_timeout_interruptible(timeout); 396 397 /* tell workers threads to quit */ 398 atomic_set_release(&test->data.stop, 1); 399 400 /* wait all workers threads finish and quit */ 401 wait_for_completion(&test->data.wait); 402 duration = (u64) ktime_us_delta(ktime_get(), start); 403 404 /* cleanup objpool */ 405 g_ot_sync_ops[test->mode].fini(sop); 406 407 /* report testing summary and performance results */ 408 ot_perf_report(test, duration); 409 410 /* report memory allocation summary */ 411 ot_mem_report(test); 412 413 return 0; 414 } 415 416 /* 417 * asynchronous test cases: pool lifecycle controlled by refcount 418 */ 419 420 static void ot_fini_async_rcu(struct rcu_head *rcu) 421 { 422 struct ot_context *sop = container_of(rcu, struct ot_context, rcu); 423 struct ot_test *test = sop->test; 424 425 /* here all cpus are aware of the stop event: test->data.stop = 1 */ 426 WARN_ON(!atomic_read_acquire(&test->data.stop)); 427 428 objpool_fini(&sop->pool); 429 complete(&test->data.rcu); 430 } 431 432 static void ot_fini_async(struct ot_context *sop) 433 { 434 /* make sure the stop event is acknowledged by all cores */ 435 call_rcu(&sop->rcu, ot_fini_async_rcu); 436 } 437 438 static int ot_objpool_release(struct objpool_head *head, void *context) 439 { 440 struct ot_context *sop = context; 441 442 WARN_ON(!head || !sop || head != &sop->pool); 443 444 /* do context cleaning if needed */ 445 if (sop) 446 ot_kfree(sop->test, sop, sizeof(*sop)); 447 448 return 0; 449 } 450 451 static struct ot_context *ot_init_async_m0(struct ot_test *test) 452 { 453 struct ot_context *sop = NULL; 454 int max = num_possible_cpus() << 3; 455 gfp_t gfp = GFP_KERNEL; 456 457 sop = (struct ot_context *)ot_kzalloc(test, sizeof(*sop)); 458 if (!sop) 459 return NULL; 460 sop->test = test; 461 if (test->objsz < 512) 462 gfp = GFP_ATOMIC; 463 464 if (objpool_init(&sop->pool, max, test->objsz, gfp, sop, 465 ot_init_node, ot_objpool_release)) { 466 ot_kfree(test, sop, sizeof(*sop)); 467 return NULL; 468 } 469 WARN_ON(max != sop->pool.nr_objs); 470 471 return sop; 472 } 473 474 static struct { 475 struct ot_context * (*init)(struct ot_test *oc); 476 void (*fini)(struct ot_context *sop); 477 } g_ot_async_ops[] = { 478 {.init = ot_init_async_m0, .fini = ot_fini_async}, 479 }; 480 481 static void ot_nod_recycle(struct ot_node *on, struct objpool_head *pool, 482 int release) 483 { 484 struct ot_context *sop; 485 486 on->refs++; 487 488 if (!release) { 489 /* push object back to opjpool for reuse */ 490 objpool_push(on, pool); 491 return; 492 } 493 494 sop = container_of(pool, struct ot_context, pool); 495 WARN_ON(sop != pool->context); 496 497 /* unref objpool with nod removed forever */ 498 objpool_drop(on, pool); 499 } 500 501 static void ot_bulk_async(struct ot_item *item, int irq) 502 { 503 struct ot_test *test = item->test; 504 struct ot_node *nods[OT_NR_MAX_BULK]; 505 int i, stop; 506 507 for (i = 0; i < item->bulk[irq]; i++) 508 nods[i] = objpool_pop(item->pool); 509 510 if (!irq) { 511 if (item->delay || !(++(item->niters) & 0x7FFF)) 512 msleep(item->delay); 513 get_cpu(); 514 } 515 516 stop = atomic_read_acquire(&test->data.stop); 517 518 /* drop all objects and deref objpool */ 519 while (i-- > 0) { 520 struct ot_node *on = nods[i]; 521 522 if (on) { 523 on->refs++; 524 ot_nod_recycle(on, item->pool, stop); 525 item->stat[irq].nhits++; 526 } else { 527 item->stat[irq].nmiss++; 528 } 529 } 530 531 if (!irq) 532 put_cpu(); 533 } 534 535 static int ot_start_async(struct ot_test *test) 536 { 537 struct ot_context *sop; 538 ktime_t start; 539 u64 duration; 540 unsigned long timeout; 541 int cpu; 542 543 /* initialize objpool for syncrhonous testcase */ 544 sop = g_ot_async_ops[test->mode].init(test); 545 if (!sop) 546 return -ENOMEM; 547 548 /* grab rwsem to block testing threads */ 549 down_write(&test->data.start); 550 551 for_each_possible_cpu(cpu) { 552 struct ot_item *item = per_cpu_ptr(&ot_pcup_items, cpu); 553 struct task_struct *work; 554 555 ot_init_cpu_item(item, test, &sop->pool, ot_bulk_async); 556 557 /* skip offline cpus */ 558 if (!cpu_online(cpu)) 559 continue; 560 561 work = kthread_run_on_cpu(ot_thread_worker, item, cpu, "ot_worker_%d"); 562 if (IS_ERR(work)) 563 pr_err("failed to create thread for cpu %d\n", cpu); 564 } 565 566 /* wait a while to make sure all threads waiting at start line */ 567 msleep(20); 568 569 /* in case no threads were created: memory insufficient ? */ 570 if (atomic_dec_and_test(&test->data.nthreads)) 571 complete(&test->data.wait); 572 573 /* start objpool testing threads */ 574 start = ktime_get(); 575 up_write(&test->data.start); 576 577 /* yeild cpu to worker threads for duration ms */ 578 timeout = msecs_to_jiffies(test->duration); 579 schedule_timeout_interruptible(timeout); 580 581 /* tell workers threads to quit */ 582 atomic_set_release(&test->data.stop, 1); 583 584 /* do async-finalization */ 585 g_ot_async_ops[test->mode].fini(sop); 586 587 /* wait all workers threads finish and quit */ 588 wait_for_completion(&test->data.wait); 589 duration = (u64) ktime_us_delta(ktime_get(), start); 590 591 /* assure rcu callback is triggered */ 592 wait_for_completion(&test->data.rcu); 593 594 /* 595 * now we are sure that objpool is finalized either 596 * by rcu callback or by worker threads 597 */ 598 599 /* report testing summary and performance results */ 600 ot_perf_report(test, duration); 601 602 /* report memory allocation summary */ 603 ot_mem_report(test); 604 605 return 0; 606 } 607 608 /* 609 * predefined testing cases: 610 * synchronous case / overrun case / async case 611 * 612 * async: synchronous or asynchronous testing 613 * mode: only mode 0 supported 614 * objsz: object size 615 * duration: int, total test time in ms 616 * delay: int, delay (in ms) between each iteration 617 * bulk_normal: int, repeat times for thread worker 618 * bulk_irq: int, repeat times for irq consumer 619 * hrtimer: unsigned long, hrtimer intervnal in ms 620 * name: char *, tag for current test ot_item 621 */ 622 623 #define NODE_COMPACT sizeof(struct ot_node) 624 #define NODE_VMALLOC (512) 625 626 static struct ot_test g_testcases[] = { 627 628 /* sync & normal */ 629 {0, 0, NODE_COMPACT, 1000, 0, 1, 0, 0, "sync: percpu objpool"}, 630 {0, 0, NODE_VMALLOC, 1000, 0, 1, 0, 0, "sync: percpu objpool from vmalloc"}, 631 632 /* sync & hrtimer */ 633 {0, 0, NODE_COMPACT, 1000, 0, 1, 1, 4, "sync & hrtimer: percpu objpool"}, 634 {0, 0, NODE_VMALLOC, 1000, 0, 1, 1, 4, "sync & hrtimer: percpu objpool from vmalloc"}, 635 636 /* sync & overrun */ 637 {0, 0, NODE_COMPACT, 1000, 0, 16, 0, 0, "sync overrun: percpu objpool"}, 638 {0, 0, NODE_VMALLOC, 1000, 0, 16, 0, 0, "sync overrun: percpu objpool from vmalloc"}, 639 640 /* async mode */ 641 {1, 0, NODE_COMPACT, 1000, 100, 1, 0, 0, "async: percpu objpool"}, 642 {1, 0, NODE_VMALLOC, 1000, 100, 1, 0, 0, "async: percpu objpool from vmalloc"}, 643 644 /* async + hrtimer mode */ 645 {1, 0, NODE_COMPACT, 1000, 0, 4, 4, 4, "async & hrtimer: percpu objpool"}, 646 {1, 0, NODE_VMALLOC, 1000, 0, 4, 4, 4, "async & hrtimer: percpu objpool from vmalloc"}, 647 }; 648 649 static int __init ot_mod_init(void) 650 { 651 int i; 652 653 /* perform testings */ 654 for (i = 0; i < ARRAY_SIZE(g_testcases); i++) { 655 ot_init_data(&g_testcases[i].data); 656 if (g_testcases[i].async) 657 ot_start_async(&g_testcases[i]); 658 else 659 ot_start_sync(&g_testcases[i]); 660 } 661 662 /* show tests summary */ 663 pr_info("\n"); 664 pr_info("Summary of testcases:\n"); 665 for (i = 0; i < ARRAY_SIZE(g_testcases); i++) { 666 pr_info(" duration: %lluus \thits: %10lu \tmiss: %10lu \t%s\n", 667 g_testcases[i].data.duration, g_testcases[i].data.objects.nhits, 668 g_testcases[i].data.objects.nmiss, g_testcases[i].name); 669 } 670 671 return -EAGAIN; 672 } 673 674 static void __exit ot_mod_exit(void) 675 { 676 } 677 678 module_init(ot_mod_init); 679 module_exit(ot_mod_exit); 680 681 MODULE_DESCRIPTION("Test module for lockless object pool"); 682 MODULE_LICENSE("GPL"); 683