1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Test module for lockless object pool 5 * 6 * Copyright: wuqiang.matt@bytedance.com 7 */ 8 9 #include <linux/errno.h> 10 #include <linux/module.h> 11 #include <linux/moduleparam.h> 12 #include <linux/completion.h> 13 #include <linux/kthread.h> 14 #include <linux/slab.h> 15 #include <linux/vmalloc.h> 16 #include <linux/delay.h> 17 #include <linux/hrtimer.h> 18 #include <linux/objpool.h> 19 20 #define OT_NR_MAX_BULK (16) 21 22 /* memory usage */ 23 struct ot_mem_stat { 24 atomic_long_t alloc; 25 atomic_long_t free; 26 }; 27 28 /* object allocation results */ 29 struct ot_obj_stat { 30 unsigned long nhits; 31 unsigned long nmiss; 32 }; 33 34 /* control & results per testcase */ 35 struct ot_data { 36 struct rw_semaphore start; 37 struct completion wait; 38 struct completion rcu; 39 atomic_t nthreads ____cacheline_aligned_in_smp; 40 atomic_t stop ____cacheline_aligned_in_smp; 41 struct ot_mem_stat kmalloc; 42 struct ot_mem_stat vmalloc; 43 struct ot_obj_stat objects; 44 u64 duration; 45 }; 46 47 /* testcase */ 48 struct ot_test { 49 int async; /* synchronous or asynchronous */ 50 int mode; /* only mode 0 supported */ 51 int objsz; /* object size */ 52 int duration; /* ms */ 53 int delay; /* ms */ 54 int bulk_normal; 55 int bulk_irq; 56 unsigned long hrtimer; /* ms */ 57 const char *name; 58 struct ot_data data; 59 }; 60 61 /* per-cpu worker */ 62 struct ot_item { 63 struct objpool_head *pool; /* pool head */ 64 struct ot_test *test; /* test parameters */ 65 66 void (*worker)(struct ot_item *item, int irq); 67 68 /* hrtimer control */ 69 ktime_t hrtcycle; 70 struct hrtimer hrtimer; 71 72 int bulk[2]; /* for thread and irq */ 73 int delay; 74 u32 niters; 75 76 /* summary per thread */ 77 struct ot_obj_stat stat[2]; /* thread and irq */ 78 u64 duration; 79 }; 80 81 /* 82 * memory leakage checking 83 */ 84 85 static void *ot_kzalloc(struct ot_test *test, long size) 86 { 87 void *ptr = kzalloc(size, GFP_KERNEL); 88 89 if (ptr) 90 atomic_long_add(size, &test->data.kmalloc.alloc); 91 return ptr; 92 } 93 94 static void ot_kfree(struct ot_test *test, void *ptr, long size) 95 { 96 if (!ptr) 97 return; 98 atomic_long_add(size, &test->data.kmalloc.free); 99 kfree(ptr); 100 } 101 102 static void ot_mem_report(struct ot_test *test) 103 { 104 long alloc, free; 105 106 pr_info("memory allocation summary for %s\n", test->name); 107 108 alloc = atomic_long_read(&test->data.kmalloc.alloc); 109 free = atomic_long_read(&test->data.kmalloc.free); 110 pr_info(" kmalloc: %lu - %lu = %lu\n", alloc, free, alloc - free); 111 112 alloc = atomic_long_read(&test->data.vmalloc.alloc); 113 free = atomic_long_read(&test->data.vmalloc.free); 114 pr_info(" vmalloc: %lu - %lu = %lu\n", alloc, free, alloc - free); 115 } 116 117 /* user object instance */ 118 struct ot_node { 119 void *owner; 120 unsigned long data; 121 unsigned long refs; 122 unsigned long payload[32]; 123 }; 124 125 /* user objpool manager */ 126 struct ot_context { 127 struct objpool_head pool; /* objpool head */ 128 struct ot_test *test; /* test parameters */ 129 void *ptr; /* user pool buffer */ 130 unsigned long size; /* buffer size */ 131 struct rcu_head rcu; 132 }; 133 134 static DEFINE_PER_CPU(struct ot_item, ot_pcup_items); 135 136 static int ot_init_data(struct ot_data *data) 137 { 138 memset(data, 0, sizeof(*data)); 139 init_rwsem(&data->start); 140 init_completion(&data->wait); 141 init_completion(&data->rcu); 142 atomic_set(&data->nthreads, 1); 143 144 return 0; 145 } 146 147 static int ot_init_node(void *nod, void *context) 148 { 149 struct ot_context *sop = context; 150 struct ot_node *on = nod; 151 152 on->owner = &sop->pool; 153 return 0; 154 } 155 156 static enum hrtimer_restart ot_hrtimer_handler(struct hrtimer *hrt) 157 { 158 struct ot_item *item = container_of(hrt, struct ot_item, hrtimer); 159 struct ot_test *test = item->test; 160 161 if (atomic_read_acquire(&test->data.stop)) 162 return HRTIMER_NORESTART; 163 164 /* do bulk-testings for objects pop/push */ 165 item->worker(item, 1); 166 167 hrtimer_forward(hrt, hrt->base->get_time(), item->hrtcycle); 168 return HRTIMER_RESTART; 169 } 170 171 static void ot_start_hrtimer(struct ot_item *item) 172 { 173 if (!item->test->hrtimer) 174 return; 175 hrtimer_start(&item->hrtimer, item->hrtcycle, HRTIMER_MODE_REL); 176 } 177 178 static void ot_stop_hrtimer(struct ot_item *item) 179 { 180 if (!item->test->hrtimer) 181 return; 182 hrtimer_cancel(&item->hrtimer); 183 } 184 185 static int ot_init_hrtimer(struct ot_item *item, unsigned long hrtimer) 186 { 187 struct hrtimer *hrt = &item->hrtimer; 188 189 if (!hrtimer) 190 return -ENOENT; 191 192 item->hrtcycle = ktime_set(0, hrtimer * 1000000UL); 193 hrtimer_setup(hrt, ot_hrtimer_handler, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 194 return 0; 195 } 196 197 static int ot_init_cpu_item(struct ot_item *item, 198 struct ot_test *test, 199 struct objpool_head *pool, 200 void (*worker)(struct ot_item *, int)) 201 { 202 memset(item, 0, sizeof(*item)); 203 item->pool = pool; 204 item->test = test; 205 item->worker = worker; 206 207 item->bulk[0] = test->bulk_normal; 208 item->bulk[1] = test->bulk_irq; 209 item->delay = test->delay; 210 211 /* initialize hrtimer */ 212 ot_init_hrtimer(item, item->test->hrtimer); 213 return 0; 214 } 215 216 static int ot_thread_worker(void *arg) 217 { 218 struct ot_item *item = arg; 219 struct ot_test *test = item->test; 220 ktime_t start; 221 222 atomic_inc(&test->data.nthreads); 223 down_read(&test->data.start); 224 up_read(&test->data.start); 225 start = ktime_get(); 226 ot_start_hrtimer(item); 227 do { 228 if (atomic_read_acquire(&test->data.stop)) 229 break; 230 /* do bulk-testings for objects pop/push */ 231 item->worker(item, 0); 232 } while (!kthread_should_stop()); 233 ot_stop_hrtimer(item); 234 item->duration = (u64) ktime_us_delta(ktime_get(), start); 235 if (atomic_dec_and_test(&test->data.nthreads)) 236 complete(&test->data.wait); 237 238 return 0; 239 } 240 241 static void ot_perf_report(struct ot_test *test, u64 duration) 242 { 243 struct ot_obj_stat total, normal = {0}, irq = {0}; 244 int cpu, nthreads = 0; 245 246 pr_info("\n"); 247 pr_info("Testing summary for %s\n", test->name); 248 249 for_each_possible_cpu(cpu) { 250 struct ot_item *item = per_cpu_ptr(&ot_pcup_items, cpu); 251 if (!item->duration) 252 continue; 253 normal.nhits += item->stat[0].nhits; 254 normal.nmiss += item->stat[0].nmiss; 255 irq.nhits += item->stat[1].nhits; 256 irq.nmiss += item->stat[1].nmiss; 257 pr_info("CPU: %d duration: %lluus\n", cpu, item->duration); 258 pr_info("\tthread:\t%16lu hits \t%16lu miss\n", 259 item->stat[0].nhits, item->stat[0].nmiss); 260 pr_info("\tirq: \t%16lu hits \t%16lu miss\n", 261 item->stat[1].nhits, item->stat[1].nmiss); 262 pr_info("\ttotal: \t%16lu hits \t%16lu miss\n", 263 item->stat[0].nhits + item->stat[1].nhits, 264 item->stat[0].nmiss + item->stat[1].nmiss); 265 nthreads++; 266 } 267 268 total.nhits = normal.nhits + irq.nhits; 269 total.nmiss = normal.nmiss + irq.nmiss; 270 271 pr_info("ALL: \tnthreads: %d duration: %lluus\n", nthreads, duration); 272 pr_info("SUM: \t%16lu hits \t%16lu miss\n", 273 total.nhits, total.nmiss); 274 275 test->data.objects = total; 276 test->data.duration = duration; 277 } 278 279 /* 280 * synchronous test cases for objpool manipulation 281 */ 282 283 /* objpool manipulation for synchronous mode (percpu objpool) */ 284 static struct ot_context *ot_init_sync_m0(struct ot_test *test) 285 { 286 struct ot_context *sop = NULL; 287 int max = num_possible_cpus() << 3; 288 gfp_t gfp = GFP_KERNEL; 289 290 sop = (struct ot_context *)ot_kzalloc(test, sizeof(*sop)); 291 if (!sop) 292 return NULL; 293 sop->test = test; 294 if (test->objsz < 512) 295 gfp = GFP_ATOMIC; 296 297 if (objpool_init(&sop->pool, max, test->objsz, 298 gfp, sop, ot_init_node, NULL)) { 299 ot_kfree(test, sop, sizeof(*sop)); 300 return NULL; 301 } 302 WARN_ON(max != sop->pool.nr_objs); 303 304 return sop; 305 } 306 307 static void ot_fini_sync(struct ot_context *sop) 308 { 309 objpool_fini(&sop->pool); 310 ot_kfree(sop->test, sop, sizeof(*sop)); 311 } 312 313 static struct { 314 struct ot_context * (*init)(struct ot_test *oc); 315 void (*fini)(struct ot_context *sop); 316 } g_ot_sync_ops[] = { 317 {.init = ot_init_sync_m0, .fini = ot_fini_sync}, 318 }; 319 320 /* 321 * synchronous test cases: performance mode 322 */ 323 324 static void ot_bulk_sync(struct ot_item *item, int irq) 325 { 326 struct ot_node *nods[OT_NR_MAX_BULK]; 327 int i; 328 329 for (i = 0; i < item->bulk[irq]; i++) 330 nods[i] = objpool_pop(item->pool); 331 332 if (!irq && (item->delay || !(++(item->niters) & 0x7FFF))) 333 msleep(item->delay); 334 335 while (i-- > 0) { 336 struct ot_node *on = nods[i]; 337 if (on) { 338 on->refs++; 339 objpool_push(on, item->pool); 340 item->stat[irq].nhits++; 341 } else { 342 item->stat[irq].nmiss++; 343 } 344 } 345 } 346 347 static int ot_start_sync(struct ot_test *test) 348 { 349 struct ot_context *sop; 350 ktime_t start; 351 u64 duration; 352 unsigned long timeout; 353 int cpu; 354 355 /* initialize objpool for syncrhonous testcase */ 356 sop = g_ot_sync_ops[test->mode].init(test); 357 if (!sop) 358 return -ENOMEM; 359 360 /* grab rwsem to block testing threads */ 361 down_write(&test->data.start); 362 363 for_each_possible_cpu(cpu) { 364 struct ot_item *item = per_cpu_ptr(&ot_pcup_items, cpu); 365 struct task_struct *work; 366 367 ot_init_cpu_item(item, test, &sop->pool, ot_bulk_sync); 368 369 /* skip offline cpus */ 370 if (!cpu_online(cpu)) 371 continue; 372 373 work = kthread_run_on_cpu(ot_thread_worker, item, 374 cpu, "ot_worker_%d"); 375 if (IS_ERR(work)) 376 pr_err("failed to create thread for cpu %d\n", cpu); 377 } 378 379 /* wait a while to make sure all threads waiting at start line */ 380 msleep(20); 381 382 /* in case no threads were created: memory insufficient ? */ 383 if (atomic_dec_and_test(&test->data.nthreads)) 384 complete(&test->data.wait); 385 386 // sched_set_fifo_low(current); 387 388 /* start objpool testing threads */ 389 start = ktime_get(); 390 up_write(&test->data.start); 391 392 /* yeild cpu to worker threads for duration ms */ 393 timeout = msecs_to_jiffies(test->duration); 394 schedule_timeout_interruptible(timeout); 395 396 /* tell workers threads to quit */ 397 atomic_set_release(&test->data.stop, 1); 398 399 /* wait all workers threads finish and quit */ 400 wait_for_completion(&test->data.wait); 401 duration = (u64) ktime_us_delta(ktime_get(), start); 402 403 /* cleanup objpool */ 404 g_ot_sync_ops[test->mode].fini(sop); 405 406 /* report testing summary and performance results */ 407 ot_perf_report(test, duration); 408 409 /* report memory allocation summary */ 410 ot_mem_report(test); 411 412 return 0; 413 } 414 415 /* 416 * asynchronous test cases: pool lifecycle controlled by refcount 417 */ 418 419 static void ot_fini_async_rcu(struct rcu_head *rcu) 420 { 421 struct ot_context *sop = container_of(rcu, struct ot_context, rcu); 422 struct ot_test *test = sop->test; 423 424 /* here all cpus are aware of the stop event: test->data.stop = 1 */ 425 WARN_ON(!atomic_read_acquire(&test->data.stop)); 426 427 objpool_fini(&sop->pool); 428 complete(&test->data.rcu); 429 } 430 431 static void ot_fini_async(struct ot_context *sop) 432 { 433 /* make sure the stop event is acknowledged by all cores */ 434 call_rcu(&sop->rcu, ot_fini_async_rcu); 435 } 436 437 static int ot_objpool_release(struct objpool_head *head, void *context) 438 { 439 struct ot_context *sop = context; 440 441 WARN_ON(!head || !sop || head != &sop->pool); 442 443 /* do context cleaning if needed */ 444 if (sop) 445 ot_kfree(sop->test, sop, sizeof(*sop)); 446 447 return 0; 448 } 449 450 static struct ot_context *ot_init_async_m0(struct ot_test *test) 451 { 452 struct ot_context *sop = NULL; 453 int max = num_possible_cpus() << 3; 454 gfp_t gfp = GFP_KERNEL; 455 456 sop = (struct ot_context *)ot_kzalloc(test, sizeof(*sop)); 457 if (!sop) 458 return NULL; 459 sop->test = test; 460 if (test->objsz < 512) 461 gfp = GFP_ATOMIC; 462 463 if (objpool_init(&sop->pool, max, test->objsz, gfp, sop, 464 ot_init_node, ot_objpool_release)) { 465 ot_kfree(test, sop, sizeof(*sop)); 466 return NULL; 467 } 468 WARN_ON(max != sop->pool.nr_objs); 469 470 return sop; 471 } 472 473 static struct { 474 struct ot_context * (*init)(struct ot_test *oc); 475 void (*fini)(struct ot_context *sop); 476 } g_ot_async_ops[] = { 477 {.init = ot_init_async_m0, .fini = ot_fini_async}, 478 }; 479 480 static void ot_nod_recycle(struct ot_node *on, struct objpool_head *pool, 481 int release) 482 { 483 struct ot_context *sop; 484 485 on->refs++; 486 487 if (!release) { 488 /* push object back to opjpool for reuse */ 489 objpool_push(on, pool); 490 return; 491 } 492 493 sop = container_of(pool, struct ot_context, pool); 494 WARN_ON(sop != pool->context); 495 496 /* unref objpool with nod removed forever */ 497 objpool_drop(on, pool); 498 } 499 500 static void ot_bulk_async(struct ot_item *item, int irq) 501 { 502 struct ot_test *test = item->test; 503 struct ot_node *nods[OT_NR_MAX_BULK]; 504 int i, stop; 505 506 for (i = 0; i < item->bulk[irq]; i++) 507 nods[i] = objpool_pop(item->pool); 508 509 if (!irq) { 510 if (item->delay || !(++(item->niters) & 0x7FFF)) 511 msleep(item->delay); 512 get_cpu(); 513 } 514 515 stop = atomic_read_acquire(&test->data.stop); 516 517 /* drop all objects and deref objpool */ 518 while (i-- > 0) { 519 struct ot_node *on = nods[i]; 520 521 if (on) { 522 on->refs++; 523 ot_nod_recycle(on, item->pool, stop); 524 item->stat[irq].nhits++; 525 } else { 526 item->stat[irq].nmiss++; 527 } 528 } 529 530 if (!irq) 531 put_cpu(); 532 } 533 534 static int ot_start_async(struct ot_test *test) 535 { 536 struct ot_context *sop; 537 ktime_t start; 538 u64 duration; 539 unsigned long timeout; 540 int cpu; 541 542 /* initialize objpool for syncrhonous testcase */ 543 sop = g_ot_async_ops[test->mode].init(test); 544 if (!sop) 545 return -ENOMEM; 546 547 /* grab rwsem to block testing threads */ 548 down_write(&test->data.start); 549 550 for_each_possible_cpu(cpu) { 551 struct ot_item *item = per_cpu_ptr(&ot_pcup_items, cpu); 552 struct task_struct *work; 553 554 ot_init_cpu_item(item, test, &sop->pool, ot_bulk_async); 555 556 /* skip offline cpus */ 557 if (!cpu_online(cpu)) 558 continue; 559 560 work = kthread_run_on_cpu(ot_thread_worker, item, cpu, "ot_worker_%d"); 561 if (IS_ERR(work)) 562 pr_err("failed to create thread for cpu %d\n", cpu); 563 } 564 565 /* wait a while to make sure all threads waiting at start line */ 566 msleep(20); 567 568 /* in case no threads were created: memory insufficient ? */ 569 if (atomic_dec_and_test(&test->data.nthreads)) 570 complete(&test->data.wait); 571 572 /* start objpool testing threads */ 573 start = ktime_get(); 574 up_write(&test->data.start); 575 576 /* yeild cpu to worker threads for duration ms */ 577 timeout = msecs_to_jiffies(test->duration); 578 schedule_timeout_interruptible(timeout); 579 580 /* tell workers threads to quit */ 581 atomic_set_release(&test->data.stop, 1); 582 583 /* do async-finalization */ 584 g_ot_async_ops[test->mode].fini(sop); 585 586 /* wait all workers threads finish and quit */ 587 wait_for_completion(&test->data.wait); 588 duration = (u64) ktime_us_delta(ktime_get(), start); 589 590 /* assure rcu callback is triggered */ 591 wait_for_completion(&test->data.rcu); 592 593 /* 594 * now we are sure that objpool is finalized either 595 * by rcu callback or by worker threads 596 */ 597 598 /* report testing summary and performance results */ 599 ot_perf_report(test, duration); 600 601 /* report memory allocation summary */ 602 ot_mem_report(test); 603 604 return 0; 605 } 606 607 /* 608 * predefined testing cases: 609 * synchronous case / overrun case / async case 610 * 611 * async: synchronous or asynchronous testing 612 * mode: only mode 0 supported 613 * objsz: object size 614 * duration: int, total test time in ms 615 * delay: int, delay (in ms) between each iteration 616 * bulk_normal: int, repeat times for thread worker 617 * bulk_irq: int, repeat times for irq consumer 618 * hrtimer: unsigned long, hrtimer intervnal in ms 619 * name: char *, tag for current test ot_item 620 */ 621 622 #define NODE_COMPACT sizeof(struct ot_node) 623 #define NODE_VMALLOC (512) 624 625 static struct ot_test g_testcases[] = { 626 627 /* sync & normal */ 628 {0, 0, NODE_COMPACT, 1000, 0, 1, 0, 0, "sync: percpu objpool"}, 629 {0, 0, NODE_VMALLOC, 1000, 0, 1, 0, 0, "sync: percpu objpool from vmalloc"}, 630 631 /* sync & hrtimer */ 632 {0, 0, NODE_COMPACT, 1000, 0, 1, 1, 4, "sync & hrtimer: percpu objpool"}, 633 {0, 0, NODE_VMALLOC, 1000, 0, 1, 1, 4, "sync & hrtimer: percpu objpool from vmalloc"}, 634 635 /* sync & overrun */ 636 {0, 0, NODE_COMPACT, 1000, 0, 16, 0, 0, "sync overrun: percpu objpool"}, 637 {0, 0, NODE_VMALLOC, 1000, 0, 16, 0, 0, "sync overrun: percpu objpool from vmalloc"}, 638 639 /* async mode */ 640 {1, 0, NODE_COMPACT, 1000, 100, 1, 0, 0, "async: percpu objpool"}, 641 {1, 0, NODE_VMALLOC, 1000, 100, 1, 0, 0, "async: percpu objpool from vmalloc"}, 642 643 /* async + hrtimer mode */ 644 {1, 0, NODE_COMPACT, 1000, 0, 4, 4, 4, "async & hrtimer: percpu objpool"}, 645 {1, 0, NODE_VMALLOC, 1000, 0, 4, 4, 4, "async & hrtimer: percpu objpool from vmalloc"}, 646 }; 647 648 static int __init ot_mod_init(void) 649 { 650 int i; 651 652 /* perform testings */ 653 for (i = 0; i < ARRAY_SIZE(g_testcases); i++) { 654 ot_init_data(&g_testcases[i].data); 655 if (g_testcases[i].async) 656 ot_start_async(&g_testcases[i]); 657 else 658 ot_start_sync(&g_testcases[i]); 659 } 660 661 /* show tests summary */ 662 pr_info("\n"); 663 pr_info("Summary of testcases:\n"); 664 for (i = 0; i < ARRAY_SIZE(g_testcases); i++) { 665 pr_info(" duration: %lluus \thits: %10lu \tmiss: %10lu \t%s\n", 666 g_testcases[i].data.duration, g_testcases[i].data.objects.nhits, 667 g_testcases[i].data.objects.nmiss, g_testcases[i].name); 668 } 669 670 return -EAGAIN; 671 } 672 673 static void __exit ot_mod_exit(void) 674 { 675 } 676 677 module_init(ot_mod_init); 678 module_exit(ot_mod_exit); 679 680 MODULE_DESCRIPTION("Test module for lockless object pool"); 681 MODULE_LICENSE("GPL"); 682