1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Test module for lockless object pool 5 * 6 * Copyright: wuqiang.matt@bytedance.com 7 */ 8 9 #include <linux/errno.h> 10 #include <linux/module.h> 11 #include <linux/moduleparam.h> 12 #include <linux/completion.h> 13 #include <linux/kthread.h> 14 #include <linux/slab.h> 15 #include <linux/vmalloc.h> 16 #include <linux/delay.h> 17 #include <linux/hrtimer.h> 18 #include <linux/objpool.h> 19 20 #define OT_NR_MAX_BULK (16) 21 22 /* memory usage */ 23 struct ot_mem_stat { 24 atomic_long_t alloc; 25 atomic_long_t free; 26 }; 27 28 /* object allocation results */ 29 struct ot_obj_stat { 30 unsigned long nhits; 31 unsigned long nmiss; 32 }; 33 34 /* control & results per testcase */ 35 struct ot_data { 36 struct rw_semaphore start; 37 struct completion wait; 38 struct completion rcu; 39 atomic_t nthreads ____cacheline_aligned_in_smp; 40 atomic_t stop ____cacheline_aligned_in_smp; 41 struct ot_mem_stat kmalloc; 42 struct ot_mem_stat vmalloc; 43 struct ot_obj_stat objects; 44 u64 duration; 45 }; 46 47 /* testcase */ 48 struct ot_test { 49 int async; /* synchronous or asynchronous */ 50 int mode; /* only mode 0 supported */ 51 int objsz; /* object size */ 52 int duration; /* ms */ 53 int delay; /* ms */ 54 int bulk_normal; 55 int bulk_irq; 56 unsigned long hrtimer; /* ms */ 57 const char *name; 58 struct ot_data data; 59 }; 60 61 /* per-cpu worker */ 62 struct ot_item { 63 struct objpool_head *pool; /* pool head */ 64 struct ot_test *test; /* test parameters */ 65 66 void (*worker)(struct ot_item *item, int irq); 67 68 /* hrtimer control */ 69 ktime_t hrtcycle; 70 struct hrtimer hrtimer; 71 72 int bulk[2]; /* for thread and irq */ 73 int delay; 74 u32 niters; 75 76 /* summary per thread */ 77 struct ot_obj_stat stat[2]; /* thread and irq */ 78 u64 duration; 79 }; 80 81 /* 82 * memory leakage checking 83 */ 84 85 static void *ot_kzalloc(struct ot_test *test, long size) 86 { 87 void *ptr = kzalloc(size, GFP_KERNEL); 88 89 if (ptr) 90 atomic_long_add(size, &test->data.kmalloc.alloc); 91 return ptr; 92 } 93 94 static void ot_kfree(struct ot_test *test, void *ptr, long size) 95 { 96 if (!ptr) 97 return; 98 atomic_long_add(size, &test->data.kmalloc.free); 99 kfree(ptr); 100 } 101 102 static void ot_mem_report(struct ot_test *test) 103 { 104 long alloc, free; 105 106 pr_info("memory allocation summary for %s\n", test->name); 107 108 alloc = atomic_long_read(&test->data.kmalloc.alloc); 109 free = atomic_long_read(&test->data.kmalloc.free); 110 pr_info(" kmalloc: %lu - %lu = %lu\n", alloc, free, alloc - free); 111 112 alloc = atomic_long_read(&test->data.vmalloc.alloc); 113 free = atomic_long_read(&test->data.vmalloc.free); 114 pr_info(" vmalloc: %lu - %lu = %lu\n", alloc, free, alloc - free); 115 } 116 117 /* user object instance */ 118 struct ot_node { 119 void *owner; 120 unsigned long data; 121 unsigned long refs; 122 unsigned long payload[32]; 123 }; 124 125 /* user objpool manager */ 126 struct ot_context { 127 struct objpool_head pool; /* objpool head */ 128 struct ot_test *test; /* test parameters */ 129 void *ptr; /* user pool buffer */ 130 unsigned long size; /* buffer size */ 131 struct rcu_head rcu; 132 }; 133 134 static DEFINE_PER_CPU(struct ot_item, ot_pcup_items); 135 136 static int ot_init_data(struct ot_data *data) 137 { 138 memset(data, 0, sizeof(*data)); 139 init_rwsem(&data->start); 140 init_completion(&data->wait); 141 init_completion(&data->rcu); 142 atomic_set(&data->nthreads, 1); 143 144 return 0; 145 } 146 147 static int ot_init_node(void *nod, void *context) 148 { 149 struct ot_context *sop = context; 150 struct ot_node *on = nod; 151 152 on->owner = &sop->pool; 153 return 0; 154 } 155 156 static enum hrtimer_restart ot_hrtimer_handler(struct hrtimer *hrt) 157 { 158 struct ot_item *item = container_of(hrt, struct ot_item, hrtimer); 159 struct ot_test *test = item->test; 160 161 if (atomic_read_acquire(&test->data.stop)) 162 return HRTIMER_NORESTART; 163 164 /* do bulk-testings for objects pop/push */ 165 item->worker(item, 1); 166 167 hrtimer_forward(hrt, hrt->base->get_time(), item->hrtcycle); 168 return HRTIMER_RESTART; 169 } 170 171 static void ot_start_hrtimer(struct ot_item *item) 172 { 173 if (!item->test->hrtimer) 174 return; 175 hrtimer_start(&item->hrtimer, item->hrtcycle, HRTIMER_MODE_REL); 176 } 177 178 static void ot_stop_hrtimer(struct ot_item *item) 179 { 180 if (!item->test->hrtimer) 181 return; 182 hrtimer_cancel(&item->hrtimer); 183 } 184 185 static int ot_init_hrtimer(struct ot_item *item, unsigned long hrtimer) 186 { 187 struct hrtimer *hrt = &item->hrtimer; 188 189 if (!hrtimer) 190 return -ENOENT; 191 192 item->hrtcycle = ktime_set(0, hrtimer * 1000000UL); 193 hrtimer_init(hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 194 hrt->function = ot_hrtimer_handler; 195 return 0; 196 } 197 198 static int ot_init_cpu_item(struct ot_item *item, 199 struct ot_test *test, 200 struct objpool_head *pool, 201 void (*worker)(struct ot_item *, int)) 202 { 203 memset(item, 0, sizeof(*item)); 204 item->pool = pool; 205 item->test = test; 206 item->worker = worker; 207 208 item->bulk[0] = test->bulk_normal; 209 item->bulk[1] = test->bulk_irq; 210 item->delay = test->delay; 211 212 /* initialize hrtimer */ 213 ot_init_hrtimer(item, item->test->hrtimer); 214 return 0; 215 } 216 217 static int ot_thread_worker(void *arg) 218 { 219 struct ot_item *item = arg; 220 struct ot_test *test = item->test; 221 ktime_t start; 222 223 atomic_inc(&test->data.nthreads); 224 down_read(&test->data.start); 225 up_read(&test->data.start); 226 start = ktime_get(); 227 ot_start_hrtimer(item); 228 do { 229 if (atomic_read_acquire(&test->data.stop)) 230 break; 231 /* do bulk-testings for objects pop/push */ 232 item->worker(item, 0); 233 } while (!kthread_should_stop()); 234 ot_stop_hrtimer(item); 235 item->duration = (u64) ktime_us_delta(ktime_get(), start); 236 if (atomic_dec_and_test(&test->data.nthreads)) 237 complete(&test->data.wait); 238 239 return 0; 240 } 241 242 static void ot_perf_report(struct ot_test *test, u64 duration) 243 { 244 struct ot_obj_stat total, normal = {0}, irq = {0}; 245 int cpu, nthreads = 0; 246 247 pr_info("\n"); 248 pr_info("Testing summary for %s\n", test->name); 249 250 for_each_possible_cpu(cpu) { 251 struct ot_item *item = per_cpu_ptr(&ot_pcup_items, cpu); 252 if (!item->duration) 253 continue; 254 normal.nhits += item->stat[0].nhits; 255 normal.nmiss += item->stat[0].nmiss; 256 irq.nhits += item->stat[1].nhits; 257 irq.nmiss += item->stat[1].nmiss; 258 pr_info("CPU: %d duration: %lluus\n", cpu, item->duration); 259 pr_info("\tthread:\t%16lu hits \t%16lu miss\n", 260 item->stat[0].nhits, item->stat[0].nmiss); 261 pr_info("\tirq: \t%16lu hits \t%16lu miss\n", 262 item->stat[1].nhits, item->stat[1].nmiss); 263 pr_info("\ttotal: \t%16lu hits \t%16lu miss\n", 264 item->stat[0].nhits + item->stat[1].nhits, 265 item->stat[0].nmiss + item->stat[1].nmiss); 266 nthreads++; 267 } 268 269 total.nhits = normal.nhits + irq.nhits; 270 total.nmiss = normal.nmiss + irq.nmiss; 271 272 pr_info("ALL: \tnthreads: %d duration: %lluus\n", nthreads, duration); 273 pr_info("SUM: \t%16lu hits \t%16lu miss\n", 274 total.nhits, total.nmiss); 275 276 test->data.objects = total; 277 test->data.duration = duration; 278 } 279 280 /* 281 * synchronous test cases for objpool manipulation 282 */ 283 284 /* objpool manipulation for synchronous mode (percpu objpool) */ 285 static struct ot_context *ot_init_sync_m0(struct ot_test *test) 286 { 287 struct ot_context *sop = NULL; 288 int max = num_possible_cpus() << 3; 289 gfp_t gfp = GFP_KERNEL; 290 291 sop = (struct ot_context *)ot_kzalloc(test, sizeof(*sop)); 292 if (!sop) 293 return NULL; 294 sop->test = test; 295 if (test->objsz < 512) 296 gfp = GFP_ATOMIC; 297 298 if (objpool_init(&sop->pool, max, test->objsz, 299 gfp, sop, ot_init_node, NULL)) { 300 ot_kfree(test, sop, sizeof(*sop)); 301 return NULL; 302 } 303 WARN_ON(max != sop->pool.nr_objs); 304 305 return sop; 306 } 307 308 static void ot_fini_sync(struct ot_context *sop) 309 { 310 objpool_fini(&sop->pool); 311 ot_kfree(sop->test, sop, sizeof(*sop)); 312 } 313 314 static struct { 315 struct ot_context * (*init)(struct ot_test *oc); 316 void (*fini)(struct ot_context *sop); 317 } g_ot_sync_ops[] = { 318 {.init = ot_init_sync_m0, .fini = ot_fini_sync}, 319 }; 320 321 /* 322 * synchronous test cases: performance mode 323 */ 324 325 static void ot_bulk_sync(struct ot_item *item, int irq) 326 { 327 struct ot_node *nods[OT_NR_MAX_BULK]; 328 int i; 329 330 for (i = 0; i < item->bulk[irq]; i++) 331 nods[i] = objpool_pop(item->pool); 332 333 if (!irq && (item->delay || !(++(item->niters) & 0x7FFF))) 334 msleep(item->delay); 335 336 while (i-- > 0) { 337 struct ot_node *on = nods[i]; 338 if (on) { 339 on->refs++; 340 objpool_push(on, item->pool); 341 item->stat[irq].nhits++; 342 } else { 343 item->stat[irq].nmiss++; 344 } 345 } 346 } 347 348 static int ot_start_sync(struct ot_test *test) 349 { 350 struct ot_context *sop; 351 ktime_t start; 352 u64 duration; 353 unsigned long timeout; 354 int cpu; 355 356 /* initialize objpool for syncrhonous testcase */ 357 sop = g_ot_sync_ops[test->mode].init(test); 358 if (!sop) 359 return -ENOMEM; 360 361 /* grab rwsem to block testing threads */ 362 down_write(&test->data.start); 363 364 for_each_possible_cpu(cpu) { 365 struct ot_item *item = per_cpu_ptr(&ot_pcup_items, cpu); 366 struct task_struct *work; 367 368 ot_init_cpu_item(item, test, &sop->pool, ot_bulk_sync); 369 370 /* skip offline cpus */ 371 if (!cpu_online(cpu)) 372 continue; 373 374 work = kthread_create_on_node(ot_thread_worker, item, 375 cpu_to_node(cpu), "ot_worker_%d", cpu); 376 if (IS_ERR(work)) { 377 pr_err("failed to create thread for cpu %d\n", cpu); 378 } else { 379 kthread_bind(work, cpu); 380 wake_up_process(work); 381 } 382 } 383 384 /* wait a while to make sure all threads waiting at start line */ 385 msleep(20); 386 387 /* in case no threads were created: memory insufficient ? */ 388 if (atomic_dec_and_test(&test->data.nthreads)) 389 complete(&test->data.wait); 390 391 // sched_set_fifo_low(current); 392 393 /* start objpool testing threads */ 394 start = ktime_get(); 395 up_write(&test->data.start); 396 397 /* yeild cpu to worker threads for duration ms */ 398 timeout = msecs_to_jiffies(test->duration); 399 schedule_timeout_interruptible(timeout); 400 401 /* tell workers threads to quit */ 402 atomic_set_release(&test->data.stop, 1); 403 404 /* wait all workers threads finish and quit */ 405 wait_for_completion(&test->data.wait); 406 duration = (u64) ktime_us_delta(ktime_get(), start); 407 408 /* cleanup objpool */ 409 g_ot_sync_ops[test->mode].fini(sop); 410 411 /* report testing summary and performance results */ 412 ot_perf_report(test, duration); 413 414 /* report memory allocation summary */ 415 ot_mem_report(test); 416 417 return 0; 418 } 419 420 /* 421 * asynchronous test cases: pool lifecycle controlled by refcount 422 */ 423 424 static void ot_fini_async_rcu(struct rcu_head *rcu) 425 { 426 struct ot_context *sop = container_of(rcu, struct ot_context, rcu); 427 struct ot_test *test = sop->test; 428 429 /* here all cpus are aware of the stop event: test->data.stop = 1 */ 430 WARN_ON(!atomic_read_acquire(&test->data.stop)); 431 432 objpool_fini(&sop->pool); 433 complete(&test->data.rcu); 434 } 435 436 static void ot_fini_async(struct ot_context *sop) 437 { 438 /* make sure the stop event is acknowledged by all cores */ 439 call_rcu(&sop->rcu, ot_fini_async_rcu); 440 } 441 442 static int ot_objpool_release(struct objpool_head *head, void *context) 443 { 444 struct ot_context *sop = context; 445 446 WARN_ON(!head || !sop || head != &sop->pool); 447 448 /* do context cleaning if needed */ 449 if (sop) 450 ot_kfree(sop->test, sop, sizeof(*sop)); 451 452 return 0; 453 } 454 455 static struct ot_context *ot_init_async_m0(struct ot_test *test) 456 { 457 struct ot_context *sop = NULL; 458 int max = num_possible_cpus() << 3; 459 gfp_t gfp = GFP_KERNEL; 460 461 sop = (struct ot_context *)ot_kzalloc(test, sizeof(*sop)); 462 if (!sop) 463 return NULL; 464 sop->test = test; 465 if (test->objsz < 512) 466 gfp = GFP_ATOMIC; 467 468 if (objpool_init(&sop->pool, max, test->objsz, gfp, sop, 469 ot_init_node, ot_objpool_release)) { 470 ot_kfree(test, sop, sizeof(*sop)); 471 return NULL; 472 } 473 WARN_ON(max != sop->pool.nr_objs); 474 475 return sop; 476 } 477 478 static struct { 479 struct ot_context * (*init)(struct ot_test *oc); 480 void (*fini)(struct ot_context *sop); 481 } g_ot_async_ops[] = { 482 {.init = ot_init_async_m0, .fini = ot_fini_async}, 483 }; 484 485 static void ot_nod_recycle(struct ot_node *on, struct objpool_head *pool, 486 int release) 487 { 488 struct ot_context *sop; 489 490 on->refs++; 491 492 if (!release) { 493 /* push object back to opjpool for reuse */ 494 objpool_push(on, pool); 495 return; 496 } 497 498 sop = container_of(pool, struct ot_context, pool); 499 WARN_ON(sop != pool->context); 500 501 /* unref objpool with nod removed forever */ 502 objpool_drop(on, pool); 503 } 504 505 static void ot_bulk_async(struct ot_item *item, int irq) 506 { 507 struct ot_test *test = item->test; 508 struct ot_node *nods[OT_NR_MAX_BULK]; 509 int i, stop; 510 511 for (i = 0; i < item->bulk[irq]; i++) 512 nods[i] = objpool_pop(item->pool); 513 514 if (!irq) { 515 if (item->delay || !(++(item->niters) & 0x7FFF)) 516 msleep(item->delay); 517 get_cpu(); 518 } 519 520 stop = atomic_read_acquire(&test->data.stop); 521 522 /* drop all objects and deref objpool */ 523 while (i-- > 0) { 524 struct ot_node *on = nods[i]; 525 526 if (on) { 527 on->refs++; 528 ot_nod_recycle(on, item->pool, stop); 529 item->stat[irq].nhits++; 530 } else { 531 item->stat[irq].nmiss++; 532 } 533 } 534 535 if (!irq) 536 put_cpu(); 537 } 538 539 static int ot_start_async(struct ot_test *test) 540 { 541 struct ot_context *sop; 542 ktime_t start; 543 u64 duration; 544 unsigned long timeout; 545 int cpu; 546 547 /* initialize objpool for syncrhonous testcase */ 548 sop = g_ot_async_ops[test->mode].init(test); 549 if (!sop) 550 return -ENOMEM; 551 552 /* grab rwsem to block testing threads */ 553 down_write(&test->data.start); 554 555 for_each_possible_cpu(cpu) { 556 struct ot_item *item = per_cpu_ptr(&ot_pcup_items, cpu); 557 struct task_struct *work; 558 559 ot_init_cpu_item(item, test, &sop->pool, ot_bulk_async); 560 561 /* skip offline cpus */ 562 if (!cpu_online(cpu)) 563 continue; 564 565 work = kthread_create_on_node(ot_thread_worker, item, 566 cpu_to_node(cpu), "ot_worker_%d", cpu); 567 if (IS_ERR(work)) { 568 pr_err("failed to create thread for cpu %d\n", cpu); 569 } else { 570 kthread_bind(work, cpu); 571 wake_up_process(work); 572 } 573 } 574 575 /* wait a while to make sure all threads waiting at start line */ 576 msleep(20); 577 578 /* in case no threads were created: memory insufficient ? */ 579 if (atomic_dec_and_test(&test->data.nthreads)) 580 complete(&test->data.wait); 581 582 /* start objpool testing threads */ 583 start = ktime_get(); 584 up_write(&test->data.start); 585 586 /* yeild cpu to worker threads for duration ms */ 587 timeout = msecs_to_jiffies(test->duration); 588 schedule_timeout_interruptible(timeout); 589 590 /* tell workers threads to quit */ 591 atomic_set_release(&test->data.stop, 1); 592 593 /* do async-finalization */ 594 g_ot_async_ops[test->mode].fini(sop); 595 596 /* wait all workers threads finish and quit */ 597 wait_for_completion(&test->data.wait); 598 duration = (u64) ktime_us_delta(ktime_get(), start); 599 600 /* assure rcu callback is triggered */ 601 wait_for_completion(&test->data.rcu); 602 603 /* 604 * now we are sure that objpool is finalized either 605 * by rcu callback or by worker threads 606 */ 607 608 /* report testing summary and performance results */ 609 ot_perf_report(test, duration); 610 611 /* report memory allocation summary */ 612 ot_mem_report(test); 613 614 return 0; 615 } 616 617 /* 618 * predefined testing cases: 619 * synchronous case / overrun case / async case 620 * 621 * async: synchronous or asynchronous testing 622 * mode: only mode 0 supported 623 * objsz: object size 624 * duration: int, total test time in ms 625 * delay: int, delay (in ms) between each iteration 626 * bulk_normal: int, repeat times for thread worker 627 * bulk_irq: int, repeat times for irq consumer 628 * hrtimer: unsigned long, hrtimer intervnal in ms 629 * name: char *, tag for current test ot_item 630 */ 631 632 #define NODE_COMPACT sizeof(struct ot_node) 633 #define NODE_VMALLOC (512) 634 635 static struct ot_test g_testcases[] = { 636 637 /* sync & normal */ 638 {0, 0, NODE_COMPACT, 1000, 0, 1, 0, 0, "sync: percpu objpool"}, 639 {0, 0, NODE_VMALLOC, 1000, 0, 1, 0, 0, "sync: percpu objpool from vmalloc"}, 640 641 /* sync & hrtimer */ 642 {0, 0, NODE_COMPACT, 1000, 0, 1, 1, 4, "sync & hrtimer: percpu objpool"}, 643 {0, 0, NODE_VMALLOC, 1000, 0, 1, 1, 4, "sync & hrtimer: percpu objpool from vmalloc"}, 644 645 /* sync & overrun */ 646 {0, 0, NODE_COMPACT, 1000, 0, 16, 0, 0, "sync overrun: percpu objpool"}, 647 {0, 0, NODE_VMALLOC, 1000, 0, 16, 0, 0, "sync overrun: percpu objpool from vmalloc"}, 648 649 /* async mode */ 650 {1, 0, NODE_COMPACT, 1000, 100, 1, 0, 0, "async: percpu objpool"}, 651 {1, 0, NODE_VMALLOC, 1000, 100, 1, 0, 0, "async: percpu objpool from vmalloc"}, 652 653 /* async + hrtimer mode */ 654 {1, 0, NODE_COMPACT, 1000, 0, 4, 4, 4, "async & hrtimer: percpu objpool"}, 655 {1, 0, NODE_VMALLOC, 1000, 0, 4, 4, 4, "async & hrtimer: percpu objpool from vmalloc"}, 656 }; 657 658 static int __init ot_mod_init(void) 659 { 660 int i; 661 662 /* perform testings */ 663 for (i = 0; i < ARRAY_SIZE(g_testcases); i++) { 664 ot_init_data(&g_testcases[i].data); 665 if (g_testcases[i].async) 666 ot_start_async(&g_testcases[i]); 667 else 668 ot_start_sync(&g_testcases[i]); 669 } 670 671 /* show tests summary */ 672 pr_info("\n"); 673 pr_info("Summary of testcases:\n"); 674 for (i = 0; i < ARRAY_SIZE(g_testcases); i++) { 675 pr_info(" duration: %lluus \thits: %10lu \tmiss: %10lu \t%s\n", 676 g_testcases[i].data.duration, g_testcases[i].data.objects.nhits, 677 g_testcases[i].data.objects.nmiss, g_testcases[i].name); 678 } 679 680 return -EAGAIN; 681 } 682 683 static void __exit ot_mod_exit(void) 684 { 685 } 686 687 module_init(ot_mod_init); 688 module_exit(ot_mod_exit); 689 690 MODULE_LICENSE("GPL");