xref: /linux/lib/test_objpool.c (revision 7f81907b7e3f93dfed2e903af52659baa4944341)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Test module for lockless object pool
5  *
6  * Copyright: wuqiang.matt@bytedance.com
7  */
8 
9 #include <linux/errno.h>
10 #include <linux/module.h>
11 #include <linux/moduleparam.h>
12 #include <linux/completion.h>
13 #include <linux/kthread.h>
14 #include <linux/slab.h>
15 #include <linux/vmalloc.h>
16 #include <linux/delay.h>
17 #include <linux/hrtimer.h>
18 #include <linux/objpool.h>
19 
20 #define OT_NR_MAX_BULK (16)
21 
22 /* memory usage */
23 struct ot_mem_stat {
24 	atomic_long_t alloc;
25 	atomic_long_t free;
26 };
27 
28 /* object allocation results */
29 struct ot_obj_stat {
30 	unsigned long nhits;
31 	unsigned long nmiss;
32 };
33 
34 /* control & results per testcase */
35 struct ot_data {
36 	struct rw_semaphore start;
37 	struct completion wait;
38 	struct completion rcu;
39 	atomic_t nthreads ____cacheline_aligned_in_smp;
40 	atomic_t stop ____cacheline_aligned_in_smp;
41 	struct ot_mem_stat kmalloc;
42 	struct ot_mem_stat vmalloc;
43 	struct ot_obj_stat objects;
44 	u64    duration;
45 };
46 
47 /* testcase */
48 struct ot_test {
49 	int async; /* synchronous or asynchronous */
50 	int mode; /* only mode 0 supported */
51 	int objsz; /* object size */
52 	int duration; /* ms */
53 	int delay; /* ms */
54 	int bulk_normal;
55 	int bulk_irq;
56 	unsigned long hrtimer; /* ms */
57 	const char *name;
58 	struct ot_data data;
59 };
60 
61 /* per-cpu worker */
62 struct ot_item {
63 	struct objpool_head *pool; /* pool head */
64 	struct ot_test *test; /* test parameters */
65 
66 	void (*worker)(struct ot_item *item, int irq);
67 
68 	/* hrtimer control */
69 	ktime_t hrtcycle;
70 	struct hrtimer hrtimer;
71 
72 	int bulk[2]; /* for thread and irq */
73 	int delay;
74 	u32 niters;
75 
76 	/* summary per thread */
77 	struct ot_obj_stat stat[2]; /* thread and irq */
78 	u64 duration;
79 };
80 
81 /*
82  * memory leakage checking
83  */
84 
85 static void *ot_kzalloc(struct ot_test *test, long size)
86 {
87 	void *ptr = kzalloc(size, GFP_KERNEL);
88 
89 	if (ptr)
90 		atomic_long_add(size, &test->data.kmalloc.alloc);
91 	return ptr;
92 }
93 
94 static void ot_kfree(struct ot_test *test, void *ptr, long size)
95 {
96 	if (!ptr)
97 		return;
98 	atomic_long_add(size, &test->data.kmalloc.free);
99 	kfree(ptr);
100 }
101 
102 static void ot_mem_report(struct ot_test *test)
103 {
104 	long alloc, free;
105 
106 	pr_info("memory allocation summary for %s\n", test->name);
107 
108 	alloc = atomic_long_read(&test->data.kmalloc.alloc);
109 	free = atomic_long_read(&test->data.kmalloc.free);
110 	pr_info("  kmalloc: %lu - %lu = %lu\n", alloc, free, alloc - free);
111 
112 	alloc = atomic_long_read(&test->data.vmalloc.alloc);
113 	free = atomic_long_read(&test->data.vmalloc.free);
114 	pr_info("  vmalloc: %lu - %lu = %lu\n", alloc, free, alloc - free);
115 }
116 
117 /* user object instance */
118 struct ot_node {
119 	void *owner;
120 	unsigned long data;
121 	unsigned long refs;
122 	unsigned long payload[32];
123 };
124 
125 /* user objpool manager */
126 struct ot_context {
127 	struct objpool_head pool; /* objpool head */
128 	struct ot_test *test; /* test parameters */
129 	void *ptr; /* user pool buffer */
130 	unsigned long size; /* buffer size */
131 	struct rcu_head rcu;
132 };
133 
134 static DEFINE_PER_CPU(struct ot_item, ot_pcup_items);
135 
136 static int ot_init_data(struct ot_data *data)
137 {
138 	memset(data, 0, sizeof(*data));
139 	init_rwsem(&data->start);
140 	init_completion(&data->wait);
141 	init_completion(&data->rcu);
142 	atomic_set(&data->nthreads, 1);
143 
144 	return 0;
145 }
146 
147 static int ot_init_node(void *nod, void *context)
148 {
149 	struct ot_context *sop = context;
150 	struct ot_node *on = nod;
151 
152 	on->owner = &sop->pool;
153 	return 0;
154 }
155 
156 static enum hrtimer_restart ot_hrtimer_handler(struct hrtimer *hrt)
157 {
158 	struct ot_item *item = container_of(hrt, struct ot_item, hrtimer);
159 	struct ot_test *test = item->test;
160 
161 	if (atomic_read_acquire(&test->data.stop))
162 		return HRTIMER_NORESTART;
163 
164 	/* do bulk-testings for objects pop/push */
165 	item->worker(item, 1);
166 
167 	hrtimer_forward(hrt, hrt->base->get_time(), item->hrtcycle);
168 	return HRTIMER_RESTART;
169 }
170 
171 static void ot_start_hrtimer(struct ot_item *item)
172 {
173 	if (!item->test->hrtimer)
174 		return;
175 	hrtimer_start(&item->hrtimer, item->hrtcycle, HRTIMER_MODE_REL);
176 }
177 
178 static void ot_stop_hrtimer(struct ot_item *item)
179 {
180 	if (!item->test->hrtimer)
181 		return;
182 	hrtimer_cancel(&item->hrtimer);
183 }
184 
185 static int ot_init_hrtimer(struct ot_item *item, unsigned long hrtimer)
186 {
187 	struct hrtimer *hrt = &item->hrtimer;
188 
189 	if (!hrtimer)
190 		return -ENOENT;
191 
192 	item->hrtcycle = ktime_set(0, hrtimer * 1000000UL);
193 	hrtimer_setup(hrt, ot_hrtimer_handler, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
194 	return 0;
195 }
196 
197 static int ot_init_cpu_item(struct ot_item *item,
198 			struct ot_test *test,
199 			struct objpool_head *pool,
200 			void (*worker)(struct ot_item *, int))
201 {
202 	memset(item, 0, sizeof(*item));
203 	item->pool = pool;
204 	item->test = test;
205 	item->worker = worker;
206 
207 	item->bulk[0] = test->bulk_normal;
208 	item->bulk[1] = test->bulk_irq;
209 	item->delay = test->delay;
210 
211 	/* initialize hrtimer */
212 	ot_init_hrtimer(item, item->test->hrtimer);
213 	return 0;
214 }
215 
216 static int ot_thread_worker(void *arg)
217 {
218 	struct ot_item *item = arg;
219 	struct ot_test *test = item->test;
220 	ktime_t start;
221 
222 	atomic_inc(&test->data.nthreads);
223 	down_read(&test->data.start);
224 	up_read(&test->data.start);
225 	start = ktime_get();
226 	ot_start_hrtimer(item);
227 	do {
228 		if (atomic_read_acquire(&test->data.stop))
229 			break;
230 		/* do bulk-testings for objects pop/push */
231 		item->worker(item, 0);
232 	} while (!kthread_should_stop());
233 	ot_stop_hrtimer(item);
234 	item->duration = (u64) ktime_us_delta(ktime_get(), start);
235 	if (atomic_dec_and_test(&test->data.nthreads))
236 		complete(&test->data.wait);
237 
238 	return 0;
239 }
240 
241 static void ot_perf_report(struct ot_test *test, u64 duration)
242 {
243 	struct ot_obj_stat total, normal = {0}, irq = {0};
244 	int cpu, nthreads = 0;
245 
246 	pr_info("\n");
247 	pr_info("Testing summary for %s\n", test->name);
248 
249 	for_each_possible_cpu(cpu) {
250 		struct ot_item *item = per_cpu_ptr(&ot_pcup_items, cpu);
251 		if (!item->duration)
252 			continue;
253 		normal.nhits += item->stat[0].nhits;
254 		normal.nmiss += item->stat[0].nmiss;
255 		irq.nhits += item->stat[1].nhits;
256 		irq.nmiss += item->stat[1].nmiss;
257 		pr_info("CPU: %d  duration: %lluus\n", cpu, item->duration);
258 		pr_info("\tthread:\t%16lu hits \t%16lu miss\n",
259 			item->stat[0].nhits, item->stat[0].nmiss);
260 		pr_info("\tirq:   \t%16lu hits \t%16lu miss\n",
261 			item->stat[1].nhits, item->stat[1].nmiss);
262 		pr_info("\ttotal: \t%16lu hits \t%16lu miss\n",
263 			item->stat[0].nhits + item->stat[1].nhits,
264 			item->stat[0].nmiss + item->stat[1].nmiss);
265 		nthreads++;
266 	}
267 
268 	total.nhits = normal.nhits + irq.nhits;
269 	total.nmiss = normal.nmiss + irq.nmiss;
270 
271 	pr_info("ALL: \tnthreads: %d  duration: %lluus\n", nthreads, duration);
272 	pr_info("SUM: \t%16lu hits \t%16lu miss\n",
273 		total.nhits, total.nmiss);
274 
275 	test->data.objects = total;
276 	test->data.duration = duration;
277 }
278 
279 /*
280  * synchronous test cases for objpool manipulation
281  */
282 
283 /* objpool manipulation for synchronous mode (percpu objpool) */
284 static struct ot_context *ot_init_sync_m0(struct ot_test *test)
285 {
286 	struct ot_context *sop = NULL;
287 	int max = num_possible_cpus() << 3;
288 	gfp_t gfp = GFP_KERNEL;
289 
290 	sop = (struct ot_context *)ot_kzalloc(test, sizeof(*sop));
291 	if (!sop)
292 		return NULL;
293 	sop->test = test;
294 	if (test->objsz < 512)
295 		gfp = GFP_ATOMIC;
296 
297 	if (objpool_init(&sop->pool, max, test->objsz,
298 			 gfp, sop, ot_init_node, NULL)) {
299 		ot_kfree(test, sop, sizeof(*sop));
300 		return NULL;
301 	}
302 	WARN_ON(max != sop->pool.nr_objs);
303 
304 	return sop;
305 }
306 
307 static void ot_fini_sync(struct ot_context *sop)
308 {
309 	objpool_fini(&sop->pool);
310 	ot_kfree(sop->test, sop, sizeof(*sop));
311 }
312 
313 static struct {
314 	struct ot_context * (*init)(struct ot_test *oc);
315 	void (*fini)(struct ot_context *sop);
316 } g_ot_sync_ops[] = {
317 	{.init = ot_init_sync_m0, .fini = ot_fini_sync},
318 };
319 
320 /*
321  * synchronous test cases: performance mode
322  */
323 
324 static void ot_bulk_sync(struct ot_item *item, int irq)
325 {
326 	struct ot_node *nods[OT_NR_MAX_BULK];
327 	int i;
328 
329 	for (i = 0; i < item->bulk[irq]; i++)
330 		nods[i] = objpool_pop(item->pool);
331 
332 	if (!irq && (item->delay || !(++(item->niters) & 0x7FFF)))
333 		msleep(item->delay);
334 
335 	while (i-- > 0) {
336 		struct ot_node *on = nods[i];
337 		if (on) {
338 			on->refs++;
339 			objpool_push(on, item->pool);
340 			item->stat[irq].nhits++;
341 		} else {
342 			item->stat[irq].nmiss++;
343 		}
344 	}
345 }
346 
347 static int ot_start_sync(struct ot_test *test)
348 {
349 	struct ot_context *sop;
350 	ktime_t start;
351 	u64 duration;
352 	unsigned long timeout;
353 	int cpu;
354 
355 	/* initialize objpool for syncrhonous testcase */
356 	sop = g_ot_sync_ops[test->mode].init(test);
357 	if (!sop)
358 		return -ENOMEM;
359 
360 	/* grab rwsem to block testing threads */
361 	down_write(&test->data.start);
362 
363 	for_each_possible_cpu(cpu) {
364 		struct ot_item *item = per_cpu_ptr(&ot_pcup_items, cpu);
365 		struct task_struct *work;
366 
367 		ot_init_cpu_item(item, test, &sop->pool, ot_bulk_sync);
368 
369 		/* skip offline cpus */
370 		if (!cpu_online(cpu))
371 			continue;
372 
373 		work = kthread_run_on_cpu(ot_thread_worker, item,
374 					  cpu, "ot_worker_%d");
375 		if (IS_ERR(work))
376 			pr_err("failed to create thread for cpu %d\n", cpu);
377 	}
378 
379 	/* wait a while to make sure all threads waiting at start line */
380 	msleep(20);
381 
382 	/* in case no threads were created: memory insufficient ? */
383 	if (atomic_dec_and_test(&test->data.nthreads))
384 		complete(&test->data.wait);
385 
386 	// sched_set_fifo_low(current);
387 
388 	/* start objpool testing threads */
389 	start = ktime_get();
390 	up_write(&test->data.start);
391 
392 	/* yeild cpu to worker threads for duration ms */
393 	timeout = msecs_to_jiffies(test->duration);
394 	schedule_timeout_interruptible(timeout);
395 
396 	/* tell workers threads to quit */
397 	atomic_set_release(&test->data.stop, 1);
398 
399 	/* wait all workers threads finish and quit */
400 	wait_for_completion(&test->data.wait);
401 	duration = (u64) ktime_us_delta(ktime_get(), start);
402 
403 	/* cleanup objpool */
404 	g_ot_sync_ops[test->mode].fini(sop);
405 
406 	/* report testing summary and performance results */
407 	ot_perf_report(test, duration);
408 
409 	/* report memory allocation summary */
410 	ot_mem_report(test);
411 
412 	return 0;
413 }
414 
415 /*
416  * asynchronous test cases: pool lifecycle controlled by refcount
417  */
418 
419 static void ot_fini_async_rcu(struct rcu_head *rcu)
420 {
421 	struct ot_context *sop = container_of(rcu, struct ot_context, rcu);
422 	struct ot_test *test = sop->test;
423 
424 	/* here all cpus are aware of the stop event: test->data.stop = 1 */
425 	WARN_ON(!atomic_read_acquire(&test->data.stop));
426 
427 	objpool_fini(&sop->pool);
428 	complete(&test->data.rcu);
429 }
430 
431 static void ot_fini_async(struct ot_context *sop)
432 {
433 	/* make sure the stop event is acknowledged by all cores */
434 	call_rcu(&sop->rcu, ot_fini_async_rcu);
435 }
436 
437 static int ot_objpool_release(struct objpool_head *head, void *context)
438 {
439 	struct ot_context *sop = context;
440 
441 	WARN_ON(!head || !sop || head != &sop->pool);
442 
443 	/* do context cleaning if needed */
444 	if (sop)
445 		ot_kfree(sop->test, sop, sizeof(*sop));
446 
447 	return 0;
448 }
449 
450 static struct ot_context *ot_init_async_m0(struct ot_test *test)
451 {
452 	struct ot_context *sop = NULL;
453 	int max = num_possible_cpus() << 3;
454 	gfp_t gfp = GFP_KERNEL;
455 
456 	sop = (struct ot_context *)ot_kzalloc(test, sizeof(*sop));
457 	if (!sop)
458 		return NULL;
459 	sop->test = test;
460 	if (test->objsz < 512)
461 		gfp = GFP_ATOMIC;
462 
463 	if (objpool_init(&sop->pool, max, test->objsz, gfp, sop,
464 			 ot_init_node, ot_objpool_release)) {
465 		ot_kfree(test, sop, sizeof(*sop));
466 		return NULL;
467 	}
468 	WARN_ON(max != sop->pool.nr_objs);
469 
470 	return sop;
471 }
472 
473 static struct {
474 	struct ot_context * (*init)(struct ot_test *oc);
475 	void (*fini)(struct ot_context *sop);
476 } g_ot_async_ops[] = {
477 	{.init = ot_init_async_m0, .fini = ot_fini_async},
478 };
479 
480 static void ot_nod_recycle(struct ot_node *on, struct objpool_head *pool,
481 			int release)
482 {
483 	struct ot_context *sop;
484 
485 	on->refs++;
486 
487 	if (!release) {
488 		/* push object back to opjpool for reuse */
489 		objpool_push(on, pool);
490 		return;
491 	}
492 
493 	sop = container_of(pool, struct ot_context, pool);
494 	WARN_ON(sop != pool->context);
495 
496 	/* unref objpool with nod removed forever */
497 	objpool_drop(on, pool);
498 }
499 
500 static void ot_bulk_async(struct ot_item *item, int irq)
501 {
502 	struct ot_test *test = item->test;
503 	struct ot_node *nods[OT_NR_MAX_BULK];
504 	int i, stop;
505 
506 	for (i = 0; i < item->bulk[irq]; i++)
507 		nods[i] = objpool_pop(item->pool);
508 
509 	if (!irq) {
510 		if (item->delay || !(++(item->niters) & 0x7FFF))
511 			msleep(item->delay);
512 		get_cpu();
513 	}
514 
515 	stop = atomic_read_acquire(&test->data.stop);
516 
517 	/* drop all objects and deref objpool */
518 	while (i-- > 0) {
519 		struct ot_node *on = nods[i];
520 
521 		if (on) {
522 			on->refs++;
523 			ot_nod_recycle(on, item->pool, stop);
524 			item->stat[irq].nhits++;
525 		} else {
526 			item->stat[irq].nmiss++;
527 		}
528 	}
529 
530 	if (!irq)
531 		put_cpu();
532 }
533 
534 static int ot_start_async(struct ot_test *test)
535 {
536 	struct ot_context *sop;
537 	ktime_t start;
538 	u64 duration;
539 	unsigned long timeout;
540 	int cpu;
541 
542 	/* initialize objpool for syncrhonous testcase */
543 	sop = g_ot_async_ops[test->mode].init(test);
544 	if (!sop)
545 		return -ENOMEM;
546 
547 	/* grab rwsem to block testing threads */
548 	down_write(&test->data.start);
549 
550 	for_each_possible_cpu(cpu) {
551 		struct ot_item *item = per_cpu_ptr(&ot_pcup_items, cpu);
552 		struct task_struct *work;
553 
554 		ot_init_cpu_item(item, test, &sop->pool, ot_bulk_async);
555 
556 		/* skip offline cpus */
557 		if (!cpu_online(cpu))
558 			continue;
559 
560 		work = kthread_run_on_cpu(ot_thread_worker, item, cpu, "ot_worker_%d");
561 		if (IS_ERR(work))
562 			pr_err("failed to create thread for cpu %d\n", cpu);
563 	}
564 
565 	/* wait a while to make sure all threads waiting at start line */
566 	msleep(20);
567 
568 	/* in case no threads were created: memory insufficient ? */
569 	if (atomic_dec_and_test(&test->data.nthreads))
570 		complete(&test->data.wait);
571 
572 	/* start objpool testing threads */
573 	start = ktime_get();
574 	up_write(&test->data.start);
575 
576 	/* yeild cpu to worker threads for duration ms */
577 	timeout = msecs_to_jiffies(test->duration);
578 	schedule_timeout_interruptible(timeout);
579 
580 	/* tell workers threads to quit */
581 	atomic_set_release(&test->data.stop, 1);
582 
583 	/* do async-finalization */
584 	g_ot_async_ops[test->mode].fini(sop);
585 
586 	/* wait all workers threads finish and quit */
587 	wait_for_completion(&test->data.wait);
588 	duration = (u64) ktime_us_delta(ktime_get(), start);
589 
590 	/* assure rcu callback is triggered */
591 	wait_for_completion(&test->data.rcu);
592 
593 	/*
594 	 * now we are sure that objpool is finalized either
595 	 * by rcu callback or by worker threads
596 	 */
597 
598 	/* report testing summary and performance results */
599 	ot_perf_report(test, duration);
600 
601 	/* report memory allocation summary */
602 	ot_mem_report(test);
603 
604 	return 0;
605 }
606 
607 /*
608  * predefined testing cases:
609  *   synchronous case / overrun case / async case
610  *
611  * async: synchronous or asynchronous testing
612  * mode: only mode 0 supported
613  * objsz: object size
614  * duration: int, total test time in ms
615  * delay: int, delay (in ms) between each iteration
616  * bulk_normal: int, repeat times for thread worker
617  * bulk_irq: int, repeat times for irq consumer
618  * hrtimer: unsigned long, hrtimer intervnal in ms
619  * name: char *, tag for current test ot_item
620  */
621 
622 #define NODE_COMPACT sizeof(struct ot_node)
623 #define NODE_VMALLOC (512)
624 
625 static struct ot_test g_testcases[] = {
626 
627 	/* sync & normal */
628 	{0, 0, NODE_COMPACT, 1000, 0,  1,  0,  0, "sync: percpu objpool"},
629 	{0, 0, NODE_VMALLOC, 1000, 0,  1,  0,  0, "sync: percpu objpool from vmalloc"},
630 
631 	/* sync & hrtimer */
632 	{0, 0, NODE_COMPACT, 1000, 0,  1,  1,  4, "sync & hrtimer: percpu objpool"},
633 	{0, 0, NODE_VMALLOC, 1000, 0,  1,  1,  4, "sync & hrtimer: percpu objpool from vmalloc"},
634 
635 	/* sync & overrun */
636 	{0, 0, NODE_COMPACT, 1000, 0, 16,  0,  0, "sync overrun: percpu objpool"},
637 	{0, 0, NODE_VMALLOC, 1000, 0, 16,  0,  0, "sync overrun: percpu objpool from vmalloc"},
638 
639 	/* async mode */
640 	{1, 0, NODE_COMPACT, 1000, 100,  1,  0,  0, "async: percpu objpool"},
641 	{1, 0, NODE_VMALLOC, 1000, 100,  1,  0,  0, "async: percpu objpool from vmalloc"},
642 
643 	/* async + hrtimer mode */
644 	{1, 0, NODE_COMPACT, 1000, 0,  4,  4,  4, "async & hrtimer: percpu objpool"},
645 	{1, 0, NODE_VMALLOC, 1000, 0,  4,  4,  4, "async & hrtimer: percpu objpool from vmalloc"},
646 };
647 
648 static int __init ot_mod_init(void)
649 {
650 	int i;
651 
652 	/* perform testings */
653 	for (i = 0; i < ARRAY_SIZE(g_testcases); i++) {
654 		ot_init_data(&g_testcases[i].data);
655 		if (g_testcases[i].async)
656 			ot_start_async(&g_testcases[i]);
657 		else
658 			ot_start_sync(&g_testcases[i]);
659 	}
660 
661 	/* show tests summary */
662 	pr_info("\n");
663 	pr_info("Summary of testcases:\n");
664 	for (i = 0; i < ARRAY_SIZE(g_testcases); i++) {
665 		pr_info("    duration: %lluus \thits: %10lu \tmiss: %10lu \t%s\n",
666 			g_testcases[i].data.duration, g_testcases[i].data.objects.nhits,
667 			g_testcases[i].data.objects.nmiss, g_testcases[i].name);
668 	}
669 
670 	return -EAGAIN;
671 }
672 
673 static void __exit ot_mod_exit(void)
674 {
675 }
676 
677 module_init(ot_mod_init);
678 module_exit(ot_mod_exit);
679 
680 MODULE_DESCRIPTION("Test module for lockless object pool");
681 MODULE_LICENSE("GPL");
682