xref: /linux/lib/test_objpool.c (revision f96a974170b749e3a56844e25b31d46a7233b6f6)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Test module for lockless object pool
5  *
6  * Copyright: wuqiang.matt@bytedance.com
7  */
8 
9 #include <linux/errno.h>
10 #include <linux/module.h>
11 #include <linux/moduleparam.h>
12 #include <linux/completion.h>
13 #include <linux/kthread.h>
14 #include <linux/slab.h>
15 #include <linux/vmalloc.h>
16 #include <linux/delay.h>
17 #include <linux/hrtimer.h>
18 #include <linux/objpool.h>
19 
20 #define OT_NR_MAX_BULK (16)
21 
22 /* memory usage */
23 struct ot_mem_stat {
24 	atomic_long_t alloc;
25 	atomic_long_t free;
26 };
27 
28 /* object allocation results */
29 struct ot_obj_stat {
30 	unsigned long nhits;
31 	unsigned long nmiss;
32 };
33 
34 /* control & results per testcase */
35 struct ot_data {
36 	struct rw_semaphore start;
37 	struct completion wait;
38 	struct completion rcu;
39 	atomic_t nthreads ____cacheline_aligned_in_smp;
40 	atomic_t stop ____cacheline_aligned_in_smp;
41 	struct ot_mem_stat kmalloc;
42 	struct ot_mem_stat vmalloc;
43 	struct ot_obj_stat objects;
44 	u64    duration;
45 };
46 
47 /* testcase */
48 struct ot_test {
49 	int async; /* synchronous or asynchronous */
50 	int mode; /* only mode 0 supported */
51 	int objsz; /* object size */
52 	int duration; /* ms */
53 	int delay; /* ms */
54 	int bulk_normal;
55 	int bulk_irq;
56 	unsigned long hrtimer; /* ms */
57 	const char *name;
58 	struct ot_data data;
59 };
60 
61 /* per-cpu worker */
62 struct ot_item {
63 	struct objpool_head *pool; /* pool head */
64 	struct ot_test *test; /* test parameters */
65 
66 	void (*worker)(struct ot_item *item, int irq);
67 
68 	/* hrtimer control */
69 	ktime_t hrtcycle;
70 	struct hrtimer hrtimer;
71 
72 	int bulk[2]; /* for thread and irq */
73 	int delay;
74 	u32 niters;
75 
76 	/* summary per thread */
77 	struct ot_obj_stat stat[2]; /* thread and irq */
78 	u64 duration;
79 };
80 
81 /*
82  * memory leakage checking
83  */
84 
85 static void *ot_kzalloc(struct ot_test *test, long size)
86 {
87 	void *ptr = kzalloc(size, GFP_KERNEL);
88 
89 	if (ptr)
90 		atomic_long_add(size, &test->data.kmalloc.alloc);
91 	return ptr;
92 }
93 
94 static void ot_kfree(struct ot_test *test, void *ptr, long size)
95 {
96 	if (!ptr)
97 		return;
98 	atomic_long_add(size, &test->data.kmalloc.free);
99 	kfree(ptr);
100 }
101 
102 static void ot_mem_report(struct ot_test *test)
103 {
104 	long alloc, free;
105 
106 	pr_info("memory allocation summary for %s\n", test->name);
107 
108 	alloc = atomic_long_read(&test->data.kmalloc.alloc);
109 	free = atomic_long_read(&test->data.kmalloc.free);
110 	pr_info("  kmalloc: %lu - %lu = %lu\n", alloc, free, alloc - free);
111 
112 	alloc = atomic_long_read(&test->data.vmalloc.alloc);
113 	free = atomic_long_read(&test->data.vmalloc.free);
114 	pr_info("  vmalloc: %lu - %lu = %lu\n", alloc, free, alloc - free);
115 }
116 
117 /* user object instance */
118 struct ot_node {
119 	void *owner;
120 	unsigned long data;
121 	unsigned long refs;
122 	unsigned long payload[32];
123 };
124 
125 /* user objpool manager */
126 struct ot_context {
127 	struct objpool_head pool; /* objpool head */
128 	struct ot_test *test; /* test parameters */
129 	void *ptr; /* user pool buffer */
130 	unsigned long size; /* buffer size */
131 	struct rcu_head rcu;
132 };
133 
134 static DEFINE_PER_CPU(struct ot_item, ot_pcup_items);
135 
136 static int ot_init_data(struct ot_data *data)
137 {
138 	memset(data, 0, sizeof(*data));
139 	init_rwsem(&data->start);
140 	init_completion(&data->wait);
141 	init_completion(&data->rcu);
142 	atomic_set(&data->nthreads, 1);
143 
144 	return 0;
145 }
146 
147 static int ot_init_node(void *nod, void *context)
148 {
149 	struct ot_context *sop = context;
150 	struct ot_node *on = nod;
151 
152 	on->owner = &sop->pool;
153 	return 0;
154 }
155 
156 static enum hrtimer_restart ot_hrtimer_handler(struct hrtimer *hrt)
157 {
158 	struct ot_item *item = container_of(hrt, struct ot_item, hrtimer);
159 	struct ot_test *test = item->test;
160 
161 	if (atomic_read_acquire(&test->data.stop))
162 		return HRTIMER_NORESTART;
163 
164 	/* do bulk-testings for objects pop/push */
165 	item->worker(item, 1);
166 
167 	hrtimer_forward(hrt, hrt->base->get_time(), item->hrtcycle);
168 	return HRTIMER_RESTART;
169 }
170 
171 static void ot_start_hrtimer(struct ot_item *item)
172 {
173 	if (!item->test->hrtimer)
174 		return;
175 	hrtimer_start(&item->hrtimer, item->hrtcycle, HRTIMER_MODE_REL);
176 }
177 
178 static void ot_stop_hrtimer(struct ot_item *item)
179 {
180 	if (!item->test->hrtimer)
181 		return;
182 	hrtimer_cancel(&item->hrtimer);
183 }
184 
185 static int ot_init_hrtimer(struct ot_item *item, unsigned long hrtimer)
186 {
187 	struct hrtimer *hrt = &item->hrtimer;
188 
189 	if (!hrtimer)
190 		return -ENOENT;
191 
192 	item->hrtcycle = ktime_set(0, hrtimer * 1000000UL);
193 	hrtimer_init(hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
194 	hrt->function = ot_hrtimer_handler;
195 	return 0;
196 }
197 
198 static int ot_init_cpu_item(struct ot_item *item,
199 			struct ot_test *test,
200 			struct objpool_head *pool,
201 			void (*worker)(struct ot_item *, int))
202 {
203 	memset(item, 0, sizeof(*item));
204 	item->pool = pool;
205 	item->test = test;
206 	item->worker = worker;
207 
208 	item->bulk[0] = test->bulk_normal;
209 	item->bulk[1] = test->bulk_irq;
210 	item->delay = test->delay;
211 
212 	/* initialize hrtimer */
213 	ot_init_hrtimer(item, item->test->hrtimer);
214 	return 0;
215 }
216 
217 static int ot_thread_worker(void *arg)
218 {
219 	struct ot_item *item = arg;
220 	struct ot_test *test = item->test;
221 	ktime_t start;
222 
223 	atomic_inc(&test->data.nthreads);
224 	down_read(&test->data.start);
225 	up_read(&test->data.start);
226 	start = ktime_get();
227 	ot_start_hrtimer(item);
228 	do {
229 		if (atomic_read_acquire(&test->data.stop))
230 			break;
231 		/* do bulk-testings for objects pop/push */
232 		item->worker(item, 0);
233 	} while (!kthread_should_stop());
234 	ot_stop_hrtimer(item);
235 	item->duration = (u64) ktime_us_delta(ktime_get(), start);
236 	if (atomic_dec_and_test(&test->data.nthreads))
237 		complete(&test->data.wait);
238 
239 	return 0;
240 }
241 
242 static void ot_perf_report(struct ot_test *test, u64 duration)
243 {
244 	struct ot_obj_stat total, normal = {0}, irq = {0};
245 	int cpu, nthreads = 0;
246 
247 	pr_info("\n");
248 	pr_info("Testing summary for %s\n", test->name);
249 
250 	for_each_possible_cpu(cpu) {
251 		struct ot_item *item = per_cpu_ptr(&ot_pcup_items, cpu);
252 		if (!item->duration)
253 			continue;
254 		normal.nhits += item->stat[0].nhits;
255 		normal.nmiss += item->stat[0].nmiss;
256 		irq.nhits += item->stat[1].nhits;
257 		irq.nmiss += item->stat[1].nmiss;
258 		pr_info("CPU: %d  duration: %lluus\n", cpu, item->duration);
259 		pr_info("\tthread:\t%16lu hits \t%16lu miss\n",
260 			item->stat[0].nhits, item->stat[0].nmiss);
261 		pr_info("\tirq:   \t%16lu hits \t%16lu miss\n",
262 			item->stat[1].nhits, item->stat[1].nmiss);
263 		pr_info("\ttotal: \t%16lu hits \t%16lu miss\n",
264 			item->stat[0].nhits + item->stat[1].nhits,
265 			item->stat[0].nmiss + item->stat[1].nmiss);
266 		nthreads++;
267 	}
268 
269 	total.nhits = normal.nhits + irq.nhits;
270 	total.nmiss = normal.nmiss + irq.nmiss;
271 
272 	pr_info("ALL: \tnthreads: %d  duration: %lluus\n", nthreads, duration);
273 	pr_info("SUM: \t%16lu hits \t%16lu miss\n",
274 		total.nhits, total.nmiss);
275 
276 	test->data.objects = total;
277 	test->data.duration = duration;
278 }
279 
280 /*
281  * synchronous test cases for objpool manipulation
282  */
283 
284 /* objpool manipulation for synchronous mode (percpu objpool) */
285 static struct ot_context *ot_init_sync_m0(struct ot_test *test)
286 {
287 	struct ot_context *sop = NULL;
288 	int max = num_possible_cpus() << 3;
289 	gfp_t gfp = GFP_KERNEL;
290 
291 	sop = (struct ot_context *)ot_kzalloc(test, sizeof(*sop));
292 	if (!sop)
293 		return NULL;
294 	sop->test = test;
295 	if (test->objsz < 512)
296 		gfp = GFP_ATOMIC;
297 
298 	if (objpool_init(&sop->pool, max, test->objsz,
299 			 gfp, sop, ot_init_node, NULL)) {
300 		ot_kfree(test, sop, sizeof(*sop));
301 		return NULL;
302 	}
303 	WARN_ON(max != sop->pool.nr_objs);
304 
305 	return sop;
306 }
307 
308 static void ot_fini_sync(struct ot_context *sop)
309 {
310 	objpool_fini(&sop->pool);
311 	ot_kfree(sop->test, sop, sizeof(*sop));
312 }
313 
314 static struct {
315 	struct ot_context * (*init)(struct ot_test *oc);
316 	void (*fini)(struct ot_context *sop);
317 } g_ot_sync_ops[] = {
318 	{.init = ot_init_sync_m0, .fini = ot_fini_sync},
319 };
320 
321 /*
322  * synchronous test cases: performance mode
323  */
324 
325 static void ot_bulk_sync(struct ot_item *item, int irq)
326 {
327 	struct ot_node *nods[OT_NR_MAX_BULK];
328 	int i;
329 
330 	for (i = 0; i < item->bulk[irq]; i++)
331 		nods[i] = objpool_pop(item->pool);
332 
333 	if (!irq && (item->delay || !(++(item->niters) & 0x7FFF)))
334 		msleep(item->delay);
335 
336 	while (i-- > 0) {
337 		struct ot_node *on = nods[i];
338 		if (on) {
339 			on->refs++;
340 			objpool_push(on, item->pool);
341 			item->stat[irq].nhits++;
342 		} else {
343 			item->stat[irq].nmiss++;
344 		}
345 	}
346 }
347 
348 static int ot_start_sync(struct ot_test *test)
349 {
350 	struct ot_context *sop;
351 	ktime_t start;
352 	u64 duration;
353 	unsigned long timeout;
354 	int cpu;
355 
356 	/* initialize objpool for syncrhonous testcase */
357 	sop = g_ot_sync_ops[test->mode].init(test);
358 	if (!sop)
359 		return -ENOMEM;
360 
361 	/* grab rwsem to block testing threads */
362 	down_write(&test->data.start);
363 
364 	for_each_possible_cpu(cpu) {
365 		struct ot_item *item = per_cpu_ptr(&ot_pcup_items, cpu);
366 		struct task_struct *work;
367 
368 		ot_init_cpu_item(item, test, &sop->pool, ot_bulk_sync);
369 
370 		/* skip offline cpus */
371 		if (!cpu_online(cpu))
372 			continue;
373 
374 		work = kthread_run_on_cpu(ot_thread_worker, item,
375 					  cpu, "ot_worker_%d");
376 		if (IS_ERR(work))
377 			pr_err("failed to create thread for cpu %d\n", cpu);
378 	}
379 
380 	/* wait a while to make sure all threads waiting at start line */
381 	msleep(20);
382 
383 	/* in case no threads were created: memory insufficient ? */
384 	if (atomic_dec_and_test(&test->data.nthreads))
385 		complete(&test->data.wait);
386 
387 	// sched_set_fifo_low(current);
388 
389 	/* start objpool testing threads */
390 	start = ktime_get();
391 	up_write(&test->data.start);
392 
393 	/* yeild cpu to worker threads for duration ms */
394 	timeout = msecs_to_jiffies(test->duration);
395 	schedule_timeout_interruptible(timeout);
396 
397 	/* tell workers threads to quit */
398 	atomic_set_release(&test->data.stop, 1);
399 
400 	/* wait all workers threads finish and quit */
401 	wait_for_completion(&test->data.wait);
402 	duration = (u64) ktime_us_delta(ktime_get(), start);
403 
404 	/* cleanup objpool */
405 	g_ot_sync_ops[test->mode].fini(sop);
406 
407 	/* report testing summary and performance results */
408 	ot_perf_report(test, duration);
409 
410 	/* report memory allocation summary */
411 	ot_mem_report(test);
412 
413 	return 0;
414 }
415 
416 /*
417  * asynchronous test cases: pool lifecycle controlled by refcount
418  */
419 
420 static void ot_fini_async_rcu(struct rcu_head *rcu)
421 {
422 	struct ot_context *sop = container_of(rcu, struct ot_context, rcu);
423 	struct ot_test *test = sop->test;
424 
425 	/* here all cpus are aware of the stop event: test->data.stop = 1 */
426 	WARN_ON(!atomic_read_acquire(&test->data.stop));
427 
428 	objpool_fini(&sop->pool);
429 	complete(&test->data.rcu);
430 }
431 
432 static void ot_fini_async(struct ot_context *sop)
433 {
434 	/* make sure the stop event is acknowledged by all cores */
435 	call_rcu(&sop->rcu, ot_fini_async_rcu);
436 }
437 
438 static int ot_objpool_release(struct objpool_head *head, void *context)
439 {
440 	struct ot_context *sop = context;
441 
442 	WARN_ON(!head || !sop || head != &sop->pool);
443 
444 	/* do context cleaning if needed */
445 	if (sop)
446 		ot_kfree(sop->test, sop, sizeof(*sop));
447 
448 	return 0;
449 }
450 
451 static struct ot_context *ot_init_async_m0(struct ot_test *test)
452 {
453 	struct ot_context *sop = NULL;
454 	int max = num_possible_cpus() << 3;
455 	gfp_t gfp = GFP_KERNEL;
456 
457 	sop = (struct ot_context *)ot_kzalloc(test, sizeof(*sop));
458 	if (!sop)
459 		return NULL;
460 	sop->test = test;
461 	if (test->objsz < 512)
462 		gfp = GFP_ATOMIC;
463 
464 	if (objpool_init(&sop->pool, max, test->objsz, gfp, sop,
465 			 ot_init_node, ot_objpool_release)) {
466 		ot_kfree(test, sop, sizeof(*sop));
467 		return NULL;
468 	}
469 	WARN_ON(max != sop->pool.nr_objs);
470 
471 	return sop;
472 }
473 
474 static struct {
475 	struct ot_context * (*init)(struct ot_test *oc);
476 	void (*fini)(struct ot_context *sop);
477 } g_ot_async_ops[] = {
478 	{.init = ot_init_async_m0, .fini = ot_fini_async},
479 };
480 
481 static void ot_nod_recycle(struct ot_node *on, struct objpool_head *pool,
482 			int release)
483 {
484 	struct ot_context *sop;
485 
486 	on->refs++;
487 
488 	if (!release) {
489 		/* push object back to opjpool for reuse */
490 		objpool_push(on, pool);
491 		return;
492 	}
493 
494 	sop = container_of(pool, struct ot_context, pool);
495 	WARN_ON(sop != pool->context);
496 
497 	/* unref objpool with nod removed forever */
498 	objpool_drop(on, pool);
499 }
500 
501 static void ot_bulk_async(struct ot_item *item, int irq)
502 {
503 	struct ot_test *test = item->test;
504 	struct ot_node *nods[OT_NR_MAX_BULK];
505 	int i, stop;
506 
507 	for (i = 0; i < item->bulk[irq]; i++)
508 		nods[i] = objpool_pop(item->pool);
509 
510 	if (!irq) {
511 		if (item->delay || !(++(item->niters) & 0x7FFF))
512 			msleep(item->delay);
513 		get_cpu();
514 	}
515 
516 	stop = atomic_read_acquire(&test->data.stop);
517 
518 	/* drop all objects and deref objpool */
519 	while (i-- > 0) {
520 		struct ot_node *on = nods[i];
521 
522 		if (on) {
523 			on->refs++;
524 			ot_nod_recycle(on, item->pool, stop);
525 			item->stat[irq].nhits++;
526 		} else {
527 			item->stat[irq].nmiss++;
528 		}
529 	}
530 
531 	if (!irq)
532 		put_cpu();
533 }
534 
535 static int ot_start_async(struct ot_test *test)
536 {
537 	struct ot_context *sop;
538 	ktime_t start;
539 	u64 duration;
540 	unsigned long timeout;
541 	int cpu;
542 
543 	/* initialize objpool for syncrhonous testcase */
544 	sop = g_ot_async_ops[test->mode].init(test);
545 	if (!sop)
546 		return -ENOMEM;
547 
548 	/* grab rwsem to block testing threads */
549 	down_write(&test->data.start);
550 
551 	for_each_possible_cpu(cpu) {
552 		struct ot_item *item = per_cpu_ptr(&ot_pcup_items, cpu);
553 		struct task_struct *work;
554 
555 		ot_init_cpu_item(item, test, &sop->pool, ot_bulk_async);
556 
557 		/* skip offline cpus */
558 		if (!cpu_online(cpu))
559 			continue;
560 
561 		work = kthread_run_on_cpu(ot_thread_worker, item, cpu, "ot_worker_%d");
562 		if (IS_ERR(work))
563 			pr_err("failed to create thread for cpu %d\n", cpu);
564 	}
565 
566 	/* wait a while to make sure all threads waiting at start line */
567 	msleep(20);
568 
569 	/* in case no threads were created: memory insufficient ? */
570 	if (atomic_dec_and_test(&test->data.nthreads))
571 		complete(&test->data.wait);
572 
573 	/* start objpool testing threads */
574 	start = ktime_get();
575 	up_write(&test->data.start);
576 
577 	/* yeild cpu to worker threads for duration ms */
578 	timeout = msecs_to_jiffies(test->duration);
579 	schedule_timeout_interruptible(timeout);
580 
581 	/* tell workers threads to quit */
582 	atomic_set_release(&test->data.stop, 1);
583 
584 	/* do async-finalization */
585 	g_ot_async_ops[test->mode].fini(sop);
586 
587 	/* wait all workers threads finish and quit */
588 	wait_for_completion(&test->data.wait);
589 	duration = (u64) ktime_us_delta(ktime_get(), start);
590 
591 	/* assure rcu callback is triggered */
592 	wait_for_completion(&test->data.rcu);
593 
594 	/*
595 	 * now we are sure that objpool is finalized either
596 	 * by rcu callback or by worker threads
597 	 */
598 
599 	/* report testing summary and performance results */
600 	ot_perf_report(test, duration);
601 
602 	/* report memory allocation summary */
603 	ot_mem_report(test);
604 
605 	return 0;
606 }
607 
608 /*
609  * predefined testing cases:
610  *   synchronous case / overrun case / async case
611  *
612  * async: synchronous or asynchronous testing
613  * mode: only mode 0 supported
614  * objsz: object size
615  * duration: int, total test time in ms
616  * delay: int, delay (in ms) between each iteration
617  * bulk_normal: int, repeat times for thread worker
618  * bulk_irq: int, repeat times for irq consumer
619  * hrtimer: unsigned long, hrtimer intervnal in ms
620  * name: char *, tag for current test ot_item
621  */
622 
623 #define NODE_COMPACT sizeof(struct ot_node)
624 #define NODE_VMALLOC (512)
625 
626 static struct ot_test g_testcases[] = {
627 
628 	/* sync & normal */
629 	{0, 0, NODE_COMPACT, 1000, 0,  1,  0,  0, "sync: percpu objpool"},
630 	{0, 0, NODE_VMALLOC, 1000, 0,  1,  0,  0, "sync: percpu objpool from vmalloc"},
631 
632 	/* sync & hrtimer */
633 	{0, 0, NODE_COMPACT, 1000, 0,  1,  1,  4, "sync & hrtimer: percpu objpool"},
634 	{0, 0, NODE_VMALLOC, 1000, 0,  1,  1,  4, "sync & hrtimer: percpu objpool from vmalloc"},
635 
636 	/* sync & overrun */
637 	{0, 0, NODE_COMPACT, 1000, 0, 16,  0,  0, "sync overrun: percpu objpool"},
638 	{0, 0, NODE_VMALLOC, 1000, 0, 16,  0,  0, "sync overrun: percpu objpool from vmalloc"},
639 
640 	/* async mode */
641 	{1, 0, NODE_COMPACT, 1000, 100,  1,  0,  0, "async: percpu objpool"},
642 	{1, 0, NODE_VMALLOC, 1000, 100,  1,  0,  0, "async: percpu objpool from vmalloc"},
643 
644 	/* async + hrtimer mode */
645 	{1, 0, NODE_COMPACT, 1000, 0,  4,  4,  4, "async & hrtimer: percpu objpool"},
646 	{1, 0, NODE_VMALLOC, 1000, 0,  4,  4,  4, "async & hrtimer: percpu objpool from vmalloc"},
647 };
648 
649 static int __init ot_mod_init(void)
650 {
651 	int i;
652 
653 	/* perform testings */
654 	for (i = 0; i < ARRAY_SIZE(g_testcases); i++) {
655 		ot_init_data(&g_testcases[i].data);
656 		if (g_testcases[i].async)
657 			ot_start_async(&g_testcases[i]);
658 		else
659 			ot_start_sync(&g_testcases[i]);
660 	}
661 
662 	/* show tests summary */
663 	pr_info("\n");
664 	pr_info("Summary of testcases:\n");
665 	for (i = 0; i < ARRAY_SIZE(g_testcases); i++) {
666 		pr_info("    duration: %lluus \thits: %10lu \tmiss: %10lu \t%s\n",
667 			g_testcases[i].data.duration, g_testcases[i].data.objects.nhits,
668 			g_testcases[i].data.objects.nmiss, g_testcases[i].name);
669 	}
670 
671 	return -EAGAIN;
672 }
673 
674 static void __exit ot_mod_exit(void)
675 {
676 }
677 
678 module_init(ot_mod_init);
679 module_exit(ot_mod_exit);
680 
681 MODULE_DESCRIPTION("Test module for lockless object pool");
682 MODULE_LICENSE("GPL");
683